aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2023-11-15 09:49:59 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2023-11-15 09:55:34 +0200
commit999c3b70fc3b970727042ff0e4def04b2aa41652 (patch)
tree48d9611c2eed72af0655ada1034695810d0ebc5b
parenta476aa23d46e6485148623c4ae45533df8357f1f (diff)
Improve extraction of summary from README.md in bdep-new
-rw-r--r--bdep/new.cxx80
-rw-r--r--bdep/utility.hxx3
-rw-r--r--tests/new.testscript4
3 files changed, 68 insertions, 19 deletions
diff --git a/bdep/new.cxx b/bdep/new.cxx
index dc094aa..a1b390f 100644
--- a/bdep/new.cxx
+++ b/bdep/new.cxx
@@ -184,7 +184,7 @@ namespace bdep
// unable to. The project name can be empty.
//
static string
- extract_summary (const path& f, const string& pkgn, const string& prjn)
+ extract_summary (const path& f, const string& pkg, const string& prj)
{
// README.md created by popular hosting services (GitHub, GitLab) have the
// following format (give or take a few blank lines in between):
@@ -197,8 +197,9 @@ namespace bdep
//
// # <name> - <summary>
//
- // Let's start simple by only support the first version and maybe
- // extend/complicate things later.
+ // We will also treat the heading that doesn't start with <name> as:
+ //
+ // # <summary>
//
try
{
@@ -212,25 +213,70 @@ namespace bdep
return !l.empty ();
};
- if (next ())
+ if (!next () || l.compare (0, 2, "# ") != 0)
+ return "";
+
+ l.erase (0, 2); // Remove `# `.
+
+ size_t m;
+ auto prefix = [&l, &m] (const string& n) -> bool
+ {
+ m = n.size ();
+ return (l.size () >= m &&
+ icasecmp (l.c_str (), n.c_str (), m) == 0 &&
+ (l.size () == m ||
+ (!alnum (l[m]) && // Separated.
+ l[m] != '_' && l[m] != '-' && l[m] != '+' && l[m] != '.')));
+ };
+
+ if (prefix (pkg) || (!prj.empty () && prefix (prj)))
{
- if ( icasecmp (l, "# " + pkgn) == 0 ||
- (!prjn.empty () && icasecmp (l, "# " + prjn) == 0))
+ size_t n (l.size ());
+ if (n > m) // # <name> - <summary>
{
- if (next ())
- {
- // Potential improvements:
- //
- // - Uppercase first letter.
- // - Strip trailing period, if any.
- // - Get only the first sentence.
- //
- return l;
- }
+ // Let's try to handle similar cases like `# <name>: <summary>`.
+ //
+ size_t i (m + 1);
+
+ for (; i != n && !alnum (l[i]); ++i) // Skip separators.
+ ;
+
+ l.erase (0, i);
+ }
+ else if (next ())
+ {
+ // # <name>
+ // <summary>
+ //
+ // Use the following line as is.
}
+ else
+ return "";
+ }
+ else
+ {
+ // # <summary>
+ //
+ // Use this line as is.
}
- return "";
+ // Sanitize the line.
+ //
+
+ // Keep only the first sentence and strip trailing period.
+ //
+ size_t p (l.find ('.'));
+ if (p != string::npos)
+ l.resize (p);
+
+ if (!trim (l).empty ())
+ {
+ // Uppercase the first letter.
+ //
+ ucase (l, 0, 1);
+ }
+
+ return l;
}
catch (const io_error& e)
{
diff --git a/bdep/utility.hxx b/bdep/utility.hxx
index c7d4666..e8678eb 100644
--- a/bdep/utility.hxx
+++ b/bdep/utility.hxx
@@ -46,6 +46,9 @@ namespace bdep
using butl::lcase;
using butl::icasecmp;
+ using butl::alpha;
+ using butl::alnum;
+
using butl::trim;
using butl::next_word;
using butl::sanitize_identifier;
diff --git a/tests/new.testscript b/tests/new.testscript
index 82d41f5..7175a30 100644
--- a/tests/new.testscript
+++ b/tests/new.testscript
@@ -1716,7 +1716,7 @@ i = [cmdline] $build install: config.install.root=./install &install/***
cat <<EOI >=libfoo/README.md &!libfoo/README.md;
# libfoo
- cool foo
+ Cool foo.
Some more stuff.
EOI
@@ -1732,7 +1732,7 @@ i = [cmdline] $build install: config.install.root=./install &install/***
created new library project libfoo in $~/libfoo/
EOE
test -f libfoo/.gitignore;
- sed -n -e 's/^summary: (.+)$/\1/p' libfoo/manifest >'cool foo';
+ sed -n -e 's/^summary: (.+)$/\1/p' libfoo/manifest >'Cool foo';
sed -n -e 's/^license: ([^ ]+).*$/\1/p' libfoo/manifest >'Apache-2.0'
}