From 999c3b70fc3b970727042ff0e4def04b2aa41652 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 15 Nov 2023 09:49:59 +0200 Subject: Improve extraction of summary from README.md in bdep-new --- bdep/new.cxx | 80 +++++++++++++++++++++++++++++++++++++++++----------- bdep/utility.hxx | 3 ++ tests/new.testscript | 4 +-- 3 files changed, 68 insertions(+), 19 deletions(-) diff --git a/bdep/new.cxx b/bdep/new.cxx index dc094aa..a1b390f 100644 --- a/bdep/new.cxx +++ b/bdep/new.cxx @@ -184,7 +184,7 @@ namespace bdep // unable to. The project name can be empty. // static string - extract_summary (const path& f, const string& pkgn, const string& prjn) + extract_summary (const path& f, const string& pkg, const string& prj) { // README.md created by popular hosting services (GitHub, GitLab) have the // following format (give or take a few blank lines in between): @@ -197,8 +197,9 @@ namespace bdep // // # - // - // Let's start simple by only support the first version and maybe - // extend/complicate things later. + // We will also treat the heading that doesn't start with as: + // + // # // try { @@ -212,25 +213,70 @@ namespace bdep return !l.empty (); }; - if (next ()) + if (!next () || l.compare (0, 2, "# ") != 0) + return ""; + + l.erase (0, 2); // Remove `# `. + + size_t m; + auto prefix = [&l, &m] (const string& n) -> bool + { + m = n.size (); + return (l.size () >= m && + icasecmp (l.c_str (), n.c_str (), m) == 0 && + (l.size () == m || + (!alnum (l[m]) && // Separated. + l[m] != '_' && l[m] != '-' && l[m] != '+' && l[m] != '.'))); + }; + + if (prefix (pkg) || (!prj.empty () && prefix (prj))) { - if ( icasecmp (l, "# " + pkgn) == 0 || - (!prjn.empty () && icasecmp (l, "# " + prjn) == 0)) + size_t n (l.size ()); + if (n > m) // # - { - if (next ()) - { - // Potential improvements: - // - // - Uppercase first letter. - // - Strip trailing period, if any. - // - Get only the first sentence. - // - return l; - } + // Let's try to handle similar cases like `# : `. + // + size_t i (m + 1); + + for (; i != n && !alnum (l[i]); ++i) // Skip separators. + ; + + l.erase (0, i); + } + else if (next ()) + { + // # + // + // + // Use the following line as is. } + else + return ""; + } + else + { + // # + // + // Use this line as is. } - return ""; + // Sanitize the line. + // + + // Keep only the first sentence and strip trailing period. + // + size_t p (l.find ('.')); + if (p != string::npos) + l.resize (p); + + if (!trim (l).empty ()) + { + // Uppercase the first letter. + // + ucase (l, 0, 1); + } + + return l; } catch (const io_error& e) { diff --git a/bdep/utility.hxx b/bdep/utility.hxx index c7d4666..e8678eb 100644 --- a/bdep/utility.hxx +++ b/bdep/utility.hxx @@ -46,6 +46,9 @@ namespace bdep using butl::lcase; using butl::icasecmp; + using butl::alpha; + using butl::alnum; + using butl::trim; using butl::next_word; using butl::sanitize_identifier; diff --git a/tests/new.testscript b/tests/new.testscript index 82d41f5..7175a30 100644 --- a/tests/new.testscript +++ b/tests/new.testscript @@ -1716,7 +1716,7 @@ i = [cmdline] $build install: config.install.root=./install &install/*** cat <=libfoo/README.md &!libfoo/README.md; # libfoo - cool foo + Cool foo. Some more stuff. EOI @@ -1732,7 +1732,7 @@ i = [cmdline] $build install: config.install.root=./install &install/*** created new library project libfoo in $~/libfoo/ EOE test -f libfoo/.gitignore; - sed -n -e 's/^summary: (.+)$/\1/p' libfoo/manifest >'cool foo'; + sed -n -e 's/^summary: (.+)$/\1/p' libfoo/manifest >'Cool foo'; sed -n -e 's/^license: ([^ ]+).*$/\1/p' libfoo/manifest >'Apache-2.0' } -- cgit v1.1