From d061c88ae81eb5e1354526f07f9f8d90d0732656 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 1 Nov 2024 11:58:42 +0200 Subject: Add comments parameter to string_parser functions --- libbutl/string-parser.cxx | 27 +++++++++++++++++++++++---- libbutl/string-parser.hxx | 10 +++++++--- tests/string-parser/driver.cxx | 24 +++++++++++++++++++----- tests/string-parser/testscript | 30 ++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 12 deletions(-) diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx index af5c1b3..aa7d52c 100644 --- a/libbutl/string-parser.cxx +++ b/libbutl/string-parser.cxx @@ -18,16 +18,34 @@ namespace butl } vector> - parse_quoted_position (const string& s, bool unquote) + parse_quoted_position (const string& s, bool unquote, bool comments) { vector> r; + + bool newline (true); for (auto b (s.begin ()), i (b), e (s.end ()); i != e; ) { - for (; i != e && space (*i); ++i) ; // Skip spaces. + // Skip spaces. + // + for (; i != e && space (*i); ++i) + { + if (*i == '\n') + newline = true; + } + + // Skip comment line. + // + if (comments && newline && i != e && *i == '#') + { + for (++i; i != e && *i != '\n'; ++i) ; + continue; + } if (i == e) // No more strings. break; + newline = false; + string s; char quoting ('\0'); // Current quoting mode, can be used as bool. size_t pos (i - b); // String position. @@ -74,9 +92,10 @@ namespace butl } vector - parse_quoted (const string& s, bool unquote) + parse_quoted (const string& s, bool unquote, bool comments) { - vector> sp (parse_quoted_position (s, unquote)); + vector> sp ( + parse_quoted_position (s, unquote, comments)); vector r; r.reserve (sp.size ()); diff --git a/libbutl/string-parser.hxx b/libbutl/string-parser.hxx index 9fc20c0..74280a2 100644 --- a/libbutl/string-parser.hxx +++ b/libbutl/string-parser.hxx @@ -26,18 +26,22 @@ namespace butl // Parse a whitespace-separated list of strings. Can contain single or // double quoted substrings. No escaping is supported. If unquote is true, - // return one-level unquoted values. Throw invalid_string in case of + // return one-level unquoted values. Optionally, assume that the passed + // string may contain comment lines (lines with `#` as the first non- + // whitespace character) and ignore them. Throw invalid_string in case of // invalid quoting. // LIBBUTL_SYMEXPORT std::vector - parse_quoted (const std::string&, bool unquote); + parse_quoted (const std::string&, bool unquote, bool comments = false); // As above but return a list of string and zero-based position pairs. // Position is useful for issuing diagnostics about an invalid string // during second-level parsing. // LIBBUTL_SYMEXPORT std::vector> - parse_quoted_position (const std::string&, bool unquote); + parse_quoted_position (const std::string&, + bool unquote, + bool comments = false); // Remove a single level of quotes. Note that the format or the // correctness of the quotation is not validated. diff --git a/tests/string-parser/driver.cxx b/tests/string-parser/driver.cxx index 8cba912..93d2088 100644 --- a/tests/string-parser/driver.cxx +++ b/tests/string-parser/driver.cxx @@ -14,13 +14,14 @@ using namespace std; using namespace butl::string_parser; -// Usage: argv[0] [-l] [-u] [-p] +// Usage: argv[0] [-l] [-u] [-p] [-c] // // Read and parse lines into strings from STDIN and print them to STDOUT. // // -l output each string on a separate line // -u unquote strings // -p output positions +// -c comments // int main (int argc, char* argv[]) @@ -29,6 +30,7 @@ try bool spl (false); // Print string per line. bool unquote (false); bool pos (false); + bool comments (false); for (int i (1); i != argc; ++i) { @@ -40,6 +42,8 @@ try unquote = true; else if (o == "-p") pos = true; + else if (o == "-c") + comments = true; else assert (false); } @@ -51,11 +55,8 @@ try cout.exceptions (ios::failbit | ios::badbit); - string l; - while (getline (cin, l)) + auto print = [spl, pos] (const vector>& v) { - vector> v (parse_quoted_position (l, unquote)); - if (!spl) { for (auto b (v.cbegin ()), i (b), e (v.cend ()); i != e; ++i) @@ -81,6 +82,19 @@ try cout << s.first << endl; } } + }; + + if (!comments) + { + string l; + while (getline (cin, l)) + print (parse_quoted_position (l, unquote)); + } + else + { + string s; + getline (cin, s, '\0'); + print (parse_quoted_position (s, unquote, true /* comments */)); } return 0; diff --git a/tests/string-parser/testscript b/tests/string-parser/testscript index d484e01..05c2807 100644 --- a/tests/string-parser/testscript +++ b/tests/string-parser/testscript @@ -30,6 +30,36 @@ x "y z EOO } + + : comments + : + { + $* -c <>EOO + # Comment 1 + # + abc #xyz + + # Comment 2 + # + abc# + + "# not a comment 3" #not-a-comment4 + + "abc + # not a comment 5 + " + # Comment 6 + EOI + abc + #xyz + abc# + "# not a comment 3" + #not-a-comment4 + "abc + # not a comment 5 + " + EOO + } } : invalid -- cgit v1.1