From 95c579df686f115c0fd3697f2723fa73476c4584 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 23 Mar 2021 18:50:55 +0300 Subject: Add regex_replace_parse() overloads --- libbutl/builtin.cxx | 75 ++++++++++++++++++++++++----------------------------- libbutl/regex.ixx | 17 ++++++++++++ libbutl/regex.mxx | 36 +++++++++++++++++++++++-- libbutl/regex.txx | 67 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+), 43 deletions(-) (limited to 'libbutl') diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index 79ff968..a6bb94b 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -1632,15 +1632,6 @@ namespace butl string replacement; bool global; bool print; - - subst (const string& re, bool ic, string rp, bool gl, bool pr) - // - // Note that ECMAScript is implied if no grammar flag is specified. - // - : regex (re, ic ? regex::icase : regex::ECMAScript), - replacement (move (rp)), - global (gl), - print (pr) {} }; small_vector substs; @@ -1663,57 +1654,59 @@ namespace butl if (delim == '\\' || delim == '\n') fail () << "invalid delimiter for 's' command in '" << v << "'"; - size_t p (v.find (delim, 2)); - if (p == string::npos) - fail () << "unterminated 's' command regex in '" << v << "'"; - - string regex (v, 2, p - 2); - - // Empty regex matches nothing, so not of much use. - // - if (regex.empty ()) - fail () << "empty regex in 's' command in '" << v << "'"; - - size_t b (p + 1); - p = v.find (delim, b); - if (p == string::npos) - fail () << "unterminated 's' command replacement in '" << v << "'"; - - string replacement (v, b, p - b); - - // Parse the substitute command flags. + // Parse the substitute command regex (as string), replacement, and + // flags. // + pair rf; bool icase (false); bool global (false); bool print (false); - char c; - for (++p; (c = v[p]) != '\0'; ++p) + try { - switch (c) + size_t e; + rf = regex_replace_parse (v.c_str () + 1, v.size () - 1, e); + + char c; + for (size_t i (e + 1); (c = v[i]) != '\0'; ++i) { - case 'i': icase = true; break; - case 'g': global = true; break; - case 'p': print = true; break; - default: + switch (c) { - fail () << "invalid 's' command flag '" << c << "' in '" << v - << "'"; + case 'i': icase = true; break; + case 'g': global = true; break; + case 'p': print = true; break; + default: + { + fail () << "invalid 's' command flag '" << c << "' in '" << v + << "'"; + } } } } + catch (const invalid_argument& e) + { + fail () << "invalid 's' command '" << v << "': " << e; + } + // Parse the regex and add the substitution to the list. + // try { - substs.emplace_back (regex, icase, - move (replacement), - global, print); + // Note that ECMAScript is implied if no grammar flag is specified. + // + regex re (rf.first, icase ? regex::icase : regex::ECMAScript); + + substs.push_back ({move (re), + move (rf.second), + global, + print}); } catch (const regex_error& e) { // Print regex_error description if meaningful (no space). // - fail () << "invalid regex '" << regex << "' in '" << v << "'" << e; + fail () << "invalid regex '" << rf.first << "' in '" << v << "'" + << e; } } diff --git a/libbutl/regex.ixx b/libbutl/regex.ixx index dec15d1..805acd1 100644 --- a/libbutl/regex.ixx +++ b/libbutl/regex.ixx @@ -21,4 +21,21 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. return make_pair (move (r), match); } + + template + inline std::pair, std::basic_string> + regex_replace_parse (const std::basic_string& s, + std::regex_constants::syntax_option_type f) + { + return regex_replace_parse (s.c_str (), s.size (), f); + } + + template + inline std::pair, std::basic_string> + regex_replace_parse (const C* s, + std::regex_constants::syntax_option_type f) + { + return regex_replace_parse ( + s, std::basic_string::traits_type::length (s), f); + } } diff --git a/libbutl/regex.mxx b/libbutl/regex.mxx index 84b024f..b5490b1 100644 --- a/libbutl/regex.mxx +++ b/libbutl/regex.mxx @@ -14,8 +14,9 @@ #include // pair #include -#include // size_t -#include // move(), make_pair() +#include // size_t +#include // move(), make_pair() +#include // invalid_argument #endif #if defined(__clang__) @@ -93,6 +94,37 @@ LIBBUTL_MODEXPORT namespace butl regex_replace_match (const std::basic_string&, const std::basic_regex&, const std::basic_string& fmt); + + // Parse the '///' replacement string into the regex/format + // pair. Other character can be used as a delimiter instead of '/'. Throw + // std::invalid_argument or std::regex_error on parsing error. + // + // Note: escaping of the delimiter character is not (yet) supported. + // + template + std::pair, std::basic_string> + regex_replace_parse (const std::basic_string&, + std::regex_constants::syntax_option_type = + std::regex_constants::ECMAScript); + + template + std::pair, std::basic_string> + regex_replace_parse (const C*, + std::regex_constants::syntax_option_type = + std::regex_constants::ECMAScript); + + template + std::pair, std::basic_string> + regex_replace_parse (const C*, size_t, + std::regex_constants::syntax_option_type = + std::regex_constants::ECMAScript); + + // As above but return string instead of regex and do not fail if there is + // text after the last delimiter instead returning its position. + // + template + std::pair, std::basic_string> + regex_replace_parse (const C*, size_t, size_t& end); } LIBBUTL_MODEXPORT namespace std diff --git a/libbutl/regex.txx b/libbutl/regex.txx index b785708..aa845be 100644 --- a/libbutl/regex.txx +++ b/libbutl/regex.txx @@ -278,4 +278,71 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. return match; } + + template + std::pair, std::basic_string> + regex_replace_parse (const C* s, size_t n, + std::regex_constants::syntax_option_type f) + { + using namespace std; + + using string_type = basic_string; + + size_t e; + pair r (regex_replace_parse (s, n, e)); + + if (e != n) + throw invalid_argument ("junk after trailing delimiter"); + + return make_pair (basic_regex (r.first, f), move (r.second)); + } + + template + std::pair, std::basic_string> + regex_replace_parse (const C* s, size_t n, size_t& e) + { + using namespace std; + + using string_type = basic_string; + + if (n == 0) + throw invalid_argument ("no leading delimiter"); + + const C* b (s); // Save the beginning of the string. + + char delim (s[0]); + + // Position to the regex first character and find the regex-terminating + // delimiter. + // + --n; + ++s; + + const C* p (string_type::traits_type::find (s, n, delim)); + + if (p == nullptr) + throw invalid_argument ("no delimiter after regex"); + + // Empty regex matches nothing, so not of much use. + // + if (p == s) + throw invalid_argument ("empty regex"); + + // Save the regex. + // + string_type re (s, p - s); + + // Position to the format first character and find the trailing delimiter. + // + n -= p - s + 1; + s = p + 1; + + p = string_type::traits_type::find (s, n, delim); + + if (p == nullptr) + throw invalid_argument ("no delimiter after replacement"); + + e = p - b + 1; + return make_pair (move (re), string_type (s, p - s)); + } } -- cgit v1.1