From 0cf84e1f006988c114bdca36715d3a2c0601a7d5 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 30 Aug 2017 10:23:06 +0300 Subject: Generalize regex_replace_ex() function --- libbutl/regex.hxx | 29 ++++++++++++++++++---- libbutl/regex.ixx | 27 +++++++++++++++++++++ libbutl/regex.txx | 18 ++++++++------ tests/regex/buildfile | 7 ++++++ tests/regex/driver.cxx | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests/regex/testscript | 54 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 189 insertions(+), 12 deletions(-) create mode 100644 libbutl/regex.ixx create mode 100644 tests/regex/buildfile create mode 100644 tests/regex/driver.cxx create mode 100644 tests/regex/testscript diff --git a/libbutl/regex.hxx b/libbutl/regex.hxx index e4fd6a7..2105f05 100644 --- a/libbutl/regex.hxx +++ b/libbutl/regex.hxx @@ -14,13 +14,18 @@ namespace butl { - // Like std::regex_match() but extends the standard ECMA-262 - // substitution escape sequences with a subset of Perl sequences: + // Call specified append() function for non-matched substrings and matched + // substring replacements returning true if search succeeded. The function + // must be callable with the following signature: // - // \\, \u, \l, \U, \L, \E, \1, ..., \9 + // void + // append(basic_string::iterator begin, basic_string::iterator end); // - // Also return the resulting string as well as whether the search - // succeeded. + // The regex semantics is like that of std::regex_replace() extended the + // standard ECMA-262 substitution escape sequences with a subset of Perl + // sequences: + // + // \\, \u, \l, \U, \L, \E, \1, ..., \9 // // Notes and limitations: // @@ -34,6 +39,19 @@ namespace butl // C++ locale (which is, unless changed, is the same as C locale and // both default to the POSIX locale aka "C"). // + template + bool + regex_replace_ex (const std::basic_string&, + const std::basic_regex&, + const std::basic_string& fmt, + F&& append, + std::regex_constants::match_flag_type = + std::regex_constants::match_default); + + // As above but concatenate non-matched substrings and matched substring + // replacements into a string returning it as well as whether the search + // succeeded. + // template std::pair, bool> regex_replace_ex (const std::basic_string&, @@ -52,6 +70,7 @@ namespace std operator<< (ostream&, const regex_error&); } +#include #include #endif // LIBBUTL_REGEX_HXX diff --git a/libbutl/regex.ixx b/libbutl/regex.ixx new file mode 100644 index 0000000..dd3ad1d --- /dev/null +++ b/libbutl/regex.ixx @@ -0,0 +1,27 @@ +// file : libbutl/regex.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include // move(), make_pair() + +namespace butl +{ + template + inline std::pair, bool> + regex_replace_ex (const std::basic_string& s, + const std::basic_regex& re, + const std::basic_string& fmt, + std::regex_constants::match_flag_type flags) + { + using namespace std; + + using it = typename basic_string::const_iterator; + + basic_string r; + bool match (regex_replace_ex (s, re, fmt, + [&r] (it b, it e) {r.append (b, e);}, + flags)); + + return make_pair (move (r), match); + } +} diff --git a/libbutl/regex.txx b/libbutl/regex.txx index 52bddfd..536cabf 100644 --- a/libbutl/regex.txx +++ b/libbutl/regex.txx @@ -7,11 +7,12 @@ namespace butl { - template - std::pair, bool> + template + bool regex_replace_ex (const std::basic_string& s, const std::basic_regex& re, const std::basic_string& fmt, + F&& append, std::regex_constants::match_flag_type flags) { using namespace std; @@ -24,7 +25,6 @@ namespace butl bool no_copy ((flags & std::regex_constants::format_no_copy) != 0); locale cl; // Copy of the global C++ locale. - string_type r; // Beginning of the last unmatched substring. // @@ -59,7 +59,7 @@ namespace butl // if (!no_copy) { - r.append (ub, m.prefix ().second); + append (ub, m.prefix ().second); ub = m.suffix ().first; } @@ -68,7 +68,7 @@ namespace butl // Append matched substring. // if (!no_copy) - r.append (m[0].first, m[0].second); + append (m[0].first, m[0].second); } else { @@ -100,6 +100,8 @@ namespace butl return c; }; + string_type r; + auto append_chr = [&r, &conv_chr] (C c) { r.push_back (conv_chr (c)); @@ -233,14 +235,16 @@ namespace butl } } } + + append (r.begin (), r.end ()); } } // Append the rightmost non-matched substring. // if (!no_copy) - r.append (ub, s.end ()); + append (ub, s.end ()); - return make_pair (move (r), match); + return match; } } diff --git a/tests/regex/buildfile b/tests/regex/buildfile new file mode 100644 index 0000000..baf4bca --- /dev/null +++ b/tests/regex/buildfile @@ -0,0 +1,7 @@ +# file : tests/regex/buildfile +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +import libs = libbutl%lib{butl} + +exe{driver}: {hxx cxx}{*} $libs test{testscript} diff --git a/tests/regex/driver.cxx b/tests/regex/driver.cxx new file mode 100644 index 0000000..054eb31 --- /dev/null +++ b/tests/regex/driver.cxx @@ -0,0 +1,66 @@ +// file : tests/regex/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include +#include +#include + +#include +#include // operator<<(ostream, exception) + +using namespace std; +using namespace butl; + +// Usage: argv[0] [-ffo] [-fnc] +// +// Perform substitution of matched substrings with formatted replacement +// strings using regex_replace_ex() function. If the string matches the regex +// then print the replacement to STDOUT and exit with zero code. Exit with +// code one if it doesn't match, and with code two on failure (print error +// description to STDERR). +// +// -ffo +// Use format_first_only replacement flag. +// +// -fnc +// Use format_no_copy replacement flag. +// +int +main (int argc, const char* argv[]) +try +{ + regex_constants::match_flag_type fl (regex_constants::match_default); + + int i (1); + for (; i != argc; ++i) + { + string op (argv[i]); + + if (op == "-ffo") + fl |= regex_constants::format_first_only; + else if (op == "-fnc") + fl |= regex_constants::format_no_copy; + else + break; + } + + assert (i + 3 == argc); + + string s (argv[i++]); + regex re (argv[i++]); + string fmt (argv[i]); + + auto r (regex_replace_ex (s, re, fmt, fl)); + + if (r.second) + cout << r.first << endl; + + return r.second ? 0 : 1; +} +catch (const exception& e) +{ + cerr << e << endl; + return 2; +} diff --git a/tests/regex/testscript b/tests/regex/testscript new file mode 100644 index 0000000..1af604c --- /dev/null +++ b/tests/regex/testscript @@ -0,0 +1,54 @@ +# file : tests/regex/testscript +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: match +: +{ + $* abcbd b x >axcxd : all + $* -ffo abcbd b x >axcbd : first-only + $* -fnc abcbd b x >xx : no-copy + + : ecma-escape + : + { + $* xay a '$b' >'x$by' : none + $* xay a '$' >'x$y' : none-term + $* xay a '$$' >'x$y' : self + $* xay a 'b$&c' >'xbacy' : match + $* xay a 'b$`c' >'xbxcy' : match-precede + $* xay a "b\\\$'c" >'xbycy' : match-follow + + : capture + : + $* abcdefghij '(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)' '$1$10' >aj + } + + : perl-escape + : + { + $* xay a '\b' >'xby' : none + $* xay a '\' >'xy' : none-term + $* xay a '\\' >'x\y' : self + + : capture + : + $* abcdefghij '(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)' '\1\10' >aa0 + + : upper + : + { + $* xay a '\U' >xy : none + $* xay a '\Uvz' >xVZy : repl + $* xay a '\Uv\Ez' >xVzy : end + $* aa a 'v\Uz' >vZvZ : locality + $* xay '(a)' '\U\1' >xAy : capt + $* x-y '(a?)-' '\U\1z' >xZy : capt-empty + $* xay a '\uvz' >xVzy : once + } + + : lower + : + $* xay a '\lVZ' >xvZy + } +} -- cgit v1.1