aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-08-30 10:23:06 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-08-30 20:57:48 +0300
commit0cf84e1f006988c114bdca36715d3a2c0601a7d5 (patch)
tree8f372d93ac2ed9bde3b57e1e4efe440b3d86d056
parentc9a062d44807803f1cdfcfe62d49ad1f18162baa (diff)
Generalize regex_replace_ex() function
-rw-r--r--libbutl/regex.hxx29
-rw-r--r--libbutl/regex.ixx27
-rw-r--r--libbutl/regex.txx18
-rw-r--r--tests/regex/buildfile7
-rw-r--r--tests/regex/driver.cxx66
-rw-r--r--tests/regex/testscript54
6 files changed, 189 insertions, 12 deletions
diff --git a/libbutl/regex.hxx b/libbutl/regex.hxx
index e4fd6a7..2105f05 100644
--- a/libbutl/regex.hxx
+++ b/libbutl/regex.hxx
@@ -14,13 +14,18 @@
namespace butl
{
- // Like std::regex_match() but extends the standard ECMA-262
- // substitution escape sequences with a subset of Perl sequences:
+ // Call specified append() function for non-matched substrings and matched
+ // substring replacements returning true if search succeeded. The function
+ // must be callable with the following signature:
//
- // \\, \u, \l, \U, \L, \E, \1, ..., \9
+ // void
+ // append(basic_string<C>::iterator begin, basic_string<C>::iterator end);
//
- // Also return the resulting string as well as whether the search
- // succeeded.
+ // The regex semantics is like that of std::regex_replace() extended the
+ // standard ECMA-262 substitution escape sequences with a subset of Perl
+ // sequences:
+ //
+ // \\, \u, \l, \U, \L, \E, \1, ..., \9
//
// Notes and limitations:
//
@@ -34,6 +39,19 @@ namespace butl
// C++ locale (which is, unless changed, is the same as C locale and
// both default to the POSIX locale aka "C").
//
+ template <typename C, typename F>
+ bool
+ regex_replace_ex (const std::basic_string<C>&,
+ const std::basic_regex<C>&,
+ const std::basic_string<C>& fmt,
+ F&& append,
+ std::regex_constants::match_flag_type =
+ std::regex_constants::match_default);
+
+ // As above but concatenate non-matched substrings and matched substring
+ // replacements into a string returning it as well as whether the search
+ // succeeded.
+ //
template <typename C>
std::pair<std::basic_string<C>, bool>
regex_replace_ex (const std::basic_string<C>&,
@@ -52,6 +70,7 @@ namespace std
operator<< (ostream&, const regex_error&);
}
+#include <libbutl/regex.ixx>
#include <libbutl/regex.txx>
#endif // LIBBUTL_REGEX_HXX
diff --git a/libbutl/regex.ixx b/libbutl/regex.ixx
new file mode 100644
index 0000000..dd3ad1d
--- /dev/null
+++ b/libbutl/regex.ixx
@@ -0,0 +1,27 @@
+// file : libbutl/regex.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <utility> // move(), make_pair()
+
+namespace butl
+{
+ template <typename C>
+ inline std::pair<std::basic_string<C>, bool>
+ regex_replace_ex (const std::basic_string<C>& s,
+ const std::basic_regex<C>& re,
+ const std::basic_string<C>& fmt,
+ std::regex_constants::match_flag_type flags)
+ {
+ using namespace std;
+
+ using it = typename basic_string<C>::const_iterator;
+
+ basic_string<C> r;
+ bool match (regex_replace_ex (s, re, fmt,
+ [&r] (it b, it e) {r.append (b, e);},
+ flags));
+
+ return make_pair (move (r), match);
+ }
+}
diff --git a/libbutl/regex.txx b/libbutl/regex.txx
index 52bddfd..536cabf 100644
--- a/libbutl/regex.txx
+++ b/libbutl/regex.txx
@@ -7,11 +7,12 @@
namespace butl
{
- template <typename C>
- std::pair<std::basic_string<C>, bool>
+ template <typename C, typename F>
+ bool
regex_replace_ex (const std::basic_string<C>& s,
const std::basic_regex<C>& re,
const std::basic_string<C>& fmt,
+ F&& append,
std::regex_constants::match_flag_type flags)
{
using namespace std;
@@ -24,7 +25,6 @@ namespace butl
bool no_copy ((flags & std::regex_constants::format_no_copy) != 0);
locale cl; // Copy of the global C++ locale.
- string_type r;
// Beginning of the last unmatched substring.
//
@@ -59,7 +59,7 @@ namespace butl
//
if (!no_copy)
{
- r.append (ub, m.prefix ().second);
+ append (ub, m.prefix ().second);
ub = m.suffix ().first;
}
@@ -68,7 +68,7 @@ namespace butl
// Append matched substring.
//
if (!no_copy)
- r.append (m[0].first, m[0].second);
+ append (m[0].first, m[0].second);
}
else
{
@@ -100,6 +100,8 @@ namespace butl
return c;
};
+ string_type r;
+
auto append_chr = [&r, &conv_chr] (C c)
{
r.push_back (conv_chr (c));
@@ -233,14 +235,16 @@ namespace butl
}
}
}
+
+ append (r.begin (), r.end ());
}
}
// Append the rightmost non-matched substring.
//
if (!no_copy)
- r.append (ub, s.end ());
+ append (ub, s.end ());
- return make_pair (move (r), match);
+ return match;
}
}
diff --git a/tests/regex/buildfile b/tests/regex/buildfile
new file mode 100644
index 0000000..baf4bca
--- /dev/null
+++ b/tests/regex/buildfile
@@ -0,0 +1,7 @@
+# file : tests/regex/buildfile
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+import libs = libbutl%lib{butl}
+
+exe{driver}: {hxx cxx}{*} $libs test{testscript}
diff --git a/tests/regex/driver.cxx b/tests/regex/driver.cxx
new file mode 100644
index 0000000..054eb31
--- /dev/null
+++ b/tests/regex/driver.cxx
@@ -0,0 +1,66 @@
+// file : tests/regex/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <string>
+#include <cassert>
+#include <iostream>
+#include <exception>
+
+#include <libbutl/regex.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+
+using namespace std;
+using namespace butl;
+
+// Usage: argv[0] [-ffo] [-fnc] <string> <regex> <format>
+//
+// Perform substitution of matched substrings with formatted replacement
+// strings using regex_replace_ex() function. If the string matches the regex
+// then print the replacement to STDOUT and exit with zero code. Exit with
+// code one if it doesn't match, and with code two on failure (print error
+// description to STDERR).
+//
+// -ffo
+// Use format_first_only replacement flag.
+//
+// -fnc
+// Use format_no_copy replacement flag.
+//
+int
+main (int argc, const char* argv[])
+try
+{
+ regex_constants::match_flag_type fl (regex_constants::match_default);
+
+ int i (1);
+ for (; i != argc; ++i)
+ {
+ string op (argv[i]);
+
+ if (op == "-ffo")
+ fl |= regex_constants::format_first_only;
+ else if (op == "-fnc")
+ fl |= regex_constants::format_no_copy;
+ else
+ break;
+ }
+
+ assert (i + 3 == argc);
+
+ string s (argv[i++]);
+ regex re (argv[i++]);
+ string fmt (argv[i]);
+
+ auto r (regex_replace_ex (s, re, fmt, fl));
+
+ if (r.second)
+ cout << r.first << endl;
+
+ return r.second ? 0 : 1;
+}
+catch (const exception& e)
+{
+ cerr << e << endl;
+ return 2;
+}
diff --git a/tests/regex/testscript b/tests/regex/testscript
new file mode 100644
index 0000000..1af604c
--- /dev/null
+++ b/tests/regex/testscript
@@ -0,0 +1,54 @@
+# file : tests/regex/testscript
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: match
+:
+{
+ $* abcbd b x >axcxd : all
+ $* -ffo abcbd b x >axcbd : first-only
+ $* -fnc abcbd b x >xx : no-copy
+
+ : ecma-escape
+ :
+ {
+ $* xay a '$b' >'x$by' : none
+ $* xay a '$' >'x$y' : none-term
+ $* xay a '$$' >'x$y' : self
+ $* xay a 'b$&c' >'xbacy' : match
+ $* xay a 'b$`c' >'xbxcy' : match-precede
+ $* xay a "b\\\$'c" >'xbycy' : match-follow
+
+ : capture
+ :
+ $* abcdefghij '(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)' '$1$10' >aj
+ }
+
+ : perl-escape
+ :
+ {
+ $* xay a '\b' >'xby' : none
+ $* xay a '\' >'xy' : none-term
+ $* xay a '\\' >'x\y' : self
+
+ : capture
+ :
+ $* abcdefghij '(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)' '\1\10' >aa0
+
+ : upper
+ :
+ {
+ $* xay a '\U' >xy : none
+ $* xay a '\Uvz' >xVZy : repl
+ $* xay a '\Uv\Ez' >xVzy : end
+ $* aa a 'v\Uz' >vZvZ : locality
+ $* xay '(a)' '\U\1' >xAy : capt
+ $* x-y '(a?)-' '\U\1z' >xZy : capt-empty
+ $* xay a '\uvz' >xVzy : once
+ }
+
+ : lower
+ :
+ $* xay a '\lVZ' >xvZy
+ }
+}