aboutsummaryrefslogtreecommitdiff
path: root/libbutl/regex.txx
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2018-06-19 15:30:22 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2018-06-19 15:30:22 +0300
commit06e915be138b0638e30083f84cecda0eb1bfc895 (patch)
tree50f7eca40de25033116c6f6f75524ae5801dcc78 /libbutl/regex.txx
parent338d8065f1b681da841fa0d79cc9265776ff1e1e (diff)
Add regex_replace_match() and rename regex_replace_ex() to regex_replace_search()
Diffstat (limited to 'libbutl/regex.txx')
-rw-r--r--libbutl/regex.txx376
1 files changed, 204 insertions, 172 deletions
diff --git a/libbutl/regex.txx b/libbutl/regex.txx
index a7a6c9a..fbe2885 100644
--- a/libbutl/regex.txx
+++ b/libbutl/regex.txx
@@ -4,13 +4,209 @@
LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
{
+ // Replace the regex match results using the format string.
+ //
+ template <typename C>
+ std::basic_string<C>
+ regex_replace_match_results (
+ const std::match_results<typename std::basic_string<C>::const_iterator>& m,
+ const std::basic_string<C>& fmt)
+ {
+ using namespace std;
+
+ using string_type = basic_string<C>;
+ using str_it = typename string_type::const_iterator;
+
+ string_type r;
+
+ // Note that we are using char type literals with the assumption that
+ // being ASCII characters they will be properly "widened" to the
+ // corresponding literals of the C template parameter type.
+ //
+ auto digit = [] (C c) -> int
+ {
+ return c >= '0' && c <= '9' ? c - '0' : -1;
+ };
+
+ enum class case_conv {none, upper, lower, upper_once, lower_once}
+ mode (case_conv::none);
+
+ locale cl; // Copy of the global C++ locale.
+
+ auto conv_chr = [&mode, &cl] (C c) -> C
+ {
+ switch (mode)
+ {
+ case case_conv::upper_once: mode = case_conv::none; // Fall through.
+ case case_conv::upper: c = toupper (c, cl); break;
+ case case_conv::lower_once: mode = case_conv::none; // Fall through.
+ case case_conv::lower: c = tolower (c, cl); break;
+ case case_conv::none: break;
+ }
+ return c;
+ };
+
+ auto append_chr = [&r, &conv_chr] (C c) {r.push_back (conv_chr (c));};
+
+ auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e)
+ {
+ // Optimize for the common case.
+ //
+ if (mode == case_conv::none)
+ r.append (b, e);
+ else
+ {
+ for (str_it i (b); i != e; ++i)
+ r.push_back (conv_chr (*i));
+ }
+ };
+
+ size_t n (fmt.size ());
+ for (size_t i (0); i < n; ++i)
+ {
+ C c (fmt[i]);
+
+ switch (c)
+ {
+ case '$':
+ {
+ // Check if this is a $-based escape sequence. Interpret it
+ // accordingly if that's the case, treat '$' as a regular character
+ // otherwise.
+ //
+ c = fmt[++i]; // '\0' if last.
+
+ switch (c)
+ {
+ case '$': append_chr (c); break;
+ case '&': append_str (m[0].first, m[0].second); break;
+ case '`':
+ {
+ append_str (m.prefix ().first, m.prefix ().second);
+ break;
+ }
+ case '\'':
+ {
+ append_str (m.suffix ().first, m.suffix ().second);
+ break;
+ }
+ default:
+ {
+ // Check if this is a sub-expression 1-based index ($n or $nn).
+ // Append the matching substring if that's the case. Treat '$'
+ // as a regular character otherwise. Index greater than the
+ // sub-expression count is silently ignored.
+ //
+ int si (digit (c));
+ if (si >= 0)
+ {
+ int d;
+ if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last.
+ {
+ si = si * 10 + d;
+ ++i;
+ }
+ }
+
+ if (si > 0)
+ {
+ // m[0] refers to the matched substring. Note that we ignore
+ // unmatched sub-expression references.
+ //
+ if (static_cast<size_t> (si) < m.size () && m[si].matched)
+ append_str (m[si].first, m[si].second);
+ }
+ else
+ {
+ // Not a $-based escape sequence so treat '$' as a regular
+ // character.
+ //
+ --i;
+ append_chr ('$');
+ }
+
+ break;
+ }
+ }
+
+ break;
+ }
+ case '\\':
+ {
+ c = fmt[++i]; // '\0' if last.
+
+ switch (c)
+ {
+ case '\\': append_chr (c); break;
+
+ case 'u': mode = case_conv::upper_once; break;
+ case 'l': mode = case_conv::lower_once; break;
+ case 'U': mode = case_conv::upper; break;
+ case 'L': mode = case_conv::lower; break;
+ case 'E': mode = case_conv::none; break;
+ default:
+ {
+ // Check if this is a sub-expression 1-based index. Append the
+ // matching substring if that's the case, Skip '\\' otherwise.
+ // Index greater than the sub-expression count is silently
+ // ignored.
+ //
+ int si (digit (c));
+ if (si > 0)
+ {
+ // m[0] refers to the matched substring. Note that we ignore
+ // unmatched sub-expression references.
+ //
+ if (static_cast<size_t> (si) < m.size () && m[si].matched)
+ append_str (m[si].first, m[si].second);
+ }
+ else
+ --i;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ default:
+ {
+ // Append a regular character.
+ //
+ append_chr (c);
+ break;
+ }
+ }
+ }
+
+ return r;
+ }
+
+ template <typename C>
+ std::pair<std::basic_string<C>, bool>
+ regex_replace_match (const std::basic_string<C>& s,
+ const std::basic_regex<C>& re,
+ const std::basic_string<C>& fmt)
+ {
+ using namespace std;
+
+ using string_type = basic_string<C>;
+ using str_it = typename string_type::const_iterator;
+
+ match_results<str_it> m;
+ bool match (regex_match (s, m, re));
+
+ return make_pair (match ? regex_replace_match_results (m, fmt) : string (),
+ match);
+ }
+
template <typename C, typename F>
bool
- regex_replace_ex (const std::basic_string<C>& s,
- const std::basic_regex<C>& re,
- const std::basic_string<C>& fmt,
- F&& append,
- std::regex_constants::match_flag_type flags)
+ regex_replace_search (const std::basic_string<C>& s,
+ const std::basic_regex<C>& re,
+ const std::basic_string<C>& fmt,
+ F&& append,
+ std::regex_constants::match_flag_type flags)
{
using namespace std;
@@ -18,10 +214,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
using str_it = typename string_type::const_iterator;
using regex_it = regex_iterator<str_it>;
- bool first_only ((flags & std::regex_constants::format_first_only) != 0);
- bool no_copy ((flags & std::regex_constants::format_no_copy) != 0);
-
- locale cl; // Copy of the global C++ locale.
+ bool first_only ((flags & regex_constants::format_first_only) != 0);
+ bool no_copy ((flags & regex_constants::format_no_copy) != 0);
// Beginning of the last unmatched substring.
//
@@ -72,169 +266,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
// The standard implementation calls m.format() here. We perform our
// own formatting.
//
- // Note that we are using char type literals with the assumption that
- // being ASCII characters they will be properly "widened" to the
- // corresponding literals of the C template parameter type.
- //
- auto digit = [] (C c) -> int
- {
- return c >= '0' && c <= '9' ? c - '0' : -1;
- };
-
- enum class case_conv {none, upper, lower, upper_once, lower_once}
- mode (case_conv::none);
-
- auto conv_chr = [&mode, &cl] (C c) -> C
- {
- switch (mode)
- {
- case case_conv::upper_once: mode = case_conv::none; // Fall through.
- case case_conv::upper: c = toupper (c, cl); break;
- case case_conv::lower_once: mode = case_conv::none; // Fall through.
- case case_conv::lower: c = tolower (c, cl); break;
- case case_conv::none: break;
- }
- return c;
- };
-
- string_type r;
-
- auto append_chr = [&r, &conv_chr] (C c)
- {
- r.push_back (conv_chr (c));
- };
-
- auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e)
- {
- // Optimize for the common case.
- //
- if (mode == case_conv::none)
- r.append (b, e);
- else
- {
- for (str_it i (b); i != e; ++i)
- r.push_back (conv_chr (*i));
- }
- };
-
- size_t n (fmt.size ());
- for (size_t i (0); i < n; ++i)
- {
- C c (fmt[i]);
-
- switch (c)
- {
- case '$':
- {
- // Check if this is a $-based escape sequence. Interpret it
- // accordingly if that's the case, treat '$' as a regular
- // character otherwise.
- //
- c = fmt[++i]; // '\0' if last.
-
- switch (c)
- {
- case '$': append_chr (c); break;
- case '&': append_str (m[0].first, m[0].second); break;
- case '`':
- {
- append_str (m.prefix ().first, m.prefix ().second);
- break;
- }
- case '\'':
- {
- append_str (m.suffix ().first, m.suffix ().second);
- break;
- }
- default:
- {
- // Check if this is a sub-expression 1-based index ($n or
- // $nn). Append the matching substring if that's the case.
- // Treat '$' as a regular character otherwise. Index greater
- // than the sub-expression count is silently ignored.
- //
- int si (digit (c));
- if (si >= 0)
- {
- int d;
- if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last.
- {
- si = si * 10 + d;
- ++i;
- }
- }
-
- if (si > 0)
- {
- // m[0] refers to the matched substring. Note that we
- // ignore unmatched sub-expression references.
- //
- if (static_cast<size_t> (si) < m.size () && m[si].matched)
- append_str (m[si].first, m[si].second);
- }
- else
- {
- // Not a $-based escape sequence so treat '$' as a
- // regular character.
- //
- --i;
- append_chr ('$');
- }
-
- break;
- }
- }
-
- break;
- }
- case '\\':
- {
- c = fmt[++i]; // '\0' if last.
-
- switch (c)
- {
- case '\\': append_chr (c); break;
-
- case 'u': mode = case_conv::upper_once; break;
- case 'l': mode = case_conv::lower_once; break;
- case 'U': mode = case_conv::upper; break;
- case 'L': mode = case_conv::lower; break;
- case 'E': mode = case_conv::none; break;
- default:
- {
- // Check if this is a sub-expression 1-based index. Append
- // the matching substring if that's the case, Skip '\\'
- // otherwise. Index greater than the sub-expression count is
- // silently ignored.
- //
- int si (digit (c));
- if (si > 0)
- {
- // m[0] refers to the matched substring. Note that we
- // ignore unmatched sub-expression references.
- //
- if (static_cast<size_t> (si) < m.size () && m[si].matched)
- append_str (m[si].first, m[si].second);
- }
- else
- --i;
-
- break;
- }
- }
-
- break;
- }
- default:
- {
- // Append a regular character.
- //
- append_chr (c);
- break;
- }
- }
- }
-
+ string_type r (regex_replace_match_results (m, fmt));
append (r.begin (), r.end ());
}
}