diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2017-06-21 13:05:43 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2017-06-21 13:05:43 +0300 |
commit | f6c20ad37b2ececb446b5051837bccba93c81d7a (patch) | |
tree | 5500b098a06a46ae21ab6f61973dd78489a6a607 | |
parent | 5e538c45eb61bf9baa09cf2ef4a9a9148e8acab0 (diff) |
Move regex utilities to libbutl
-rw-r--r-- | build2/buildfile | 1 | ||||
-rw-r--r-- | build2/regex.cxx | 42 | ||||
-rw-r--r-- | build2/regex.hxx | 57 | ||||
-rw-r--r-- | build2/regex.txx | 215 | ||||
-rw-r--r-- | build2/test/script/builtin.cxx | 3 | ||||
-rw-r--r-- | build2/test/script/runner.cxx | 2 |
6 files changed, 2 insertions, 318 deletions
diff --git a/build2/buildfile b/build2/buildfile index 3a114e2..6d48718 100644 --- a/build2/buildfile +++ b/build2/buildfile @@ -26,7 +26,6 @@ exe{b}: \ {hxx cxx}{ operation } \ {hxx cxx}{ parser } \ {hxx cxx}{ prerequisite } \ - {hxx txx cxx}{ regex } \ {hxx cxx}{ rule } \ {hxx }{ rule-map } \ {hxx txx cxx}{ scheduler } \ diff --git a/build2/regex.cxx b/build2/regex.cxx deleted file mode 100644 index d96b860..0000000 --- a/build2/regex.cxx +++ /dev/null @@ -1,42 +0,0 @@ -// file : build2/regex.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#include <build2/regex.hxx> - -#if defined(_MSC_VER) && _MSC_VER <= 1910 -# include <cstring> // strstr() -#endif - -#include <ostream> -#include <sstream> - -namespace std -{ - // Currently libstdc++ just returns the name of the exception (bug #67361). - // So we check that the description contains at least one space character. - // - // While VC's description is meaningful, it has an undesired prefix that - // resembles the following: 'regex_error(error_badrepeat): '. So we skip it. - // - ostream& - operator<< (ostream& o, const regex_error& e) - { - const char* d (e.what ()); - -#if defined(_MSC_VER) && _MSC_VER <= 1910 - const char* rd (strstr (d, "): ")); - if (rd != nullptr) - d = rd + 3; -#endif - - ostringstream os; - os << runtime_error (d); // Sanitize the description. - - string s (os.str ()); - if (s.find (' ') != string::npos) - o << ": " << s; - - return o; - } -} diff --git a/build2/regex.hxx b/build2/regex.hxx deleted file mode 100644 index 1fa261b..0000000 --- a/build2/regex.hxx +++ /dev/null @@ -1,57 +0,0 @@ -// file : build2/regex.hxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#ifndef BUILD2_REGEX_HXX -#define BUILD2_REGEX_HXX - -#include <regex> -#include <iosfwd> -#include <string> // basic_string - -#include <build2/types.hxx> -#include <build2/utility.hxx> - -namespace build2 -{ - // Like std::regex_match() but extends the standard ECMA-262 - // substitution escape sequences with a subset of Perl sequences: - // - // \\, \u, \l, \U, \L, \E, \1, ..., \9 - // - // Also return the resulting string as well as whether the search - // succeeded. - // - // Notes and limitations: - // - // - The only valid regex_constants flags are match_default, - // format_first_only (format_no_copy can easily be supported). - // - // - If backslash doesn't start any of the listed sequences then it is - // silently dropped and the following character is copied as is. - // - // - The character case conversion is performed according to the global - // C++ locale (which is, unless changed, is the same as C locale and - // both default to the POSIX locale aka "C"). - // - template <typename C> - pair<std::basic_string<C>, bool> - regex_replace_ex (const std::basic_string<C>&, - const std::basic_regex<C>&, - const std::basic_string<C>& fmt, - std::regex_constants::match_flag_type = - std::regex_constants::match_default); -} - -namespace std -{ - // Print regex error description but only if it is meaningful (this is also - // why we have to print leading colon). - // - ostream& - operator<< (ostream&, const regex_error&); -} - -#include <build2/regex.txx> - -#endif // BUILD2_REGEX_HXX diff --git a/build2/regex.txx b/build2/regex.txx deleted file mode 100644 index 1325de9..0000000 --- a/build2/regex.txx +++ /dev/null @@ -1,215 +0,0 @@ -// file : build2/regex.txx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -namespace build2 -{ - template <typename C> - pair<std::basic_string<C>, bool> - regex_replace_ex (const std::basic_string<C>& s, - const std::basic_regex<C>& re, - const std::basic_string<C>& fmt, - std::regex_constants::match_flag_type flags) - { - using namespace std; - - using string_type = basic_string<C>; - using str_it = typename string_type::const_iterator; - using regex_it = regex_iterator<str_it>; - - bool first_only ((flags & std::regex_constants::format_first_only) == - std::regex_constants::format_first_only); - - locale cl; // Copy of the global C++ locale. - string_type r; - - // Beginning of the last unmatched substring. - // - str_it ub (s.begin ()); - - regex_it b (s.begin (), s.end (), re, flags); - regex_it e; - bool match (b != e); - - for (regex_it i (b); i != e; ++i) - { - const match_results<str_it>& m (*i); - - // Copy the preceeding unmatched substring, save the beginning of the - // one that follows. - // - r.append (ub, m.prefix ().second); - ub = m.suffix ().first; - - if (first_only && i != b) - r.append (m[0].first, m[0].second); // Append matched substring. - else - { - // The standard implementation calls m.format() here. We perform our - // own formatting. - // - // Note that we are using char type literals with the assumption that - // being ASCII characters they will be properly "widened" to the - // corresponding literals of the C template parameter type. - // - auto digit = [] (C c) -> int - { - return c >= '0' && c <= '9' ? c - '0' : -1; - }; - - enum class case_conv {none, upper, lower, upper_once, lower_once} - mode (case_conv::none); - - auto conv_chr = [&mode, &cl] (C c) -> C - { - switch (mode) - { - case case_conv::upper_once: mode = case_conv::none; - case case_conv::upper: c = toupper (c, cl); break; - case case_conv::lower_once: mode = case_conv::none; - case case_conv::lower: c = tolower (c, cl); break; - case case_conv::none: break; - } - return c; - }; - - auto append_chr = [&r, &conv_chr] (C c) - { - r.push_back (conv_chr (c)); - }; - - auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e) - { - // Optimize for the common case. - // - if (mode == case_conv::none) - r.append (b, e); - else - { - for (str_it i (b); i != e; ++i) - r.push_back (conv_chr (*i)); - } - }; - - size_t n (fmt.size ()); - for (size_t i (0); i < n; ++i) - { - C c (fmt[i]); - - switch (c) - { - case '$': - { - // Check if this is a $-based escape sequence. Interpret it - // accordingly if that's the case, treat '$' as a regular - // character otherwise. - // - c = fmt[++i]; // '\0' if last. - - switch (c) - { - case '$': append_chr (c); break; - case '&': append_str (m[0].first, m[0].second); break; - case '`': - { - append_str (m.prefix ().first, m.prefix ().second); - break; - } - case '\'': - { - append_str (m.suffix ().first, m.suffix ().second); - break; - } - default: - { - // Check if this is a sub-expression 1-based index ($n or - // $nn). Append the matching substring if that's the case. - // Treat '$' as a regular character otherwise. Index greater - // than the sub-expression count is silently ignored. - // - int si (digit (c)); - if (si >= 0) - { - int d; - if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last. - { - si = si * 10 + d; - ++i; - } - } - - if (si > 0) - { - // m[0] refers to the matched substring. - // - if (static_cast<size_t> (si) < m.size ()) - append_str (m[si].first, m[si].second); - } - else - { - // Not a $-based escape sequence so treat '$' as a - // regular character. - // - --i; - append_chr ('$'); - } - - break; - } - } - - break; - } - case '\\': - { - c = fmt[++i]; // '\0' if last. - - switch (c) - { - case '\\': append_chr (c); break; - - case 'u': mode = case_conv::upper_once; break; - case 'l': mode = case_conv::lower_once; break; - case 'U': mode = case_conv::upper; break; - case 'L': mode = case_conv::lower; break; - case 'E': mode = case_conv::none; break; - default: - { - // Check if this is a sub-expression 1-based index. Append - // the matching substring if that's the case, Skip '\\' - // otherwise. Index greater than the sub-expression count is - // silently ignored. - // - int si (digit (c)); - if (si > 0) - { - // m[0] refers to the matched substring. - // - if (static_cast<size_t> (si) < m.size ()) - append_str (m[si].first, m[si].second); - } - else - --i; - - break; - } - } - - break; - } - default: - { - // Append a regular character. - // - append_chr (c); - break; - } - } - } - } - } - - r.append (ub, s.end ()); // Append the rightmost non-matched substring. - return make_pair (move (r), match); - } -} diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx index 42e02d8..2a8150d 100644 --- a/build2/test/script/builtin.cxx +++ b/build2/test/script/builtin.cxx @@ -14,12 +14,11 @@ #include <ostream> #include <sstream> +#include <libbutl/regex.hxx> #include <libbutl/path-io.hxx> // use default operator<< implementation #include <libbutl/fdstream.hxx> // fdopen_mode, fdstream_mode #include <libbutl/filesystem.hxx> // mkdir_status -#include <build2/regex.hxx> - #include <build2/test/script/script.hxx> // Strictly speaking a builtin which reads/writes from/to standard streams diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx index 8269f05..a4ead93 100644 --- a/build2/test/script/runner.cxx +++ b/build2/test/script/runner.cxx @@ -7,9 +7,9 @@ #include <set> #include <ios> // streamsize +#include <libbutl/regex.hxx> #include <libbutl/fdstream.hxx> // fdopen_mode, fdnull(), fddup() -#include <build2/regex.hxx> #include <build2/variable.hxx> #include <build2/filesystem.hxx> |