Move regex utilities to libbutl

author: Karen Arutyunov <karen@codesynthesis.com> 2017-06-21 13:05:43 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2017-06-21 13:05:43 +0300
commit: f6c20ad37b2ececb446b5051837bccba93c81d7a (patch)
tree: 5500b098a06a46ae21ab6f61973dd78489a6a607
parent: 5e538c45eb61bf9baa09cf2ef4a9a9148e8acab0 (diff)
6 files changed, 2 insertions, 318 deletions
diff --git a/build2/buildfile b/build2/buildfile
index 3a114e2..6d48718 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -26,7 +26,6 @@ exe{b}:                                                   \
             {hxx         cxx}{ operation                } \
             {hxx         cxx}{ parser                   } \
             {hxx         cxx}{ prerequisite             } \
-            {hxx     txx cxx}{ regex                    } \
             {hxx         cxx}{ rule                     } \
             {hxx            }{ rule-map                 } \
             {hxx     txx cxx}{ scheduler                } \
diff --git a/build2/regex.cxx b/build2/regex.cxx
deleted file mode 100644
index d96b860..0000000
--- a/build2/regex.cxx
+++ /dev/null
@@ -1,42 +0,0 @@
-// file      : build2/regex.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
-// license   : MIT; see accompanying LICENSE file
-
-#include <build2/regex.hxx>
-
-#if defined(_MSC_VER) && _MSC_VER <= 1910
-#  include <cstring> // strstr()
-#endif
-
-#include <ostream>
-#include <sstream>
-
-namespace std
-{
-  // Currently libstdc++ just returns the name of the exception (bug #67361).
-  // So we check that the description contains at least one space character.
-  //
-  // While VC's description is meaningful, it has an undesired prefix that
-  // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
-  //
-  ostream&
-  operator<< (ostream& o, const regex_error& e)
-  {
-    const char* d (e.what ());
-
-#if defined(_MSC_VER) && _MSC_VER <= 1910
-    const char* rd (strstr (d, "): "));
-    if (rd != nullptr)
-      d = rd + 3;
-#endif
-
-    ostringstream os;
-    os << runtime_error (d); // Sanitize the description.
-
-    string s (os.str ());
-    if (s.find (' ') != string::npos)
-      o << ": " << s;
-
-    return o;
-  }
-}
diff --git a/build2/regex.hxx b/build2/regex.hxx
deleted file mode 100644
index 1fa261b..0000000
--- a/build2/regex.hxx
+++ /dev/null
@@ -1,57 +0,0 @@
-// file      : build2/regex.hxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
-// license   : MIT; see accompanying LICENSE file
-
-#ifndef BUILD2_REGEX_HXX
-#define BUILD2_REGEX_HXX
-
-#include <regex>
-#include <iosfwd>
-#include <string> // basic_string
-
-#include <build2/types.hxx>
-#include <build2/utility.hxx>
-
-namespace build2
-{
-  // Like std::regex_match() but extends the standard ECMA-262
-  // substitution escape sequences with a subset of Perl sequences:
-  //
-  // \\, \u, \l, \U, \L, \E, \1, ..., \9
-  //
-  // Also return the resulting string as well as whether the search
-  // succeeded.
-  //
-  // Notes and limitations:
-  //
-  // - The only valid regex_constants flags are match_default,
-  //   format_first_only (format_no_copy can easily be supported).
-  //
-  // - If backslash doesn't start any of the listed sequences then it is
-  //   silently dropped and the following character is copied as is.
-  //
-  // - The character case conversion is performed according to the global
-  //   C++ locale (which is, unless changed, is the same as C locale and
-  //   both default to the POSIX locale aka "C").
-  //
-  template <typename C>
-  pair<std::basic_string<C>, bool>
-  regex_replace_ex (const std::basic_string<C>&,
-                    const std::basic_regex<C>&,
-                    const std::basic_string<C>& fmt,
-                    std::regex_constants::match_flag_type =
-                      std::regex_constants::match_default);
-}
-
-namespace std
-{
-  // Print regex error description but only if it is meaningful (this is also
-  // why we have to print leading colon).
-  //
-  ostream&
-  operator<< (ostream&, const regex_error&);
-}
-
-#include <build2/regex.txx>
-
-#endif // BUILD2_REGEX_HXX
diff --git a/build2/regex.txx b/build2/regex.txx
deleted file mode 100644
index 1325de9..0000000
--- a/build2/regex.txx
+++ /dev/null
@@ -1,215 +0,0 @@
-// file      : build2/regex.txx -*- C++ -*-
-// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
-// license   : MIT; see accompanying LICENSE file
-
-namespace build2
-{
-  template <typename C>
-  pair<std::basic_string<C>, bool>
-  regex_replace_ex (const std::basic_string<C>& s,
-                    const std::basic_regex<C>& re,
-                    const std::basic_string<C>& fmt,
-                    std::regex_constants::match_flag_type flags)
-  {
-    using namespace std;
-
-    using string_type = basic_string<C>;
-    using str_it      = typename string_type::const_iterator;
-    using regex_it    = regex_iterator<str_it>;
-
-    bool first_only ((flags & std::regex_constants::format_first_only) ==
-                     std::regex_constants::format_first_only);
-
-    locale cl; // Copy of the global C++ locale.
-    string_type r;
-
-    // Beginning of the last unmatched substring.
-    //
-    str_it ub (s.begin ());
-
-    regex_it b (s.begin (), s.end (), re, flags);
-    regex_it e;
-    bool match (b != e);
-
-    for (regex_it i (b); i != e; ++i)
-    {
-      const match_results<str_it>& m (*i);
-
-      // Copy the preceeding unmatched substring, save the beginning of the
-      // one that follows.
-      //
-      r.append (ub, m.prefix ().second);
-      ub = m.suffix ().first;
-
-      if (first_only && i != b)
-        r.append (m[0].first, m[0].second); // Append matched substring.
-      else
-      {
-        // The standard implementation calls m.format() here. We perform our
-        // own formatting.
-        //
-        // Note that we are using char type literals with the assumption that
-        // being ASCII characters they will be properly "widened" to the
-        // corresponding literals of the C template parameter type.
-        //
-        auto digit = [] (C c) -> int
-        {
-          return c >= '0' && c <= '9' ? c - '0' : -1;
-        };
-
-        enum class case_conv {none, upper, lower, upper_once, lower_once}
-        mode (case_conv::none);
-
-        auto conv_chr = [&mode, &cl] (C c) -> C
-        {
-          switch (mode)
-          {
-          case case_conv::upper_once: mode = case_conv::none;
-          case case_conv::upper:      c = toupper (c, cl); break;
-          case case_conv::lower_once: mode = case_conv::none;
-          case case_conv::lower:      c = tolower (c, cl); break;
-          case case_conv::none:       break;
-          }
-          return c;
-        };
-
-        auto append_chr = [&r, &conv_chr] (C c)
-        {
-          r.push_back (conv_chr (c));
-        };
-
-        auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e)
-        {
-          // Optimize for the common case.
-          //
-          if (mode == case_conv::none)
-            r.append (b, e);
-          else
-          {
-            for (str_it i (b); i != e; ++i)
-              r.push_back (conv_chr (*i));
-          }
-        };
-
-        size_t n (fmt.size ());
-        for (size_t i (0); i < n; ++i)
-        {
-          C c (fmt[i]);
-
-          switch (c)
-          {
-          case '$':
-            {
-              // Check if this is a $-based escape sequence. Interpret it
-              // accordingly if that's the case, treat '$' as a regular
-              // character otherwise.
-              //
-              c = fmt[++i]; // '\0' if last.
-
-              switch (c)
-              {
-              case '$': append_chr (c); break;
-              case '&': append_str (m[0].first, m[0].second); break;
-              case '`':
-                {
-                  append_str (m.prefix ().first, m.prefix ().second);
-                  break;
-                }
-              case '\'':
-                {
-                  append_str (m.suffix ().first, m.suffix ().second);
-                  break;
-                }
-              default:
-                {
-                  // Check if this is a sub-expression 1-based index ($n or
-                  // $nn). Append the matching substring if that's the case.
-                  // Treat '$' as a regular character otherwise. Index greater
-                  // than the sub-expression count is silently ignored.
-                  //
-                  int si (digit (c));
-                  if (si >= 0)
-                  {
-                    int d;
-                    if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last.
-                    {
-                      si = si * 10 + d;
-                      ++i;
-                    }
-                  }
-
-                  if (si > 0)
-                  {
-                    // m[0] refers to the matched substring.
-                    //
-                    if (static_cast<size_t> (si) < m.size ())
-                      append_str (m[si].first, m[si].second);
-                  }
-                  else
-                  {
-                    // Not a $-based escape sequence so treat '$' as a
-                    // regular character.
-                    //
-                    --i;
-                    append_chr ('$');
-                  }
-
-                  break;
-                }
-              }
-
-              break;
-            }
-          case '\\':
-            {
-              c = fmt[++i]; // '\0' if last.
-
-              switch (c)
-              {
-              case '\\': append_chr (c); break;
-
-              case 'u': mode = case_conv::upper_once; break;
-              case 'l': mode = case_conv::lower_once; break;
-              case 'U': mode = case_conv::upper;      break;
-              case 'L': mode = case_conv::lower;      break;
-              case 'E': mode = case_conv::none;       break;
-              default:
-                {
-                  // Check if this is a sub-expression 1-based index. Append
-                  // the matching substring if that's the case, Skip '\\'
-                  // otherwise. Index greater than the sub-expression count is
-                  // silently ignored.
-                  //
-                  int si (digit (c));
-                  if (si > 0)
-                  {
-                    // m[0] refers to the matched substring.
-                    //
-                    if (static_cast<size_t> (si) < m.size ())
-                      append_str (m[si].first, m[si].second);
-                  }
-                  else
-                    --i;
-
-                  break;
-                }
-              }
-
-              break;
-            }
-          default:
-            {
-              // Append a regular character.
-              //
-              append_chr (c);
-              break;
-            }
-          }
-        }
-      }
-    }
-
-    r.append (ub, s.end ()); // Append the rightmost non-matched substring.
-    return make_pair (move (r), match);
-  }
-}
diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx
index 42e02d8..2a8150d 100644
--- a/build2/test/script/builtin.cxx
+++ b/build2/test/script/builtin.cxx
@@ -14,12 +14,11 @@
 #include <ostream>
 #include <sstream>
 
+#include <libbutl/regex.hxx>
 #include <libbutl/path-io.hxx>    // use default operator<< implementation
 #include <libbutl/fdstream.hxx>   // fdopen_mode, fdstream_mode
 #include <libbutl/filesystem.hxx> // mkdir_status
 
-#include <build2/regex.hxx>
-
 #include <build2/test/script/script.hxx>
 
 // Strictly speaking a builtin which reads/writes from/to standard streams
diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx
index 8269f05..a4ead93 100644
--- a/build2/test/script/runner.cxx
+++ b/build2/test/script/runner.cxx
@@ -7,9 +7,9 @@
 #include <set>
 #include <ios> // streamsize
 
+#include <libbutl/regex.hxx>
 #include <libbutl/fdstream.hxx> // fdopen_mode, fdnull(), fddup()
 
-#include <build2/regex.hxx>
 #include <build2/variable.hxx>
 #include <build2/filesystem.hxx>
author	Karen Arutyunov <karen@codesynthesis.com>	2017-06-21 13:05:43 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2017-06-21 13:05:43 +0300
commit	f6c20ad37b2ececb446b5051837bccba93c81d7a (patch)
tree	5500b098a06a46ae21ab6f61973dd78489a6a607
parent	5e538c45eb61bf9baa09cf2ef4a9a9148e8acab0 (diff)