Add sed builtin

author: Karen Arutyunov <karen@codesynthesis.com> 2017-01-31 22:08:38 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2017-02-03 23:57:27 +0300
commit: 044e2e1c1460fb060f677a366144b98905522754 (patch)
tree: 4cdd67e9bca323d74cf5cc514444019a70b4de95
parent: 31a4169c67045cfe37eed138b537930e259db1e9 (diff)
10 files changed, 1045 insertions, 149 deletions
diff --git a/build2/b.cxx b/build2/b.cxx
index e576435..b06459b 100644
--- a/build2/b.cxx
+++ b/build2/b.cxx
@@ -2,7 +2,10 @@
 // copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
 // license   : MIT; see accompanying LICENSE file
 
-#include <string.h>    // strerror()
+#ifndef _WIN32
+#  include <signal.h> // signal()
+#endif
+
 #include <stdlib.h>    // getenv() _putenv()(_WIN32)
 
 #include <sstream>
@@ -82,6 +85,17 @@ main (int argc, char* argv[])
   {
     tracer trace ("main");
 
+    // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if
+    // the pipe reading end is closed. Note that by default this signal
+    // terminates a process. Also note that there is no way to disable this
+    // behavior on a file descriptor basis or for the write() function call.
+    //
+#ifndef _WIN32
+    if (signal (SIGPIPE, SIG_IGN) == SIG_ERR)
+      fail << "unable to ignore broken pipe (SIGPIPE) signal: "
+           << system_error (errno, system_category ()); // Sanitize.
+#endif
+
     // Parse the command line. We want to be able to specify options, vars,
     // and buildspecs in any order (it is really handy to just add -v at the
     // end of the command line).
diff --git a/build2/buildfile b/build2/buildfile
index 84e2f82..1ee7063 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -26,6 +26,7 @@ exe{b}:                                                   \
             {hxx         cxx}{ operation                } \
             {hxx         cxx}{ parser                   } \
             {hxx         cxx}{ prerequisite             } \
+            {hxx     txx cxx}{ regex                    } \
             {hxx         cxx}{ rule                     } \
             {hxx            }{ rule-map                 } \
             {hxx     txx cxx}{ scheduler                } \
diff --git a/build2/regex b/build2/regex
new file mode 100644
index 0000000..dc6dc96
--- /dev/null
+++ b/build2/regex
@@ -0,0 +1,57 @@
+// file      : build2/regex -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BUILD2_REGEX
+#define BUILD2_REGEX
+
+#include <regex>
+#include <iosfwd>
+#include <string> // basic_string
+
+#include <build2/types>
+#include <build2/utility>
+
+namespace build2
+{
+  // Like std::regex_match() but extends the standard ECMA-262
+  // substitution escape sequences with a subset of Perl sequences:
+  //
+  // \\, \u, \l, \U, \L, \E, \1, ..., \9
+  //
+  // Also return the resulting string as well as whether the search
+  // succeeded.
+  //
+  // Notes and limitations:
+  //
+  // - The only valid regex_constants flags are match_default,
+  //   format_first_only (format_no_copy can easily be supported).
+  //
+  // - If backslash doesn't start any of the listed sequences then it is
+  //   silently dropped and the following character is copied as is.
+  //
+  // - The character case conversion is performed according to the global
+  //   C++ locale (which is, unless changed, is the same as C locale and
+  //   both default to the POSIX locale aka "C").
+  //
+  template <typename C>
+  pair<std::basic_string<C>, bool>
+  regex_replace_ex (const std::basic_string<C>&,
+                    const std::basic_regex<C>&,
+                    const std::basic_string<C>& fmt,
+                    std::regex_constants::match_flag_type =
+                      std::regex_constants::match_default);
+}
+
+namespace std
+{
+  // Print regex error description but only if it is meaningful (this is also
+  // why we have to print leading colon).
+  //
+  ostream&
+  operator<< (ostream&, const regex_error&);
+}
+
+#include <build2/regex.txx>
+
+#endif // BUILD2_REGEX
diff --git a/build2/regex.cxx b/build2/regex.cxx
new file mode 100644
index 0000000..40347b5
--- /dev/null
+++ b/build2/regex.cxx
@@ -0,0 +1,42 @@
+// file      : build2/regex.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <build2/regex>
+
+#if defined(_MSC_VER) && _MSC_VER <= 1910
+#  include <cstring> // strstr()
+#endif
+
+#include <ostream>
+#include <sstream>
+
+namespace std
+{
+  // Currently libstdc++ just returns the name of the exception (bug #67361).
+  // So we check that the description contains at least one space character.
+  //
+  // While VC's description is meaningful, it has an undesired prefix that
+  // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
+  //
+  ostream&
+  operator<< (ostream& o, const regex_error& e)
+  {
+    const char* d (e.what ());
+
+#if defined(_MSC_VER) && _MSC_VER <= 1910
+    const char* rd (strstr (d, "): "));
+    if (rd != nullptr)
+      d = rd + 3;
+#endif
+
+    ostringstream os;
+    os << runtime_error (d); // Sanitize the description.
+
+    string s (os.str ());
+    if (s.find (' ') != string::npos)
+      o << ": " << s;
+
+    return o;
+  }
+}
diff --git a/build2/regex.txx b/build2/regex.txx
new file mode 100644
index 0000000..1325de9
--- /dev/null
+++ b/build2/regex.txx
@@ -0,0 +1,215 @@
+// file      : build2/regex.txx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+  template <typename C>
+  pair<std::basic_string<C>, bool>
+  regex_replace_ex (const std::basic_string<C>& s,
+                    const std::basic_regex<C>& re,
+                    const std::basic_string<C>& fmt,
+                    std::regex_constants::match_flag_type flags)
+  {
+    using namespace std;
+
+    using string_type = basic_string<C>;
+    using str_it      = typename string_type::const_iterator;
+    using regex_it    = regex_iterator<str_it>;
+
+    bool first_only ((flags & std::regex_constants::format_first_only) ==
+                     std::regex_constants::format_first_only);
+
+    locale cl; // Copy of the global C++ locale.
+    string_type r;
+
+    // Beginning of the last unmatched substring.
+    //
+    str_it ub (s.begin ());
+
+    regex_it b (s.begin (), s.end (), re, flags);
+    regex_it e;
+    bool match (b != e);
+
+    for (regex_it i (b); i != e; ++i)
+    {
+      const match_results<str_it>& m (*i);
+
+      // Copy the preceeding unmatched substring, save the beginning of the
+      // one that follows.
+      //
+      r.append (ub, m.prefix ().second);
+      ub = m.suffix ().first;
+
+      if (first_only && i != b)
+        r.append (m[0].first, m[0].second); // Append matched substring.
+      else
+      {
+        // The standard implementation calls m.format() here. We perform our
+        // own formatting.
+        //
+        // Note that we are using char type literals with the assumption that
+        // being ASCII characters they will be properly "widened" to the
+        // corresponding literals of the C template parameter type.
+        //
+        auto digit = [] (C c) -> int
+        {
+          return c >= '0' && c <= '9' ? c - '0' : -1;
+        };
+
+        enum class case_conv {none, upper, lower, upper_once, lower_once}
+        mode (case_conv::none);
+
+        auto conv_chr = [&mode, &cl] (C c) -> C
+        {
+          switch (mode)
+          {
+          case case_conv::upper_once: mode = case_conv::none;
+          case case_conv::upper:      c = toupper (c, cl); break;
+          case case_conv::lower_once: mode = case_conv::none;
+          case case_conv::lower:      c = tolower (c, cl); break;
+          case case_conv::none:       break;
+          }
+          return c;
+        };
+
+        auto append_chr = [&r, &conv_chr] (C c)
+        {
+          r.push_back (conv_chr (c));
+        };
+
+        auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e)
+        {
+          // Optimize for the common case.
+          //
+          if (mode == case_conv::none)
+            r.append (b, e);
+          else
+          {
+            for (str_it i (b); i != e; ++i)
+              r.push_back (conv_chr (*i));
+          }
+        };
+
+        size_t n (fmt.size ());
+        for (size_t i (0); i < n; ++i)
+        {
+          C c (fmt[i]);
+
+          switch (c)
+          {
+          case '$':
+            {
+              // Check if this is a $-based escape sequence. Interpret it
+              // accordingly if that's the case, treat '$' as a regular
+              // character otherwise.
+              //
+              c = fmt[++i]; // '\0' if last.
+
+              switch (c)
+              {
+              case '$': append_chr (c); break;
+              case '&': append_str (m[0].first, m[0].second); break;
+              case '`':
+                {
+                  append_str (m.prefix ().first, m.prefix ().second);
+                  break;
+                }
+              case '\'':
+                {
+                  append_str (m.suffix ().first, m.suffix ().second);
+                  break;
+                }
+              default:
+                {
+                  // Check if this is a sub-expression 1-based index ($n or
+                  // $nn). Append the matching substring if that's the case.
+                  // Treat '$' as a regular character otherwise. Index greater
+                  // than the sub-expression count is silently ignored.
+                  //
+                  int si (digit (c));
+                  if (si >= 0)
+                  {
+                    int d;
+                    if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last.
+                    {
+                      si = si * 10 + d;
+                      ++i;
+                    }
+                  }
+
+                  if (si > 0)
+                  {
+                    // m[0] refers to the matched substring.
+                    //
+                    if (static_cast<size_t> (si) < m.size ())
+                      append_str (m[si].first, m[si].second);
+                  }
+                  else
+                  {
+                    // Not a $-based escape sequence so treat '$' as a
+                    // regular character.
+                    //
+                    --i;
+                    append_chr ('$');
+                  }
+
+                  break;
+                }
+              }
+
+              break;
+            }
+          case '\\':
+            {
+              c = fmt[++i]; // '\0' if last.
+
+              switch (c)
+              {
+              case '\\': append_chr (c); break;
+
+              case 'u': mode = case_conv::upper_once; break;
+              case 'l': mode = case_conv::lower_once; break;
+              case 'U': mode = case_conv::upper;      break;
+              case 'L': mode = case_conv::lower;      break;
+              case 'E': mode = case_conv::none;       break;
+              default:
+                {
+                  // Check if this is a sub-expression 1-based index. Append
+                  // the matching substring if that's the case, Skip '\\'
+                  // otherwise. Index greater than the sub-expression count is
+                  // silently ignored.
+                  //
+                  int si (digit (c));
+                  if (si > 0)
+                  {
+                    // m[0] refers to the matched substring.
+                    //
+                    if (static_cast<size_t> (si) < m.size ())
+                      append_str (m[si].first, m[si].second);
+                  }
+                  else
+                    --i;
+
+                  break;
+                }
+              }
+
+              break;
+            }
+          default:
+            {
+              // Append a regular character.
+              //
+              append_chr (c);
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    r.append (ub, s.end ()); // Append the rightmost non-matched substring.
+    return make_pair (move (r), match);
+  }
+}
diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx
index 008ac32..3957adb 100644
--- a/build2/test/script/builtin.cxx
+++ b/build2/test/script/builtin.cxx
@@ -10,12 +10,17 @@
 #  include <sys/utime.h>
 #endif
 
+#include <locale>
 #include <thread>
+#include <ostream>
+#include <sstream>
 
 #include <butl/path-io>    // use default operator<< implementation
 #include <butl/fdstream>   // fdopen_mode, fdstream_mode
 #include <butl/filesystem> // mkdir_status
 
+#include <build2/regex>
+
 #include <build2/test/script/script>
 
 // Strictly speaking a builtin which reads/writes from/to standard streams
@@ -51,6 +56,74 @@ namespace build2
       //
       struct failed {};
 
+      // Accumulate an error message, print it atomically in dtor to the
+      // provided stream and throw failed afterwards if requested. Prefixes
+      // the message with the builtin name.
+      //
+      // Move constructible-only, not assignable (based to diag_record).
+      //
+      class error_record
+      {
+      public:
+        template <typename T>
+        friend const error_record&
+        operator<< (const error_record& r, const T& x)
+        {
+          r.ss_ << x;
+          return r;
+        }
+
+        error_record (ostream& o, bool fail, const char* name)
+            : os_ (o), fail_ (fail), empty_ (false)
+        {
+          ss_ << name << ": ";
+        }
+
+        // Older versions of libstdc++ don't have the ostringstream move
+        // support. Luckily, GCC doesn't seem to be actually needing move due
+        // to copy/move elision.
+        //
+#ifdef __GLIBCXX__
+        error_record (error_record&&);
+#else
+        error_record (error_record&& r)
+            : os_ (r.os_),
+              ss_ (move (r.ss_)),
+              fail_ (r.fail_),
+              empty_ (r.empty_)
+        {
+          r.empty_ = true;
+        }
+#endif
+
+        ~error_record () noexcept (false)
+        {
+          if (!empty_)
+          {
+            // The output stream can be in a bad state (for example as a
+            // result of unsuccessful attempt to report a previous error), so
+            // we check it.
+            //
+            if (os_.good ())
+            {
+              ss_.put ('\n');
+              os_ << ss_.str ();
+              os_.flush ();
+            }
+
+            if (fail_)
+              throw failed ();
+          }
+        }
+
+      private:
+        ostream& os_;
+        mutable ostringstream ss_;
+
+        bool fail_;
+        bool empty_;
+      };
+
       // Parse and normalize a path. Also, unless it is already absolute, make
       // the path absolute using the specified directory. Throw invalid_path
       // if the path is empty, and on parsing and normalization failures.
@@ -103,6 +176,11 @@ namespace build2
         uint8_t r (1);
         ofdstream cerr (move (err));
 
+        auto error = [&cerr] (bool fail = true)
+        {
+          return error_record (cerr, fail, "cat");
+        };
+
         try
         {
           ifdstream cin  (move (in),  fdstream_mode::binary);
@@ -154,15 +232,15 @@ namespace build2
           }
           catch (const io_error& e)
           {
-            cerr << "cat: unable to print ";
+            error_record d (error ());
+            d << "unable to print ";
 
             if (p.empty ())
-              cerr << "stdin";
+              d << "stdin";
             else
-              cerr << "'" << p << "'";
+              d << "'" << p << "'";
 
-            cerr << ": " << e << endl;
-            throw failed ();
+            d << ": " << e;
           }
 
           cin.close ();
@@ -171,15 +249,13 @@ namespace build2
         }
         catch (const invalid_path& e)
         {
-          cerr << "cat: invalid path '" << e.path << "'" << endl;
+          error (false) << "invalid path '" << e.path << "'";
         }
-        // Can be thrown while closing cin, cout or writing to cerr (that's
-        // why need to check its state before writing).
+        // Can be thrown while creating/closing cin, cout or writing to cerr.
         //
         catch (const io_error& e)
         {
-          if (cerr.good ())
-            cerr << "cat: " << e << endl;
+          error (false) << e;
         }
         catch (const failed&)
         {
@@ -215,8 +291,7 @@ namespace build2
           for (auto b (args.begin ()), i (b), e (args.end ()); i != e; ++i)
             cout << (i != b ? " " : "") << *i;
 
-          cout << endl;
-
+          cout << '\n';
           cout.close ();
           r = 0;
         }
@@ -291,6 +366,11 @@ namespace build2
         uint8_t r (1);
         ofdstream cerr (move (err));
 
+        auto error = [&cerr] (bool fail = true)
+        {
+          return error_record (cerr, fail, "mkdir");
+        };
+
         try
         {
           in.close ();
@@ -317,10 +397,7 @@ namespace build2
           // Create directories.
           //
           if (i == args.end ())
-          {
-            cerr << "mkdir: missing directory" << endl;
-            throw failed ();
-          }
+            error () << "missing directory";
 
           for (; i != args.end (); ++i)
           {
@@ -337,9 +414,7 @@ namespace build2
             }
             catch (const system_error& e)
             {
-              cerr << "mkdir: unable to create directory '" << p << "': "
-                   << e << endl;
-              throw failed ();
+              error () << "unable to create directory '" << p << "': " << e;
             }
           }
 
@@ -347,15 +422,13 @@ namespace build2
         }
         catch (const invalid_path& e)
         {
-          cerr << "mkdir: invalid path '" << e.path << "'" << endl;
+          error (false) << "invalid path '" << e.path << "'";
         }
-        // Can be thrown while closing in, out or writing to cerr (that's why
-        // need to check its state before writing).
+        // Can be thrown while closing in, out or writing to cerr.
         //
         catch (const io_error& e)
         {
-          if (cerr.good ())
-            cerr << "mkdir: " << e << endl;
+          error (false) << e;
         }
         catch (const failed&)
         {
@@ -403,6 +476,11 @@ namespace build2
         uint8_t r (1);
         ofdstream cerr (move (err));
 
+        auto error = [&cerr] (bool fail = true)
+        {
+          return error_record (cerr, fail, "rm");
+        };
+
         try
         {
           in.close ();
@@ -432,10 +510,7 @@ namespace build2
           // Remove entries.
           //
           if (i == args.end () && !force)
-          {
-            cerr << "rm: missing file" << endl;
-            throw failed ();
-          }
+            error () << "missing file";
 
           const dir_path& wd  (sp.wd_path);
           const dir_path& rwd (sp.root->wd_path);
@@ -445,11 +520,8 @@ namespace build2
             path p (parse_path (*i, wd));
 
             if (!p.sub (rwd) && !force)
-            {
-              cerr << "rm: '" << p << "' is out of working directory '" << rwd
-                   << "'" << endl;
-              throw failed ();
-            }
+              error () << "'" << p << "' is out of working directory '" << rwd
+                       << "'";
 
             try
             {
@@ -458,17 +530,11 @@ namespace build2
               if (dir_exists (d))
               {
                 if (!dir)
-                {
-                  cerr << "rm: '" << p << "' is a directory" << endl;
-                  throw failed ();
-                }
+                  error () << "'" << p << "' is a directory";
 
                 if (wd.sub (d))
-                {
-                  cerr << "rm: '" << p << "' contains test working directory '"
-                       << wd << "'" << endl;
-                  throw failed ();
-                }
+                  error () << "'" << p << "' contains test working directory '"
+                           << wd << "'";
 
                 // The call can result in rmdir_status::not_exist. That's not
                 // very likelly but there is also nothing bad about it.
@@ -480,8 +546,7 @@ namespace build2
             }
             catch (const system_error& e)
             {
-              cerr << "rm: unable to remove '" << p << "': " << e << endl;
-              throw failed ();
+              error () << "unable to remove '" << p << "': " << e;
             }
           }
 
@@ -489,15 +554,13 @@ namespace build2
         }
         catch (const invalid_path& e)
         {
-          cerr << "rm: invalid path '" << e.path << "'" << endl;
+          error (false) << "invalid path '" << e.path << "'";
         }
-        // Can be thrown while closing in, out or writing to cerr (that's why
-        // need to check its state before writing).
+        // Can be thrown while closing in, out or writing to cerr.
         //
         catch (const io_error& e)
         {
-          if (cerr.good ())
-            cerr << "rm: " << e << endl;
+          error (false) << e;
         }
         catch (const failed&)
         {
@@ -533,6 +596,11 @@ namespace build2
         uint8_t r (1);
         ofdstream cerr (move (err));
 
+        auto error = [&cerr] (bool fail = true)
+        {
+          return error_record (cerr, fail, "rmdir");
+        };
+
         try
         {
           in.close ();
@@ -559,10 +627,7 @@ namespace build2
           // Remove directories.
           //
           if (i == args.end () && !force)
-          {
-            cerr << "rmdir: missing directory" << endl;
-            throw failed ();
-          }
+            error () << "missing directory";
 
           const dir_path& wd  (sp.wd_path);
           const dir_path& rwd (sp.root->wd_path);
@@ -572,18 +637,12 @@ namespace build2
             dir_path p (path_cast<dir_path> (parse_path (*i, wd)));
 
             if (wd.sub (p))
-            {
-              cerr << "rmdir: '" << p << "' contains test working directory '"
-                   << wd << "'" << endl;
-              throw failed ();
-            }
+              error () << "'" << p << "' contains test working directory '"
+                       << wd << "'";
 
             if (!p.sub (rwd) && !force)
-            {
-              cerr << "rmdir: '" << p << "' is out of working directory '"
-                   << rwd << "'" << endl;
-              throw failed ();
-            }
+              error () << "'" << p << "' is out of working directory '"
+                       << rwd << "'";
 
             try
             {
@@ -596,8 +655,7 @@ namespace build2
             }
             catch (const system_error& e)
             {
-              cerr << "rmdir: unable to remove '" << p << "': " << e << endl;
-              throw failed ();
+              error () << "unable to remove '" << p << "': " << e;
             }
           }
 
@@ -605,15 +663,259 @@ namespace build2
         }
         catch (const invalid_path& e)
         {
-          cerr << "rmdir: invalid path '" << e.path << "'" << endl;
+          error (false) << "invalid path '" << e.path << "'";
+        }
+        // Can be thrown while closing in, out or writing to cerr.
+        //
+        catch (const io_error& e)
+        {
+          error (false) << e;
+        }
+        catch (const failed&)
+        {
+          // Diagnostics has already been issued.
+        }
+
+        cerr.close ();
+        return r;
+      }
+      catch (const std::exception&)
+      {
+        return 1;
+      }
+
+      // sed [-n] -e <script> [<file>]
+      //
+      // Read text from file, make editing changes according to script, and
+      // write the result to stdout. If file is not specified or is '-', read
+      // from stdin.
+      //
+      // -n
+      //    Suppress automatic printing of the pattern space at the end of the
+      //    script execution.
+      //
+      // -e <script>
+      //    Editing commands to be executed (required).
+      //
+      //  Currently, only single-command scripts using the following editing
+      //  commands are supported.
+      //
+      //  s/<regex>/<replacement>/<flags>
+      //    The supported flags are 'i' (case-insensitive search), 'g'
+      //    (substitute globally), 'p' (print if a replacement was made). If
+      //    regex starts with ^, then it only matches at the beginning of the
+      //    pattern space. Similarly, if it ends with $, then it only matches
+      //    at the end of the pattern space.
+      //
+      //    In replacement, besides the standard ECMAScript escape sequences a
+      //    subset of Perl-specific ones is recognized.
+      //
+      //  For more details read the builtin description in 'The build2
+      //  Testscript Language'.
+      //
+      // Note: must be executed asynchronously.
+      //
+      static uint8_t
+      sed (scope& sp,
+           const strings& args,
+           auto_fd in, auto_fd out, auto_fd err) noexcept
+      try
+      {
+        uint8_t r (1);
+        ofdstream cerr (move (err));
+
+        auto error = [&cerr] (bool fail = true)
+        {
+          return error_record (cerr, fail, "sed");
+        };
+
+        try
+        {
+          // Do not throw when failbit is set (getline() failed to extract any
+          // character).
+          //
+          ifdstream cin  (move (in), ifdstream::badbit);
+          ofdstream cout (move (out));
+
+          auto i (args.begin ());
+          auto e (args.end ());
+
+          // Process options.
+          //
+          bool auto_prn (true);
+
+          struct substitute
+          {
+            string regex;
+            string replacement;
+            bool icase  = false;
+            bool global = false;
+            bool print  = false;
+          };
+          optional<substitute> subst;
+
+          for (; i != e; ++i)
+          {
+            if (*i == "-n")
+              auto_prn = false;
+            else if (*i == "-e")
+            {
+              // Only a single script is supported.
+	      //
+              if (subst)
+                error () << "multiple scripts";
+
+              // If option has no value then bail out and report.
+              //
+              if (++i == e)
+                break;
+
+              const string& v (*i);
+              if (v.empty ())
+                error () << "empty script";
+
+              if (v[0] != 's')
+                error () << "only 's' command supported";
+
+              // Parse the substitute command.
+              //
+              if (v.size () < 2)
+                error () << "no delimiter for 's' command";
+
+              char delim (v[1]);
+              if (delim == '\\' || delim == '\n')
+                error () << "invalid delimiter for 's' command";
+
+              size_t p (v.find (delim, 2));
+              if (p == string::npos)
+                error () << "unterminated 's' command regex";
+
+              subst = substitute ();
+              subst->regex.assign (v, 2, p - 2);
+
+              // Empty regex matches nothing, so not of much use.
+              //
+              if (subst->regex.empty ())
+                error () << "empty regex in 's' command";
+
+              size_t b (p + 1);
+              p = v.find (delim, b);
+              if (p == string::npos)
+                error () << "unterminated 's' command replacement";
+
+              subst->replacement.assign (v, b, p - b);
+
+              // Parse the substitute command flags.
+              //
+              char c;
+              for (++p; (c = v[p]) != '\0'; ++p)
+              {
+                switch (c)
+                {
+                case 'i': subst->icase  = true; break;
+                case 'g': subst->global = true; break;
+                case 'p': subst->print  = true; break;
+                default:
+                  {
+                    error () << "invalid 's' command flag '" << c << "'";
+                  }
+                }
+              }
+            }
+            else
+            {
+              if (*i == "--")
+                ++i;
+
+              break;
+            }
+          }
+
+          if (!subst)
+            error () << "missing script";
+
+          // Path of a file to edit. An empty path represents stdin.
+          //
+          path p;
+          if (i != e)
+	  {
+	    if (*i != "-")
+              p = parse_path (*i, sp.wd_path);
+
+            ++i;
+	  }
+
+          if (i != e)
+	    error () << "unexpected argument";
+
+          // Note that ECMAScript is implied if no grammar flag is specified.
+          //
+          regex re (subst->regex,
+                    subst->icase ? regex::icase : regex::ECMAScript);
+
+          // Edit a file or STDIN.
+          //
+          try
+          {
+            // Open a file if specified.
+            //
+            if (!p.empty ())
+            {
+              cin.close (); // Flush and close.
+              cin.open (p);
+            }
+
+            // Read until failbit is set (throw on badbit).
+            //
+            string s;
+            while (getline (cin, s))
+            {
+              auto r (regex_replace_ex (s,
+                                        re,
+                                        subst->replacement,
+                                        subst->global
+                                        ? regex_constants::format_default
+                                        : regex_constants::format_first_only));
+
+              // Add newline regardless whether the source line is newline-
+              // terminated or not (in accordance with POSIX).
+              //
+              if (auto_prn || (r.second && subst->print))
+                cout << r.first << '\n';
+            }
+
+            cin.close ();
+            cout.close ();
+            r = 0;
+          }
+          catch (const io_error& e)
+          {
+            error_record d (error ());
+            d << "unable to edit ";
+
+            if (p.empty ())
+              d << "stdin";
+            else
+              d << "'" << p << "'";
+
+            d << ": " << e;
+          }
+        }
+        catch (const regex_error& e)
+        {
+          // Print regex_error description if meaningful (no space).
+          //
+          error (false) << "invalid regex" << e;
+        }
+        catch (const invalid_path& e)
+        {
+          error (false) << "invalid path '" << e.path << "'";
         }
-        // Can be thrown while closing in, out or writing to cerr (that's why
-        // need to check its state before writing).
+        // Can be thrown while creating cin, cout or writing to cerr.
         //
         catch (const io_error& e)
         {
-          if (cerr.good ())
-            cerr << "rmdir: " << e << endl;
+          error (false) << e;
         }
         catch (const failed&)
         {
@@ -654,30 +956,26 @@ namespace build2
         uint8_t r (2);
         ofdstream cerr (move (err));
 
+        auto error = [&cerr] (bool fail = true)
+        {
+          return error_record (cerr, fail, "test");
+        };
+
         try
         {
           in.close ();
           out.close ();
 
           if (args.size () < 2)
-          {
-            cerr << "test: missing path" << endl;
-            throw failed ();
-          }
+            error () << "missing path";
 
           bool file (args[0] == "-f");
 
           if (!file && args[0] != "-d")
-          {
-            cerr << "test: invalid option" << endl;
-            throw failed ();
-          }
+            error () << "invalid option";
 
           if (args.size () > 2)
-          {
-            cerr << "test: unexpected argument" << endl;
-            throw failed ();
-          }
+            error () << "unexpected argument";
 
           path p (parse_path (args[1], sp.wd_path));
 
@@ -687,21 +985,18 @@ namespace build2
           }
           catch (const system_error& e)
           {
-            cerr << "test: cannot test '" << p << "': " << e << endl;
-            throw failed ();
+            error () << "cannot test '" << p << "': " << e;
           }
         }
         catch (const invalid_path& e)
         {
-          cerr << "test: invalid path '" << e.path << "'" << endl;
+          error (false)  << "invalid path '" << e.path << "'";
         }
-        // Can be thrown while closing in, out or writing to cerr (that's why
-        // need to check its state before writing).
+        // Can be thrown while closing in, out or writing to cerr.
         //
         catch (const io_error& e)
         {
-          if (cerr.good ())
-            cerr << "test: " << e << endl;
+          error (false) << e;
         }
         catch (const failed&)
         {
@@ -740,16 +1035,18 @@ namespace build2
         uint8_t r (1);
         ofdstream cerr (move (err));
 
+        auto error = [&cerr] (bool fail = true)
+        {
+          return error_record (cerr, fail, "touch");
+        };
+
         try
         {
           in.close ();
           out.close ();
 
           if (args.empty ())
-          {
-            cerr << "touch: missing file" << endl;
-            throw failed ();
-          }
+            error () << "missing file";
 
           // Create files.
           //
@@ -783,25 +1080,17 @@ namespace build2
                 }
                 catch (const io_error& e)
                 {
-                  cerr << "touch: cannot create file '" << p << "': " << e
-                       << endl;
-                  throw failed ();
+                  error () << "cannot create file '" << p << "': " << e;
                 }
 
                 sp.clean ({cleanup_type::always, p}, true);
               }
               else
-              {
-                cerr << "touch: '" << p << "' exists and is not a file"
-                     << endl;
-                throw failed ();
-              }
+                error () << "'" << p << "' exists and is not a file";
             }
             catch (const system_error& e)
             {
-              cerr << "touch: cannot create/update '" << p << "': " << e
-                   << endl;
-              throw failed ();
+              error () << "cannot create/update '" << p << "': " << e;
             }
           }
 
@@ -809,15 +1098,13 @@ namespace build2
         }
         catch (const invalid_path& e)
         {
-          cerr << "touch: invalid path '" << e.path << "'" << endl;
+          error (false) << "invalid path '" << e.path << "'";
         }
-        // Can be thrown while closing in, out or writing to cerr (that's why
-        // need to check its state before writing).
+        // Can be thrown while closing in, out or writing to cerr.
         //
         catch (const io_error& e)
         {
-          if (cerr.good ())
-            cerr << "touch: " << e << endl;
+          error (false) << e;
         }
         catch (const failed&)
         {
@@ -896,6 +1183,7 @@ namespace build2
         {"mkdir", &sync_impl<&mkdir>},
         {"rm",    &sync_impl<&rm>},
         {"rmdir", &sync_impl<&rmdir>},
+        {"sed",   &async_impl<&sed>},
         {"test",  &sync_impl<&test>},
         {"touch", &sync_impl<&touch>},
         {"true",  &true_}
diff --git a/build2/test/script/regex b/build2/test/script/regex
index b25c1f1..1170b99 100644
--- a/build2/test/script/regex
+++ b/build2/test/script/regex
@@ -8,8 +8,9 @@
 #include <list>
 #include <regex>
 #include <locale>
+#include <string>        // basic_string
 #include <cstdint>       // uintptr_t
-#include <type_traits>   // make_unsigned, is_unsigned
+#include <type_traits>   // make_unsigned, enable_if, is_*
 #include <unordered_set>
 
 #include <build2/types>
@@ -25,7 +26,7 @@ namespace build2
       {
         using char_string = std::basic_string<char>;
 
-        enum class char_flags: std::uint16_t
+        enum class char_flags: uint16_t
         {
           icase = 0x1, // Case-insensitive match.
           idot  = 0x2, // Invert '.' escaping.
diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx
index dcfaec9..751daec 100644
--- a/build2/test/script/runner.cxx
+++ b/build2/test/script/runner.cxx
@@ -5,12 +5,11 @@
 #include <build2/test/script/runner>
 
 #include <set>
-#include <ios>     // streamsize
-#include <cstring> // strstr()
-#include <sstream>
+#include <ios> // streamsize
 
 #include <butl/fdstream> // fdopen_mode, fdnull(), fddup()
 
+#include <build2/regex>
 #include <build2/filesystem>
 
 #include <build2/test/common>
@@ -21,39 +20,6 @@
 using namespace std;
 using namespace butl;
 
-namespace std
-{
-  // Print regex error description but only if it is meaningful (this is also
-  // why we have to print leading colon here).
-  //
-  // Currently libstdc++ just returns the name of the exception (bug #67361).
-  // So we check that the description contains at least one space character.
-  //
-  // While VC's description is meaningful, it has an undesired prefix that
-  // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
-  //
-  static ostream&
-  operator<< (ostream& o, const regex_error& e)
-  {
-    const char* d (e.what ());
-
-#if defined(_MSC_VER) && _MSC_VER <= 1910
-    const char* rd (strstr (d, "): "));
-    if (rd != nullptr)
-      d = rd + 3;
-#endif
-
-    ostringstream os;
-    os << runtime_error (d); // Sanitize the description.
-
-    string s (os.str ());
-    if (s.find (' ') != string::npos)
-      o << ": " << s;
-
-    return o;
-  }
-}
-
 namespace build2
 {
   namespace test
diff --git a/tests/test/script/builtin/buildfile b/tests/test/script/builtin/buildfile
index e5bac10..2a57c54 100644
--- a/tests/test/script/builtin/buildfile
+++ b/tests/test/script/builtin/buildfile
@@ -2,4 +2,4 @@
 # copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
 # license   : MIT; see accompanying LICENSE file
 
-./: test{cat echo mkdir rm rmdir test touch} $b
+./: test{cat echo mkdir rm rmdir sed test touch} $b
diff --git a/tests/test/script/builtin/sed.test b/tests/test/script/builtin/sed.test
new file mode 100644
index 0000000..ef99539
--- /dev/null
+++ b/tests/test/script/builtin/sed.test
@@ -0,0 +1,312 @@
+# file      : tests/test/script/builtin/sed.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+.include ../common.test
+
+: arg
+:
+{
+  : auto-prn
+  :
+  {
+    $c <"sed -n -e 's/fox/bar/' <'foo'       " && $b : on
+    $c <"sed    -e 's/fox/bar/' <'foo' >'foo'" && $b : off
+  }
+
+  : script
+  :
+  {
+    : missed
+    :
+    $c <'sed' && $b 2>>/EOE != 0
+    testscript:1:1: error: sed exit status 1 != 0
+      info: stderr: test/1/stderr
+    sed: missing script
+    EOE
+
+    : missed-val
+    :
+    $c <'sed -e' && $b 2>>/EOE != 0
+    testscript:1:1: error: sed exit status 1 != 0
+      info: stderr: test/1/stderr
+    sed: missing script
+    EOE
+
+    : empty
+    :
+    $c <"sed -e ''" && $b 2>>/EOE != 0
+    testscript:1:1: error: sed exit status 1 != 0
+      info: stderr: test/1/stderr
+    sed: empty script
+    EOE
+
+    : multiple
+    :
+    $c <"sed -e 's/a//' -e 's/a//'" && $b 2>>/EOE != 0
+    testscript:1:1: error: sed exit status 1 != 0
+      info: stderr: test/1/stderr
+    sed: multiple scripts
+    EOE
+
+    : invalid
+    :
+    $c <"sed -e 'z'" && $b 2>>/EOE != 0
+    testscript:1:1: error: sed exit status 1 != 0
+      info: stderr: test/1/stderr
+    sed: only 's' command supported
+    EOE
+  }
+
+  : file
+  :
+  {
+    : exist
+    :
+    $c <<EOI && $b
+    cat <'foo' >=f;
+    sed -e 's/foo/bar/' f >'bar'
+    EOI
+
+    : none
+    :
+    $c <<EOI && $b
+    sed -e 's/foo/bar/' <'foo' >'bar'
+    EOI
+
+    : dash
+    :
+    $c <<EOI && $b
+    sed -e 's/foo/bar/' - <'foo' >'bar'
+    EOI
+
+    : not-exist
+    :
+    $c <"sed -e 's/foo/bar/' f" && $b 2>>/~%EOE% != 0
+    testscript:1:1: error: sed exit status 1 != 0
+      info: stderr: test/1/stderr
+    %sed: unable to edit '.+/1/f': .+%
+    EOE
+
+    : empty
+    :
+    $c <"sed -e 's/foo/bar/' ''" && $b 2>>/EOE != 0
+    testscript:1:1: error: sed exit status 1 != 0
+      info: stderr: test/1/stderr
+    sed: invalid path ''
+    EOE
+  }
+
+  : unexpected
+  :
+  $c <"sed -e 's/a//' a b" && $b 2>>/EOE != 0
+  testscript:1:1: error: sed exit status 1 != 0
+    info: stderr: test/1/stderr
+  sed: unexpected argument
+  EOE
+
+}
+
+: command
+:
+{
+  : subst
+  :
+  {
+    : parsing
+    :
+    {
+      : delim
+      :
+      {
+        : none
+        :
+        $c <"sed -e 's'" && $b 2>>/EOE != 0
+        testscript:1:1: error: sed exit status 1 != 0
+          info: stderr: test/1/stderr
+        sed: no delimiter for 's' command
+        EOE
+
+        : invalid
+        :
+        $c <"sed -e 's\\'" && $b 2>>/EOE != 0
+        testscript:1:1: error: sed exit status 1 != 0
+          info: stderr: test/1/stderr
+        sed: invalid delimiter for 's' command
+        EOE
+      }
+
+      : regex
+      :
+      {
+        : unterminated
+        :
+        $c <"sed -e 's/foo'" && $b 2>>/EOE != 0
+        testscript:1:1: error: sed exit status 1 != 0
+          info: stderr: test/1/stderr
+        sed: unterminated 's' command regex
+        EOE
+
+        : empty
+        :
+        $c <"sed -e 's///'" && $b 2>>/EOE != 0
+        testscript:1:1: error: sed exit status 1 != 0
+          info: stderr: test/1/stderr
+        sed: empty regex in 's' command
+        EOE
+
+        : invalid
+        :
+        : Note that old versions of libc++ (for example 1.1) do not detect some
+        : regex errors. For example '*' is parsed successfully.
+        :
+        $c <"sed -e 's/foo[/bar/'" && $b 2>>/~%EOE% != 0
+        testscript:1:1: error: sed exit status 1 != 0
+          info: stderr: test/1/stderr
+        %sed: invalid regex.*%
+        EOE
+      }
+
+      : unterminated-replacement
+      :
+      $c <"sed -e 's/foo/bar'" && $b 2>>/EOE != 0
+      testscript:1:1: error: sed exit status 1 != 0
+        info: stderr: test/1/stderr
+      sed: unterminated 's' command replacement
+      EOE
+
+      : invalid-flags
+      :
+      $c <"sed -e 's/foo/bar/a'" && $b 2>>/EOE != 0
+      testscript:1:1: error: sed exit status 1 != 0
+        info: stderr: test/1/stderr
+      sed: invalid 's' command flag 'a'
+      EOE
+    }
+
+    : exec
+    :
+    {
+      : flags
+      :
+      {
+        : global
+        :
+        {
+          $c <"sed -e 's/o/a/g' <'foo' >'faa'" && $b : on
+          $c <"sed -e 's/o/a/'  <'foo' >'fao'" && $b : off
+        }
+
+        : icase
+        :
+        {
+          $c <"sed -e 's/O/a/i' <'foo' >'fao'" && $b : on
+          $c <"sed -e 's/O/a/'  <'foo' >'foo'" && $b : off
+        }
+
+        : print
+        :
+        {
+          $c <"sed -n -e 's/o/a/p' <'foo' >'fao'" && $b : on-match
+          $c <"sed -n -e 's/o/a/'  <'foo'       " && $b : off-match
+          $c <"sed -n -e 's/u/a/p' <'foo'       " && $b : on-no-match
+        }
+      }
+
+      : search
+      {
+        : anchor
+        :
+        {
+          $c <"sed -n -e 's/^o/a/gp'  <'oof' >'aof'" && $b : begin
+          $c <"sed -n -e 's/o\$/a/gp' <'foo' >'foa'" && $b : end
+        }
+
+        : match
+        : Match corner cases
+        :
+        {
+          $c <"sed -n -e 's/a/b/p'  <'a'    >'b'   " && $b : full
+          $c <"sed -n -e 's/a/b/p'  <'ac'   >'bc'  " && $b : left
+          $c <"sed -n -e 's/a/b/p'  <'ca'   >'cb'  " && $b : right
+          $c <"sed -n -e 's/a/b/pg' <'xaax' >'xbbx'" && $b : adjacent
+        }
+      }
+
+      : replacement
+      :
+      {
+        : ecma-escape
+        :
+        {
+          $c <"sed <'xay' -e 's/a/\$b/'       >'x\$by'" && $b : none
+          $c <"sed <'xay' -e 's/a/\$/'        >'x\$y' " && $b : none-term
+          $c <"sed <'xay' -e 's/a/\$\$/'      >'x\$y' " && $b : self
+          $c <"sed <'xay' -e 's/a/b\$&c/'     >'xbacy'" && $b : match
+          $c <"sed <'xay' -e 's/a/b\$`c/'     >'xbxcy'" && $b : match-precede
+          $c <"sed <'xay' -e \"s/a/b\\\$'c/\" >'xbycy'" && $b : match-follow
+
+          : capture
+          :
+          $c <<EOI && $b
+          sed <'abcdefghij' -e 's/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/$1$10/' >'aj'
+          EOI
+        }
+
+        : perl-escape
+        :
+        {
+          $c <"sed <'xay' -e 's/a/\\b/'  >'xby' " && $b : none
+          $c <"sed <'xay' -e 's/a/\\/'   >'xy'  " && $b : none-term
+          $c <"sed <'xay' -e 's/a/\\\\/' >'x\\y'" && $b : self
+
+          : capture
+          :
+          $c <<EOI && $b
+          sed <'abcdefghij' -e 's/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/\1\10/' >'aa0'
+          EOI
+
+          : upper
+          :
+          {
+            $c <"sed <'xay' -e 's/a/\\U/'           >'xy'  " && $b : none
+            $c <"sed <'xay' -e 's/a/\\Uvz/'         >'xVZy'" && $b : repl
+            $c <"sed <'xay' -e 's/a/\\Uv\\Ez/'      >'xVzy'" && $b : end
+            $c <"sed <'aa'  -e 's/a/v\\Uz/g'        >'vZvZ'" && $b : locality
+            $c <"sed <'xay' -e 's/\(a\)/\\U\\1/'    >'xAy' " && $b : capt
+            $c <"sed <'x-y' -e 's/\(a?\)-/\\U\\1z/' >'xZy' " && $b : capt-empty
+            $c <"sed <'xay' -e 's/a/\\uvz/'         >'xVzy'" && $b : once
+          }
+
+          : lower
+          :
+          {
+            $c <"sed <'xay' -e 's/a/\\lVZ/' >'xvZy'" && $b : once
+          }
+        }
+      }
+
+      $c <"sed -e 's/a//' <:'b' >'b'" && $b : no-newline
+      $c <"sed -e 's/a//' <:''      " && $b : empty-stdin
+
+      : empty-file
+      :
+      $c <<EOI && $b
+      touch f;
+      sed -e 's/a//' f
+      EOI
+    }
+  }
+}
+
+: big
+:
+: Sed a big file (about 3MB) to test that the builtin is asynchronous.
+:
+{
+  s="------------------------------------------------------------------------"
+  s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s"
+  s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s"
+  s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s"
+  $c <"cat <'$s' | sed -e 's/^x//' >'$s'" && $b
+}
author	Karen Arutyunov <karen@codesynthesis.com>	2017-01-31 22:08:38 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2017-02-03 23:57:27 +0300
commit	044e2e1c1460fb060f677a366144b98905522754 (patch)
tree	4cdd67e9bca323d74cf5cc514444019a70b4de95
parent	31a4169c67045cfe37eed138b537930e259db1e9 (diff)