aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-01-31 22:08:38 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-02-03 23:57:27 +0300
commit044e2e1c1460fb060f677a366144b98905522754 (patch)
tree4cdd67e9bca323d74cf5cc514444019a70b4de95
parent31a4169c67045cfe37eed138b537930e259db1e9 (diff)
Add sed builtin
-rw-r--r--build2/b.cxx16
-rw-r--r--build2/buildfile1
-rw-r--r--build2/regex57
-rw-r--r--build2/regex.cxx42
-rw-r--r--build2/regex.txx215
-rw-r--r--build2/test/script/builtin.cxx506
-rw-r--r--build2/test/script/regex5
-rw-r--r--build2/test/script/runner.cxx38
-rw-r--r--tests/test/script/builtin/buildfile2
-rw-r--r--tests/test/script/builtin/sed.test312
10 files changed, 1045 insertions, 149 deletions
diff --git a/build2/b.cxx b/build2/b.cxx
index e576435..b06459b 100644
--- a/build2/b.cxx
+++ b/build2/b.cxx
@@ -2,7 +2,10 @@
// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
// license : MIT; see accompanying LICENSE file
-#include <string.h> // strerror()
+#ifndef _WIN32
+# include <signal.h> // signal()
+#endif
+
#include <stdlib.h> // getenv() _putenv()(_WIN32)
#include <sstream>
@@ -82,6 +85,17 @@ main (int argc, char* argv[])
{
tracer trace ("main");
+ // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if
+ // the pipe reading end is closed. Note that by default this signal
+ // terminates a process. Also note that there is no way to disable this
+ // behavior on a file descriptor basis or for the write() function call.
+ //
+#ifndef _WIN32
+ if (signal (SIGPIPE, SIG_IGN) == SIG_ERR)
+ fail << "unable to ignore broken pipe (SIGPIPE) signal: "
+ << system_error (errno, system_category ()); // Sanitize.
+#endif
+
// Parse the command line. We want to be able to specify options, vars,
// and buildspecs in any order (it is really handy to just add -v at the
// end of the command line).
diff --git a/build2/buildfile b/build2/buildfile
index 84e2f82..1ee7063 100644
--- a/build2/buildfile
+++ b/build2/buildfile
@@ -26,6 +26,7 @@ exe{b}: \
{hxx cxx}{ operation } \
{hxx cxx}{ parser } \
{hxx cxx}{ prerequisite } \
+ {hxx txx cxx}{ regex } \
{hxx cxx}{ rule } \
{hxx }{ rule-map } \
{hxx txx cxx}{ scheduler } \
diff --git a/build2/regex b/build2/regex
new file mode 100644
index 0000000..dc6dc96
--- /dev/null
+++ b/build2/regex
@@ -0,0 +1,57 @@
+// file : build2/regex -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUILD2_REGEX
+#define BUILD2_REGEX
+
+#include <regex>
+#include <iosfwd>
+#include <string> // basic_string
+
+#include <build2/types>
+#include <build2/utility>
+
+namespace build2
+{
+ // Like std::regex_match() but extends the standard ECMA-262
+ // substitution escape sequences with a subset of Perl sequences:
+ //
+ // \\, \u, \l, \U, \L, \E, \1, ..., \9
+ //
+ // Also return the resulting string as well as whether the search
+ // succeeded.
+ //
+ // Notes and limitations:
+ //
+ // - The only valid regex_constants flags are match_default,
+ // format_first_only (format_no_copy can easily be supported).
+ //
+ // - If backslash doesn't start any of the listed sequences then it is
+ // silently dropped and the following character is copied as is.
+ //
+ // - The character case conversion is performed according to the global
+ // C++ locale (which is, unless changed, is the same as C locale and
+ // both default to the POSIX locale aka "C").
+ //
+ template <typename C>
+ pair<std::basic_string<C>, bool>
+ regex_replace_ex (const std::basic_string<C>&,
+ const std::basic_regex<C>&,
+ const std::basic_string<C>& fmt,
+ std::regex_constants::match_flag_type =
+ std::regex_constants::match_default);
+}
+
+namespace std
+{
+ // Print regex error description but only if it is meaningful (this is also
+ // why we have to print leading colon).
+ //
+ ostream&
+ operator<< (ostream&, const regex_error&);
+}
+
+#include <build2/regex.txx>
+
+#endif // BUILD2_REGEX
diff --git a/build2/regex.cxx b/build2/regex.cxx
new file mode 100644
index 0000000..40347b5
--- /dev/null
+++ b/build2/regex.cxx
@@ -0,0 +1,42 @@
+// file : build2/regex.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <build2/regex>
+
+#if defined(_MSC_VER) && _MSC_VER <= 1910
+# include <cstring> // strstr()
+#endif
+
+#include <ostream>
+#include <sstream>
+
+namespace std
+{
+ // Currently libstdc++ just returns the name of the exception (bug #67361).
+ // So we check that the description contains at least one space character.
+ //
+ // While VC's description is meaningful, it has an undesired prefix that
+ // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
+ //
+ ostream&
+ operator<< (ostream& o, const regex_error& e)
+ {
+ const char* d (e.what ());
+
+#if defined(_MSC_VER) && _MSC_VER <= 1910
+ const char* rd (strstr (d, "): "));
+ if (rd != nullptr)
+ d = rd + 3;
+#endif
+
+ ostringstream os;
+ os << runtime_error (d); // Sanitize the description.
+
+ string s (os.str ());
+ if (s.find (' ') != string::npos)
+ o << ": " << s;
+
+ return o;
+ }
+}
diff --git a/build2/regex.txx b/build2/regex.txx
new file mode 100644
index 0000000..1325de9
--- /dev/null
+++ b/build2/regex.txx
@@ -0,0 +1,215 @@
+// file : build2/regex.txx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+ template <typename C>
+ pair<std::basic_string<C>, bool>
+ regex_replace_ex (const std::basic_string<C>& s,
+ const std::basic_regex<C>& re,
+ const std::basic_string<C>& fmt,
+ std::regex_constants::match_flag_type flags)
+ {
+ using namespace std;
+
+ using string_type = basic_string<C>;
+ using str_it = typename string_type::const_iterator;
+ using regex_it = regex_iterator<str_it>;
+
+ bool first_only ((flags & std::regex_constants::format_first_only) ==
+ std::regex_constants::format_first_only);
+
+ locale cl; // Copy of the global C++ locale.
+ string_type r;
+
+ // Beginning of the last unmatched substring.
+ //
+ str_it ub (s.begin ());
+
+ regex_it b (s.begin (), s.end (), re, flags);
+ regex_it e;
+ bool match (b != e);
+
+ for (regex_it i (b); i != e; ++i)
+ {
+ const match_results<str_it>& m (*i);
+
+ // Copy the preceeding unmatched substring, save the beginning of the
+ // one that follows.
+ //
+ r.append (ub, m.prefix ().second);
+ ub = m.suffix ().first;
+
+ if (first_only && i != b)
+ r.append (m[0].first, m[0].second); // Append matched substring.
+ else
+ {
+ // The standard implementation calls m.format() here. We perform our
+ // own formatting.
+ //
+ // Note that we are using char type literals with the assumption that
+ // being ASCII characters they will be properly "widened" to the
+ // corresponding literals of the C template parameter type.
+ //
+ auto digit = [] (C c) -> int
+ {
+ return c >= '0' && c <= '9' ? c - '0' : -1;
+ };
+
+ enum class case_conv {none, upper, lower, upper_once, lower_once}
+ mode (case_conv::none);
+
+ auto conv_chr = [&mode, &cl] (C c) -> C
+ {
+ switch (mode)
+ {
+ case case_conv::upper_once: mode = case_conv::none;
+ case case_conv::upper: c = toupper (c, cl); break;
+ case case_conv::lower_once: mode = case_conv::none;
+ case case_conv::lower: c = tolower (c, cl); break;
+ case case_conv::none: break;
+ }
+ return c;
+ };
+
+ auto append_chr = [&r, &conv_chr] (C c)
+ {
+ r.push_back (conv_chr (c));
+ };
+
+ auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e)
+ {
+ // Optimize for the common case.
+ //
+ if (mode == case_conv::none)
+ r.append (b, e);
+ else
+ {
+ for (str_it i (b); i != e; ++i)
+ r.push_back (conv_chr (*i));
+ }
+ };
+
+ size_t n (fmt.size ());
+ for (size_t i (0); i < n; ++i)
+ {
+ C c (fmt[i]);
+
+ switch (c)
+ {
+ case '$':
+ {
+ // Check if this is a $-based escape sequence. Interpret it
+ // accordingly if that's the case, treat '$' as a regular
+ // character otherwise.
+ //
+ c = fmt[++i]; // '\0' if last.
+
+ switch (c)
+ {
+ case '$': append_chr (c); break;
+ case '&': append_str (m[0].first, m[0].second); break;
+ case '`':
+ {
+ append_str (m.prefix ().first, m.prefix ().second);
+ break;
+ }
+ case '\'':
+ {
+ append_str (m.suffix ().first, m.suffix ().second);
+ break;
+ }
+ default:
+ {
+ // Check if this is a sub-expression 1-based index ($n or
+ // $nn). Append the matching substring if that's the case.
+ // Treat '$' as a regular character otherwise. Index greater
+ // than the sub-expression count is silently ignored.
+ //
+ int si (digit (c));
+ if (si >= 0)
+ {
+ int d;
+ if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last.
+ {
+ si = si * 10 + d;
+ ++i;
+ }
+ }
+
+ if (si > 0)
+ {
+ // m[0] refers to the matched substring.
+ //
+ if (static_cast<size_t> (si) < m.size ())
+ append_str (m[si].first, m[si].second);
+ }
+ else
+ {
+ // Not a $-based escape sequence so treat '$' as a
+ // regular character.
+ //
+ --i;
+ append_chr ('$');
+ }
+
+ break;
+ }
+ }
+
+ break;
+ }
+ case '\\':
+ {
+ c = fmt[++i]; // '\0' if last.
+
+ switch (c)
+ {
+ case '\\': append_chr (c); break;
+
+ case 'u': mode = case_conv::upper_once; break;
+ case 'l': mode = case_conv::lower_once; break;
+ case 'U': mode = case_conv::upper; break;
+ case 'L': mode = case_conv::lower; break;
+ case 'E': mode = case_conv::none; break;
+ default:
+ {
+ // Check if this is a sub-expression 1-based index. Append
+ // the matching substring if that's the case, Skip '\\'
+ // otherwise. Index greater than the sub-expression count is
+ // silently ignored.
+ //
+ int si (digit (c));
+ if (si > 0)
+ {
+ // m[0] refers to the matched substring.
+ //
+ if (static_cast<size_t> (si) < m.size ())
+ append_str (m[si].first, m[si].second);
+ }
+ else
+ --i;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ default:
+ {
+ // Append a regular character.
+ //
+ append_chr (c);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ r.append (ub, s.end ()); // Append the rightmost non-matched substring.
+ return make_pair (move (r), match);
+ }
+}
diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx
index 008ac32..3957adb 100644
--- a/build2/test/script/builtin.cxx
+++ b/build2/test/script/builtin.cxx
@@ -10,12 +10,17 @@
# include <sys/utime.h>
#endif
+#include <locale>
#include <thread>
+#include <ostream>
+#include <sstream>
#include <butl/path-io> // use default operator<< implementation
#include <butl/fdstream> // fdopen_mode, fdstream_mode
#include <butl/filesystem> // mkdir_status
+#include <build2/regex>
+
#include <build2/test/script/script>
// Strictly speaking a builtin which reads/writes from/to standard streams
@@ -51,6 +56,74 @@ namespace build2
//
struct failed {};
+ // Accumulate an error message, print it atomically in dtor to the
+ // provided stream and throw failed afterwards if requested. Prefixes
+ // the message with the builtin name.
+ //
+ // Move constructible-only, not assignable (based to diag_record).
+ //
+ class error_record
+ {
+ public:
+ template <typename T>
+ friend const error_record&
+ operator<< (const error_record& r, const T& x)
+ {
+ r.ss_ << x;
+ return r;
+ }
+
+ error_record (ostream& o, bool fail, const char* name)
+ : os_ (o), fail_ (fail), empty_ (false)
+ {
+ ss_ << name << ": ";
+ }
+
+ // Older versions of libstdc++ don't have the ostringstream move
+ // support. Luckily, GCC doesn't seem to be actually needing move due
+ // to copy/move elision.
+ //
+#ifdef __GLIBCXX__
+ error_record (error_record&&);
+#else
+ error_record (error_record&& r)
+ : os_ (r.os_),
+ ss_ (move (r.ss_)),
+ fail_ (r.fail_),
+ empty_ (r.empty_)
+ {
+ r.empty_ = true;
+ }
+#endif
+
+ ~error_record () noexcept (false)
+ {
+ if (!empty_)
+ {
+ // The output stream can be in a bad state (for example as a
+ // result of unsuccessful attempt to report a previous error), so
+ // we check it.
+ //
+ if (os_.good ())
+ {
+ ss_.put ('\n');
+ os_ << ss_.str ();
+ os_.flush ();
+ }
+
+ if (fail_)
+ throw failed ();
+ }
+ }
+
+ private:
+ ostream& os_;
+ mutable ostringstream ss_;
+
+ bool fail_;
+ bool empty_;
+ };
+
// Parse and normalize a path. Also, unless it is already absolute, make
// the path absolute using the specified directory. Throw invalid_path
// if the path is empty, and on parsing and normalization failures.
@@ -103,6 +176,11 @@ namespace build2
uint8_t r (1);
ofdstream cerr (move (err));
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "cat");
+ };
+
try
{
ifdstream cin (move (in), fdstream_mode::binary);
@@ -154,15 +232,15 @@ namespace build2
}
catch (const io_error& e)
{
- cerr << "cat: unable to print ";
+ error_record d (error ());
+ d << "unable to print ";
if (p.empty ())
- cerr << "stdin";
+ d << "stdin";
else
- cerr << "'" << p << "'";
+ d << "'" << p << "'";
- cerr << ": " << e << endl;
- throw failed ();
+ d << ": " << e;
}
cin.close ();
@@ -171,15 +249,13 @@ namespace build2
}
catch (const invalid_path& e)
{
- cerr << "cat: invalid path '" << e.path << "'" << endl;
+ error (false) << "invalid path '" << e.path << "'";
}
- // Can be thrown while closing cin, cout or writing to cerr (that's
- // why need to check its state before writing).
+ // Can be thrown while creating/closing cin, cout or writing to cerr.
//
catch (const io_error& e)
{
- if (cerr.good ())
- cerr << "cat: " << e << endl;
+ error (false) << e;
}
catch (const failed&)
{
@@ -215,8 +291,7 @@ namespace build2
for (auto b (args.begin ()), i (b), e (args.end ()); i != e; ++i)
cout << (i != b ? " " : "") << *i;
- cout << endl;
-
+ cout << '\n';
cout.close ();
r = 0;
}
@@ -291,6 +366,11 @@ namespace build2
uint8_t r (1);
ofdstream cerr (move (err));
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "mkdir");
+ };
+
try
{
in.close ();
@@ -317,10 +397,7 @@ namespace build2
// Create directories.
//
if (i == args.end ())
- {
- cerr << "mkdir: missing directory" << endl;
- throw failed ();
- }
+ error () << "missing directory";
for (; i != args.end (); ++i)
{
@@ -337,9 +414,7 @@ namespace build2
}
catch (const system_error& e)
{
- cerr << "mkdir: unable to create directory '" << p << "': "
- << e << endl;
- throw failed ();
+ error () << "unable to create directory '" << p << "': " << e;
}
}
@@ -347,15 +422,13 @@ namespace build2
}
catch (const invalid_path& e)
{
- cerr << "mkdir: invalid path '" << e.path << "'" << endl;
+ error (false) << "invalid path '" << e.path << "'";
}
- // Can be thrown while closing in, out or writing to cerr (that's why
- // need to check its state before writing).
+ // Can be thrown while closing in, out or writing to cerr.
//
catch (const io_error& e)
{
- if (cerr.good ())
- cerr << "mkdir: " << e << endl;
+ error (false) << e;
}
catch (const failed&)
{
@@ -403,6 +476,11 @@ namespace build2
uint8_t r (1);
ofdstream cerr (move (err));
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "rm");
+ };
+
try
{
in.close ();
@@ -432,10 +510,7 @@ namespace build2
// Remove entries.
//
if (i == args.end () && !force)
- {
- cerr << "rm: missing file" << endl;
- throw failed ();
- }
+ error () << "missing file";
const dir_path& wd (sp.wd_path);
const dir_path& rwd (sp.root->wd_path);
@@ -445,11 +520,8 @@ namespace build2
path p (parse_path (*i, wd));
if (!p.sub (rwd) && !force)
- {
- cerr << "rm: '" << p << "' is out of working directory '" << rwd
- << "'" << endl;
- throw failed ();
- }
+ error () << "'" << p << "' is out of working directory '" << rwd
+ << "'";
try
{
@@ -458,17 +530,11 @@ namespace build2
if (dir_exists (d))
{
if (!dir)
- {
- cerr << "rm: '" << p << "' is a directory" << endl;
- throw failed ();
- }
+ error () << "'" << p << "' is a directory";
if (wd.sub (d))
- {
- cerr << "rm: '" << p << "' contains test working directory '"
- << wd << "'" << endl;
- throw failed ();
- }
+ error () << "'" << p << "' contains test working directory '"
+ << wd << "'";
// The call can result in rmdir_status::not_exist. That's not
// very likelly but there is also nothing bad about it.
@@ -480,8 +546,7 @@ namespace build2
}
catch (const system_error& e)
{
- cerr << "rm: unable to remove '" << p << "': " << e << endl;
- throw failed ();
+ error () << "unable to remove '" << p << "': " << e;
}
}
@@ -489,15 +554,13 @@ namespace build2
}
catch (const invalid_path& e)
{
- cerr << "rm: invalid path '" << e.path << "'" << endl;
+ error (false) << "invalid path '" << e.path << "'";
}
- // Can be thrown while closing in, out or writing to cerr (that's why
- // need to check its state before writing).
+ // Can be thrown while closing in, out or writing to cerr.
//
catch (const io_error& e)
{
- if (cerr.good ())
- cerr << "rm: " << e << endl;
+ error (false) << e;
}
catch (const failed&)
{
@@ -533,6 +596,11 @@ namespace build2
uint8_t r (1);
ofdstream cerr (move (err));
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "rmdir");
+ };
+
try
{
in.close ();
@@ -559,10 +627,7 @@ namespace build2
// Remove directories.
//
if (i == args.end () && !force)
- {
- cerr << "rmdir: missing directory" << endl;
- throw failed ();
- }
+ error () << "missing directory";
const dir_path& wd (sp.wd_path);
const dir_path& rwd (sp.root->wd_path);
@@ -572,18 +637,12 @@ namespace build2
dir_path p (path_cast<dir_path> (parse_path (*i, wd)));
if (wd.sub (p))
- {
- cerr << "rmdir: '" << p << "' contains test working directory '"
- << wd << "'" << endl;
- throw failed ();
- }
+ error () << "'" << p << "' contains test working directory '"
+ << wd << "'";
if (!p.sub (rwd) && !force)
- {
- cerr << "rmdir: '" << p << "' is out of working directory '"
- << rwd << "'" << endl;
- throw failed ();
- }
+ error () << "'" << p << "' is out of working directory '"
+ << rwd << "'";
try
{
@@ -596,8 +655,7 @@ namespace build2
}
catch (const system_error& e)
{
- cerr << "rmdir: unable to remove '" << p << "': " << e << endl;
- throw failed ();
+ error () << "unable to remove '" << p << "': " << e;
}
}
@@ -605,15 +663,259 @@ namespace build2
}
catch (const invalid_path& e)
{
- cerr << "rmdir: invalid path '" << e.path << "'" << endl;
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // sed [-n] -e <script> [<file>]
+ //
+ // Read text from file, make editing changes according to script, and
+ // write the result to stdout. If file is not specified or is '-', read
+ // from stdin.
+ //
+ // -n
+ // Suppress automatic printing of the pattern space at the end of the
+ // script execution.
+ //
+ // -e <script>
+ // Editing commands to be executed (required).
+ //
+ // Currently, only single-command scripts using the following editing
+ // commands are supported.
+ //
+ // s/<regex>/<replacement>/<flags>
+ // The supported flags are 'i' (case-insensitive search), 'g'
+ // (substitute globally), 'p' (print if a replacement was made). If
+ // regex starts with ^, then it only matches at the beginning of the
+ // pattern space. Similarly, if it ends with $, then it only matches
+ // at the end of the pattern space.
+ //
+ // In replacement, besides the standard ECMAScript escape sequences a
+ // subset of Perl-specific ones is recognized.
+ //
+ // For more details read the builtin description in 'The build2
+ // Testscript Language'.
+ //
+ // Note: must be executed asynchronously.
+ //
+ static uint8_t
+ sed (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "sed");
+ };
+
+ try
+ {
+ // Do not throw when failbit is set (getline() failed to extract any
+ // character).
+ //
+ ifdstream cin (move (in), ifdstream::badbit);
+ ofdstream cout (move (out));
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool auto_prn (true);
+
+ struct substitute
+ {
+ string regex;
+ string replacement;
+ bool icase = false;
+ bool global = false;
+ bool print = false;
+ };
+ optional<substitute> subst;
+
+ for (; i != e; ++i)
+ {
+ if (*i == "-n")
+ auto_prn = false;
+ else if (*i == "-e")
+ {
+ // Only a single script is supported.
+ //
+ if (subst)
+ error () << "multiple scripts";
+
+ // If option has no value then bail out and report.
+ //
+ if (++i == e)
+ break;
+
+ const string& v (*i);
+ if (v.empty ())
+ error () << "empty script";
+
+ if (v[0] != 's')
+ error () << "only 's' command supported";
+
+ // Parse the substitute command.
+ //
+ if (v.size () < 2)
+ error () << "no delimiter for 's' command";
+
+ char delim (v[1]);
+ if (delim == '\\' || delim == '\n')
+ error () << "invalid delimiter for 's' command";
+
+ size_t p (v.find (delim, 2));
+ if (p == string::npos)
+ error () << "unterminated 's' command regex";
+
+ subst = substitute ();
+ subst->regex.assign (v, 2, p - 2);
+
+ // Empty regex matches nothing, so not of much use.
+ //
+ if (subst->regex.empty ())
+ error () << "empty regex in 's' command";
+
+ size_t b (p + 1);
+ p = v.find (delim, b);
+ if (p == string::npos)
+ error () << "unterminated 's' command replacement";
+
+ subst->replacement.assign (v, b, p - b);
+
+ // Parse the substitute command flags.
+ //
+ char c;
+ for (++p; (c = v[p]) != '\0'; ++p)
+ {
+ switch (c)
+ {
+ case 'i': subst->icase = true; break;
+ case 'g': subst->global = true; break;
+ case 'p': subst->print = true; break;
+ default:
+ {
+ error () << "invalid 's' command flag '" << c << "'";
+ }
+ }
+ }
+ }
+ else
+ {
+ if (*i == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ if (!subst)
+ error () << "missing script";
+
+ // Path of a file to edit. An empty path represents stdin.
+ //
+ path p;
+ if (i != e)
+ {
+ if (*i != "-")
+ p = parse_path (*i, sp.wd_path);
+
+ ++i;
+ }
+
+ if (i != e)
+ error () << "unexpected argument";
+
+ // Note that ECMAScript is implied if no grammar flag is specified.
+ //
+ regex re (subst->regex,
+ subst->icase ? regex::icase : regex::ECMAScript);
+
+ // Edit a file or STDIN.
+ //
+ try
+ {
+ // Open a file if specified.
+ //
+ if (!p.empty ())
+ {
+ cin.close (); // Flush and close.
+ cin.open (p);
+ }
+
+ // Read until failbit is set (throw on badbit).
+ //
+ string s;
+ while (getline (cin, s))
+ {
+ auto r (regex_replace_ex (s,
+ re,
+ subst->replacement,
+ subst->global
+ ? regex_constants::format_default
+ : regex_constants::format_first_only));
+
+ // Add newline regardless whether the source line is newline-
+ // terminated or not (in accordance with POSIX).
+ //
+ if (auto_prn || (r.second && subst->print))
+ cout << r.first << '\n';
+ }
+
+ cin.close ();
+ cout.close ();
+ r = 0;
+ }
+ catch (const io_error& e)
+ {
+ error_record d (error ());
+ d << "unable to edit ";
+
+ if (p.empty ())
+ d << "stdin";
+ else
+ d << "'" << p << "'";
+
+ d << ": " << e;
+ }
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful (no space).
+ //
+ error (false) << "invalid regex" << e;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
}
- // Can be thrown while closing in, out or writing to cerr (that's why
- // need to check its state before writing).
+ // Can be thrown while creating cin, cout or writing to cerr.
//
catch (const io_error& e)
{
- if (cerr.good ())
- cerr << "rmdir: " << e << endl;
+ error (false) << e;
}
catch (const failed&)
{
@@ -654,30 +956,26 @@ namespace build2
uint8_t r (2);
ofdstream cerr (move (err));
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "test");
+ };
+
try
{
in.close ();
out.close ();
if (args.size () < 2)
- {
- cerr << "test: missing path" << endl;
- throw failed ();
- }
+ error () << "missing path";
bool file (args[0] == "-f");
if (!file && args[0] != "-d")
- {
- cerr << "test: invalid option" << endl;
- throw failed ();
- }
+ error () << "invalid option";
if (args.size () > 2)
- {
- cerr << "test: unexpected argument" << endl;
- throw failed ();
- }
+ error () << "unexpected argument";
path p (parse_path (args[1], sp.wd_path));
@@ -687,21 +985,18 @@ namespace build2
}
catch (const system_error& e)
{
- cerr << "test: cannot test '" << p << "': " << e << endl;
- throw failed ();
+ error () << "cannot test '" << p << "': " << e;
}
}
catch (const invalid_path& e)
{
- cerr << "test: invalid path '" << e.path << "'" << endl;
+ error (false) << "invalid path '" << e.path << "'";
}
- // Can be thrown while closing in, out or writing to cerr (that's why
- // need to check its state before writing).
+ // Can be thrown while closing in, out or writing to cerr.
//
catch (const io_error& e)
{
- if (cerr.good ())
- cerr << "test: " << e << endl;
+ error (false) << e;
}
catch (const failed&)
{
@@ -740,16 +1035,18 @@ namespace build2
uint8_t r (1);
ofdstream cerr (move (err));
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "touch");
+ };
+
try
{
in.close ();
out.close ();
if (args.empty ())
- {
- cerr << "touch: missing file" << endl;
- throw failed ();
- }
+ error () << "missing file";
// Create files.
//
@@ -783,25 +1080,17 @@ namespace build2
}
catch (const io_error& e)
{
- cerr << "touch: cannot create file '" << p << "': " << e
- << endl;
- throw failed ();
+ error () << "cannot create file '" << p << "': " << e;
}
sp.clean ({cleanup_type::always, p}, true);
}
else
- {
- cerr << "touch: '" << p << "' exists and is not a file"
- << endl;
- throw failed ();
- }
+ error () << "'" << p << "' exists and is not a file";
}
catch (const system_error& e)
{
- cerr << "touch: cannot create/update '" << p << "': " << e
- << endl;
- throw failed ();
+ error () << "cannot create/update '" << p << "': " << e;
}
}
@@ -809,15 +1098,13 @@ namespace build2
}
catch (const invalid_path& e)
{
- cerr << "touch: invalid path '" << e.path << "'" << endl;
+ error (false) << "invalid path '" << e.path << "'";
}
- // Can be thrown while closing in, out or writing to cerr (that's why
- // need to check its state before writing).
+ // Can be thrown while closing in, out or writing to cerr.
//
catch (const io_error& e)
{
- if (cerr.good ())
- cerr << "touch: " << e << endl;
+ error (false) << e;
}
catch (const failed&)
{
@@ -896,6 +1183,7 @@ namespace build2
{"mkdir", &sync_impl<&mkdir>},
{"rm", &sync_impl<&rm>},
{"rmdir", &sync_impl<&rmdir>},
+ {"sed", &async_impl<&sed>},
{"test", &sync_impl<&test>},
{"touch", &sync_impl<&touch>},
{"true", &true_}
diff --git a/build2/test/script/regex b/build2/test/script/regex
index b25c1f1..1170b99 100644
--- a/build2/test/script/regex
+++ b/build2/test/script/regex
@@ -8,8 +8,9 @@
#include <list>
#include <regex>
#include <locale>
+#include <string> // basic_string
#include <cstdint> // uintptr_t
-#include <type_traits> // make_unsigned, is_unsigned
+#include <type_traits> // make_unsigned, enable_if, is_*
#include <unordered_set>
#include <build2/types>
@@ -25,7 +26,7 @@ namespace build2
{
using char_string = std::basic_string<char>;
- enum class char_flags: std::uint16_t
+ enum class char_flags: uint16_t
{
icase = 0x1, // Case-insensitive match.
idot = 0x2, // Invert '.' escaping.
diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx
index dcfaec9..751daec 100644
--- a/build2/test/script/runner.cxx
+++ b/build2/test/script/runner.cxx
@@ -5,12 +5,11 @@
#include <build2/test/script/runner>
#include <set>
-#include <ios> // streamsize
-#include <cstring> // strstr()
-#include <sstream>
+#include <ios> // streamsize
#include <butl/fdstream> // fdopen_mode, fdnull(), fddup()
+#include <build2/regex>
#include <build2/filesystem>
#include <build2/test/common>
@@ -21,39 +20,6 @@
using namespace std;
using namespace butl;
-namespace std
-{
- // Print regex error description but only if it is meaningful (this is also
- // why we have to print leading colon here).
- //
- // Currently libstdc++ just returns the name of the exception (bug #67361).
- // So we check that the description contains at least one space character.
- //
- // While VC's description is meaningful, it has an undesired prefix that
- // resembles the following: 'regex_error(error_badrepeat): '. So we skip it.
- //
- static ostream&
- operator<< (ostream& o, const regex_error& e)
- {
- const char* d (e.what ());
-
-#if defined(_MSC_VER) && _MSC_VER <= 1910
- const char* rd (strstr (d, "): "));
- if (rd != nullptr)
- d = rd + 3;
-#endif
-
- ostringstream os;
- os << runtime_error (d); // Sanitize the description.
-
- string s (os.str ());
- if (s.find (' ') != string::npos)
- o << ": " << s;
-
- return o;
- }
-}
-
namespace build2
{
namespace test
diff --git a/tests/test/script/builtin/buildfile b/tests/test/script/builtin/buildfile
index e5bac10..2a57c54 100644
--- a/tests/test/script/builtin/buildfile
+++ b/tests/test/script/builtin/buildfile
@@ -2,4 +2,4 @@
# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
# license : MIT; see accompanying LICENSE file
-./: test{cat echo mkdir rm rmdir test touch} $b
+./: test{cat echo mkdir rm rmdir sed test touch} $b
diff --git a/tests/test/script/builtin/sed.test b/tests/test/script/builtin/sed.test
new file mode 100644
index 0000000..ef99539
--- /dev/null
+++ b/tests/test/script/builtin/sed.test
@@ -0,0 +1,312 @@
+# file : tests/test/script/builtin/sed.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+.include ../common.test
+
+: arg
+:
+{
+ : auto-prn
+ :
+ {
+ $c <"sed -n -e 's/fox/bar/' <'foo' " && $b : on
+ $c <"sed -e 's/fox/bar/' <'foo' >'foo'" && $b : off
+ }
+
+ : script
+ :
+ {
+ : missed
+ :
+ $c <'sed' && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: missing script
+ EOE
+
+ : missed-val
+ :
+ $c <'sed -e' && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: missing script
+ EOE
+
+ : empty
+ :
+ $c <"sed -e ''" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: empty script
+ EOE
+
+ : multiple
+ :
+ $c <"sed -e 's/a//' -e 's/a//'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: multiple scripts
+ EOE
+
+ : invalid
+ :
+ $c <"sed -e 'z'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: only 's' command supported
+ EOE
+ }
+
+ : file
+ :
+ {
+ : exist
+ :
+ $c <<EOI && $b
+ cat <'foo' >=f;
+ sed -e 's/foo/bar/' f >'bar'
+ EOI
+
+ : none
+ :
+ $c <<EOI && $b
+ sed -e 's/foo/bar/' <'foo' >'bar'
+ EOI
+
+ : dash
+ :
+ $c <<EOI && $b
+ sed -e 's/foo/bar/' - <'foo' >'bar'
+ EOI
+
+ : not-exist
+ :
+ $c <"sed -e 's/foo/bar/' f" && $b 2>>/~%EOE% != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ %sed: unable to edit '.+/1/f': .+%
+ EOE
+
+ : empty
+ :
+ $c <"sed -e 's/foo/bar/' ''" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: invalid path ''
+ EOE
+ }
+
+ : unexpected
+ :
+ $c <"sed -e 's/a//' a b" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: unexpected argument
+ EOE
+
+}
+
+: command
+:
+{
+ : subst
+ :
+ {
+ : parsing
+ :
+ {
+ : delim
+ :
+ {
+ : none
+ :
+ $c <"sed -e 's'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: no delimiter for 's' command
+ EOE
+
+ : invalid
+ :
+ $c <"sed -e 's\\'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: invalid delimiter for 's' command
+ EOE
+ }
+
+ : regex
+ :
+ {
+ : unterminated
+ :
+ $c <"sed -e 's/foo'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: unterminated 's' command regex
+ EOE
+
+ : empty
+ :
+ $c <"sed -e 's///'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: empty regex in 's' command
+ EOE
+
+ : invalid
+ :
+ : Note that old versions of libc++ (for example 1.1) do not detect some
+ : regex errors. For example '*' is parsed successfully.
+ :
+ $c <"sed -e 's/foo[/bar/'" && $b 2>>/~%EOE% != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ %sed: invalid regex.*%
+ EOE
+ }
+
+ : unterminated-replacement
+ :
+ $c <"sed -e 's/foo/bar'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: unterminated 's' command replacement
+ EOE
+
+ : invalid-flags
+ :
+ $c <"sed -e 's/foo/bar/a'" && $b 2>>/EOE != 0
+ testscript:1:1: error: sed exit status 1 != 0
+ info: stderr: test/1/stderr
+ sed: invalid 's' command flag 'a'
+ EOE
+ }
+
+ : exec
+ :
+ {
+ : flags
+ :
+ {
+ : global
+ :
+ {
+ $c <"sed -e 's/o/a/g' <'foo' >'faa'" && $b : on
+ $c <"sed -e 's/o/a/' <'foo' >'fao'" && $b : off
+ }
+
+ : icase
+ :
+ {
+ $c <"sed -e 's/O/a/i' <'foo' >'fao'" && $b : on
+ $c <"sed -e 's/O/a/' <'foo' >'foo'" && $b : off
+ }
+
+ : print
+ :
+ {
+ $c <"sed -n -e 's/o/a/p' <'foo' >'fao'" && $b : on-match
+ $c <"sed -n -e 's/o/a/' <'foo' " && $b : off-match
+ $c <"sed -n -e 's/u/a/p' <'foo' " && $b : on-no-match
+ }
+ }
+
+ : search
+ {
+ : anchor
+ :
+ {
+ $c <"sed -n -e 's/^o/a/gp' <'oof' >'aof'" && $b : begin
+ $c <"sed -n -e 's/o\$/a/gp' <'foo' >'foa'" && $b : end
+ }
+
+ : match
+ : Match corner cases
+ :
+ {
+ $c <"sed -n -e 's/a/b/p' <'a' >'b' " && $b : full
+ $c <"sed -n -e 's/a/b/p' <'ac' >'bc' " && $b : left
+ $c <"sed -n -e 's/a/b/p' <'ca' >'cb' " && $b : right
+ $c <"sed -n -e 's/a/b/pg' <'xaax' >'xbbx'" && $b : adjacent
+ }
+ }
+
+ : replacement
+ :
+ {
+ : ecma-escape
+ :
+ {
+ $c <"sed <'xay' -e 's/a/\$b/' >'x\$by'" && $b : none
+ $c <"sed <'xay' -e 's/a/\$/' >'x\$y' " && $b : none-term
+ $c <"sed <'xay' -e 's/a/\$\$/' >'x\$y' " && $b : self
+ $c <"sed <'xay' -e 's/a/b\$&c/' >'xbacy'" && $b : match
+ $c <"sed <'xay' -e 's/a/b\$`c/' >'xbxcy'" && $b : match-precede
+ $c <"sed <'xay' -e \"s/a/b\\\$'c/\" >'xbycy'" && $b : match-follow
+
+ : capture
+ :
+ $c <<EOI && $b
+ sed <'abcdefghij' -e 's/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/$1$10/' >'aj'
+ EOI
+ }
+
+ : perl-escape
+ :
+ {
+ $c <"sed <'xay' -e 's/a/\\b/' >'xby' " && $b : none
+ $c <"sed <'xay' -e 's/a/\\/' >'xy' " && $b : none-term
+ $c <"sed <'xay' -e 's/a/\\\\/' >'x\\y'" && $b : self
+
+ : capture
+ :
+ $c <<EOI && $b
+ sed <'abcdefghij' -e 's/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/\1\10/' >'aa0'
+ EOI
+
+ : upper
+ :
+ {
+ $c <"sed <'xay' -e 's/a/\\U/' >'xy' " && $b : none
+ $c <"sed <'xay' -e 's/a/\\Uvz/' >'xVZy'" && $b : repl
+ $c <"sed <'xay' -e 's/a/\\Uv\\Ez/' >'xVzy'" && $b : end
+ $c <"sed <'aa' -e 's/a/v\\Uz/g' >'vZvZ'" && $b : locality
+ $c <"sed <'xay' -e 's/\(a\)/\\U\\1/' >'xAy' " && $b : capt
+ $c <"sed <'x-y' -e 's/\(a?\)-/\\U\\1z/' >'xZy' " && $b : capt-empty
+ $c <"sed <'xay' -e 's/a/\\uvz/' >'xVzy'" && $b : once
+ }
+
+ : lower
+ :
+ {
+ $c <"sed <'xay' -e 's/a/\\lVZ/' >'xvZy'" && $b : once
+ }
+ }
+ }
+
+ $c <"sed -e 's/a//' <:'b' >'b'" && $b : no-newline
+ $c <"sed -e 's/a//' <:'' " && $b : empty-stdin
+
+ : empty-file
+ :
+ $c <<EOI && $b
+ touch f;
+ sed -e 's/a//' f
+ EOI
+ }
+ }
+}
+
+: big
+:
+: Sed a big file (about 3MB) to test that the builtin is asynchronous.
+:
+{
+ s="------------------------------------------------------------------------"
+ s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s"
+ s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s"
+ s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s"
+ $c <"cat <'$s' | sed -e 's/^x//' >'$s'" && $b
+}