diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2017-01-31 22:08:38 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2017-02-03 23:57:27 +0300 |
commit | 044e2e1c1460fb060f677a366144b98905522754 (patch) | |
tree | 4cdd67e9bca323d74cf5cc514444019a70b4de95 | |
parent | 31a4169c67045cfe37eed138b537930e259db1e9 (diff) |
Add sed builtin
-rw-r--r-- | build2/b.cxx | 16 | ||||
-rw-r--r-- | build2/buildfile | 1 | ||||
-rw-r--r-- | build2/regex | 57 | ||||
-rw-r--r-- | build2/regex.cxx | 42 | ||||
-rw-r--r-- | build2/regex.txx | 215 | ||||
-rw-r--r-- | build2/test/script/builtin.cxx | 506 | ||||
-rw-r--r-- | build2/test/script/regex | 5 | ||||
-rw-r--r-- | build2/test/script/runner.cxx | 38 | ||||
-rw-r--r-- | tests/test/script/builtin/buildfile | 2 | ||||
-rw-r--r-- | tests/test/script/builtin/sed.test | 312 |
10 files changed, 1045 insertions, 149 deletions
diff --git a/build2/b.cxx b/build2/b.cxx index e576435..b06459b 100644 --- a/build2/b.cxx +++ b/build2/b.cxx @@ -2,7 +2,10 @@ // copyright : Copyright (c) 2014-2017 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file -#include <string.h> // strerror() +#ifndef _WIN32 +# include <signal.h> // signal() +#endif + #include <stdlib.h> // getenv() _putenv()(_WIN32) #include <sstream> @@ -82,6 +85,17 @@ main (int argc, char* argv[]) { tracer trace ("main"); + // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if + // the pipe reading end is closed. Note that by default this signal + // terminates a process. Also note that there is no way to disable this + // behavior on a file descriptor basis or for the write() function call. + // +#ifndef _WIN32 + if (signal (SIGPIPE, SIG_IGN) == SIG_ERR) + fail << "unable to ignore broken pipe (SIGPIPE) signal: " + << system_error (errno, system_category ()); // Sanitize. +#endif + // Parse the command line. We want to be able to specify options, vars, // and buildspecs in any order (it is really handy to just add -v at the // end of the command line). diff --git a/build2/buildfile b/build2/buildfile index 84e2f82..1ee7063 100644 --- a/build2/buildfile +++ b/build2/buildfile @@ -26,6 +26,7 @@ exe{b}: \ {hxx cxx}{ operation } \ {hxx cxx}{ parser } \ {hxx cxx}{ prerequisite } \ + {hxx txx cxx}{ regex } \ {hxx cxx}{ rule } \ {hxx }{ rule-map } \ {hxx txx cxx}{ scheduler } \ diff --git a/build2/regex b/build2/regex new file mode 100644 index 0000000..dc6dc96 --- /dev/null +++ b/build2/regex @@ -0,0 +1,57 @@ +// file : build2/regex -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUILD2_REGEX +#define BUILD2_REGEX + +#include <regex> +#include <iosfwd> +#include <string> // basic_string + +#include <build2/types> +#include <build2/utility> + +namespace build2 +{ + // Like std::regex_match() but extends the standard ECMA-262 + // substitution escape sequences with a subset of Perl sequences: + // + // \\, \u, \l, \U, \L, \E, \1, ..., \9 + // + // Also return the resulting string as well as whether the search + // succeeded. + // + // Notes and limitations: + // + // - The only valid regex_constants flags are match_default, + // format_first_only (format_no_copy can easily be supported). + // + // - If backslash doesn't start any of the listed sequences then it is + // silently dropped and the following character is copied as is. + // + // - The character case conversion is performed according to the global + // C++ locale (which is, unless changed, is the same as C locale and + // both default to the POSIX locale aka "C"). + // + template <typename C> + pair<std::basic_string<C>, bool> + regex_replace_ex (const std::basic_string<C>&, + const std::basic_regex<C>&, + const std::basic_string<C>& fmt, + std::regex_constants::match_flag_type = + std::regex_constants::match_default); +} + +namespace std +{ + // Print regex error description but only if it is meaningful (this is also + // why we have to print leading colon). + // + ostream& + operator<< (ostream&, const regex_error&); +} + +#include <build2/regex.txx> + +#endif // BUILD2_REGEX diff --git a/build2/regex.cxx b/build2/regex.cxx new file mode 100644 index 0000000..40347b5 --- /dev/null +++ b/build2/regex.cxx @@ -0,0 +1,42 @@ +// file : build2/regex.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <build2/regex> + +#if defined(_MSC_VER) && _MSC_VER <= 1910 +# include <cstring> // strstr() +#endif + +#include <ostream> +#include <sstream> + +namespace std +{ + // Currently libstdc++ just returns the name of the exception (bug #67361). + // So we check that the description contains at least one space character. + // + // While VC's description is meaningful, it has an undesired prefix that + // resembles the following: 'regex_error(error_badrepeat): '. So we skip it. + // + ostream& + operator<< (ostream& o, const regex_error& e) + { + const char* d (e.what ()); + +#if defined(_MSC_VER) && _MSC_VER <= 1910 + const char* rd (strstr (d, "): ")); + if (rd != nullptr) + d = rd + 3; +#endif + + ostringstream os; + os << runtime_error (d); // Sanitize the description. + + string s (os.str ()); + if (s.find (' ') != string::npos) + o << ": " << s; + + return o; + } +} diff --git a/build2/regex.txx b/build2/regex.txx new file mode 100644 index 0000000..1325de9 --- /dev/null +++ b/build2/regex.txx @@ -0,0 +1,215 @@ +// file : build2/regex.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + template <typename C> + pair<std::basic_string<C>, bool> + regex_replace_ex (const std::basic_string<C>& s, + const std::basic_regex<C>& re, + const std::basic_string<C>& fmt, + std::regex_constants::match_flag_type flags) + { + using namespace std; + + using string_type = basic_string<C>; + using str_it = typename string_type::const_iterator; + using regex_it = regex_iterator<str_it>; + + bool first_only ((flags & std::regex_constants::format_first_only) == + std::regex_constants::format_first_only); + + locale cl; // Copy of the global C++ locale. + string_type r; + + // Beginning of the last unmatched substring. + // + str_it ub (s.begin ()); + + regex_it b (s.begin (), s.end (), re, flags); + regex_it e; + bool match (b != e); + + for (regex_it i (b); i != e; ++i) + { + const match_results<str_it>& m (*i); + + // Copy the preceeding unmatched substring, save the beginning of the + // one that follows. + // + r.append (ub, m.prefix ().second); + ub = m.suffix ().first; + + if (first_only && i != b) + r.append (m[0].first, m[0].second); // Append matched substring. + else + { + // The standard implementation calls m.format() here. We perform our + // own formatting. + // + // Note that we are using char type literals with the assumption that + // being ASCII characters they will be properly "widened" to the + // corresponding literals of the C template parameter type. + // + auto digit = [] (C c) -> int + { + return c >= '0' && c <= '9' ? c - '0' : -1; + }; + + enum class case_conv {none, upper, lower, upper_once, lower_once} + mode (case_conv::none); + + auto conv_chr = [&mode, &cl] (C c) -> C + { + switch (mode) + { + case case_conv::upper_once: mode = case_conv::none; + case case_conv::upper: c = toupper (c, cl); break; + case case_conv::lower_once: mode = case_conv::none; + case case_conv::lower: c = tolower (c, cl); break; + case case_conv::none: break; + } + return c; + }; + + auto append_chr = [&r, &conv_chr] (C c) + { + r.push_back (conv_chr (c)); + }; + + auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e) + { + // Optimize for the common case. + // + if (mode == case_conv::none) + r.append (b, e); + else + { + for (str_it i (b); i != e; ++i) + r.push_back (conv_chr (*i)); + } + }; + + size_t n (fmt.size ()); + for (size_t i (0); i < n; ++i) + { + C c (fmt[i]); + + switch (c) + { + case '$': + { + // Check if this is a $-based escape sequence. Interpret it + // accordingly if that's the case, treat '$' as a regular + // character otherwise. + // + c = fmt[++i]; // '\0' if last. + + switch (c) + { + case '$': append_chr (c); break; + case '&': append_str (m[0].first, m[0].second); break; + case '`': + { + append_str (m.prefix ().first, m.prefix ().second); + break; + } + case '\'': + { + append_str (m.suffix ().first, m.suffix ().second); + break; + } + default: + { + // Check if this is a sub-expression 1-based index ($n or + // $nn). Append the matching substring if that's the case. + // Treat '$' as a regular character otherwise. Index greater + // than the sub-expression count is silently ignored. + // + int si (digit (c)); + if (si >= 0) + { + int d; + if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last. + { + si = si * 10 + d; + ++i; + } + } + + if (si > 0) + { + // m[0] refers to the matched substring. + // + if (static_cast<size_t> (si) < m.size ()) + append_str (m[si].first, m[si].second); + } + else + { + // Not a $-based escape sequence so treat '$' as a + // regular character. + // + --i; + append_chr ('$'); + } + + break; + } + } + + break; + } + case '\\': + { + c = fmt[++i]; // '\0' if last. + + switch (c) + { + case '\\': append_chr (c); break; + + case 'u': mode = case_conv::upper_once; break; + case 'l': mode = case_conv::lower_once; break; + case 'U': mode = case_conv::upper; break; + case 'L': mode = case_conv::lower; break; + case 'E': mode = case_conv::none; break; + default: + { + // Check if this is a sub-expression 1-based index. Append + // the matching substring if that's the case, Skip '\\' + // otherwise. Index greater than the sub-expression count is + // silently ignored. + // + int si (digit (c)); + if (si > 0) + { + // m[0] refers to the matched substring. + // + if (static_cast<size_t> (si) < m.size ()) + append_str (m[si].first, m[si].second); + } + else + --i; + + break; + } + } + + break; + } + default: + { + // Append a regular character. + // + append_chr (c); + break; + } + } + } + } + } + + r.append (ub, s.end ()); // Append the rightmost non-matched substring. + return make_pair (move (r), match); + } +} diff --git a/build2/test/script/builtin.cxx b/build2/test/script/builtin.cxx index 008ac32..3957adb 100644 --- a/build2/test/script/builtin.cxx +++ b/build2/test/script/builtin.cxx @@ -10,12 +10,17 @@ # include <sys/utime.h> #endif +#include <locale> #include <thread> +#include <ostream> +#include <sstream> #include <butl/path-io> // use default operator<< implementation #include <butl/fdstream> // fdopen_mode, fdstream_mode #include <butl/filesystem> // mkdir_status +#include <build2/regex> + #include <build2/test/script/script> // Strictly speaking a builtin which reads/writes from/to standard streams @@ -51,6 +56,74 @@ namespace build2 // struct failed {}; + // Accumulate an error message, print it atomically in dtor to the + // provided stream and throw failed afterwards if requested. Prefixes + // the message with the builtin name. + // + // Move constructible-only, not assignable (based to diag_record). + // + class error_record + { + public: + template <typename T> + friend const error_record& + operator<< (const error_record& r, const T& x) + { + r.ss_ << x; + return r; + } + + error_record (ostream& o, bool fail, const char* name) + : os_ (o), fail_ (fail), empty_ (false) + { + ss_ << name << ": "; + } + + // Older versions of libstdc++ don't have the ostringstream move + // support. Luckily, GCC doesn't seem to be actually needing move due + // to copy/move elision. + // +#ifdef __GLIBCXX__ + error_record (error_record&&); +#else + error_record (error_record&& r) + : os_ (r.os_), + ss_ (move (r.ss_)), + fail_ (r.fail_), + empty_ (r.empty_) + { + r.empty_ = true; + } +#endif + + ~error_record () noexcept (false) + { + if (!empty_) + { + // The output stream can be in a bad state (for example as a + // result of unsuccessful attempt to report a previous error), so + // we check it. + // + if (os_.good ()) + { + ss_.put ('\n'); + os_ << ss_.str (); + os_.flush (); + } + + if (fail_) + throw failed (); + } + } + + private: + ostream& os_; + mutable ostringstream ss_; + + bool fail_; + bool empty_; + }; + // Parse and normalize a path. Also, unless it is already absolute, make // the path absolute using the specified directory. Throw invalid_path // if the path is empty, and on parsing and normalization failures. @@ -103,6 +176,11 @@ namespace build2 uint8_t r (1); ofdstream cerr (move (err)); + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "cat"); + }; + try { ifdstream cin (move (in), fdstream_mode::binary); @@ -154,15 +232,15 @@ namespace build2 } catch (const io_error& e) { - cerr << "cat: unable to print "; + error_record d (error ()); + d << "unable to print "; if (p.empty ()) - cerr << "stdin"; + d << "stdin"; else - cerr << "'" << p << "'"; + d << "'" << p << "'"; - cerr << ": " << e << endl; - throw failed (); + d << ": " << e; } cin.close (); @@ -171,15 +249,13 @@ namespace build2 } catch (const invalid_path& e) { - cerr << "cat: invalid path '" << e.path << "'" << endl; + error (false) << "invalid path '" << e.path << "'"; } - // Can be thrown while closing cin, cout or writing to cerr (that's - // why need to check its state before writing). + // Can be thrown while creating/closing cin, cout or writing to cerr. // catch (const io_error& e) { - if (cerr.good ()) - cerr << "cat: " << e << endl; + error (false) << e; } catch (const failed&) { @@ -215,8 +291,7 @@ namespace build2 for (auto b (args.begin ()), i (b), e (args.end ()); i != e; ++i) cout << (i != b ? " " : "") << *i; - cout << endl; - + cout << '\n'; cout.close (); r = 0; } @@ -291,6 +366,11 @@ namespace build2 uint8_t r (1); ofdstream cerr (move (err)); + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "mkdir"); + }; + try { in.close (); @@ -317,10 +397,7 @@ namespace build2 // Create directories. // if (i == args.end ()) - { - cerr << "mkdir: missing directory" << endl; - throw failed (); - } + error () << "missing directory"; for (; i != args.end (); ++i) { @@ -337,9 +414,7 @@ namespace build2 } catch (const system_error& e) { - cerr << "mkdir: unable to create directory '" << p << "': " - << e << endl; - throw failed (); + error () << "unable to create directory '" << p << "': " << e; } } @@ -347,15 +422,13 @@ namespace build2 } catch (const invalid_path& e) { - cerr << "mkdir: invalid path '" << e.path << "'" << endl; + error (false) << "invalid path '" << e.path << "'"; } - // Can be thrown while closing in, out or writing to cerr (that's why - // need to check its state before writing). + // Can be thrown while closing in, out or writing to cerr. // catch (const io_error& e) { - if (cerr.good ()) - cerr << "mkdir: " << e << endl; + error (false) << e; } catch (const failed&) { @@ -403,6 +476,11 @@ namespace build2 uint8_t r (1); ofdstream cerr (move (err)); + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "rm"); + }; + try { in.close (); @@ -432,10 +510,7 @@ namespace build2 // Remove entries. // if (i == args.end () && !force) - { - cerr << "rm: missing file" << endl; - throw failed (); - } + error () << "missing file"; const dir_path& wd (sp.wd_path); const dir_path& rwd (sp.root->wd_path); @@ -445,11 +520,8 @@ namespace build2 path p (parse_path (*i, wd)); if (!p.sub (rwd) && !force) - { - cerr << "rm: '" << p << "' is out of working directory '" << rwd - << "'" << endl; - throw failed (); - } + error () << "'" << p << "' is out of working directory '" << rwd + << "'"; try { @@ -458,17 +530,11 @@ namespace build2 if (dir_exists (d)) { if (!dir) - { - cerr << "rm: '" << p << "' is a directory" << endl; - throw failed (); - } + error () << "'" << p << "' is a directory"; if (wd.sub (d)) - { - cerr << "rm: '" << p << "' contains test working directory '" - << wd << "'" << endl; - throw failed (); - } + error () << "'" << p << "' contains test working directory '" + << wd << "'"; // The call can result in rmdir_status::not_exist. That's not // very likelly but there is also nothing bad about it. @@ -480,8 +546,7 @@ namespace build2 } catch (const system_error& e) { - cerr << "rm: unable to remove '" << p << "': " << e << endl; - throw failed (); + error () << "unable to remove '" << p << "': " << e; } } @@ -489,15 +554,13 @@ namespace build2 } catch (const invalid_path& e) { - cerr << "rm: invalid path '" << e.path << "'" << endl; + error (false) << "invalid path '" << e.path << "'"; } - // Can be thrown while closing in, out or writing to cerr (that's why - // need to check its state before writing). + // Can be thrown while closing in, out or writing to cerr. // catch (const io_error& e) { - if (cerr.good ()) - cerr << "rm: " << e << endl; + error (false) << e; } catch (const failed&) { @@ -533,6 +596,11 @@ namespace build2 uint8_t r (1); ofdstream cerr (move (err)); + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "rmdir"); + }; + try { in.close (); @@ -559,10 +627,7 @@ namespace build2 // Remove directories. // if (i == args.end () && !force) - { - cerr << "rmdir: missing directory" << endl; - throw failed (); - } + error () << "missing directory"; const dir_path& wd (sp.wd_path); const dir_path& rwd (sp.root->wd_path); @@ -572,18 +637,12 @@ namespace build2 dir_path p (path_cast<dir_path> (parse_path (*i, wd))); if (wd.sub (p)) - { - cerr << "rmdir: '" << p << "' contains test working directory '" - << wd << "'" << endl; - throw failed (); - } + error () << "'" << p << "' contains test working directory '" + << wd << "'"; if (!p.sub (rwd) && !force) - { - cerr << "rmdir: '" << p << "' is out of working directory '" - << rwd << "'" << endl; - throw failed (); - } + error () << "'" << p << "' is out of working directory '" + << rwd << "'"; try { @@ -596,8 +655,7 @@ namespace build2 } catch (const system_error& e) { - cerr << "rmdir: unable to remove '" << p << "': " << e << endl; - throw failed (); + error () << "unable to remove '" << p << "': " << e; } } @@ -605,15 +663,259 @@ namespace build2 } catch (const invalid_path& e) { - cerr << "rmdir: invalid path '" << e.path << "'" << endl; + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // sed [-n] -e <script> [<file>] + // + // Read text from file, make editing changes according to script, and + // write the result to stdout. If file is not specified or is '-', read + // from stdin. + // + // -n + // Suppress automatic printing of the pattern space at the end of the + // script execution. + // + // -e <script> + // Editing commands to be executed (required). + // + // Currently, only single-command scripts using the following editing + // commands are supported. + // + // s/<regex>/<replacement>/<flags> + // The supported flags are 'i' (case-insensitive search), 'g' + // (substitute globally), 'p' (print if a replacement was made). If + // regex starts with ^, then it only matches at the beginning of the + // pattern space. Similarly, if it ends with $, then it only matches + // at the end of the pattern space. + // + // In replacement, besides the standard ECMAScript escape sequences a + // subset of Perl-specific ones is recognized. + // + // For more details read the builtin description in 'The build2 + // Testscript Language'. + // + // Note: must be executed asynchronously. + // + static uint8_t + sed (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "sed"); + }; + + try + { + // Do not throw when failbit is set (getline() failed to extract any + // character). + // + ifdstream cin (move (in), ifdstream::badbit); + ofdstream cout (move (out)); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool auto_prn (true); + + struct substitute + { + string regex; + string replacement; + bool icase = false; + bool global = false; + bool print = false; + }; + optional<substitute> subst; + + for (; i != e; ++i) + { + if (*i == "-n") + auto_prn = false; + else if (*i == "-e") + { + // Only a single script is supported. + // + if (subst) + error () << "multiple scripts"; + + // If option has no value then bail out and report. + // + if (++i == e) + break; + + const string& v (*i); + if (v.empty ()) + error () << "empty script"; + + if (v[0] != 's') + error () << "only 's' command supported"; + + // Parse the substitute command. + // + if (v.size () < 2) + error () << "no delimiter for 's' command"; + + char delim (v[1]); + if (delim == '\\' || delim == '\n') + error () << "invalid delimiter for 's' command"; + + size_t p (v.find (delim, 2)); + if (p == string::npos) + error () << "unterminated 's' command regex"; + + subst = substitute (); + subst->regex.assign (v, 2, p - 2); + + // Empty regex matches nothing, so not of much use. + // + if (subst->regex.empty ()) + error () << "empty regex in 's' command"; + + size_t b (p + 1); + p = v.find (delim, b); + if (p == string::npos) + error () << "unterminated 's' command replacement"; + + subst->replacement.assign (v, b, p - b); + + // Parse the substitute command flags. + // + char c; + for (++p; (c = v[p]) != '\0'; ++p) + { + switch (c) + { + case 'i': subst->icase = true; break; + case 'g': subst->global = true; break; + case 'p': subst->print = true; break; + default: + { + error () << "invalid 's' command flag '" << c << "'"; + } + } + } + } + else + { + if (*i == "--") + ++i; + + break; + } + } + + if (!subst) + error () << "missing script"; + + // Path of a file to edit. An empty path represents stdin. + // + path p; + if (i != e) + { + if (*i != "-") + p = parse_path (*i, sp.wd_path); + + ++i; + } + + if (i != e) + error () << "unexpected argument"; + + // Note that ECMAScript is implied if no grammar flag is specified. + // + regex re (subst->regex, + subst->icase ? regex::icase : regex::ECMAScript); + + // Edit a file or STDIN. + // + try + { + // Open a file if specified. + // + if (!p.empty ()) + { + cin.close (); // Flush and close. + cin.open (p); + } + + // Read until failbit is set (throw on badbit). + // + string s; + while (getline (cin, s)) + { + auto r (regex_replace_ex (s, + re, + subst->replacement, + subst->global + ? regex_constants::format_default + : regex_constants::format_first_only)); + + // Add newline regardless whether the source line is newline- + // terminated or not (in accordance with POSIX). + // + if (auto_prn || (r.second && subst->print)) + cout << r.first << '\n'; + } + + cin.close (); + cout.close (); + r = 0; + } + catch (const io_error& e) + { + error_record d (error ()); + d << "unable to edit "; + + if (p.empty ()) + d << "stdin"; + else + d << "'" << p << "'"; + + d << ": " << e; + } + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful (no space). + // + error (false) << "invalid regex" << e; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; } - // Can be thrown while closing in, out or writing to cerr (that's why - // need to check its state before writing). + // Can be thrown while creating cin, cout or writing to cerr. // catch (const io_error& e) { - if (cerr.good ()) - cerr << "rmdir: " << e << endl; + error (false) << e; } catch (const failed&) { @@ -654,30 +956,26 @@ namespace build2 uint8_t r (2); ofdstream cerr (move (err)); + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "test"); + }; + try { in.close (); out.close (); if (args.size () < 2) - { - cerr << "test: missing path" << endl; - throw failed (); - } + error () << "missing path"; bool file (args[0] == "-f"); if (!file && args[0] != "-d") - { - cerr << "test: invalid option" << endl; - throw failed (); - } + error () << "invalid option"; if (args.size () > 2) - { - cerr << "test: unexpected argument" << endl; - throw failed (); - } + error () << "unexpected argument"; path p (parse_path (args[1], sp.wd_path)); @@ -687,21 +985,18 @@ namespace build2 } catch (const system_error& e) { - cerr << "test: cannot test '" << p << "': " << e << endl; - throw failed (); + error () << "cannot test '" << p << "': " << e; } } catch (const invalid_path& e) { - cerr << "test: invalid path '" << e.path << "'" << endl; + error (false) << "invalid path '" << e.path << "'"; } - // Can be thrown while closing in, out or writing to cerr (that's why - // need to check its state before writing). + // Can be thrown while closing in, out or writing to cerr. // catch (const io_error& e) { - if (cerr.good ()) - cerr << "test: " << e << endl; + error (false) << e; } catch (const failed&) { @@ -740,16 +1035,18 @@ namespace build2 uint8_t r (1); ofdstream cerr (move (err)); + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "touch"); + }; + try { in.close (); out.close (); if (args.empty ()) - { - cerr << "touch: missing file" << endl; - throw failed (); - } + error () << "missing file"; // Create files. // @@ -783,25 +1080,17 @@ namespace build2 } catch (const io_error& e) { - cerr << "touch: cannot create file '" << p << "': " << e - << endl; - throw failed (); + error () << "cannot create file '" << p << "': " << e; } sp.clean ({cleanup_type::always, p}, true); } else - { - cerr << "touch: '" << p << "' exists and is not a file" - << endl; - throw failed (); - } + error () << "'" << p << "' exists and is not a file"; } catch (const system_error& e) { - cerr << "touch: cannot create/update '" << p << "': " << e - << endl; - throw failed (); + error () << "cannot create/update '" << p << "': " << e; } } @@ -809,15 +1098,13 @@ namespace build2 } catch (const invalid_path& e) { - cerr << "touch: invalid path '" << e.path << "'" << endl; + error (false) << "invalid path '" << e.path << "'"; } - // Can be thrown while closing in, out or writing to cerr (that's why - // need to check its state before writing). + // Can be thrown while closing in, out or writing to cerr. // catch (const io_error& e) { - if (cerr.good ()) - cerr << "touch: " << e << endl; + error (false) << e; } catch (const failed&) { @@ -896,6 +1183,7 @@ namespace build2 {"mkdir", &sync_impl<&mkdir>}, {"rm", &sync_impl<&rm>}, {"rmdir", &sync_impl<&rmdir>}, + {"sed", &async_impl<&sed>}, {"test", &sync_impl<&test>}, {"touch", &sync_impl<&touch>}, {"true", &true_} diff --git a/build2/test/script/regex b/build2/test/script/regex index b25c1f1..1170b99 100644 --- a/build2/test/script/regex +++ b/build2/test/script/regex @@ -8,8 +8,9 @@ #include <list> #include <regex> #include <locale> +#include <string> // basic_string #include <cstdint> // uintptr_t -#include <type_traits> // make_unsigned, is_unsigned +#include <type_traits> // make_unsigned, enable_if, is_* #include <unordered_set> #include <build2/types> @@ -25,7 +26,7 @@ namespace build2 { using char_string = std::basic_string<char>; - enum class char_flags: std::uint16_t + enum class char_flags: uint16_t { icase = 0x1, // Case-insensitive match. idot = 0x2, // Invert '.' escaping. diff --git a/build2/test/script/runner.cxx b/build2/test/script/runner.cxx index dcfaec9..751daec 100644 --- a/build2/test/script/runner.cxx +++ b/build2/test/script/runner.cxx @@ -5,12 +5,11 @@ #include <build2/test/script/runner> #include <set> -#include <ios> // streamsize -#include <cstring> // strstr() -#include <sstream> +#include <ios> // streamsize #include <butl/fdstream> // fdopen_mode, fdnull(), fddup() +#include <build2/regex> #include <build2/filesystem> #include <build2/test/common> @@ -21,39 +20,6 @@ using namespace std; using namespace butl; -namespace std -{ - // Print regex error description but only if it is meaningful (this is also - // why we have to print leading colon here). - // - // Currently libstdc++ just returns the name of the exception (bug #67361). - // So we check that the description contains at least one space character. - // - // While VC's description is meaningful, it has an undesired prefix that - // resembles the following: 'regex_error(error_badrepeat): '. So we skip it. - // - static ostream& - operator<< (ostream& o, const regex_error& e) - { - const char* d (e.what ()); - -#if defined(_MSC_VER) && _MSC_VER <= 1910 - const char* rd (strstr (d, "): ")); - if (rd != nullptr) - d = rd + 3; -#endif - - ostringstream os; - os << runtime_error (d); // Sanitize the description. - - string s (os.str ()); - if (s.find (' ') != string::npos) - o << ": " << s; - - return o; - } -} - namespace build2 { namespace test diff --git a/tests/test/script/builtin/buildfile b/tests/test/script/builtin/buildfile index e5bac10..2a57c54 100644 --- a/tests/test/script/builtin/buildfile +++ b/tests/test/script/builtin/buildfile @@ -2,4 +2,4 @@ # copyright : Copyright (c) 2014-2017 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -./: test{cat echo mkdir rm rmdir test touch} $b +./: test{cat echo mkdir rm rmdir sed test touch} $b diff --git a/tests/test/script/builtin/sed.test b/tests/test/script/builtin/sed.test new file mode 100644 index 0000000..ef99539 --- /dev/null +++ b/tests/test/script/builtin/sed.test @@ -0,0 +1,312 @@ +# file : tests/test/script/builtin/sed.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +.include ../common.test + +: arg +: +{ + : auto-prn + : + { + $c <"sed -n -e 's/fox/bar/' <'foo' " && $b : on + $c <"sed -e 's/fox/bar/' <'foo' >'foo'" && $b : off + } + + : script + : + { + : missed + : + $c <'sed' && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: missing script + EOE + + : missed-val + : + $c <'sed -e' && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: missing script + EOE + + : empty + : + $c <"sed -e ''" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: empty script + EOE + + : multiple + : + $c <"sed -e 's/a//' -e 's/a//'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: multiple scripts + EOE + + : invalid + : + $c <"sed -e 'z'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: only 's' command supported + EOE + } + + : file + : + { + : exist + : + $c <<EOI && $b + cat <'foo' >=f; + sed -e 's/foo/bar/' f >'bar' + EOI + + : none + : + $c <<EOI && $b + sed -e 's/foo/bar/' <'foo' >'bar' + EOI + + : dash + : + $c <<EOI && $b + sed -e 's/foo/bar/' - <'foo' >'bar' + EOI + + : not-exist + : + $c <"sed -e 's/foo/bar/' f" && $b 2>>/~%EOE% != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + %sed: unable to edit '.+/1/f': .+% + EOE + + : empty + : + $c <"sed -e 's/foo/bar/' ''" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: invalid path '' + EOE + } + + : unexpected + : + $c <"sed -e 's/a//' a b" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: unexpected argument + EOE + +} + +: command +: +{ + : subst + : + { + : parsing + : + { + : delim + : + { + : none + : + $c <"sed -e 's'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: no delimiter for 's' command + EOE + + : invalid + : + $c <"sed -e 's\\'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: invalid delimiter for 's' command + EOE + } + + : regex + : + { + : unterminated + : + $c <"sed -e 's/foo'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: unterminated 's' command regex + EOE + + : empty + : + $c <"sed -e 's///'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: empty regex in 's' command + EOE + + : invalid + : + : Note that old versions of libc++ (for example 1.1) do not detect some + : regex errors. For example '*' is parsed successfully. + : + $c <"sed -e 's/foo[/bar/'" && $b 2>>/~%EOE% != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + %sed: invalid regex.*% + EOE + } + + : unterminated-replacement + : + $c <"sed -e 's/foo/bar'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: unterminated 's' command replacement + EOE + + : invalid-flags + : + $c <"sed -e 's/foo/bar/a'" && $b 2>>/EOE != 0 + testscript:1:1: error: sed exit status 1 != 0 + info: stderr: test/1/stderr + sed: invalid 's' command flag 'a' + EOE + } + + : exec + : + { + : flags + : + { + : global + : + { + $c <"sed -e 's/o/a/g' <'foo' >'faa'" && $b : on + $c <"sed -e 's/o/a/' <'foo' >'fao'" && $b : off + } + + : icase + : + { + $c <"sed -e 's/O/a/i' <'foo' >'fao'" && $b : on + $c <"sed -e 's/O/a/' <'foo' >'foo'" && $b : off + } + + : print + : + { + $c <"sed -n -e 's/o/a/p' <'foo' >'fao'" && $b : on-match + $c <"sed -n -e 's/o/a/' <'foo' " && $b : off-match + $c <"sed -n -e 's/u/a/p' <'foo' " && $b : on-no-match + } + } + + : search + { + : anchor + : + { + $c <"sed -n -e 's/^o/a/gp' <'oof' >'aof'" && $b : begin + $c <"sed -n -e 's/o\$/a/gp' <'foo' >'foa'" && $b : end + } + + : match + : Match corner cases + : + { + $c <"sed -n -e 's/a/b/p' <'a' >'b' " && $b : full + $c <"sed -n -e 's/a/b/p' <'ac' >'bc' " && $b : left + $c <"sed -n -e 's/a/b/p' <'ca' >'cb' " && $b : right + $c <"sed -n -e 's/a/b/pg' <'xaax' >'xbbx'" && $b : adjacent + } + } + + : replacement + : + { + : ecma-escape + : + { + $c <"sed <'xay' -e 's/a/\$b/' >'x\$by'" && $b : none + $c <"sed <'xay' -e 's/a/\$/' >'x\$y' " && $b : none-term + $c <"sed <'xay' -e 's/a/\$\$/' >'x\$y' " && $b : self + $c <"sed <'xay' -e 's/a/b\$&c/' >'xbacy'" && $b : match + $c <"sed <'xay' -e 's/a/b\$`c/' >'xbxcy'" && $b : match-precede + $c <"sed <'xay' -e \"s/a/b\\\$'c/\" >'xbycy'" && $b : match-follow + + : capture + : + $c <<EOI && $b + sed <'abcdefghij' -e 's/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/$1$10/' >'aj' + EOI + } + + : perl-escape + : + { + $c <"sed <'xay' -e 's/a/\\b/' >'xby' " && $b : none + $c <"sed <'xay' -e 's/a/\\/' >'xy' " && $b : none-term + $c <"sed <'xay' -e 's/a/\\\\/' >'x\\y'" && $b : self + + : capture + : + $c <<EOI && $b + sed <'abcdefghij' -e 's/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/\1\10/' >'aa0' + EOI + + : upper + : + { + $c <"sed <'xay' -e 's/a/\\U/' >'xy' " && $b : none + $c <"sed <'xay' -e 's/a/\\Uvz/' >'xVZy'" && $b : repl + $c <"sed <'xay' -e 's/a/\\Uv\\Ez/' >'xVzy'" && $b : end + $c <"sed <'aa' -e 's/a/v\\Uz/g' >'vZvZ'" && $b : locality + $c <"sed <'xay' -e 's/\(a\)/\\U\\1/' >'xAy' " && $b : capt + $c <"sed <'x-y' -e 's/\(a?\)-/\\U\\1z/' >'xZy' " && $b : capt-empty + $c <"sed <'xay' -e 's/a/\\uvz/' >'xVzy'" && $b : once + } + + : lower + : + { + $c <"sed <'xay' -e 's/a/\\lVZ/' >'xvZy'" && $b : once + } + } + } + + $c <"sed -e 's/a//' <:'b' >'b'" && $b : no-newline + $c <"sed -e 's/a//' <:'' " && $b : empty-stdin + + : empty-file + : + $c <<EOI && $b + touch f; + sed -e 's/a//' f + EOI + } + } +} + +: big +: +: Sed a big file (about 3MB) to test that the builtin is asynchronous. +: +{ + s="------------------------------------------------------------------------" + s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s" + s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s" + s="$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s" + $c <"cat <'$s' | sed -e 's/^x//' >'$s'" && $b +} |