diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2020-11-19 18:30:11 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2020-11-25 14:52:00 +0300 |
commit | 18de2b7b5b1b063e7a074878b4e3b3ccdd5c6ebc (patch) | |
tree | 36c60d9dd494b1b4cb714fea9900899362273e2f | |
parent | ae43c5780651d594b1ec76e99330cd6ef082b0c5 (diff) |
Allow multiple -e options for sed builtin
-rw-r--r-- | libbutl/builtin.cxx | 128 | ||||
-rw-r--r-- | tests/builtin/sed.testscript | 55 |
2 files changed, 120 insertions, 63 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index 7a2c024..5c1c875 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -1569,7 +1569,7 @@ namespace butl return 1; } - // sed [-n|--quiet] [-i|--in-place] -e|--expression <script> [<file>] + // sed [-n|--quiet] [-i|--in-place] (-e|--expression <script>)... [<file>] // // Note: must be executed asynchronously. // @@ -1615,71 +1615,95 @@ namespace butl if (ops.expression ().empty ()) fail () << "missing script"; - // Only a single script is supported. - // - if (ops.expression ().size () != 1) - fail () << "multiple scripts"; - - struct + struct subst { - string regex; + std::regex regex; string replacement; - bool icase = false; - bool global = false; - bool print = false; - } subst; + bool global; + bool print; + + subst (const string& re, bool ic, string rp, bool gl, bool pr) + // + // Note that ECMAScript is implied if no grammar flag is specified. + // + : regex (re, ic ? regex::icase : regex::ECMAScript), + replacement (move (rp)), + global (gl), + print (pr) {} + }; + + small_vector<subst, 1> substs; + for (const string& v: ops.expression ()) { - const string& v (ops.expression ()[0]); if (v.empty ()) fail () << "empty script"; if (v[0] != 's') - fail () << "only 's' command supported"; + fail () << "unknown command in '" << v << "': only 's' command " + << "supported"; // Parse the substitute command. // if (v.size () < 2) - fail () << "no delimiter for 's' command"; + fail () << "no delimiter for 's' command in '" << v << "'"; char delim (v[1]); if (delim == '\\' || delim == '\n') - fail () << "invalid delimiter for 's' command"; + fail () << "invalid delimiter for 's' command in '" << v << "'"; size_t p (v.find (delim, 2)); if (p == string::npos) - fail () << "unterminated 's' command regex"; + fail () << "unterminated 's' command regex in '" << v << "'"; - subst.regex.assign (v, 2, p - 2); + string regex (v, 2, p - 2); // Empty regex matches nothing, so not of much use. // - if (subst.regex.empty ()) - fail () << "empty regex in 's' command"; + if (regex.empty ()) + fail () << "empty regex in 's' command in '" << v << "'"; size_t b (p + 1); p = v.find (delim, b); if (p == string::npos) - fail () << "unterminated 's' command replacement"; + fail () << "unterminated 's' command replacement in '" << v << "'"; - subst.replacement.assign (v, b, p - b); + string replacement (v, b, p - b); // Parse the substitute command flags. // + bool icase (false); + bool global (false); + bool print (false); + char c; for (++p; (c = v[p]) != '\0'; ++p) { switch (c) { - case 'i': subst.icase = true; break; - case 'g': subst.global = true; break; - case 'p': subst.print = true; break; + case 'i': icase = true; break; + case 'g': global = true; break; + case 'p': print = true; break; default: { - fail () << "invalid 's' command flag '" << c << "'"; + fail () << "invalid 's' command flag '" << c << "' in '" << v + << "'"; } } } + + try + { + substs.emplace_back (regex, icase, + move (replacement), + global, print); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful (no space). + // + fail () << "invalid regex '" << regex << "' in '" << v << "'" << e; + } } // Path of a file to edit. An empty path represents stdin. @@ -1738,10 +1762,6 @@ namespace butl rm = auto_rmfile (tp); } - // Note that ECMAScript is implied if no grammar flag is specified. - // - regex re (subst.regex, subst.icase ? regex::icase : regex::ECMAScript); - // Edit a file or STDIN. // try @@ -1756,22 +1776,42 @@ namespace butl // Read until failbit is set (throw on badbit). // - string s; - while (getline (cin, s)) + string ps; + while (getline (cin, ps)) { - auto r (regex_replace_search ( - s, - re, - subst.replacement, - subst.global - ? regex_constants::format_default - : regex_constants::format_first_only)); + bool prn (!ops.quiet ()); + + for (const subst& s: substs) + { + auto r (regex_replace_search ( + ps, + s.regex, + s.replacement, + s.global + ? regex_constants::format_default + : regex_constants::format_first_only)); + + // If the regex matches, then override the pattern space with the + // replacement result and print it and proceed to the next line, + // if requested. + // + if (r.second) + { + ps = move (r.first); + + if (s.print) + { + prn = true; + break; + } + } + } // Add newline regardless whether the source line is newline- // terminated or not (in accordance with POSIX). // - if (!ops.quiet () || (r.second && subst.print)) - cout << r.first << '\n'; + if (prn) + cout << ps << '\n'; } cin.close (); @@ -1801,12 +1841,6 @@ namespace butl d << ": " << e; } } - catch (const regex_error& e) - { - // Print regex_error description if meaningful (no space). - // - error () << "invalid regex" << e; - } // Can be thrown while creating cin, cout or writing to cerr. // catch (const io_error& e) diff --git a/tests/builtin/sed.testscript b/tests/builtin/sed.testscript index ad26483..7fbc9b2 100644 --- a/tests/builtin/sed.testscript +++ b/tests/builtin/sed.testscript @@ -89,16 +89,10 @@ test.options += -c sed: empty script EOE - : multiple - : - $* -e 's/a//' -e 's/a//' 2>>EOE != 0 - sed: multiple scripts - EOE - : invalid : $* -e 'z' 2>>EOE != 0 - sed: only 's' command supported + sed: unknown command in 'z': only 's' command supported EOE } @@ -156,13 +150,13 @@ test.options += -c : none : $* -e 's' 2>>EOE != 0 - sed: no delimiter for 's' command + sed: no delimiter for 's' command in 's' EOE : invalid : $* -e 's\\' 2>>EOE != 0 - sed: invalid delimiter for 's' command + sed: invalid delimiter for 's' command in 's\\' EOE } @@ -171,14 +165,14 @@ test.options += -c { : unterminated : - $* -e 's/foo' 2>>/EOE != 0 - sed: unterminated 's' command regex + $* -e 's/foo' 2>>EOE != 0 + sed: unterminated 's' command regex in 's/foo' EOE : empty : $* -e 's///' 2>>EOE != 0 - sed: empty regex in 's' command + sed: empty regex in 's' command in 's///' EOE : invalid @@ -187,20 +181,20 @@ test.options += -c : regex errors. For example '*' is parsed successfully. : $* -e 's/foo[/bar/' 2>>~%EOE% != 0 - %sed: invalid regex.*% + %sed: invalid regex 'foo\[' in 's/foo\[/bar/'.*% EOE } : unterminated-replacement : - $* -e 's/foo/bar' 2>>/EOE != 0 - sed: unterminated 's' command replacement + $* -e 's/foo/bar' 2>>EOE != 0 + sed: unterminated 's' command replacement in 's/foo/bar' EOE : invalid-flags : $* -e 's/foo/bar/a' 2>>EOE != 0 - sed: invalid 's' command flag 'a' + sed: invalid 's' command flag 'a' in 's/foo/bar/a' EOE } @@ -314,6 +308,35 @@ test.options += -c } } } + + : multiple + : + { + $* -e 's/b/x/' -e 's/x/y/' -e 's/c/z/' <'abc' >'ayz' : replace-replacement + + : new-cycle + : + $* -e 's/b/x/p' -e 's/x/y/p' <<EOI >>EOO + abc + klm + dxe + EOI + axc + klm + dye + EOO + + : quiet + : + $* -n -e 's/b/x/p' -e 's/x/y/p' <<EOI >>EOO + abc + klm + dxe + EOI + axc + dye + EOO + } } : in-place |