From 221a53ed3d18217d06f7d8e0bdf9ce315ca2413c Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 1 Apr 2024 10:23:37 +0200 Subject: Add $string.replace() function --- libbuild2/functions-regex.cxx | 2 + libbuild2/functions-string.cxx | 132 ++++++++++++++++++++++++++++++++++++++- tests/function/string/testscript | 45 +++++++++++++ 3 files changed, 178 insertions(+), 1 deletion(-) diff --git a/libbuild2/functions-regex.cxx b/libbuild2/functions-regex.cxx index a7fcf55..cf3ffd0 100644 --- a/libbuild2/functions-regex.cxx +++ b/libbuild2/functions-regex.cxx @@ -775,6 +775,8 @@ namespace build2 // If both `format_first_only` and `format_no_copy` flags are specified // then the result will only contain the replacement of the first match. // + // See also `$string.replace()`. + // f[".replace"] += [](value v, string re, string fmt, optional flags) { return replace (move (v), re, fmt, move (flags)); diff --git a/libbuild2/functions-string.cxx b/libbuild2/functions-string.cxx index 367923f..0458724 100644 --- a/libbuild2/functions-string.cxx +++ b/libbuild2/functions-string.cxx @@ -8,6 +8,103 @@ using namespace std; namespace build2 { + static string + replace (string&& s, value&& fv, value&& tv, optional&& fs) + { + bool ic (false), fo (false), lo (false); + if (fs) + { + for (name& f: *fs) + { + string s (convert (move (f))); + + if (s == "icase") + ic = true; + else if (s == "first_only") + fo = true; + else if (s == "last_only") + lo = true; + else + throw invalid_argument ("invalid flag '" + s + '\''); + } + } + + string f (convert (move (fv))); + string t (convert (move (tv))); + + if (f.empty ()) + throw invalid_argument ("empty substring"); + + if (!s.empty ()) + { + // Note that we don't cache s.size () since the string size will be + // changing as we are replacing. In fact, we may end up with an empty + // string after a replacement. + + size_t fn (f.size ()); + + // Look for the substring forward in the [p, n) range. + // + auto find = [&s, &f, fn, ic] (size_t p) -> size_t + { + for (size_t n (s.size ()); p != n; ++p) + { + if (n - p >= fn && + (ic + ? icasecmp (f, s.c_str () + p, fn) + : s.compare (p, fn, f)) == 0) + return p; + } + + return string::npos; + }; + + // Look for the substring backard in the [0, n) range. + // + auto rfind = [&s, &f, fn, ic] (size_t n) -> size_t + { + if (n >= fn) + { + n -= fn; // Don't consider characters out of range. + + for (size_t p (n);; ) + { + if ((ic + ? icasecmp (f, s.c_str () + p, fn) + : s.compare (p, fn, f)) == 0) + return p; + + if (--p == 0) + break; + } + } + + return string::npos; + }; + + if (fo || lo) + { + size_t p (lo ? rfind (s.size ()) : find (0)); + + if (fo && lo && p != string::npos) + { + if (p != find (0)) + p = string::npos; + } + + if (p != string::npos) + s.replace (p, fn, t); + } + else + { + for (size_t p (0); (p = find (0)) != string::npos; p += fn) + s.replace (p, fn, t); + } + } + + return s; + } + static size_t find_index (const strings& vs, value&& v, optional&& fs) { @@ -32,7 +129,7 @@ namespace build2 })); return i != vs.end () ? i - vs.begin () : vs.size (); - }; + } void string_functions (function_map& m) @@ -76,6 +173,39 @@ namespace build2 convert (move (y))) == 0; }; + // $string.replace(, , [, ]) + // $replace(, , [, ]) + // + // Replace occurences of substring with in a string. The + // substring must not be empty. + // + // The following flags are supported: + // + // icase - compare ignoring case + // + // first_only - only replace the first match + // + // last_only - only replace the last match + // + // + // If both `first_only` and `last_only` flags are specified, then + // is replaced only if it occurs in the string once. + // + // See also `$regex.replace()`. + // + f["replace"] += [](string s, value f, value t, optional fs) + { + return replace (move (s), move (f), move (t), move (fs)); + }; + + f[".replace"] += [](names s, value f, value t, optional fs) + { + return names { + name ( + replace ( + convert (move (s)), move (f), move (t), move (fs)))}; + }; + // $string.trim() // $trim() // diff --git a/tests/function/string/testscript b/tests/function/string/testscript index 96f5c52..244ace8 100644 --- a/tests/function/string/testscript +++ b/tests/function/string/testscript @@ -25,6 +25,51 @@ } } +: replace +: +{ + : basics + : + { + $* <'print $string.replace( abcb, b, BB)' >'aBBcBB' : expand + $* <'print $string.replace( aabbccbb, bb, B)' >'aaBccB' : shrink + $* <'print $replace([string] abc, b, B)' >'aBc' : typed + $* <'print $replace([string] "", b, B)' >'' : empty + $* <'print $replace([string] bbb, b, "")' >'' : to-empty + } + + : icase + : + { + $* <'print $string.replace(abcB, b, X, icase)' >'aXcX' + } + + : first + : + { + $* <'print $string.replace(babc, b, B, first_only)' >'Babc' : first + $* <'print $string.replace(abcb, b, B, first_only)' >'aBcb' : middle + $* <'print $string.replace(b, b, B, first_only)' >'B' : only + } + + : last + : + { + $* <'print $string.replace(babc, b, B, last_only)' >'baBc' : middle + $* <'print $string.replace(abcb, b, B, last_only)' >'abcB' : last + $* <'print $string.replace(b, b, B, last_only)' >'B' : only + } + + : first-and-last + : + { + $* <'print $string.replace(ac, b, B, first_only last_only)' >'ac' : zero + $* <'print $string.replace(abc, b, B, first_only last_only)' >'aBc' : one + $* <'print $string.replace(abcb, b, B, first_only last_only)' >'abcb' : two + $* <'print $string.replace(b, b, B, first_only last_only)' >'B' : only + } +} + : trim : { -- cgit v1.1