From 2fc53c801eb551154f0a2aa96522cf3182a65b7a Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 20 May 2024 09:34:16 +0200 Subject: Add $string.contains(), $string.starts_with(), $string.ends_with() Also fix bug in $string.replace(). --- libbuild2/functions-regex.cxx | 3 + libbuild2/functions-string.cxx | 246 ++++++++++++++++++++++++++++++++------- tests/function/string/testscript | 72 +++++++++++- 3 files changed, 274 insertions(+), 47 deletions(-) diff --git a/libbuild2/functions-regex.cxx b/libbuild2/functions-regex.cxx index cf3ffd0..c46f6f5 100644 --- a/libbuild2/functions-regex.cxx +++ b/libbuild2/functions-regex.cxx @@ -688,6 +688,9 @@ namespace build2 // If both `return_match` and `return_subs` flags are specified then the // sub-string that matches the whole regular expression comes first. // + // See also `$string.contains()`, `$string.starts_with()`, + // `$string.ends_with()`. + // f[".search"] += [](value v, string re, optional flags) { return search (move (v), re, move (flags)); diff --git a/libbuild2/functions-string.cxx b/libbuild2/functions-string.cxx index b7e0a17..eccc6c7 100644 --- a/libbuild2/functions-string.cxx +++ b/libbuild2/functions-string.cxx @@ -8,6 +8,136 @@ using namespace std; namespace build2 { + // Look for the substring forwards in the [p, n) range. + // + static inline size_t + find (const string& s, size_t p, const string& ss, bool ic) + { + size_t sn (ss.size ()); + + for (size_t n (s.size ()); p != n; ++p) + { + if (n - p >= sn && + (ic + ? icasecmp (ss, s.c_str () + p, sn) + : s.compare (p, sn, ss)) == 0) + return p; + } + + return string::npos; + } + + // Look for the substring backwards in the [0, n) range. + // + static inline size_t + rfind (const string& s, size_t n, const string& ss, bool ic) + { + size_t sn (ss.size ()); + + if (n >= sn) + { + n -= sn; // Don't consider characters out of range. + + for (size_t p (n);; ) + { + if ((ic + ? icasecmp (ss, s.c_str () + p, sn) + : s.compare (p, sn, ss)) == 0) + return p; + + if (--p == 0) + break; + } + } + + return string::npos; + } + + static bool + contains (const string& s, value&& ssv, optional&& fs) + { + bool ic (false), once (false); + if (fs) + { + for (name& f: *fs) + { + string s (convert (move (f))); + + if (s == "icase") + ic = true; + else if (s == "once") + once = true; + else + throw invalid_argument ("invalid flag '" + s + '\''); + } + } + + const string ss (convert (move (ssv))); + + if (ss.empty ()) + throw invalid_argument ("empty substring"); + + size_t p (find (s, 0, ss, ic)); + + if (once && p != string::npos && p != rfind (s, s.size (), ss, ic)) + p = string::npos; + + return p != string::npos; + } + + static bool + starts_with (const string& s, value&& pfv, optional&& fs) + { + bool ic (false); + if (fs) + { + for (name& f: *fs) + { + string s (convert (move (f))); + + if (s == "icase") + ic = true; + else + throw invalid_argument ("invalid flag '" + s + '\''); + } + } + + const string pf (convert (move (pfv))); + + if (pf.empty ()) + throw invalid_argument ("empty prefix"); + + return find (s, 0, pf, ic) == 0; + } + + static bool + ends_with (const string& s, value&& sfv, optional&& fs) + { + bool ic (false); + if (fs) + { + for (name& f: *fs) + { + string s (convert (move (f))); + + if (s == "icase") + ic = true; + else + throw invalid_argument ("invalid flag '" + s + '\''); + } + } + + const string sf (convert (move (sfv))); + + if (sf.empty ()) + throw invalid_argument ("empty suffix"); + + size_t n (s.size ()); + size_t p (rfind (s, n, sf, ic)); + + return p != string::npos && p + sf.size () == n; + } + static string replace (string&& s, value&& fv, value&& tv, optional&& fs) { @@ -43,52 +173,13 @@ namespace build2 size_t fn (f.size ()); - // Look for the substring forward in the [p, n) range. - // - auto find = [&s, &f, fn, ic] (size_t p) -> size_t - { - for (size_t n (s.size ()); p != n; ++p) - { - if (n - p >= fn && - (ic - ? icasecmp (f, s.c_str () + p, fn) - : s.compare (p, fn, f)) == 0) - return p; - } - - return string::npos; - }; - - // Look for the substring backard in the [0, n) range. - // - auto rfind = [&s, &f, fn, ic] (size_t n) -> size_t - { - if (n >= fn) - { - n -= fn; // Don't consider characters out of range. - - for (size_t p (n);; ) - { - if ((ic - ? icasecmp (f, s.c_str () + p, fn) - : s.compare (p, fn, f)) == 0) - return p; - - if (--p == 0) - break; - } - } - - return string::npos; - }; - if (fo || lo) { - size_t p (lo ? rfind (s.size ()) : find (0)); + size_t p (lo ? rfind (s, s.size (), f, ic) : find (s, 0, f, ic)); if (fo && lo && p != string::npos) { - if (p != find (0)) + if (p != find (s, 0, f, ic)) p = string::npos; } @@ -97,7 +188,9 @@ namespace build2 } else { - for (size_t p (0); (p = find (0)) != string::npos; p += fn) + size_t tn (t.size ()); + + for (size_t p (0); (p = find (s, p, f, ic)) != string::npos; p += tn) s.replace (p, fn, t); } } @@ -173,6 +266,75 @@ namespace build2 convert (move (y))) == 0; }; + // $string.contains(, [, ]) + // $contains(, [, ]) + // + // Check if the string (first argument) contains the given substring + // (second argument). The substring must not be empty. + // + // The following flags are supported: + // + // icase - compare ignoring case + // + // once - check if the substring occurs exactly once + // + // See also `$string.starts_with()`, `$string.ends_with()`, + // `$regex.search()`. + // + f["contains"] += [](string s, value ss, optional fs) + { + return contains (move (s), move (ss), move (fs)); + }; + + f[".contains"] += [](names s, value ss, optional fs) + { + return contains (convert (move (s)), move (ss), move (fs)); + }; + + // $string.starts_with(, [, ]) + // $starts_with(, [, ]) + // + // Check if the string (first argument) begins with the given prefix + // (second argument). The prefix must not be empty. + // + // The following flags are supported: + // + // icase - compare ignoring case + // + // See also `$string.contains()`. + // + f["starts_with"] += [](string s, value pf, optional fs) + { + return starts_with (move (s), move (pf), move (fs)); + }; + + f[".starts_with"] += [](names s, value pf, optional fs) + { + return starts_with (convert (move (s)), move (pf), move (fs)); + }; + + // $string.ends_with(, [, ]) + // $ends_with(, [, ]) + // + // Check if the string (first argument) ends with the given suffix (second + // argument). The suffix must not be empty. + // + // The following flags are supported: + // + // icase - compare ignoring case + // + // See also `$string.contains()`. + // + f["ends_with"] += [](string s, value sf, optional fs) + { + return ends_with (move (s), move (sf), move (fs)); + }; + + f[".ends_with"] += [](names s, value sf, optional fs) + { + return ends_with (convert (move (s)), move (sf), move (fs)); + }; + // $string.replace(, , [, ]) // $replace(, , [, ]) // diff --git a/tests/function/string/testscript b/tests/function/string/testscript index 244ace8..8eb5760 100644 --- a/tests/function/string/testscript +++ b/tests/function/string/testscript @@ -25,17 +25,79 @@ } } +: contains +: +{ + : basics + : + { + $* <'print $string.contains( abcd, bc)' >'true' : true + $* <'print $string.contains( abcd, ac)' >'false' : false + $* <'print $contains([string] abcd, cd)' >'true' : typed + } + + : icase + : + { + $* <'print $string.contains(aBcD, bC, icase)' >'true' : true + } + + : once + : + { + $* <'print $string.contains(abcdabcd, da, once)' >'true' : true + $* <'print $string.contains(abcdabcd, bc, once)' >'false' : false + } +} + +: starts_with +: +{ + : basics + : + { + $* <'print $string.starts_with( abcd, ab)' >'true' : true + $* <'print $string.starts_with( abcd, bc)' >'false' : false + $* <'print $starts_with([string] abcd, abcd)' >'true' : typed + } + + : icase + : + { + $* <'print $string.starts_with(aBcD, Ab, icase)' >'true' : true + } +} + +: ends_with +: +{ + : basics + : + { + $* <'print $string.ends_with( abcd, cd)' >'true' : true + $* <'print $string.ends_with( abcd, bc)' >'false' : false + $* <'print $ends_with([string] abcd, abcd)' >'true' : typed + } + + : icase + : + { + $* <'print $string.ends_with(aBcD, Cd, icase)' >'true' : true + } +} + : replace : { : basics : { - $* <'print $string.replace( abcb, b, BB)' >'aBBcBB' : expand - $* <'print $string.replace( aabbccbb, bb, B)' >'aaBccB' : shrink - $* <'print $replace([string] abc, b, B)' >'aBc' : typed - $* <'print $replace([string] "", b, B)' >'' : empty - $* <'print $replace([string] bbb, b, "")' >'' : to-empty + $* <'print $string.replace( abcb, b, BB)' >'aBBcBB' : expand + $* <'print $string.replace( aabbccbb, bb, B)' >'aaBccB' : shrink + $* <'print $replace([string] abc, b, B)' >'aBc' : typed + $* <'print $replace([string] "", b, B)' >'' : empty + $* <'print $replace([string] bbb, b, "")' >'' : to-empty + $* <'print $replace([string] bb, b, Bb)' >'BbBb' : no-recursion } : icase -- cgit v1.1