aboutsummaryrefslogtreecommitdiff
path: root/libbutl
diff options
context:
space:
mode:
Diffstat (limited to 'libbutl')
-rw-r--r--libbutl/builtin.cxx75
-rw-r--r--libbutl/regex.ixx17
-rw-r--r--libbutl/regex.mxx36
-rw-r--r--libbutl/regex.txx67
4 files changed, 152 insertions, 43 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx
index 79ff968..a6bb94b 100644
--- a/libbutl/builtin.cxx
+++ b/libbutl/builtin.cxx
@@ -1632,15 +1632,6 @@ namespace butl
string replacement;
bool global;
bool print;
-
- subst (const string& re, bool ic, string rp, bool gl, bool pr)
- //
- // Note that ECMAScript is implied if no grammar flag is specified.
- //
- : regex (re, ic ? regex::icase : regex::ECMAScript),
- replacement (move (rp)),
- global (gl),
- print (pr) {}
};
small_vector<subst, 1> substs;
@@ -1663,57 +1654,59 @@ namespace butl
if (delim == '\\' || delim == '\n')
fail () << "invalid delimiter for 's' command in '" << v << "'";
- size_t p (v.find (delim, 2));
- if (p == string::npos)
- fail () << "unterminated 's' command regex in '" << v << "'";
-
- string regex (v, 2, p - 2);
-
- // Empty regex matches nothing, so not of much use.
- //
- if (regex.empty ())
- fail () << "empty regex in 's' command in '" << v << "'";
-
- size_t b (p + 1);
- p = v.find (delim, b);
- if (p == string::npos)
- fail () << "unterminated 's' command replacement in '" << v << "'";
-
- string replacement (v, b, p - b);
-
- // Parse the substitute command flags.
+ // Parse the substitute command regex (as string), replacement, and
+ // flags.
//
+ pair<string, string> rf;
bool icase (false);
bool global (false);
bool print (false);
- char c;
- for (++p; (c = v[p]) != '\0'; ++p)
+ try
{
- switch (c)
+ size_t e;
+ rf = regex_replace_parse (v.c_str () + 1, v.size () - 1, e);
+
+ char c;
+ for (size_t i (e + 1); (c = v[i]) != '\0'; ++i)
{
- case 'i': icase = true; break;
- case 'g': global = true; break;
- case 'p': print = true; break;
- default:
+ switch (c)
{
- fail () << "invalid 's' command flag '" << c << "' in '" << v
- << "'";
+ case 'i': icase = true; break;
+ case 'g': global = true; break;
+ case 'p': print = true; break;
+ default:
+ {
+ fail () << "invalid 's' command flag '" << c << "' in '" << v
+ << "'";
+ }
}
}
}
+ catch (const invalid_argument& e)
+ {
+ fail () << "invalid 's' command '" << v << "': " << e;
+ }
+ // Parse the regex and add the substitution to the list.
+ //
try
{
- substs.emplace_back (regex, icase,
- move (replacement),
- global, print);
+ // Note that ECMAScript is implied if no grammar flag is specified.
+ //
+ regex re (rf.first, icase ? regex::icase : regex::ECMAScript);
+
+ substs.push_back ({move (re),
+ move (rf.second),
+ global,
+ print});
}
catch (const regex_error& e)
{
// Print regex_error description if meaningful (no space).
//
- fail () << "invalid regex '" << regex << "' in '" << v << "'" << e;
+ fail () << "invalid regex '" << rf.first << "' in '" << v << "'"
+ << e;
}
}
diff --git a/libbutl/regex.ixx b/libbutl/regex.ixx
index dec15d1..805acd1 100644
--- a/libbutl/regex.ixx
+++ b/libbutl/regex.ixx
@@ -21,4 +21,21 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
return make_pair (move (r), match);
}
+
+ template <typename C>
+ inline std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const std::basic_string<C>& s,
+ std::regex_constants::syntax_option_type f)
+ {
+ return regex_replace_parse (s.c_str (), s.size (), f);
+ }
+
+ template <typename C>
+ inline std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C* s,
+ std::regex_constants::syntax_option_type f)
+ {
+ return regex_replace_parse (
+ s, std::basic_string<C>::traits_type::length (s), f);
+ }
}
diff --git a/libbutl/regex.mxx b/libbutl/regex.mxx
index 84b024f..b5490b1 100644
--- a/libbutl/regex.mxx
+++ b/libbutl/regex.mxx
@@ -14,8 +14,9 @@
#include <utility> // pair
#include <locale>
-#include <cstddef> // size_t
-#include <utility> // move(), make_pair()
+#include <cstddef> // size_t
+#include <utility> // move(), make_pair()
+#include <stdexcept> // invalid_argument
#endif
#if defined(__clang__)
@@ -93,6 +94,37 @@ LIBBUTL_MODEXPORT namespace butl
regex_replace_match (const std::basic_string<C>&,
const std::basic_regex<C>&,
const std::basic_string<C>& fmt);
+
+ // Parse the '/<regex>/<format>/' replacement string into the regex/format
+ // pair. Other character can be used as a delimiter instead of '/'. Throw
+ // std::invalid_argument or std::regex_error on parsing error.
+ //
+ // Note: escaping of the delimiter character is not (yet) supported.
+ //
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const std::basic_string<C>&,
+ std::regex_constants::syntax_option_type =
+ std::regex_constants::ECMAScript);
+
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C*,
+ std::regex_constants::syntax_option_type =
+ std::regex_constants::ECMAScript);
+
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C*, size_t,
+ std::regex_constants::syntax_option_type =
+ std::regex_constants::ECMAScript);
+
+ // As above but return string instead of regex and do not fail if there is
+ // text after the last delimiter instead returning its position.
+ //
+ template <typename C>
+ std::pair<std::basic_string<C>, std::basic_string<C>>
+ regex_replace_parse (const C*, size_t, size_t& end);
}
LIBBUTL_MODEXPORT namespace std
diff --git a/libbutl/regex.txx b/libbutl/regex.txx
index b785708..aa845be 100644
--- a/libbutl/regex.txx
+++ b/libbutl/regex.txx
@@ -278,4 +278,71 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
return match;
}
+
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C* s, size_t n,
+ std::regex_constants::syntax_option_type f)
+ {
+ using namespace std;
+
+ using string_type = basic_string<C>;
+
+ size_t e;
+ pair<string_type, string_type> r (regex_replace_parse (s, n, e));
+
+ if (e != n)
+ throw invalid_argument ("junk after trailing delimiter");
+
+ return make_pair (basic_regex<C> (r.first, f), move (r.second));
+ }
+
+ template <typename C>
+ std::pair<std::basic_string<C>, std::basic_string<C>>
+ regex_replace_parse (const C* s, size_t n, size_t& e)
+ {
+ using namespace std;
+
+ using string_type = basic_string<C>;
+
+ if (n == 0)
+ throw invalid_argument ("no leading delimiter");
+
+ const C* b (s); // Save the beginning of the string.
+
+ char delim (s[0]);
+
+ // Position to the regex first character and find the regex-terminating
+ // delimiter.
+ //
+ --n;
+ ++s;
+
+ const C* p (string_type::traits_type::find (s, n, delim));
+
+ if (p == nullptr)
+ throw invalid_argument ("no delimiter after regex");
+
+ // Empty regex matches nothing, so not of much use.
+ //
+ if (p == s)
+ throw invalid_argument ("empty regex");
+
+ // Save the regex.
+ //
+ string_type re (s, p - s);
+
+ // Position to the format first character and find the trailing delimiter.
+ //
+ n -= p - s + 1;
+ s = p + 1;
+
+ p = string_type::traits_type::find (s, n, delim);
+
+ if (p == nullptr)
+ throw invalid_argument ("no delimiter after replacement");
+
+ e = p - b + 1;
+ return make_pair (move (re), string_type (s, p - s));
+ }
}