diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2024-03-19 13:24:44 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2024-03-21 06:44:06 +0200 |
commit | 3a55e033e4fc9a18ede99c4f9dd69fd30c383cf7 (patch) | |
tree | 1c35062021a261479ff5f38d95dd22cfe34fbb38 /libbutl/utility.ixx | |
parent | 736b0f25003c92b3903798ce0a768230480d8f4b (diff) |
In particular, this version can be used to parse lines while observing
blanks.
Diffstat (limited to 'libbutl/utility.ixx')
-rw-r--r-- | libbutl/utility.ixx | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/libbutl/utility.ixx b/libbutl/utility.ixx index 0ce33a7..fda1ce5 100644 --- a/libbutl/utility.ixx +++ b/libbutl/utility.ixx @@ -210,6 +210,66 @@ namespace butl return e - b; } + inline std::size_t + next_word (const std::string& s, + std::size_t n, std::size_t& b, std::size_t& e, std::size_t& m, + char d1, char d2) + { + // An empty word will necessarily be represented as b and e being the + // position of a delimiter. Consider these corner cases (in all three we + // should produce two words): + // + // \n + // a\n + // \na + // + // It feels sensible to represent an empty word as the position of the + // trailing delimiter except if it is the last character (the first two + // cases). Thus the additional m state, which, if 0 or 1 indicates the + // number of delimiters to skip before parsing the next word and 2 if + // this is a trailing delimiter for which we need to fake an empty word + // with the leading delimiter. + + if (b != e) + b = e; + + if (m > 1) + { + --m; + return 0; + } + + // Skip the leading delimiter, if any. + // + b += m; + + if (b == n) + { + e = n; + return 0; + } + + // Find first trailing delimiter. + // + m = 0; + for (e = b; e != n; ++e) + { + if (s[e] == d1 || s[e] == d2) + { + m = 1; + + // Handle the special delimiter as the last character case. + // + if (e + 1 == n) + ++m; + + break; + } + } + + return e - b; + } + inline std::string& sanitize_identifier (std::string& s) { |