From 3a55e033e4fc9a18ede99c4f9dd69fd30c383cf7 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Tue, 19 Mar 2024 13:24:44 +0200
Subject: Add next_word() overload that doesn't skip consecutive delimiters

In particular, this version can be used to parse lines while observing
blanks.
---
 libbutl/utility.ixx | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'libbutl/utility.ixx')

diff --git a/libbutl/utility.ixx b/libbutl/utility.ixx
index 0ce33a7..fda1ce5 100644
--- a/libbutl/utility.ixx
+++ b/libbutl/utility.ixx
@@ -210,6 +210,66 @@ namespace butl
     return e - b;
   }
 
+  inline std::size_t
+  next_word (const std::string& s,
+             std::size_t n, std::size_t& b, std::size_t& e, std::size_t& m,
+             char d1, char d2)
+  {
+    // An empty word will necessarily be represented as b and e being the
+    // position of a delimiter. Consider these corner cases (in all three we
+    // should produce two words):
+    //
+    // \n
+    // a\n
+    // \na
+    //
+    // It feels sensible to represent an empty word as the position of the
+    // trailing delimiter except if it is the last character (the first two
+    // cases). Thus the additional m state, which, if 0 or 1 indicates the
+    // number of delimiters to skip before parsing the next word and 2 if
+    // this is a trailing delimiter for which we need to fake an empty word
+    // with the leading delimiter.
+
+    if (b != e)
+      b = e;
+
+    if (m > 1)
+    {
+      --m;
+      return 0;
+    }
+
+    // Skip the leading delimiter, if any.
+    //
+    b += m;
+
+    if (b == n)
+    {
+      e = n;
+      return 0;
+    }
+
+    // Find first trailing delimiter.
+    //
+    m = 0;
+    for (e = b; e != n; ++e)
+    {
+      if (s[e] == d1 || s[e] == d2)
+      {
+        m = 1;
+
+        // Handle the special delimiter as the last character case.
+        //
+        if (e + 1 == n)
+          ++m;
+
+        break;
+      }
+    }
+
+    return e - b;
+  }
+
   inline std::string&
   sanitize_identifier (std::string& s)
   {
-- 
cgit v1.1