1 files changed, 175 insertions, 63 deletions
diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx
index 9514bbd..904910a 100644
--- a/libbutl/manifest-parser.cxx
+++ b/libbutl/manifest-parser.cxx
@@ -1,39 +1,10 @@
 // file      : libbutl/manifest-parser.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-parser.mxx>
-#endif
+#include <libbutl/manifest-parser.hxx>
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
-#include <vector>
-#include <cstdint>
-#include <utility>
-#include <stdexcept>
-
-#include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-import butl.char_scanner;
-import butl.manifest_types;
-#endif
-
-#endif
+#include <cassert>
 
 using namespace std;
 
@@ -177,41 +148,136 @@ namespace butl
   {
     using iterator = string::const_iterator;
 
-    auto space = [] (char c) -> bool {return c == ' ' || c == '\t';};
+    // Parse the value differently depending on whether it is multi-line or
+    // not.
+    //
+    if (v.find ('\n') == string::npos) // Single-line.
+    {
+      auto space = [] (char c) {return c == ' ' || c == '\t';};
 
-    iterator i (v.begin ());
-    iterator e (v.end ());
+      iterator i (v.begin ());
+      iterator e (v.end ());
 
-    string r;
-    size_t n (0);
-    for (char c; i != e && (c = *i) != ';'; ++i)
-    {
-      // Unescape ';' character.
+      string r;
+      size_t n (0);
+      for (char c; i != e && (c = *i) != ';'; ++i)
+      {
+        // Unescape ';' and '\' characters.
+        //
+        if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\'))
+          c = *++i;
+
+        r += c;
+
+        if (!space (c))
+          n = r.size ();
+      }
+
+      // Strip the value trailing spaces.
       //
-      if (c == '\\' && i + 1 != e && *(i + 1) == ';')
-        c = *++i;
+      if (r.size () != n)
+        r.resize (n);
 
-      r += c;
+      // Find beginning of a comment (i).
+      //
+      if (i != e)
+      {
+        // Skip spaces.
+        //
+        for (++i; i != e && space (*i); ++i);
+      }
 
-      if (!space (c))
-        n = r.size ();
+      return make_pair (move (r), string (i, e));
     }
+    else // Multi-line.
+    {
+      string r;
+      string c;
 
-    // Strip the value trailing spaces.
-    //
-    if (r.size () != n)
-      r.resize (n);
+      // Parse the value lines until the comment separator is encountered or
+      // the end of the value is reached. Add these lines to the resulting
+      // value, unescaping them if required.
+      //
+      // Note that we only need to unescape lines which have the '\+;' form.
+      //
+      auto i (v.begin ());
+      auto e (v.end ());
 
-    // Find beginning of a comment (i).
-    //
-    if (i != e)
-    {
-      // Skip spaces.
+      while (i != e)
+      {
+        // Find the end of the line and while at it the first non-backslash
+        // character.
+        //
+        auto le (i);
+        auto nb (e);
+        for (; le != e && *le != '\n'; ++le)
+        {
+          if (nb == e && *le != '\\')
+            nb = le;
+        }
+
+        // If the value end is not reached then position to the beginning of
+        // the next line and to the end of the value otherwise.
+        //
+        auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);};
+
+        // If the first non-backslash character is ';' and it is the last
+        // character on the line, then this is either the comment separator or
+        // an escape sequence.
+        //
+        if (nb != e && *nb == ';' && nb + 1 == le)
+        {
+          // If ';' is the first (and thus the only) character on the line,
+          // then this is the comment separator and we bail out from this
+          // loop. Note that in this case we need to trim the trailing newline
+          // (but only one) from the resulting value since it is considered as
+          // a part of the separator.
+          //
+          if (nb == i)
+          {
+            if (!r.empty ())
+            {
+              assert (r.back () == '\n');
+              r.pop_back ();
+            }
+
+            next ();
+            break;
+          }
+          //
+          // Otherwise, this is an escape sequence, so unescape it. For that
+          // just take the rightmost half of the string:
+          //
+          // \;     -> ;
+          // \\;    -> \;
+          // \\\;   -> \;
+          // \\\\;  -> \\;
+          // \\\\\; -> \\;
+          //
+          else
+            i += (le - i) / 2;
+        }
+
+        // Add the line to the resulting value together with the trailing
+        // newline, if present.
+        //
+        r.append (i, le);
+
+        if (le != e)
+          r += '\n';
+
+        next ();
+      }
+
+      // If we haven't reached the end of the value then it means we've
+      // encountered the comment separator. In this case save the remaining
+      // value part as a comment.
       //
-      for (++i; i != e && space (*i); ++i);
-    }
+      if (i != e)
+        c = string (i, e);
 
-    return make_pair (move (r), string (i, e));
+      return make_pair (move (r), move (c));
+    }
   }
 
   void manifest_parser::
@@ -251,7 +317,8 @@ namespace butl
     string& v (r.value);
     string::size_type n (0); // Size of last non-space character (simple mode).
 
-    // Detect the multi-line mode introductor.
+    // Detect the old-fashioned multi-line mode introducer (like in
+    // 'foo:\<newline>').
     //
     bool ml (false);
     if (c == '\\')
@@ -266,11 +333,46 @@ namespace butl
         ml = true;
       }
       else if (eos (p))
+      {
+        c = p;     // Set to EOF.
         ml = true;
+      }
       else
         unget (c);
     }
 
+    // Detect the new-fashioned multi-line mode introducer (like in
+    // 'foo:<newline>\<newline>').
+    //
+    if (!ml && c == '\n')
+    {
+      get ();
+      xchar p1 (peek ());
+
+      if (p1 == '\\')
+      {
+        get ();
+        xchar p2 (peek ());
+
+        if (p2 == '\n')
+        {
+          get (); // Newline is not part of the value so skip it.
+          c = peek ();
+          ml = true;
+        }
+        else if (eos (p2))
+        {
+          c = p2;    // Set to EOF.
+          ml = true;
+        }
+        else
+          unget (p1);  // Unget '\\'. Note: '\n' will be ungot below.
+      }
+
+      if (!ml)
+        unget (c); // Unget '\n'.
+    }
+
     // Multi-line value starts from the line that follows the name.
     //
     if (ml)
@@ -281,7 +383,7 @@ namespace butl
 
     // The nl flag signals that the preceding character was a "special
     // newline", that is, a newline that was part of the milti-line mode
-    // introductor or an escape sequence.
+    // introducer or an escape sequence.
     //
     for (bool nl (ml); !eos (c); c = peek ())
     {
@@ -299,7 +401,7 @@ namespace butl
       //
       // The first block handles the special sequence that starts with
       // a special newline. In multi-line mode, this is an "immediate
-      // termination" where we "use" the newline from the introductor.
+      // termination" where we "use" the newline from the introducer.
       // Note also that in the simple mode the special sequence can
       // only start with a special (i.e., escaped) newline.
       //
@@ -472,11 +574,21 @@ namespace butl
   static inline string
   format (const string& n, uint64_t l, uint64_t c, const string& d)
   {
-    ostringstream os;
+    using std::to_string;
+
+    string r;
     if (!n.empty ())
-      os << n << ':';
-    os << l << ':' << c << ": error: " << d;
-    return os.str ();
+    {
+      r += n;
+      r += ':';
+    }
+
+    r += to_string (l);
+    r += ':';
+    r += to_string (c);
+    r += ": error: ";
+    r += d;
+    return r;
   }
 
   manifest_parsing::