From 72648921ec28903615698a61aeff4799e1ca9a7d Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Tue, 9 Jun 2015 19:50:58 +0200
Subject: Implement low-level manifest parser

---
 bpkg/buildfile                   |   5 +
 bpkg/manifest-parser             | 141 ++++++++++++
 bpkg/manifest-parser.cxx         | 453 +++++++++++++++++++++++++++++++++++++++
 build/.gitignore                 |   1 +
 build/bootstrap.build            |   6 +
 build/root.build                 |   8 +
 buildfile                        |   7 +
 tests/.gitignore                 |   1 +
 tests/buildfile                  |   7 +
 tests/manifest-parser/buildfile  |   7 +
 tests/manifest-parser/driver.cxx | 210 ++++++++++++++++++
 11 files changed, 846 insertions(+)
 create mode 100644 bpkg/buildfile
 create mode 100644 bpkg/manifest-parser
 create mode 100644 bpkg/manifest-parser.cxx
 create mode 100644 build/.gitignore
 create mode 100644 build/bootstrap.build
 create mode 100644 build/root.build
 create mode 100644 buildfile
 create mode 100644 tests/.gitignore
 create mode 100644 tests/buildfile
 create mode 100644 tests/manifest-parser/buildfile
 create mode 100644 tests/manifest-parser/driver.cxx
diff --git a/bpkg/buildfile b/bpkg/buildfile
new file mode 100644
index 0000000..b7c1c9f
--- /dev/null
+++ b/bpkg/buildfile
@@ -0,0 +1,5 @@
+# file      : bpkg/buildfile
+# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+lib{bpkg}: cxx{manifest-parser}
diff --git a/bpkg/manifest-parser b/bpkg/manifest-parser
new file mode 100644
index 0000000..2b1e4a5
--- /dev/null
+++ b/bpkg/manifest-parser
@@ -0,0 +1,141 @@
+// file      : bpkg/manifest-parser -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BPKG_MANIFEST_PARSER
+#define BPKG_MANIFEST_PARSER
+
+#include <string>
+#include <iosfwd>
+#include <cstdint>   // uint64_t
+#include <stdexcept> // runtime_error
+
+namespace bpkg
+{
+  class manifest_parsing: public std::runtime_error
+  {
+  public:
+    manifest_parsing (const std::string& name,
+                      std::uint64_t line,
+                      std::uint64_t column,
+                      const std::string& description);
+
+    std::string name;
+    std::uint64_t line;
+    std::uint64_t column;
+    std::string description;
+  };
+
+  class manifest_parser
+  {
+  public:
+    manifest_parser (std::istream& is, const std::string& name)
+        : is_ (is), name_ (name) {}
+
+    const std::string&
+    name () const {return name_;}
+
+    struct name_value_type
+    {
+      std::string name;
+      std::string value;
+
+      std::uint64_t name_line;
+      std::uint64_t name_column;
+
+      std::uint64_t value_line;
+      std::uint64_t value_column;
+    };
+
+    // The first returned pair is special "start-of-manifest" with
+    // empty name and value being the format version: {"", "<ver>"}.
+    // After that we have a sequence of ordinary pairs which are
+    // the manifest. At the end of the manifest we have the special
+    // "end-of-manifest" pair with empty name and value: {"", ""}.
+    // After that we can either get another start-of-manifest pair
+    // (in which case the whole sequence repeats from the beginning)
+    // or we get another end-of-manifest pair which signals the end
+    // of stream (aka EOF). To put it another way, the parse sequence
+    // always has the following form:
+    //
+    // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""}
+    //
+    name_value_type
+    next ();
+
+  private:
+    class xchar
+    {
+    public:
+      typedef std::char_traits<char> traits_type;
+      typedef traits_type::int_type int_type;
+      typedef traits_type::char_type char_type;
+
+      xchar (int_type v, std::uint64_t l, std::uint64_t c)
+          : v_ (v), l_ (l), c_ (c) {}
+
+      operator char_type () const {return static_cast<char_type> (v_);}
+
+      int_type
+      value () const {return v_;}
+
+      std::uint64_t line () const {return l_;}
+      std::uint64_t column () const {return c_;}
+
+    private:
+      int_type v_;
+      std::uint64_t l_;
+      std::uint64_t c_;
+    };
+
+  private:
+    void
+    parse_name (name_value_type&);
+
+    void
+    parse_value (name_value_type&);
+
+    // Skip spaces and return the first peeked non-space character.
+    //
+    xchar
+    skip_spaces ();
+
+    // Character interface.
+    //
+  private:
+    xchar
+    peek ();
+
+    xchar
+    get ();
+
+    void
+    unget (const xchar&);
+
+    // Tests.
+    //
+    bool
+    is_eos (const xchar& c) const
+    {
+      return c.value () == xchar::traits_type::eof ();
+    }
+
+  private:
+    enum {start, body, eos} s_ = start;
+    std::string version_; // Current format version.
+
+  private:
+    std::istream& is_;
+    const std::string name_;
+
+    std::uint64_t l_ {1};
+    std::uint64_t c_ {1};
+
+    bool unget_ {false};
+    xchar buf_ {0, 0, 0};
+
+    bool eos_ {false};
+  };
+}
+
+#endif // BPKG_MANIFEST_PARSER
diff --git a/bpkg/manifest-parser.cxx b/bpkg/manifest-parser.cxx
new file mode 100644
index 0000000..374de3e
--- /dev/null
+++ b/bpkg/manifest-parser.cxx
@@ -0,0 +1,453 @@
+// file      : bpkg/manifest-parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <bpkg/manifest-parser>
+
+#include <cassert>
+#include <sstream>
+
+using namespace std;
+
+namespace bpkg
+{
+  using name_value = manifest_parser::name_value_type;
+
+  name_value manifest_parser::
+  next ()
+  {
+    if (s_ == eos)
+      return name_value {"", "", l_, c_, l_, c_};
+
+    xchar c (skip_spaces ());
+
+    // Here is the problem: if we are in the 'body' state (that is,
+    // we are parsing inside the manifest) and we see the special
+    // empty name, then before returning the "start" pair for the
+    // next manifest, we have to return the "end" pair. One way
+    // would be to cache the "start" pair and return it on the
+    // next call of next(). But that would require quite a bit
+    // of extra logic. The alternative is to detect the beginning
+    // of the empty name before parsing too far. This way, the
+    // next call to next() will start parsing where we left of
+    // and return the "start" pair naturally.
+    //
+    if (s_ == body && c == ':')
+    {
+      s_ = start;
+      uint64_t ln (c.line ()), cn (c.column ());
+      return name_value {"", "", ln, cn, ln, cn};
+    }
+
+    // Regardless of the state, what should come next is a name,
+    // potentially the special empty one.
+    //
+    name_value r;
+    parse_name (r);
+
+    skip_spaces ();
+    c = get ();
+
+    if (is_eos (c))
+    {
+      // This is ok as long as the name is empty.
+      //
+      if (!r.name.empty ())
+        throw manifest_parsing (name_, c.line (), c.column (),
+                                "':' expected after name");
+
+      s_ = eos;
+
+      // The "end" pair.
+      //
+      r.value_line = r.name_line;
+      r.value_column = r.name_column;
+      return r;
+    }
+
+    if (c != ':')
+      throw manifest_parsing (name_, c.line (), c.column (),
+                              "':' expected after name");
+
+    skip_spaces ();
+    parse_value (r);
+
+    c = peek ();
+
+    // The character after the value should be either a newline or eos.
+    //
+    assert (c == '\n' || is_eos (c));
+
+    if (c == '\n')
+      get ();
+
+    // Now figure out whether what we've got makes sense, depending
+    // on the state we are in.
+    //
+    if (s_ == start)
+    {
+      // Start of the (next) manifest. The first pair should be the
+      // special empty name/format version.
+      //
+      if (!r.name.empty ())
+        throw manifest_parsing (name_, r.name_line, r.name_column,
+                                "format version pair expected");
+
+      // The version value is only mandatory for the first manifest in
+      // a sequence.
+      //
+      if (r.value.empty ())
+      {
+        if (version_.empty ())
+          throw manifest_parsing (name_, r.value_line, r.value_column,
+                                  "format version value expected");
+        r.value = version_;
+      }
+      else
+      {
+        version_ = r.value; // Update with the latest.
+
+        if (version_ != "1")
+          throw manifest_parsing (name_, r.value_line, r.value_column,
+                                  "unsupported format version " + version_);
+      }
+
+      s_ = body;
+    }
+    else
+    {
+      // Parsing the body of the manifest.
+      //
+
+      // Should have been handled by the special case above.
+      //
+      assert (!r.name.empty ());
+    }
+
+    return r;
+  }
+
+  void manifest_parser::
+  parse_name (name_value& r)
+  {
+    xchar c (peek ());
+
+    r.name_line = c.line ();
+    r.name_column = c.column ();
+
+    for (; !is_eos (c); c = peek ())
+    {
+      if (c == ':' || c == ' ' || c == '\t' || c == '\n')
+        break;
+
+      r.name += c;
+      get ();
+    }
+  }
+
+  void manifest_parser::
+  parse_value (name_value& r)
+  {
+    xchar c (peek ());
+
+    r.value_line = c.line ();
+    r.value_column = c.column ();
+
+    string& v (r.value);
+    string::size_type n (0); // Size of last non-space character (simpel mode).
+
+    // Detect the multi-line mode introductor.
+    //
+    bool ml (false);
+    if (c == '\\')
+    {
+      get ();
+      xchar p (peek ());
+
+      if (p == '\n')
+      {
+        get (); // Newline is not part of the value so skip it.
+        c = peek ();
+        ml = true;
+      }
+      else if (is_eos (p))
+        ml = true;
+      else
+        unget (c);
+    }
+
+    // The nl flag signals that the preceding character was a "special
+    // newline", that is, a newline that was part of the milti-line mode
+    // introductor or an escape sequence.
+    //
+    for (bool nl (ml); !is_eos (c); c = peek ())
+    {
+      // Detect the special "\n\\\n" sequence. In the multi-line mode,
+      // this is a "terminator". In the simple mode, this is a way to
+      // specify a newline.
+      //
+      // The key idea here is this: if we "swallowed" any characters
+      // (i.e., called get() without a matching unget()), then we
+      // have to restart the loop in order to do all the tests for
+      // the next character. Also, for this to work, we can only
+      // add one character to v, which limits us to maximum three
+      // characters look-ahead: one in v, one "ungot", and one
+      // peeked.
+      //
+      // The first block handles the special sequence that starts with
+      // a special newline. In multi-line mode, this is an "immediate
+      // termination" where we "use" the newline from the introductor.
+      // Note also that in the simple mode the special sequence can
+      // only start with a special (i.e., escaped) newline.
+      //
+      if (nl)
+      {
+        nl = false;
+
+        if (c == '\\')
+        {
+          get ();
+          xchar c1 (peek ());
+
+          if (c1 == '\n' || is_eos (c1))
+          {
+            if (ml)
+              break;
+            else
+            {
+              if (c1 == '\n')
+                get ();
+
+              v += '\n'; // Literal newline.
+              n++;
+              continue; // Restart from the next character.
+            }
+          }
+          else
+            unget (c); // Fall through.
+        }
+      }
+
+      if (c == '\n')
+      {
+        if (ml)
+        {
+          get ();
+          xchar c1 (peek ());
+
+          if (c1 == '\\')
+          {
+            get ();
+            xchar c2 (peek ());
+
+            if (c2 == '\n' || is_eos (c2))
+              break;
+            else
+            {
+              v += '\n';
+              unget (c1);
+              continue; // Restart from c1 (slash).
+            }
+          }
+          else
+            unget (c); // Fall through.
+        }
+        else
+          break; // Simple value terminator.
+      }
+
+      // Detect the newline escape sequence. The same look-ahead
+      // approach as above.
+      //
+      if (c == '\\')
+      {
+        get ();
+        xchar c1 (peek ());
+
+        if (c1 == '\n' || is_eos (c1))
+        {
+          if (c1 == '\n')
+          {
+            get ();
+            nl = true; // This is a special newline.
+          }
+          continue; // Restart from the next character.
+        }
+        else if (c1 == '\\')
+        {
+          get ();
+          xchar c2 (peek ());
+
+          if (c2 == '\n' || is_eos (c1))
+          {
+            v += '\\';
+            n++;
+            // Restart from c2 (newline/eos).
+          }
+          else
+          {
+            v += '\\';
+            n++;
+            unget (c1); // Restart from c1 (second slash).
+          }
+
+          continue;
+        }
+        else
+          unget (c); // Fall through.
+      }
+
+      get ();
+      v += c;
+
+      if (!ml)
+      {
+        if (c != ' ' && c != '\t')
+          n++;
+      }
+    }
+
+    // Cut off trailing whitespaces.
+    //
+    if (!ml)
+      v.resize (n);
+  }
+
+  manifest_parser::xchar manifest_parser::
+  skip_spaces ()
+  {
+    xchar c (peek ());
+    bool start (c.column () == 1);
+
+    for (; !is_eos (c); c = peek ())
+    {
+      switch (c)
+      {
+      case ' ':
+      case '\t':
+        break;
+      case '\n':
+        {
+          // Skip empty lines.
+          //
+          if (!start)
+            return c;
+
+          break;
+        }
+      case '#':
+        {
+          // We only recognize '#' as a start of a comment at the beginning
+          // of the line (sans leading spaces).
+          //
+          if (!start)
+            return c;
+
+          get ();
+
+          // Read until newline or eos.
+          //
+          for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
+            get ();
+
+          continue;
+        }
+      default:
+        return c; // Not a space.
+      }
+
+      get ();
+    }
+
+    return c;
+  }
+
+  // Character interface.
+  //
+
+  manifest_parser::xchar manifest_parser::
+  peek ()
+  {
+    if (unget_)
+      return buf_;
+    else
+    {
+      if (eos_)
+        return xchar (xchar::traits_type::eof (), l_, c_);
+      else
+      {
+        xchar::int_type v (is_.peek ());
+
+        if (v == xchar::traits_type::eof ())
+          eos_ = true;
+
+        return xchar (v, l_, c_);
+      }
+    }
+  }
+
+  manifest_parser::xchar manifest_parser::
+  get ()
+  {
+    if (unget_)
+    {
+      unget_ = false;
+      return buf_;
+    }
+    else
+    {
+      // When is_.get () returns eof, the failbit is also set (stupid,
+      // isn't?) which may trigger an exception. To work around this
+      // we will call peek() first and only call get() if it is not
+      // eof. But we can only call peek() on eof once; any subsequent
+      // calls will spoil the failbit (even more stupid).
+      //
+      xchar c (peek ());
+
+      if (!is_eos (c))
+      {
+        is_.get ();
+
+        if (c == '\n')
+        {
+          l_++;
+          c_ = 1;
+        }
+        else
+          c_++;
+      }
+
+      return c;
+    }
+  }
+
+  void manifest_parser::
+  unget (const xchar& c)
+  {
+    // Because iostream::unget cannot work once eos is reached,
+    // we have to provide our own implementation.
+    //
+    buf_ = c;
+    unget_ = true;
+  }
+
+  // manifest_parsing
+  //
+
+  static string
+  format (const string& n, uint64_t l, uint64_t c, const string& d)
+  {
+    ostringstream os;
+    if (!n.empty ())
+      os << n << ':';
+    os << l << ':' << c << ": error: " << d;
+    return os.str ();
+  }
+
+  manifest_parsing::
+  manifest_parsing (const string& n, uint64_t l, uint64_t c, const string& d)
+      : runtime_error (format (n, l, c, d)),
+        name (n), line (l), column (c), description (d)
+  {
+  }
+}
diff --git a/build/.gitignore b/build/.gitignore
new file mode 100644
index 0000000..225c27f
--- /dev/null
+++ b/build/.gitignore
@@ -0,0 +1 @@
+config.build
diff --git a/build/bootstrap.build b/build/bootstrap.build
new file mode 100644
index 0000000..0fff8ad
--- /dev/null
+++ b/build/bootstrap.build
@@ -0,0 +1,6 @@
+# file      : build/bootstrap.build
+# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+project = libbpkg
+using config
diff --git a/build/root.build b/build/root.build
new file mode 100644
index 0000000..dc1c297
--- /dev/null
+++ b/build/root.build
@@ -0,0 +1,8 @@
+# file      : build/root.build
+# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+using cxx
+
+cxx.std = 14
+cxx.poptions += -I$src_root
diff --git a/buildfile b/buildfile
new file mode 100644
index 0000000..b9c4507
--- /dev/null
+++ b/buildfile
@@ -0,0 +1,7 @@
+# file      : buildfile
+# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+d = bpkg/ tests/
+.: $d
+include $d
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..e54525b
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1 @@
+driver
diff --git a/tests/buildfile b/tests/buildfile
new file mode 100644
index 0000000..8832040
--- /dev/null
+++ b/tests/buildfile
@@ -0,0 +1,7 @@
+# file      : tests/buildfile
+# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+d = manifest-parser/
+.: $d
+include $d
diff --git a/tests/manifest-parser/buildfile b/tests/manifest-parser/buildfile
new file mode 100644
index 0000000..4fcf735
--- /dev/null
+++ b/tests/manifest-parser/buildfile
@@ -0,0 +1,7 @@
+# file      : tests/manifest-parser/buildfile
+# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+exe{driver}: cxx{driver} ../../bpkg/lib{bpkg}
+
+include ../../bpkg/
diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx
new file mode 100644
index 0000000..b86b806
--- /dev/null
+++ b/tests/manifest-parser/driver.cxx
@@ -0,0 +1,210 @@
+// file      : tests/manifest-parser/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <vector>
+#include <string>
+#include <utility> // pair
+#include <cassert>
+#include <sstream>
+#include <iostream>
+
+#include <bpkg/manifest-parser>
+
+using namespace std;
+using namespace bpkg;
+
+using pairs = vector<pair<string, string>>;
+
+static bool
+parse (const char* manifest, const pairs& expected);
+
+static bool
+fail (const char* manifest);
+
+int
+main ()
+{
+  // Whitespaces and comments.
+  //
+  assert (parse (" \t", {{"",""}}));
+  assert (parse (" \t\n \n\n", {{"",""}}));
+  assert (parse ("# one\n  #two", {{"",""}}));
+
+  // Test encountering eos at various points.
+  //
+  assert (parse ("", {{"",""}}));
+  assert (parse (" ", {{"",""}}));
+  assert (parse ("\n", {{"",""}}));
+  assert (fail ("a"));
+  assert (parse (":1\na:", {{"","1"},{"a", ""},{"",""},{"",""}}));
+
+  // Invalid manifests.
+  //
+  assert (fail ("a:"));          // format version pair expected
+  assert (fail (":"));           // format version value expected
+  assert (fail (":9"));          // unsupported format version
+  assert (fail ("a"));           // ':' expected after name
+  assert (fail ("a b"));         // ':' expected after name
+  assert (fail ("a\tb"));        // ':' expected after name
+  assert (fail ("a\nb"));        // ':' expected after name
+  assert (fail (":1\na:b\n:9")); // unsupported format version
+
+  // Empty manifest.
+  //
+  assert (parse (":1", {{"","1"},{"",""},{"",""}}));
+  assert (parse (" \t :1", {{"","1"},{"",""},{"",""}}));
+  assert (parse (" \t : 1", {{"","1"},{"",""},{"",""}}));
+  assert (parse (" \t : 1 ", {{"","1"},{"",""},{"",""}}));
+  assert (parse (":1\n", {{"","1"},{"",""},{"",""}}));
+  assert (parse (":1 \n", {{"","1"},{"",""},{"",""}}));
+
+  // Single manifest.
+  //
+  assert (parse (":1\na:x", {{"","1"},{"a", "x"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\n", {{"","1"},{"a","x"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\nb:y",
+                 {{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\n\tb : y\n  #comment",
+                 {{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}}));
+
+  // Multiple manifests.
+  //
+  assert (parse (":1\na:x\n:\nb:y",
+                 {{"","1"},{"a", "x"},{"",""},
+                  {"","1"},{"b", "y"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\n:1\nb:y",
+                 {{"","1"},{"a", "x"},{"",""},
+                  {"","1"},{"b", "y"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\n:\nb:y\n:\nc:z\n",
+                 {{"","1"},{"a", "x"},{"",""},
+                  {"","1"},{"b", "y"},{"",""},
+                  {"","1"},{"c", "z"},{"",""},{"",""}}));
+
+  // Name parsing.
+  //
+  assert (parse (":1\nabc:", {{"","1"},{"abc",""},{"",""},{"",""}}));
+  assert (parse (":1\nabc :", {{"","1"},{"abc",""},{"",""},{"",""}}));
+  assert (parse (":1\nabc\t:", {{"","1"},{"abc",""},{"",""},{"",""}}));
+
+  // Simple value parsing.
+  //
+  assert (parse (":1\na: \t xyz \t ", {{"","1"},{"a","xyz"},{"",""},{"",""}}));
+
+  // Simple value escaping.
+  //
+  assert (parse (":1\na:x\\", {{"","1"},{"a","x"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\\\ny", {{"","1"},{"a","xy"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\\\\\nb:",
+                 {{"","1"},{"a","x\\"},{"b",""},{"",""},{"",""}}));
+  assert (parse (":1\na:x\\\\\\\nb:",
+                 {{"","1"},{"a","x\\\\"},{"b",""},{"",""},{"",""}}));
+
+  // Simple value literal newline.
+  //
+  assert (parse (":1\na:x\\\n\\",
+                 {{"","1"},{"a","x\n"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\\\n\\\ny",
+                 {{"","1"},{"a","x\ny"},{"",""},{"",""}}));
+  assert (parse (":1\na:x\\\n\\\ny\\\n\\\nz",
+                 {{"","1"},{"a","x\ny\nz"},{"",""},{"",""}}));
+
+  // Multi-line value parsing.
+  //
+  assert (parse (":1\na:\\", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\n", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (parse (":1\na:\\x", {{"","1"},{"a", "\\x"},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\n\\", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\n\\\n", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\n\\x\n\\",
+                 {{"","1"},{"a", "\\x"},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\nx\ny", {{"","1"},{"a", "x\ny"},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\n \n#\t\n\\",
+                 {{"","1"},{"a", " \n#\t"},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\n\n\n\\", {{"","1"},{"a", "\n"},{"",""},{"",""}}));
+
+  // Multi-line value escaping.
+  //
+  assert (parse (":1\na:\\\nx\\",
+                 {{"","1"},{"a","x"},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\nx\\\ny\n\\",
+                 {{"","1"},{"a","xy"},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\nx\\\\\n\\\nb:",
+                 {{"","1"},{"a","x\\"},{"b",""},{"",""},{"",""}}));
+  assert (parse (":1\na:\\\nx\\\\\\\n\\\nb:",
+                 {{"","1"},{"a","x\\\\"},{"b",""},{"",""},{"",""}}));
+}
+
+static std::ostream&
+operator<< (std::ostream& os, const pairs& ps)
+{
+  os << '{';
+
+  bool f (true);
+  for (const auto& p: ps)
+    os << (f ? (f = false, "") : ",")
+       << '{' << p.first << ',' << p.second << '}';
+
+  os << '}';
+  return os;
+}
+
+static pairs
+parse (const char* m)
+{
+  istringstream is (m);
+  is.exceptions (istream::failbit | istream::badbit);
+  manifest_parser p (is, "");
+
+  pairs r;
+
+  for (bool eom (true), eos (false); !eos; )
+  {
+    auto nv (p.next ());
+
+    if (nv.name.empty () && nv.value.empty ()) // End pair.
+    {
+      eos = eom;
+      eom = true;
+    }
+    else
+      eom = false;
+
+    r.emplace_back (nv.name, nv.value); // move
+  }
+
+  return r;
+}
+
+static bool
+parse (const char* m, const pairs& e)
+{
+  pairs r (parse (m));
+
+  if (r != e)
+  {
+    cerr << "actual: " << r << endl
+         << "expect: " << e << endl;
+
+    return false;
+  }
+
+  return true;
+}
+
+static bool
+fail (const char* m)
+{
+  try
+  {
+    pairs r (parse (m));
+    cerr << "nofail: " << r << endl;
+    return false;
+  }
+  catch (const manifest_parsing& e)
+  {
+    cerr << e.what () << endl;
+  }
+
+  return true;
+}
-- 
cgit v1.1