14 files changed, 1385 insertions, 22 deletions
diff --git a/butl/buildfile b/butl/buildfile
index 1a9787a..fcb5f86 100644
--- a/butl/buildfile
+++ b/butl/buildfile
@@ -2,27 +2,30 @@
 # copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
 # license   : MIT; see accompanying LICENSE file
 
-lib{butl}:                         \
-{hxx         cxx}{ base64        } \
-{hxx         cxx}{ char-scanner  } \
-{hxx            }{ export        } \
-{hxx ixx     cxx}{ fdstream      } \
-{hxx ixx     cxx}{ filesystem    } \
-{hxx            }{ multi-index   } \
-{hxx            }{ optional      } \
-{hxx         cxx}{ pager         } \
-{hxx ixx txx cxx}{ path          } \
-{hxx            }{ path-io       } \
-{hxx            }{ path-map      } \
-{hxx     txx    }{ prefix-map    } \
-{hxx ixx     cxx}{ process       } \
-{hxx         cxx}{ sha256        } \
-{hxx     txx    }{ string-table  } \
-{hxx         cxx}{ timestamp     } \
-{hxx         cxx}{ triplet       } \
-{hxx ixx        }{ utility       } \
-{hxx            }{ vector-view   } \
-{hxx            }{ version       }
+lib{butl}:                               \
+{hxx         cxx}{ base64              } \
+{hxx         cxx}{ char-scanner        } \
+{hxx            }{ export              } \
+{hxx ixx     cxx}{ fdstream            } \
+{hxx ixx     cxx}{ filesystem          } \
+{hxx            }{ manifest-forward    } \
+{hxx         cxx}{ manifest-parser     } \
+{hxx         cxx}{ manifest-serializer } \
+{hxx            }{ multi-index         } \
+{hxx            }{ optional            } \
+{hxx         cxx}{ pager               } \
+{hxx ixx txx cxx}{ path                } \
+{hxx            }{ path-io             } \
+{hxx            }{ path-map            } \
+{hxx     txx    }{ prefix-map          } \
+{hxx ixx     cxx}{ process             } \
+{hxx         cxx}{ sha256              } \
+{hxx     txx    }{ string-table        } \
+{hxx         cxx}{ timestamp           } \
+{hxx         cxx}{ triplet             } \
+{hxx ixx        }{ utility             } \
+{hxx            }{ vector-view         } \
+{hxx            }{ version             }
 
 # Exclude these from compilation on non-Windows targets.
 #
diff --git a/butl/manifest-forward b/butl/manifest-forward
new file mode 100644
index 0000000..5dc5060
--- /dev/null
+++ b/butl/manifest-forward
@@ -0,0 +1,15 @@
+// file      : butl/manifest-forward -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BUTL_MANIFEST_FORWARD
+#define BUTL_MANIFEST_FORWARD
+
+namespace butl
+{
+  class manifest_parser;
+  class manifest_serializer;
+  class manifest_name_value;
+}
+
+#endif // BUTL_MANIFEST_FORWARD
diff --git a/butl/manifest-parser b/butl/manifest-parser
new file mode 100644
index 0000000..a005b34
--- /dev/null
+++ b/butl/manifest-parser
@@ -0,0 +1,94 @@
+// file      : butl/manifest-parser -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BUTL_MANIFEST_PARSER
+#define BUTL_MANIFEST_PARSER
+
+#include <string>
+#include <iosfwd>
+#include <cstdint>   // uint64_t
+#include <stdexcept> // runtime_error
+
+#include <butl/export>
+
+#include <butl/char-scanner>
+
+namespace butl
+{
+  class LIBBUTL_EXPORT manifest_parsing: public std::runtime_error
+  {
+  public:
+    manifest_parsing (const std::string& name,
+                      std::uint64_t line,
+                      std::uint64_t column,
+                      const std::string& description);
+
+    std::string name;
+    std::uint64_t line;
+    std::uint64_t column;
+    std::string description;
+  };
+
+  class manifest_name_value
+  {
+  public:
+    std::string name;
+    std::string value;
+
+    std::uint64_t name_line;
+    std::uint64_t name_column;
+
+    std::uint64_t value_line;
+    std::uint64_t value_column;
+
+    bool
+    empty () const {return name.empty () && value.empty ();}
+  };
+
+  class LIBBUTL_EXPORT manifest_parser: protected butl::char_scanner
+  {
+  public:
+    manifest_parser (std::istream& is, const std::string& name)
+        : char_scanner (is), name_ (name) {}
+
+    const std::string&
+    name () const {return name_;}
+
+    // The first returned pair is special "start-of-manifest" with
+    // empty name and value being the format version: {"", "<ver>"}.
+    // After that we have a sequence of ordinary pairs which are
+    // the manifest. At the end of the manifest we have the special
+    // "end-of-manifest" pair with empty name and value: {"", ""}.
+    // After that we can either get another start-of-manifest pair
+    // (in which case the whole sequence repeats from the beginning)
+    // or we get another end-of-manifest pair which signals the end
+    // of stream (aka EOF). To put it another way, the parse sequence
+    // always has the following form:
+    //
+    // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""}
+    //
+    manifest_name_value
+    next ();
+
+  private:
+    void
+    parse_name (manifest_name_value&);
+
+    void
+    parse_value (manifest_name_value&);
+
+    // Skip spaces and return the first peeked non-space character.
+    //
+    xchar
+    skip_spaces ();
+
+  private:
+    const std::string name_;
+
+    enum {start, body, end} s_ = start;
+    std::string version_; // Current format version.
+  };
+}
+
+#endif // BUTL_MANIFEST_PARSER
diff --git a/butl/manifest-parser.cxx b/butl/manifest-parser.cxx
new file mode 100644
index 0000000..ec26ca8
--- /dev/null
+++ b/butl/manifest-parser.cxx
@@ -0,0 +1,379 @@
+// file      : butl/manifest-parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <butl/manifest-parser>
+
+#include <cassert>
+#include <sstream>
+
+using namespace std;
+
+namespace butl
+{
+  using parsing = manifest_parsing;
+  using name_value = manifest_name_value;
+
+  name_value manifest_parser::
+  next ()
+  {
+    if (s_ == end)
+      return name_value {"", "", line, column, line, column};
+
+    xchar c (skip_spaces ());
+
+    // Here is the problem: if we are in the 'body' state (that is,
+    // we are parsing inside the manifest) and we see the special
+    // empty name, then before returning the "start" pair for the
+    // next manifest, we have to return the "end" pair. One way
+    // would be to cache the "start" pair and return it on the
+    // next call of next(). But that would require quite a bit
+    // of extra logic. The alternative is to detect the beginning
+    // of the empty name before parsing too far. This way, the
+    // next call to next() will start parsing where we left of
+    // and return the "start" pair naturally.
+    //
+    if (s_ == body && c == ':')
+    {
+      s_ = start;
+      return name_value {"", "", c.line, c.column, c.line, c.column};
+    }
+
+    // Regardless of the state, what should come next is a name,
+    // potentially the special empty one.
+    //
+    name_value r;
+    parse_name (r);
+
+    skip_spaces ();
+    c = get ();
+
+    if (eos (c))
+    {
+      // This is ok as long as the name is empty.
+      //
+      if (!r.name.empty ())
+        throw parsing (name_, c.line, c.column, "':' expected after name");
+
+      s_ = end;
+
+      // The "end" pair.
+      //
+      r.value_line = r.name_line;
+      r.value_column = r.name_column;
+      return r;
+    }
+
+    if (c != ':')
+      throw parsing (name_, c.line, c.column, "':' expected after name");
+
+    skip_spaces ();
+    parse_value (r);
+
+    c = peek ();
+
+    // The character after the value should be either a newline or eos.
+    //
+    assert (c == '\n' || eos (c));
+
+    if (c == '\n')
+      get ();
+
+    // Now figure out whether what we've got makes sense, depending
+    // on the state we are in.
+    //
+    if (s_ == start)
+    {
+      // Start of the (next) manifest. The first pair should be the
+      // special empty name/format version.
+      //
+      if (!r.name.empty ())
+        throw parsing (name_, r.name_line, r.name_column,
+                       "format version pair expected");
+
+      // The version value is only mandatory for the first manifest in
+      // a sequence.
+      //
+      if (r.value.empty ())
+      {
+        if (version_.empty ())
+          throw parsing (name_, r.value_line, r.value_column,
+                         "format version value expected");
+        r.value = version_;
+      }
+      else
+      {
+        version_ = r.value; // Update with the latest.
+
+        if (version_ != "1")
+          throw parsing (name_, r.value_line, r.value_column,
+                         "unsupported format version " + version_);
+      }
+
+      s_ = body;
+    }
+    else
+    {
+      // Parsing the body of the manifest.
+      //
+
+      // Should have been handled by the special case above.
+      //
+      assert (!r.name.empty ());
+    }
+
+    return r;
+  }
+
+  void manifest_parser::
+  parse_name (name_value& r)
+  {
+    xchar c (peek ());
+
+    r.name_line = c.line;
+    r.name_column = c.column;
+
+    for (; !eos (c); c = peek ())
+    {
+      if (c == ':' || c == ' ' || c == '\t' || c == '\n')
+        break;
+
+      r.name += c;
+      get ();
+    }
+  }
+
+  void manifest_parser::
+  parse_value (name_value& r)
+  {
+    xchar c (peek ());
+
+    r.value_line = c.line;
+    r.value_column = c.column;
+
+    string& v (r.value);
+    string::size_type n (0); // Size of last non-space character (simple mode).
+
+    // Detect the multi-line mode introductor.
+    //
+    bool ml (false);
+    if (c == '\\')
+    {
+      get ();
+      xchar p (peek ());
+
+      if (p == '\n')
+      {
+        get (); // Newline is not part of the value so skip it.
+        c = peek ();
+        ml = true;
+      }
+      else if (eos (p))
+        ml = true;
+      else
+        unget (c);
+    }
+
+    // The nl flag signals that the preceding character was a "special
+    // newline", that is, a newline that was part of the milti-line mode
+    // introductor or an escape sequence.
+    //
+    for (bool nl (ml); !eos (c); c = peek ())
+    {
+      // Detect the special "\n\\\n" sequence. In the multi-line mode,
+      // this is a "terminator". In the simple mode, this is a way to
+      // specify a newline.
+      //
+      // The key idea here is this: if we "swallowed" any characters
+      // (i.e., called get() without a matching unget()), then we
+      // have to restart the loop in order to do all the tests for
+      // the next character. Also, for this to work, we can only
+      // add one character to v, which limits us to maximum three
+      // characters look-ahead: one in v, one "ungot", and one
+      // peeked.
+      //
+      // The first block handles the special sequence that starts with
+      // a special newline. In multi-line mode, this is an "immediate
+      // termination" where we "use" the newline from the introductor.
+      // Note also that in the simple mode the special sequence can
+      // only start with a special (i.e., escaped) newline.
+      //
+      if (nl)
+      {
+        nl = false;
+
+        if (c == '\\')
+        {
+          get ();
+          xchar c1 (peek ());
+
+          if (c1 == '\n' || eos (c1))
+          {
+            if (ml)
+              break;
+            else
+            {
+              if (c1 == '\n')
+                get ();
+
+              v += '\n'; // Literal newline.
+              n = v.size ();
+              continue; // Restart from the next character.
+            }
+          }
+          else
+            unget (c); // Fall through.
+        }
+      }
+
+      if (c == '\n')
+      {
+        if (ml)
+        {
+          get ();
+          xchar c1 (peek ());
+
+          if (c1 == '\\')
+          {
+            get ();
+            xchar c2 (peek ());
+
+            if (c2 == '\n' || eos (c2))
+              break;
+            else
+            {
+              v += '\n';
+              unget (c1);
+              continue; // Restart from c1 (slash).
+            }
+          }
+          else
+            unget (c); // Fall through.
+        }
+        else
+          break; // Simple value terminator.
+      }
+
+      // Detect the newline escape sequence. The same look-ahead
+      // approach as above.
+      //
+      if (c == '\\')
+      {
+        get ();
+        xchar c1 (peek ());
+
+        if (c1 == '\n' || eos (c1))
+        {
+          if (c1 == '\n')
+          {
+            get ();
+            nl = true; // This is a special newline.
+          }
+          continue; // Restart from the next character.
+        }
+        else if (c1 == '\\')
+        {
+          get ();
+          xchar c2 (peek ());
+
+          if (c2 == '\n' || eos (c1))
+          {
+            v += '\\';
+            n = v.size ();
+            // Restart from c2 (newline/eos).
+          }
+          else
+          {
+            v += '\\';
+            n = v.size ();
+            unget (c1); // Restart from c1 (second slash).
+          }
+
+          continue;
+        }
+        else
+          unget (c); // Fall through.
+      }
+
+      get ();
+      v += c;
+
+      if (!ml && c != ' ' && c != '\t')
+        n = v.size ();
+    }
+
+    // Cut off trailing whitespaces.
+    //
+    if (!ml)
+      v.resize (n);
+  }
+
+  manifest_parser::xchar manifest_parser::
+  skip_spaces ()
+  {
+    xchar c (peek ());
+    bool start (c.column == 1);
+
+    for (; !eos (c); c = peek ())
+    {
+      switch (c)
+      {
+      case ' ':
+      case '\t':
+        break;
+      case '\n':
+        {
+          // Skip empty lines.
+          //
+          if (!start)
+            return c;
+
+          break;
+        }
+      case '#':
+        {
+          // We only recognize '#' as a start of a comment at the beginning
+          // of the line (sans leading spaces).
+          //
+          if (!start)
+            return c;
+
+          get ();
+
+          // Read until newline or eos.
+          //
+          for (c = peek (); !eos (c) && c != '\n'; c = peek ())
+            get ();
+
+          continue;
+        }
+      default:
+        return c; // Not a space.
+      }
+
+      get ();
+    }
+
+    return c;
+  }
+
+  // manifest_parsing
+  //
+
+  static string
+  format (const string& n, uint64_t l, uint64_t c, const string& d)
+  {
+    ostringstream os;
+    if (!n.empty ())
+      os << n << ':';
+    os << l << ':' << c << ": error: " << d;
+    return os.str ();
+  }
+
+  manifest_parsing::
+  manifest_parsing (const string& n, uint64_t l, uint64_t c, const string& d)
+      : runtime_error (format (n, l, c, d)),
+        name (n), line (l), column (c), description (d)
+  {
+  }
+}
diff --git a/butl/manifest-serializer b/butl/manifest-serializer
new file mode 100644
index 0000000..6d7eeec
--- /dev/null
+++ b/butl/manifest-serializer
@@ -0,0 +1,75 @@
+// file      : butl/manifest-serializer -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BUTL_MANIFEST_SERIALIZER
+#define BUTL_MANIFEST_SERIALIZER
+
+#include <string>
+#include <iosfwd>
+#include <cstddef>   // size_t
+#include <stdexcept> // runtime_error
+
+#include <butl/export>
+
+namespace butl
+{
+  class LIBBUTL_EXPORT manifest_serialization: public std::runtime_error
+  {
+  public:
+    manifest_serialization (const std::string& name,
+                            const std::string& description);
+
+    std::string name;
+    std::string description;
+  };
+
+  class LIBBUTL_EXPORT manifest_serializer
+  {
+  public:
+    manifest_serializer (std::ostream& os, const std::string& name)
+        : os_ (os), name_ (name) {}
+
+    const std::string&
+    name () const {return name_;}
+
+    // The first name-value pair should be the special "start-of-manifest"
+    // with empty name and value being the format version. After that we
+    // have a sequence of ordinary pairs which are the manifest. At the
+    // end of the manifest we have the special "end-of-manifest" pair
+    // with empty name and value. After that we can either have another
+    // start-of-manifest pair (in which case the whole sequence repeats
+    // from the beginning) or we get another end-of-manifest pair which
+    // signals the end of stream.
+    //
+    void
+    next (const std::string& name, const std::string& value);
+
+    // Write a comment. The supplied text is prefixed with "# " and
+    // terminated with a newline.
+    //
+    void
+    comment (const std::string&);
+
+  private:
+    void
+    check_name (const std::string&);
+
+    // Write 'n' characters from 's' (assuming there are no newlines)
+    // split into multiple lines at or near the 78 characters
+    // boundary. The first line starts at the 'column' offset.
+    //
+    void
+    write_value (std::size_t column, const char* s, std::size_t n);
+
+  private:
+    enum {start, body, end} s_ = start;
+    std::string version_; // Current format version.
+
+  private:
+    std::ostream& os_;
+    const std::string name_;
+  };
+}
+
+#endif // BUTL_MANIFEST_SERIALIZER
diff --git a/butl/manifest-serializer.cxx b/butl/manifest-serializer.cxx
new file mode 100644
index 0000000..c45aaba
--- /dev/null
+++ b/butl/manifest-serializer.cxx
@@ -0,0 +1,238 @@
+// file      : butl/manifest-serializer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <butl/manifest-serializer>
+
+#include <ostream>
+#include <cassert>
+
+using namespace std;
+
+namespace butl
+{
+  using serialization = manifest_serialization;
+
+  void manifest_serializer::
+  next (const string& n, const string& v)
+  {
+    switch (s_)
+    {
+    case start:
+      {
+        if (!n.empty ())
+          throw serialization (name_, "format version pair expected");
+
+        if (v.empty ())
+        {
+          // End of manifests.
+          //
+          os_.flush ();
+          s_ = end;
+          break;
+        }
+
+        if (v != "1")
+          throw serialization (name_, "unsupported format version " + v);
+
+        os_ << ':';
+
+        if (v != version_)
+        {
+          os_ << ' ' << v;
+          version_ = v;
+        }
+
+        os_ << endl;
+        s_ = body;
+        break;
+      }
+    case body:
+      {
+        if (n.empty ())
+        {
+          if (!v.empty ())
+            throw serialization (name_, "non-empty value in end pair");
+
+          s_ = start;
+          break;
+        }
+
+        check_name (n);
+
+        os_ << n << ':';
+
+        if (!v.empty ())
+        {
+          os_ << ' ';
+
+          // Use the multi-line mode in any of the following cases:
+          //
+          // - name is too long (say longer than 37 (78/2 - 2) characters;
+          //   we cannot start on the next line since that would start the
+          //   multi-line mode)
+          // - value contains newlines
+          // - value contains leading/trailing whitespaces
+          //
+          if (n.size () > 37 ||
+              v.find ('\n') != string::npos ||
+              v.front () == ' ' || v.front () == '\t' ||
+              v.back () == ' ' || v.back () == '\t')
+          {
+            os_ << "\\" << endl; // Multi-line mode introductor.
+
+            // Chunk the value into fragments separated by newlines.
+            //
+            for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i))
+            {
+              if (p == string::npos)
+              {
+                // Last chunk.
+                //
+                write_value (0, v.c_str () + i, v.size () - i);
+                break;
+              }
+
+              write_value (0, v.c_str () + i, p - i);
+              os_ << endl;
+              i = p + 1;
+            }
+
+            os_ << endl << "\\"; // Multi-line mode terminator.
+          }
+          else
+            write_value (n.size () + 2, v.c_str (), v.size ());
+        }
+
+        os_ << endl;
+        break;
+      }
+    case end:
+      {
+        throw serialization (name_, "serialization after eos");
+      }
+    }
+  }
+
+  void manifest_serializer::
+  comment (const string& t)
+  {
+    if (s_ == end)
+      throw serialization (name_, "serialization after eos");
+
+    os_ << '#';
+
+    if (!t.empty ())
+      os_ << ' ' << t;
+
+    os_ << endl;
+  }
+
+  void manifest_serializer::
+  check_name (const string& n)
+  {
+    if (n[0] == '#')
+      throw serialization (name_, "name starts with '#'");
+
+    for (char c: n)
+    {
+      switch (c)
+      {
+      case ' ':
+      case '\t':
+      case '\n': throw serialization (name_, "name contains whitespace");
+      case ':': throw serialization (name_, "name contains ':'");
+      default: break;
+      }
+    }
+  }
+
+  void manifest_serializer::
+  write_value (size_t cl, const char* s, size_t n)
+  {
+    char c ('\0');
+
+    // The idea is to break on the 77th character (i.e., write it
+    // on the next line) which means we have written 76 characters
+    // on this line plus 2 for '\' and '\n', which gives us 78.
+    //
+    for (const char* e (s + n); s != e; s++, cl++)
+    {
+      c = *s;
+      bool br (false); // Break the line.
+
+      // If this is a whitespace, see if it's a good place to break the
+      // line.
+      //
+      if (c == ' ' || c == '\t')
+      {
+        // Find the next whitespace (or the end) and see if it is a better
+        // place.
+        //
+        for (const char* w (s + 1); ; w++)
+        {
+          if (w == e || *w == ' ' || *w == '\t')
+          {
+            // Is this whitespace past where we need to break? Also see
+            // below the "hard" break case for why we use 78 at the end.
+            //
+            if (cl + static_cast<size_t> (w - s) > (w != e ? 77 : 78))
+            {
+              // Only break if this whitespace is close enough to
+              // the end of the line.
+              //
+              br = (cl > 57);
+            }
+
+            break;
+          }
+        }
+      }
+
+      // Do we have to do a "hard" break (i.e., without a whitespace)?
+      // If there is just one character left, then instead of writing
+      // '\' and then the character on the next line, we might as well
+      // write it on this line.
+      //
+      if (cl == (s + 1 != e ? 77 : 78))
+        br = true;
+
+      if (br)
+      {
+        os_ << '\\' << endl;
+        cl = 0;
+      }
+
+      os_ << c;
+    }
+
+    // What comes next is always a newline. I the last character that
+    // we have written is a backslash, escape it.
+    //
+    if (c == '\\')
+      os_ << '\\';
+  }
+
+  // manifest_serialization
+  //
+
+  static string
+  format (const string& n, const string& d)
+  {
+    string r;
+    if (!n.empty ())
+    {
+      r += n;
+      r += ": ";
+    }
+    r += "error: ";
+    r += d;
+    return r;
+  }
+
+  manifest_serialization::
+  manifest_serialization (const string& n, const string& d)
+      : runtime_error (format (n, d)), name (n), description (d)
+  {
+  }
+}
diff --git a/tests/buildfile b/tests/buildfile
index fd2589a..84a17aa 100644
--- a/tests/buildfile
+++ b/tests/buildfile
@@ -2,7 +2,8 @@
 # copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
 # license   : MIT; see accompanying LICENSE file
 
-d = base64/ cpfile/ dir-iterator/ fdstream/ link/ pager/ path/ prefix-map/ \
+d = base64/ cpfile/ dir-iterator/ fdstream/ link/ manifest-parser/ \
+    manifest-serializer/ manifest-roundtrip/ pager/ path/ prefix-map/ \
     process/ sha256/ strcase/ timestamp/ triplet/
 
 ./: $d
diff --git a/tests/manifest-parser/buildfile b/tests/manifest-parser/buildfile
new file mode 100644
index 0000000..9173bdb
--- /dev/null
+++ b/tests/manifest-parser/buildfile
@@ -0,0 +1,7 @@
+# file      : tests/manifest-parser/buildfile
+# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+exe{driver}: cxx{driver} ../../butl/lib{butl}
+
+include ../../butl/
diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx
new file mode 100644
index 0000000..bab60a8
--- /dev/null
+++ b/tests/manifest-parser/driver.cxx
@@ -0,0 +1,207 @@
+// file      : tests/manifest-parser/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <vector>
+#include <string>
+#include <utility> // pair
+#include <cassert>
+#include <sstream>
+#include <iostream>
+
+#include <butl/manifest-parser>
+
+using namespace std;
+using namespace butl;
+
+using pairs = vector<pair<string, string>>;
+
+static bool
+test (const char* manifest, const pairs& expected);
+
+static bool
+fail (const char* manifest);
+
+int
+main ()
+{
+  // Whitespaces and comments.
+  //
+  assert (test (" \t", {{"",""}}));
+  assert (test (" \t\n \n\n", {{"",""}}));
+  assert (test ("# one\n  #two", {{"",""}}));
+
+  // Test encountering eos at various points.
+  //
+  assert (test ("", {{"",""}}));
+  assert (test (" ", {{"",""}}));
+  assert (test ("\n", {{"",""}}));
+  assert (fail ("a"));
+  assert (test (":1\na:", {{"","1"},{"a", ""},{"",""},{"",""}}));
+
+  // Invalid manifests.
+  //
+  assert (fail ("a:"));          // format version pair expected
+  assert (fail (":"));           // format version value expected
+  assert (fail (":9"));          // unsupported format version
+  assert (fail ("a"));           // ':' expected after name
+  assert (fail ("a b"));         // ':' expected after name
+  assert (fail ("a\tb"));        // ':' expected after name
+  assert (fail ("a\nb"));        // ':' expected after name
+  assert (fail (":1\na:b\n:9")); // unsupported format version
+
+  // Empty manifest.
+  //
+  assert (test (":1", {{"","1"},{"",""},{"",""}}));
+  assert (test (" \t :1", {{"","1"},{"",""},{"",""}}));
+  assert (test (" \t : 1", {{"","1"},{"",""},{"",""}}));
+  assert (test (" \t : 1 ", {{"","1"},{"",""},{"",""}}));
+  assert (test (":1\n", {{"","1"},{"",""},{"",""}}));
+  assert (test (":1 \n", {{"","1"},{"",""},{"",""}}));
+
+  // Single manifest.
+  //
+  assert (test (":1\na:x", {{"","1"},{"a", "x"},{"",""},{"",""}}));
+  assert (test (":1\na:x\n", {{"","1"},{"a","x"},{"",""},{"",""}}));
+  assert (test (":1\na:x\nb:y",
+                {{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}}));
+  assert (test (":1\na:x\n\tb : y\n  #comment",
+                {{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}}));
+
+  // Multiple manifests.
+  //
+  assert (test (":1\na:x\n:\nb:y",
+                {{"","1"},{"a", "x"},{"",""},
+                 {"","1"},{"b", "y"},{"",""},{"",""}}));
+  assert (test (":1\na:x\n:1\nb:y",
+                {{"","1"},{"a", "x"},{"",""},
+                 {"","1"},{"b", "y"},{"",""},{"",""}}));
+  assert (test (":1\na:x\n:\nb:y\n:\nc:z\n",
+                {{"","1"},{"a", "x"},{"",""},
+                 {"","1"},{"b", "y"},{"",""},
+                 {"","1"},{"c", "z"},{"",""},{"",""}}));
+
+  // Name parsing.
+  //
+  assert (test (":1\nabc:", {{"","1"},{"abc",""},{"",""},{"",""}}));
+  assert (test (":1\nabc :", {{"","1"},{"abc",""},{"",""},{"",""}}));
+  assert (test (":1\nabc\t:", {{"","1"},{"abc",""},{"",""},{"",""}}));
+
+  // Simple value parsing.
+  //
+  assert (test (":1\na: \t xyz \t ", {{"","1"},{"a","xyz"},{"",""},{"",""}}));
+
+  // Simple value escaping.
+  //
+  assert (test (":1\na:x\\", {{"","1"},{"a","x"},{"",""},{"",""}}));
+  assert (test (":1\na:x\\\ny", {{"","1"},{"a","xy"},{"",""},{"",""}}));
+  assert (test (":1\na:x\\\\\nb:",
+                {{"","1"},{"a","x\\"},{"b",""},{"",""},{"",""}}));
+  assert (test (":1\na:x\\\\\\\nb:",
+                {{"","1"},{"a","x\\\\"},{"b",""},{"",""},{"",""}}));
+
+  // Simple value literal newline.
+  //
+  assert (test (":1\na:x\\\n\\", {{"","1"},{"a","x\n"},{"",""},{"",""}}));
+  assert (test (":1\na:x\\\n\\\ny", {{"","1"},{"a","x\ny"},{"",""},{"",""}}));
+  assert (test (":1\na:x\\\n\\\ny\\\n\\\nz",
+                {{"","1"},{"a","x\ny\nz"},{"",""},{"",""}}));
+
+  // Multi-line value parsing.
+  //
+  assert (test (":1\na:\\", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (test (":1\na:\\\n", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (test (":1\na:\\x", {{"","1"},{"a", "\\x"},{"",""},{"",""}}));
+  assert (test (":1\na:\\\n\\", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (test (":1\na:\\\n\\\n", {{"","1"},{"a", ""},{"",""},{"",""}}));
+  assert (test (":1\na:\\\n\\x\n\\",
+                {{"","1"},{"a", "\\x"},{"",""},{"",""}}));
+  assert (test (":1\na:\\\nx\ny", {{"","1"},{"a", "x\ny"},{"",""},{"",""}}));
+  assert (test (":1\na:\\\n \n#\t\n\\",
+                {{"","1"},{"a", " \n#\t"},{"",""},{"",""}}));
+  assert (test (":1\na:\\\n\n\n\\", {{"","1"},{"a", "\n"},{"",""},{"",""}}));
+
+  // Multi-line value escaping.
+  //
+  assert (test (":1\na:\\\nx\\", {{"","1"},{"a","x"},{"",""},{"",""}}));
+  assert (test (":1\na:\\\nx\\\ny\n\\",
+                {{"","1"},{"a","xy"},{"",""},{"",""}}));
+  assert (test (":1\na:\\\nx\\\\\n\\\nb:",
+                {{"","1"},{"a","x\\"},{"b",""},{"",""},{"",""}}));
+  assert (test (":1\na:\\\nx\\\\\\\n\\\nb:",
+                {{"","1"},{"a","x\\\\"},{"b",""},{"",""},{"",""}}));
+}
+
+static ostream&
+operator<< (ostream& os, const pairs& ps)
+{
+  os << '{';
+
+  bool f (true);
+  for (const auto& p: ps)
+    os << (f ? (f = false, "") : ",")
+       << '{' << p.first << ',' << p.second << '}';
+
+  os << '}';
+  return os;
+}
+
+static pairs
+parse (const char* m)
+{
+  istringstream is (m);
+  is.exceptions (istream::failbit | istream::badbit);
+  manifest_parser p (is, "");
+
+  pairs r;
+
+  for (bool eom (true), eos (false); !eos; )
+  {
+    manifest_name_value nv (p.next ());
+
+    if (nv.empty ()) // End pair.
+    {
+      eos = eom;
+      eom = true;
+    }
+    else
+      eom = false;
+
+    r.emplace_back (nv.name, nv.value); // move
+  }
+
+  return r;
+}
+
+static bool
+test (const char* m, const pairs& e)
+{
+  pairs r (parse (m));
+
+  if (r != e)
+  {
+    cerr << "actual: " << r << endl
+         << "expect: " << e << endl;
+
+    return false;
+  }
+
+  return true;
+}
+
+static bool
+fail (const char* m)
+{
+  try
+  {
+    pairs r (parse (m));
+    cerr << "nofail: " << r << endl;
+    return false;
+  }
+  catch (const manifest_parsing& e)
+  {
+    //cerr << e.what () << endl;
+  }
+
+  return true;
+}
diff --git a/tests/manifest-roundtrip/buildfile b/tests/manifest-roundtrip/buildfile
new file mode 100644
index 0000000..78e5a08
--- /dev/null
+++ b/tests/manifest-roundtrip/buildfile
@@ -0,0 +1,8 @@
+# file      : tests/manifest-roundtrip/buildfile
+# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+exe{driver}: cxx{driver} ../../butl/lib{butl}
+exe{driver}: test.roundtrip = manifest
+
+include ../../butl/
diff --git a/tests/manifest-roundtrip/driver.cxx b/tests/manifest-roundtrip/driver.cxx
new file mode 100644
index 0000000..e1ce5b8
--- /dev/null
+++ b/tests/manifest-roundtrip/driver.cxx
@@ -0,0 +1,52 @@
+// file      : tests/manifest-roundtrip/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <butl/fdstream>
+#include <butl/manifest-parser>
+#include <butl/manifest-serializer>
+
+using namespace std;
+using namespace butl;
+
+int
+main (int argc, char* argv[])
+{
+  if (argc != 2)
+  {
+    cerr << "usage: " << argv[0] << " <file>" << endl;
+    return 1;
+  }
+
+  try
+  {
+    ifdstream ifs (argv[1]);
+    manifest_parser p (ifs, argv[1]);
+
+    stdout_fdmode (fdstream_mode::binary); // Write in binary mode.
+    manifest_serializer s (cout, "stdout");
+
+    for (bool eom (true), eos (false); !eos; )
+    {
+      manifest_name_value nv (p.next ());
+
+      if (nv.empty ()) // End pair.
+      {
+        eos = eom;
+        eom = true;
+      }
+      else
+        eom = false;
+
+      s.next (nv.name, nv.value);
+    }
+  }
+  catch (const exception& e)
+  {
+    cerr << e.what () << endl;
+    return 1;
+  }
+}
diff --git a/tests/manifest-roundtrip/manifest b/tests/manifest-roundtrip/manifest
new file mode 100644
index 0000000..23c2730
--- /dev/null
+++ b/tests/manifest-roundtrip/manifest
@@ -0,0 +1,32 @@
+: 1
+name: libbpkg
+version: 1.0.1
+summary: build2 package manager library
+license: MIT
+tags: c++, package, manager, bpkg
+description: A very very very very very very very very very very very very\
+ very very very very very very very very very very very very very very very\
+ very very long description.
+changes: \
+1.0.1
+  - Fixed a  very very very very very very very very very very very very very\
+  very annoying bug.
+1.0.0
+  - Firts public release
+  - Lots of really cool features
+\
+url: http://www.codesynthesis.com/projects/libstudxml/
+email: build-users@codesynthesis.com; Public mailing list, posts by\
+ non-members are allowed but moderated.
+package-email: boris@codesynthesis.com; Direct email to the author.
+depends: libbutl
+depends: * build2
+depends: ?* bpkg
+requires: ?* linux | windows
+requires: c++11
+:
+path: c:\windows\\
+path: \
+
+c:\windows\\
+\
diff --git a/tests/manifest-serializer/buildfile b/tests/manifest-serializer/buildfile
new file mode 100644
index 0000000..0325323
--- /dev/null
+++ b/tests/manifest-serializer/buildfile
@@ -0,0 +1,7 @@
+# file      : tests/manifest-serializer/buildfile
+# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+exe{driver}: cxx{driver} ../../butl/lib{butl}
+
+include ../../butl/
diff --git a/tests/manifest-serializer/driver.cxx b/tests/manifest-serializer/driver.cxx
new file mode 100644
index 0000000..250272d
--- /dev/null
+++ b/tests/manifest-serializer/driver.cxx
@@ -0,0 +1,245 @@
+// file      : tests/manifest-serializer/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <vector>
+#include <string>
+#include <utility> // pair
+#include <cassert>
+#include <sstream>
+#include <iostream>
+
+#include <butl/manifest-serializer>
+
+using namespace std;
+using namespace butl;
+
+using pairs = vector<pair<string, string>>;
+
+static bool
+test (const pairs& manifest, const string& expected);
+
+static bool
+fail (const pairs& manifest);
+
+int
+main ()
+{
+  // Comments.
+  //
+  assert (test ({{"#", ""}}, "#\n"));
+  assert (test ({{"#", "x"}}, "# x\n"));
+  assert (test ({{"#", "x"},{"#", "y"},{"#", ""}}, "# x\n# y\n#\n"));
+  assert (fail ({{"",""},{"#", "x"}})); // serialization after eos
+
+  // Empty manifest stream.
+  //
+  assert (test ({}, ""));
+  assert (test ({{"",""}}, ""));
+
+  // Empty manifest.
+  //
+  assert (test ({{"","1"},{"",""},{"",""}}, ": 1\n"));
+  assert (test ({{"","1"},{"",""},{"","1"},{"",""},{"",""}}, ": 1\n:\n"));
+
+  // Invalid manifests.
+  //
+  assert (fail ({{"a",""}}));                  // format version pair expected
+  assert (fail ({{"","1"},{"",""},{"a",""}})); // format version pair expected
+  assert (fail ({{"","9"}}));                  // unsupported format version 9
+  assert (fail ({{"","1"},{"","x"}}));         // non-empty value in end pair
+  assert (fail ({{"",""},{"","1"}}));          // serialization after eos
+
+  // Single manifest.
+  //
+  assert (test ({{"","1"},{"a","x"},{"",""},{"",""}}, ": 1\na: x\n"));
+  assert (test ({{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}},
+                ": 1\na: x\nb: y\n"));
+  assert (test ({{"","1"},{"#","c"},{"a","x"},{"",""},{"",""}},
+                ": 1\n# c\na: x\n"));
+
+  // Multiple manifests.
+  //
+  assert (test ({{"","1"},{"a","x"},{"",""},
+                 {"","1"},{"b","y"},{"",""},{"",""}}, ": 1\na: x\n:\nb: y\n"));
+  assert (test ({{"","1"},{"a","x"},{"",""},
+                 {"","1"},{"b","y"},{"",""},
+                 {"","1"},{"c","z"},{"",""},{"",""}},
+                ": 1\na: x\n:\nb: y\n:\nc: z\n"));
+
+  // Invalid name.
+  //
+  assert (fail ({{"","1"},{"#a",""}}));
+  assert (fail ({{"","1"},{"a:b",""}}));
+  assert (fail ({{"","1"},{"a b",""}}));
+  assert (fail ({{"","1"},{"a\tb",""}}));
+  assert (fail ({{"","1"},{"a\n",""}}));
+
+  // Simple value.
+  //
+  assert (test ({{"","1"},{"a",""},{"",""},{"",""}}, ": 1\na:\n"));
+  assert (test ({{"","1"},{"a","x y z"},{"",""},{"",""}}, ": 1\na: x y z\n"));
+
+  // Long simple value (newline escaping).
+  //
+
+  // "Solid" text/hard break.
+  //
+  string l1 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  string e1 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\n"
+             "Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n"
+             "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  // Space too early/hard break.
+  //
+  string l2 ("x xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "Yyyyyyyyyyyyyyyyy yyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz z"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  string e2 ("x xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\n"
+             "Yyyyyyyyyyyyyyyyy yyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n"
+             "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz z"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  // Space/soft break.
+  //
+  string l3 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxxxxxxxxxxxx"
+             " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  string e3 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxxxxxxxxxxxx\\\n"
+             " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n"
+             " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  // Space with a better one/soft break.
+  //
+  string l4 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxx xxxxxxxxx"
+             " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyy yyyyyyyyyy"
+             " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  string e4 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+             "xxxxxxxxx xxxxxxxxx\\\n"
+             " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+             "yyyyyyyyyyyyyyyyyy yyyyyyyyyy\\\n"
+             " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+             "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+  assert (test ({{"","1"},{"a",l1},{"",""},{"",""}}, ": 1\na: " + e1 + "\n"));
+  assert (test ({{"","1"},{"a",l2},{"",""},{"",""}}, ": 1\na: " + e2 + "\n"));
+  assert (test ({{"","1"},{"a",l3},{"",""},{"",""}}, ": 1\na: " + e3 + "\n"));
+  assert (test ({{"","1"},{"a",l4},{"",""},{"",""}}, ": 1\na: " + e4 + "\n"));
+
+
+  // Multi-line value.
+  //
+  string n ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+  assert (test ({{"","1"},{n,"x"},{"",""},{"",""}},
+                ": 1\n" + n + ": \\\nx\n\\\n"));
+  assert (test ({{"","1"},{"a","\n"},{"",""},{"",""}},
+                ": 1\na: \\\n\n\n\\\n"));
+  assert (test ({{"","1"},{"a","\n\n"},{"",""},{"",""}},
+                ": 1\na: \\\n\n\n\n\\\n"));
+  assert (test ({{"","1"},{"a","\nx\n"},{"",""},{"",""}},
+                ": 1\na: \\\n\nx\n\n\\\n"));
+  assert (test ({{"","1"},{"a","x\ny\nz"},{"",""},{"",""}},
+                ": 1\na: \\\nx\ny\nz\n\\\n"));
+  assert (test ({{"","1"},{"a"," x"},{"",""},{"",""}},
+                ": 1\na: \\\n x\n\\\n"));
+  assert (test ({{"","1"},{"a","x "},{"",""},{"",""}},
+                ": 1\na: \\\nx \n\\\n"));
+  assert (test ({{"","1"},{"a"," x "},{"",""},{"",""}},
+                ": 1\na: \\\n x \n\\\n"));
+
+  // Extra three x's are for the leading name part ("a: ") that we
+  // don't have.
+  //
+  assert (test ({{"","1"},{"a","\nxxx" + l1},{"",""},{"",""}},
+                ": 1\na: \\\n\nxxx" + e1 + "\n\\\n"));
+  assert (test ({{"","1"},{"a","\nxxx" + l2},{"",""},{"",""}},
+                ": 1\na: \\\n\nxxx" + e2 + "\n\\\n"));
+  assert (test ({{"","1"},{"a","\nxxx" + l3},{"",""},{"",""}},
+                ": 1\na: \\\n\nxxx" + e3 + "\n\\\n"));
+  assert (test ({{"","1"},{"a","\nxxx" + l4},{"",""},{"",""}},
+                ": 1\na: \\\n\nxxx" + e4 + "\n\\\n"));
+
+  // Backslash escaping (simple and multi-line).
+  //
+  assert (test ({{"","1"},{"a","c:\\"},{"",""},{"",""}},
+                ": 1\na: c:\\\\\n"));
+  assert (test ({{"","1"},{"a","c:\\\nd:\\"},{"",""},{"",""}},
+                ": 1\na: \\\nc:\\\\\nd:\\\\\n\\\n"));
+}
+
+static string
+serialize (const pairs& m)
+{
+  ostringstream os;
+  os.exceptions (istream::failbit | istream::badbit);
+  manifest_serializer s (os, "");
+
+  for (const auto& p: m)
+  {
+    if (p.first != "#")
+      s.next (p.first, p.second);
+    else
+      s.comment (p.second);
+  }
+
+  return os.str ();
+}
+
+static bool
+test (const pairs& m, const string& e)
+{
+  string r (serialize (m));
+
+  if (r != e)
+  {
+    cerr << "actual:" << endl << "'" << r << "'"<< endl
+         << "expect:" << endl << "'" << e << "'"<< endl;
+
+    return false;
+  }
+
+  return true;
+}
+
+static bool
+fail (const pairs& m)
+{
+  try
+  {
+    string r (serialize (m));
+    cerr << "nofail: " << r << endl;
+    return false;
+  }
+  catch (const manifest_serialization& e)
+  {
+    //cerr << e.what () << endl;
+  }
+
+  return true;
+}