From f4660720e3ab0dc70d31fd39d48199590810ab03 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 11 Jun 2015 16:53:03 +0200 Subject: Implement manifest serializer --- bpkg/buildfile | 3 +- bpkg/manifest-serializer | 73 +++++++++++ bpkg/manifest-serializer.cxx | 238 ++++++++++++++++++++++++++++++++++ tests/buildfile | 2 +- tests/manifest-roundtrip/buildfile | 9 ++ tests/manifest-roundtrip/driver.cxx | 57 ++++++++ tests/manifest-roundtrip/manifest | 29 +++++ tests/manifest-serializer/buildfile | 7 + tests/manifest-serializer/driver.cxx | 245 +++++++++++++++++++++++++++++++++++ 9 files changed, 661 insertions(+), 2 deletions(-) create mode 100644 bpkg/manifest-serializer create mode 100644 bpkg/manifest-serializer.cxx create mode 100644 tests/manifest-roundtrip/buildfile create mode 100644 tests/manifest-roundtrip/driver.cxx create mode 100644 tests/manifest-roundtrip/manifest create mode 100644 tests/manifest-serializer/buildfile create mode 100644 tests/manifest-serializer/driver.cxx diff --git a/bpkg/buildfile b/bpkg/buildfile index b7c1c9f..45d9d89 100644 --- a/bpkg/buildfile +++ b/bpkg/buildfile @@ -2,4 +2,5 @@ # copyright : Copyright (c) 2014-2015 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -lib{bpkg}: cxx{manifest-parser} +import libs += libhello +lib{bpkg}: cxx{manifest-parser manifest-serializer} $libs diff --git a/bpkg/manifest-serializer b/bpkg/manifest-serializer new file mode 100644 index 0000000..2c5a09b --- /dev/null +++ b/bpkg/manifest-serializer @@ -0,0 +1,73 @@ +// file : bpkg/manifest-serializer -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BPKG_MANIFEST_SERIALIZER +#define BPKG_MANIFEST_SERIALIZER + +#include +#include +#include // size_t +#include // runtime_error + +namespace bpkg +{ + class manifest_serialization: public std::runtime_error + { + public: + manifest_serialization (const std::string& name, + const std::string& description); + + std::string name; + std::string description; + }; + + class manifest_serializer + { + public: + manifest_serializer (std::ostream& os, const std::string& name) + : os_ (os), name_ (name) {} + + const std::string& + name () const {return name_;} + + // The first name-value pair should be the special "start-of-manifest" + // with empty name and value being the format version. After that we + // have a sequence of ordinary pairs which are the manifest. At the + // end of the manifest we have the special "end-of-manifest" pair + // with empty name and value. After that we can either have another + // start-of-manifest pair (in which case the whole sequence repeats + // from the beginning) or we get another end-of-manifest pair which + // signals the end of stream. + // + void + next (const std::string& name, const std::string& value); + + // Write a comment. The supplied text is prefixed with "# " and + // terminated with a newline. + // + void + comment (const std::string&); + + private: + void + check_name (const std::string&); + + // Write 'n' characters from 's' (assuming there are no newlines) + // split into multiple lines at or near the 78 characters + // boundary. The first line starts at the 'column' offset. + // + void + write_value (std::size_t column, const char* s, std::size_t n); + + private: + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + private: + std::ostream& os_; + const std::string name_; + }; +} + +#endif // BPKG_MANIFEST_SERIALIZER diff --git a/bpkg/manifest-serializer.cxx b/bpkg/manifest-serializer.cxx new file mode 100644 index 0000000..1d9fd89 --- /dev/null +++ b/bpkg/manifest-serializer.cxx @@ -0,0 +1,238 @@ +// file : bpkg/manifest-serializer.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +using namespace std; + +namespace bpkg +{ + using serialization = manifest_serialization; + + void manifest_serializer:: + next (const string& n, const string& v) + { + switch (s_) + { + case start: + { + if (!n.empty ()) + throw serialization (name_, "format version pair expected"); + + if (v.empty ()) + { + // End of manifests. + // + os_.flush (); + s_ = end; + break; + } + + if (v != "1") + throw serialization (name_, "unsupported format version " + v); + + os_ << ':'; + + if (v != version_) + { + os_ << ' ' << v; + version_ = v; + } + + os_ << endl; + s_ = body; + break; + } + case body: + { + if (n.empty ()) + { + if (!v.empty ()) + throw serialization (name_, "non-empty value in end pair"); + + s_ = start; + break; + } + + check_name (n); + + os_ << n << ':'; + + if (!v.empty ()) + { + os_ << ' '; + + // Use the multi-line mode in any of the following cases: + // + // - name is too long (say longer than 37 (78/2 - 2) characters; + // we cannot start on the next line since that would start the + // multi-line mode) + // - value contains newlines + // - value contains leading/trailing whitespaces + // + if (n.size () > 37 || + v.find ('\n') != string::npos || + v.front () == ' ' || v.front () == '\t' || + v.back () == ' ' || v.back () == '\t') + { + os_ << "\\" << endl; // Multi-line mode introductor. + + // Chunk the value into fragments separated by newlines. + // + for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i)) + { + if (p == string::npos) + { + // Last chunk. + // + write_value (0, v.c_str () + i, v.size () - i); + break; + } + + write_value (0, v.c_str () + i, p - i); + os_ << endl; + i = p + 1; + } + + os_ << endl << "\\"; // Multi-line mode terminator. + } + else + write_value (n.size () + 2, v.c_str (), v.size ()); + } + + os_ << endl; + break; + } + case end: + { + throw serialization (name_, "serialization after eos"); + } + } + } + + void manifest_serializer:: + comment (const string& t) + { + if (s_ == end) + throw serialization (name_, "serialization after eos"); + + os_ << '#'; + + if (!t.empty ()) + os_ << ' ' << t; + + os_ << endl; + } + + void manifest_serializer:: + check_name (const string& n) + { + if (n[0] == '#') + throw serialization (name_, "name starts with '#'"); + + for (char c: n) + { + switch (c) + { + case ' ': + case '\t': + case '\n': throw serialization (name_, "name contains whitespace"); + case ':': throw serialization (name_, "name contains ':'"); + default: break; + } + } + } + + void manifest_serializer:: + write_value (size_t cl, const char* s, size_t n) + { + char c ('\0'); + + // The idea is to break on the 77th character (i.e., write it + // on the next line) which means we have written 76 characters + // on this line plus 2 for '\' and '\n', which gives us 78. + // + for (const char* e (s + n); s != e; s++, cl++) + { + c = *s; + bool br (false); // Break the line. + + // If this is a whitespace, see if it's a good place to break the + // line. + // + if (c == ' ' || c == '\t') + { + // Find the next whitespace (or the end) and see if it is a better + // place. + // + for (const char* w (s + 1); ; w++) + { + if (w == e || *w == ' ' || *w == '\t') + { + // Is this whitespace past where we need to break? Also see + // below the "hard" break case for why we use 78 at the end. + // + if (cl + static_cast (w - s) > (w != e ? 77 : 78)) + { + // Only break if this whitespace is close enough to + // the end of the line. + // + br = (cl > 57); + } + + break; + } + } + } + + // Do we have to do a "hard" break (i.e., without a whitespace)? + // If there is just one character left, then instead of writing + // '\' and then the character on the next line, we might as well + // write it on this line. + // + if (cl == (s + 1 != e ? 77 : 78)) + br = true; + + if (br) + { + os_ << '\\' << endl; + cl = 0; + } + + os_ << c; + } + + // What comes next is always a newline. I the last character that + // we have written is a backslash, escape it. + // + if (c == '\\') + os_ << '\\'; + } + + // manifest_serialization + // + + static string + format (const string& n, const string& d) + { + string r; + if (!n.empty ()) + { + r += n; + r += ": "; + } + r += "error: "; + r += d; + return r; + } + + manifest_serialization:: + manifest_serialization (const string& n, const string& d) + : runtime_error (format (n, d)), name (n), description (d) + { + } +} diff --git a/tests/buildfile b/tests/buildfile index 8832040..d7b8b7f 100644 --- a/tests/buildfile +++ b/tests/buildfile @@ -2,6 +2,6 @@ # copyright : Copyright (c) 2014-2015 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -d = manifest-parser/ +d = manifest-parser/ manifest-serializer/ manifest-roundtrip/ .: $d include $d diff --git a/tests/manifest-roundtrip/buildfile b/tests/manifest-roundtrip/buildfile new file mode 100644 index 0000000..0883303 --- /dev/null +++ b/tests/manifest-roundtrip/buildfile @@ -0,0 +1,9 @@ +# file : tests/manifest-roundtrip/buildfile +# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +exe{driver}: cxx{driver} ../../bpkg/lib{bpkg} + +include ../../bpkg/ + +# test: ./driver manifest | diff -u manifest - diff --git a/tests/manifest-roundtrip/driver.cxx b/tests/manifest-roundtrip/driver.cxx new file mode 100644 index 0000000..45abc67 --- /dev/null +++ b/tests/manifest-roundtrip/driver.cxx @@ -0,0 +1,57 @@ +// file : tests/manifest-roundtrip/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include +#include + +using namespace std; +using namespace bpkg; + +int +main (int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " " << endl; + return 1; + } + + try + { + ifstream ifs; + ifs.exceptions (ifstream::badbit | ifstream::failbit); + ifs.open (argv[1], ifstream::in | ifstream::binary); + + manifest_parser p (ifs, ""); + manifest_serializer s (cout, "stdout"); + + for (bool eom (true), eos (false); !eos; ) + { + auto nv (p.next ()); + + if (nv.empty ()) // End pair. + { + eos = eom; + eom = true; + } + else + eom = false; + + s.next (nv.name, nv.value); + } + } + catch (const ios_base::failure&) + { + cerr << "io failure" << endl; + return 1; + } + catch (const std::exception& e) + { + cerr << e.what () << endl; + return 1; + } +} diff --git a/tests/manifest-roundtrip/manifest b/tests/manifest-roundtrip/manifest new file mode 100644 index 0000000..31d6b28 --- /dev/null +++ b/tests/manifest-roundtrip/manifest @@ -0,0 +1,29 @@ +: 1 +name: libbpkg +version: 1.0.1 +summary: build2 package manager library +license: MIT +tags: c++, package, manager, bpkg +description: A very very very very very very very very very very very very\ + very very very very very very very very very very very very very very very\ + very very long description. +changes: \ +1.0.1 + - Fixed a very very very very very very very very very very very very very\ + very annoying bug. +1.0.0 + - Firts public release + - Lots of really cool features +\ +url: http://www.codesynthesis.com/projects/libstudxml/ +email: build-users@codesynthesis.com; Public mailing list, posts by\ + non-members are allowed but moderated. +package-email: boris@codesynthesis.com; Direct email to the author. +depends: libbutl +requires: c++11 +: +path: c:\windows\\ +path: \ + +c:\windows\\ +\ diff --git a/tests/manifest-serializer/buildfile b/tests/manifest-serializer/buildfile new file mode 100644 index 0000000..b1dfb78 --- /dev/null +++ b/tests/manifest-serializer/buildfile @@ -0,0 +1,7 @@ +# file : tests/manifest-serializer/buildfile +# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +exe{driver}: cxx{driver} ../../bpkg/lib{bpkg} + +include ../../bpkg/ diff --git a/tests/manifest-serializer/driver.cxx b/tests/manifest-serializer/driver.cxx new file mode 100644 index 0000000..21bca08 --- /dev/null +++ b/tests/manifest-serializer/driver.cxx @@ -0,0 +1,245 @@ +// file : tests/manifest-serializer/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include +#include // pair +#include +#include +#include + +#include + +using namespace std; +using namespace bpkg; + +using pairs = vector>; + +static bool +test (const pairs& manifest, const string& expected); + +static bool +fail (const pairs& manifest); + +int +main () +{ + // Comments. + // + assert (test ({{"#", ""}}, "#\n")); + assert (test ({{"#", "x"}}, "# x\n")); + assert (test ({{"#", "x"},{"#", "y"},{"#", ""}}, "# x\n# y\n#\n")); + assert (fail ({{"",""},{"#", "x"}})); // serialization after eos + + // Empty manifest stream. + // + assert (test ({}, "")); + assert (test ({{"",""}}, "")); + + // Empty manifest. + // + assert (test ({{"","1"},{"",""},{"",""}}, ": 1\n")); + assert (test ({{"","1"},{"",""},{"","1"},{"",""},{"",""}}, ": 1\n:\n")); + + // Invalid manifests. + // + assert (fail ({{"a",""}})); // format version pair expected + assert (fail ({{"","1"},{"",""},{"a",""}})); // format version pair expected + assert (fail ({{"","9"}})); // unsupported format version 9 + assert (fail ({{"","1"},{"","x"}})); // non-empty value in end pair + assert (fail ({{"",""},{"","1"}})); // serialization after eos + + // Single manifest. + // + assert (test ({{"","1"},{"a","x"},{"",""},{"",""}}, ": 1\na: x\n")); + assert (test ({{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}}, + ": 1\na: x\nb: y\n")); + assert (test ({{"","1"},{"#","c"},{"a","x"},{"",""},{"",""}}, + ": 1\n# c\na: x\n")); + + // Multiple manifests. + // + assert (test ({{"","1"},{"a","x"},{"",""}, + {"","1"},{"b","y"},{"",""},{"",""}}, ": 1\na: x\n:\nb: y\n")); + assert (test ({{"","1"},{"a","x"},{"",""}, + {"","1"},{"b","y"},{"",""}, + {"","1"},{"c","z"},{"",""},{"",""}}, + ": 1\na: x\n:\nb: y\n:\nc: z\n")); + + // Invalid name. + // + assert (fail ({{"","1"},{"#a",""}})); + assert (fail ({{"","1"},{"a:b",""}})); + assert (fail ({{"","1"},{"a b",""}})); + assert (fail ({{"","1"},{"a\tb",""}})); + assert (fail ({{"","1"},{"a\n",""}})); + + // Simple value. + // + assert (test ({{"","1"},{"a",""},{"",""},{"",""}}, ": 1\na:\n")); + assert (test ({{"","1"},{"a","x y z"},{"",""},{"",""}}, ": 1\na: x y z\n")); + + // Long simple value (newline escaping). + // + + // "Solid" text/hard break. + // + string l1 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e1 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\n" + "Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + // Space too early/hard break. + // + string l2 ("x xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "Yyyyyyyyyyyyyyyyy yyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz z" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e2 ("x xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\n" + "Yyyyyyyyyyyyyyyyy yyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz z" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + // Space/soft break. + // + string l3 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxx" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e3 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxx\\\n" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + // Space with a better one/soft break. + // + string l4 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxx xxxxxxxxx" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyy yyyyyyyyyy" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e4 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxx xxxxxxxxx\\\n" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyy yyyyyyyyyy\\\n" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + assert (test ({{"","1"},{"a",l1},{"",""},{"",""}}, ": 1\na: " + e1 + "\n")); + assert (test ({{"","1"},{"a",l2},{"",""},{"",""}}, ": 1\na: " + e2 + "\n")); + assert (test ({{"","1"},{"a",l3},{"",""},{"",""}}, ": 1\na: " + e3 + "\n")); + assert (test ({{"","1"},{"a",l4},{"",""},{"",""}}, ": 1\na: " + e4 + "\n")); + + + // Multi-line value. + // + string n ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + assert (test ({{"","1"},{n,"x"},{"",""},{"",""}}, + ": 1\n" + n + ": \\\nx\n\\\n")); + assert (test ({{"","1"},{"a","\n"},{"",""},{"",""}}, + ": 1\na: \\\n\n\n\\\n")); + assert (test ({{"","1"},{"a","\n\n"},{"",""},{"",""}}, + ": 1\na: \\\n\n\n\n\\\n")); + assert (test ({{"","1"},{"a","\nx\n"},{"",""},{"",""}}, + ": 1\na: \\\n\nx\n\n\\\n")); + assert (test ({{"","1"},{"a","x\ny\nz"},{"",""},{"",""}}, + ": 1\na: \\\nx\ny\nz\n\\\n")); + assert (test ({{"","1"},{"a"," x"},{"",""},{"",""}}, + ": 1\na: \\\n x\n\\\n")); + assert (test ({{"","1"},{"a","x "},{"",""},{"",""}}, + ": 1\na: \\\nx \n\\\n")); + assert (test ({{"","1"},{"a"," x "},{"",""},{"",""}}, + ": 1\na: \\\n x \n\\\n")); + + // Extra three x's are for the leading name part ("a: ") that we + // don't have. + // + assert (test ({{"","1"},{"a","\nxxx" + l1},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e1 + "\n\\\n")); + assert (test ({{"","1"},{"a","\nxxx" + l2},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e2 + "\n\\\n")); + assert (test ({{"","1"},{"a","\nxxx" + l3},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e3 + "\n\\\n")); + assert (test ({{"","1"},{"a","\nxxx" + l4},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e4 + "\n\\\n")); + + // Backslash escaping (simple and multi-line). + // + assert (test ({{"","1"},{"a","c:\\"},{"",""},{"",""}}, + ": 1\na: c:\\\\\n")); + assert (test ({{"","1"},{"a","c:\\\nd:\\"},{"",""},{"",""}}, + ": 1\na: \\\nc:\\\\\nd:\\\\\n\\\n")); +} + +static string +serialize (const pairs& m) +{ + ostringstream os; + os.exceptions (istream::failbit | istream::badbit); + manifest_serializer s (os, ""); + + for (const auto& p: m) + { + if (p.first != "#") + s.next (p.first, p.second); + else + s.comment (p.second); + } + + return os.str (); +} + +static bool +test (const pairs& m, const string& e) +{ + string r (serialize (m)); + + if (r != e) + { + cerr << "actual:" << endl << "'" << r << "'"<< endl + << "expect:" << endl << "'" << e << "'"<< endl; + + return false; + } + + return true; +} + +static bool +fail (const pairs& m) +{ + try + { + string r (serialize (m)); + cerr << "nofail: " << r << endl; + return false; + } + catch (const manifest_serialization& e) + { + //cerr << e.what () << endl; + } + + return true; +} -- cgit v1.1