From 25a9484378ddaae9602ec54532cdc03b1f1924ef Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 29 Sep 2016 21:54:14 +0300 Subject: Add manifest_parser and manifest_serializer --- butl/buildfile | 45 +++-- butl/manifest-forward | 15 ++ butl/manifest-parser | 94 +++++++++ butl/manifest-parser.cxx | 379 +++++++++++++++++++++++++++++++++++ butl/manifest-serializer | 75 +++++++ butl/manifest-serializer.cxx | 238 ++++++++++++++++++++++ tests/buildfile | 3 +- tests/manifest-parser/buildfile | 7 + tests/manifest-parser/driver.cxx | 207 +++++++++++++++++++ tests/manifest-roundtrip/buildfile | 8 + tests/manifest-roundtrip/driver.cxx | 52 +++++ tests/manifest-roundtrip/manifest | 32 +++ tests/manifest-serializer/buildfile | 7 + tests/manifest-serializer/driver.cxx | 245 ++++++++++++++++++++++ 14 files changed, 1385 insertions(+), 22 deletions(-) create mode 100644 butl/manifest-forward create mode 100644 butl/manifest-parser create mode 100644 butl/manifest-parser.cxx create mode 100644 butl/manifest-serializer create mode 100644 butl/manifest-serializer.cxx create mode 100644 tests/manifest-parser/buildfile create mode 100644 tests/manifest-parser/driver.cxx create mode 100644 tests/manifest-roundtrip/buildfile create mode 100644 tests/manifest-roundtrip/driver.cxx create mode 100644 tests/manifest-roundtrip/manifest create mode 100644 tests/manifest-serializer/buildfile create mode 100644 tests/manifest-serializer/driver.cxx diff --git a/butl/buildfile b/butl/buildfile index 1a9787a..fcb5f86 100644 --- a/butl/buildfile +++ b/butl/buildfile @@ -2,27 +2,30 @@ # copyright : Copyright (c) 2014-2016 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -lib{butl}: \ -{hxx cxx}{ base64 } \ -{hxx cxx}{ char-scanner } \ -{hxx }{ export } \ -{hxx ixx cxx}{ fdstream } \ -{hxx ixx cxx}{ filesystem } \ -{hxx }{ multi-index } \ -{hxx }{ optional } \ -{hxx cxx}{ pager } \ -{hxx ixx txx cxx}{ path } \ -{hxx }{ path-io } \ -{hxx }{ path-map } \ -{hxx txx }{ prefix-map } \ -{hxx ixx cxx}{ process } \ -{hxx cxx}{ sha256 } \ -{hxx txx }{ string-table } \ -{hxx cxx}{ timestamp } \ -{hxx cxx}{ triplet } \ -{hxx ixx }{ utility } \ -{hxx }{ vector-view } \ -{hxx }{ version } +lib{butl}: \ +{hxx cxx}{ base64 } \ +{hxx cxx}{ char-scanner } \ +{hxx }{ export } \ +{hxx ixx cxx}{ fdstream } \ +{hxx ixx cxx}{ filesystem } \ +{hxx }{ manifest-forward } \ +{hxx cxx}{ manifest-parser } \ +{hxx cxx}{ manifest-serializer } \ +{hxx }{ multi-index } \ +{hxx }{ optional } \ +{hxx cxx}{ pager } \ +{hxx ixx txx cxx}{ path } \ +{hxx }{ path-io } \ +{hxx }{ path-map } \ +{hxx txx }{ prefix-map } \ +{hxx ixx cxx}{ process } \ +{hxx cxx}{ sha256 } \ +{hxx txx }{ string-table } \ +{hxx cxx}{ timestamp } \ +{hxx cxx}{ triplet } \ +{hxx ixx }{ utility } \ +{hxx }{ vector-view } \ +{hxx }{ version } # Exclude these from compilation on non-Windows targets. # diff --git a/butl/manifest-forward b/butl/manifest-forward new file mode 100644 index 0000000..5dc5060 --- /dev/null +++ b/butl/manifest-forward @@ -0,0 +1,15 @@ +// file : butl/manifest-forward -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUTL_MANIFEST_FORWARD +#define BUTL_MANIFEST_FORWARD + +namespace butl +{ + class manifest_parser; + class manifest_serializer; + class manifest_name_value; +} + +#endif // BUTL_MANIFEST_FORWARD diff --git a/butl/manifest-parser b/butl/manifest-parser new file mode 100644 index 0000000..a005b34 --- /dev/null +++ b/butl/manifest-parser @@ -0,0 +1,94 @@ +// file : butl/manifest-parser -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUTL_MANIFEST_PARSER +#define BUTL_MANIFEST_PARSER + +#include +#include +#include // uint64_t +#include // runtime_error + +#include + +#include + +namespace butl +{ + class LIBBUTL_EXPORT manifest_parsing: public std::runtime_error + { + public: + manifest_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + class manifest_name_value + { + public: + std::string name; + std::string value; + + std::uint64_t name_line; + std::uint64_t name_column; + + std::uint64_t value_line; + std::uint64_t value_column; + + bool + empty () const {return name.empty () && value.empty ();} + }; + + class LIBBUTL_EXPORT manifest_parser: protected butl::char_scanner + { + public: + manifest_parser (std::istream& is, const std::string& name) + : char_scanner (is), name_ (name) {} + + const std::string& + name () const {return name_;} + + // The first returned pair is special "start-of-manifest" with + // empty name and value being the format version: {"", ""}. + // After that we have a sequence of ordinary pairs which are + // the manifest. At the end of the manifest we have the special + // "end-of-manifest" pair with empty name and value: {"", ""}. + // After that we can either get another start-of-manifest pair + // (in which case the whole sequence repeats from the beginning) + // or we get another end-of-manifest pair which signals the end + // of stream (aka EOF). To put it another way, the parse sequence + // always has the following form: + // + // ({"", ""} {"", ""}* {"", ""})* {"", ""} + // + manifest_name_value + next (); + + private: + void + parse_name (manifest_name_value&); + + void + parse_value (manifest_name_value&); + + // Skip spaces and return the first peeked non-space character. + // + xchar + skip_spaces (); + + private: + const std::string name_; + + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + }; +} + +#endif // BUTL_MANIFEST_PARSER diff --git a/butl/manifest-parser.cxx b/butl/manifest-parser.cxx new file mode 100644 index 0000000..ec26ca8 --- /dev/null +++ b/butl/manifest-parser.cxx @@ -0,0 +1,379 @@ +// file : butl/manifest-parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +using namespace std; + +namespace butl +{ + using parsing = manifest_parsing; + using name_value = manifest_name_value; + + name_value manifest_parser:: + next () + { + if (s_ == end) + return name_value {"", "", line, column, line, column}; + + xchar c (skip_spaces ()); + + // Here is the problem: if we are in the 'body' state (that is, + // we are parsing inside the manifest) and we see the special + // empty name, then before returning the "start" pair for the + // next manifest, we have to return the "end" pair. One way + // would be to cache the "start" pair and return it on the + // next call of next(). But that would require quite a bit + // of extra logic. The alternative is to detect the beginning + // of the empty name before parsing too far. This way, the + // next call to next() will start parsing where we left of + // and return the "start" pair naturally. + // + if (s_ == body && c == ':') + { + s_ = start; + return name_value {"", "", c.line, c.column, c.line, c.column}; + } + + // Regardless of the state, what should come next is a name, + // potentially the special empty one. + // + name_value r; + parse_name (r); + + skip_spaces (); + c = get (); + + if (eos (c)) + { + // This is ok as long as the name is empty. + // + if (!r.name.empty ()) + throw parsing (name_, c.line, c.column, "':' expected after name"); + + s_ = end; + + // The "end" pair. + // + r.value_line = r.name_line; + r.value_column = r.name_column; + return r; + } + + if (c != ':') + throw parsing (name_, c.line, c.column, "':' expected after name"); + + skip_spaces (); + parse_value (r); + + c = peek (); + + // The character after the value should be either a newline or eos. + // + assert (c == '\n' || eos (c)); + + if (c == '\n') + get (); + + // Now figure out whether what we've got makes sense, depending + // on the state we are in. + // + if (s_ == start) + { + // Start of the (next) manifest. The first pair should be the + // special empty name/format version. + // + if (!r.name.empty ()) + throw parsing (name_, r.name_line, r.name_column, + "format version pair expected"); + + // The version value is only mandatory for the first manifest in + // a sequence. + // + if (r.value.empty ()) + { + if (version_.empty ()) + throw parsing (name_, r.value_line, r.value_column, + "format version value expected"); + r.value = version_; + } + else + { + version_ = r.value; // Update with the latest. + + if (version_ != "1") + throw parsing (name_, r.value_line, r.value_column, + "unsupported format version " + version_); + } + + s_ = body; + } + else + { + // Parsing the body of the manifest. + // + + // Should have been handled by the special case above. + // + assert (!r.name.empty ()); + } + + return r; + } + + void manifest_parser:: + parse_name (name_value& r) + { + xchar c (peek ()); + + r.name_line = c.line; + r.name_column = c.column; + + for (; !eos (c); c = peek ()) + { + if (c == ':' || c == ' ' || c == '\t' || c == '\n') + break; + + r.name += c; + get (); + } + } + + void manifest_parser:: + parse_value (name_value& r) + { + xchar c (peek ()); + + r.value_line = c.line; + r.value_column = c.column; + + string& v (r.value); + string::size_type n (0); // Size of last non-space character (simple mode). + + // Detect the multi-line mode introductor. + // + bool ml (false); + if (c == '\\') + { + get (); + xchar p (peek ()); + + if (p == '\n') + { + get (); // Newline is not part of the value so skip it. + c = peek (); + ml = true; + } + else if (eos (p)) + ml = true; + else + unget (c); + } + + // The nl flag signals that the preceding character was a "special + // newline", that is, a newline that was part of the milti-line mode + // introductor or an escape sequence. + // + for (bool nl (ml); !eos (c); c = peek ()) + { + // Detect the special "\n\\\n" sequence. In the multi-line mode, + // this is a "terminator". In the simple mode, this is a way to + // specify a newline. + // + // The key idea here is this: if we "swallowed" any characters + // (i.e., called get() without a matching unget()), then we + // have to restart the loop in order to do all the tests for + // the next character. Also, for this to work, we can only + // add one character to v, which limits us to maximum three + // characters look-ahead: one in v, one "ungot", and one + // peeked. + // + // The first block handles the special sequence that starts with + // a special newline. In multi-line mode, this is an "immediate + // termination" where we "use" the newline from the introductor. + // Note also that in the simple mode the special sequence can + // only start with a special (i.e., escaped) newline. + // + if (nl) + { + nl = false; + + if (c == '\\') + { + get (); + xchar c1 (peek ()); + + if (c1 == '\n' || eos (c1)) + { + if (ml) + break; + else + { + if (c1 == '\n') + get (); + + v += '\n'; // Literal newline. + n = v.size (); + continue; // Restart from the next character. + } + } + else + unget (c); // Fall through. + } + } + + if (c == '\n') + { + if (ml) + { + get (); + xchar c1 (peek ()); + + if (c1 == '\\') + { + get (); + xchar c2 (peek ()); + + if (c2 == '\n' || eos (c2)) + break; + else + { + v += '\n'; + unget (c1); + continue; // Restart from c1 (slash). + } + } + else + unget (c); // Fall through. + } + else + break; // Simple value terminator. + } + + // Detect the newline escape sequence. The same look-ahead + // approach as above. + // + if (c == '\\') + { + get (); + xchar c1 (peek ()); + + if (c1 == '\n' || eos (c1)) + { + if (c1 == '\n') + { + get (); + nl = true; // This is a special newline. + } + continue; // Restart from the next character. + } + else if (c1 == '\\') + { + get (); + xchar c2 (peek ()); + + if (c2 == '\n' || eos (c1)) + { + v += '\\'; + n = v.size (); + // Restart from c2 (newline/eos). + } + else + { + v += '\\'; + n = v.size (); + unget (c1); // Restart from c1 (second slash). + } + + continue; + } + else + unget (c); // Fall through. + } + + get (); + v += c; + + if (!ml && c != ' ' && c != '\t') + n = v.size (); + } + + // Cut off trailing whitespaces. + // + if (!ml) + v.resize (n); + } + + manifest_parser::xchar manifest_parser:: + skip_spaces () + { + xchar c (peek ()); + bool start (c.column == 1); + + for (; !eos (c); c = peek ()) + { + switch (c) + { + case ' ': + case '\t': + break; + case '\n': + { + // Skip empty lines. + // + if (!start) + return c; + + break; + } + case '#': + { + // We only recognize '#' as a start of a comment at the beginning + // of the line (sans leading spaces). + // + if (!start) + return c; + + get (); + + // Read until newline or eos. + // + for (c = peek (); !eos (c) && c != '\n'; c = peek ()) + get (); + + continue; + } + default: + return c; // Not a space. + } + + get (); + } + + return c; + } + + // manifest_parsing + // + + static string + format (const string& n, uint64_t l, uint64_t c, const string& d) + { + ostringstream os; + if (!n.empty ()) + os << n << ':'; + os << l << ':' << c << ": error: " << d; + return os.str (); + } + + manifest_parsing:: + manifest_parsing (const string& n, uint64_t l, uint64_t c, const string& d) + : runtime_error (format (n, l, c, d)), + name (n), line (l), column (c), description (d) + { + } +} diff --git a/butl/manifest-serializer b/butl/manifest-serializer new file mode 100644 index 0000000..6d7eeec --- /dev/null +++ b/butl/manifest-serializer @@ -0,0 +1,75 @@ +// file : butl/manifest-serializer -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUTL_MANIFEST_SERIALIZER +#define BUTL_MANIFEST_SERIALIZER + +#include +#include +#include // size_t +#include // runtime_error + +#include + +namespace butl +{ + class LIBBUTL_EXPORT manifest_serialization: public std::runtime_error + { + public: + manifest_serialization (const std::string& name, + const std::string& description); + + std::string name; + std::string description; + }; + + class LIBBUTL_EXPORT manifest_serializer + { + public: + manifest_serializer (std::ostream& os, const std::string& name) + : os_ (os), name_ (name) {} + + const std::string& + name () const {return name_;} + + // The first name-value pair should be the special "start-of-manifest" + // with empty name and value being the format version. After that we + // have a sequence of ordinary pairs which are the manifest. At the + // end of the manifest we have the special "end-of-manifest" pair + // with empty name and value. After that we can either have another + // start-of-manifest pair (in which case the whole sequence repeats + // from the beginning) or we get another end-of-manifest pair which + // signals the end of stream. + // + void + next (const std::string& name, const std::string& value); + + // Write a comment. The supplied text is prefixed with "# " and + // terminated with a newline. + // + void + comment (const std::string&); + + private: + void + check_name (const std::string&); + + // Write 'n' characters from 's' (assuming there are no newlines) + // split into multiple lines at or near the 78 characters + // boundary. The first line starts at the 'column' offset. + // + void + write_value (std::size_t column, const char* s, std::size_t n); + + private: + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + private: + std::ostream& os_; + const std::string name_; + }; +} + +#endif // BUTL_MANIFEST_SERIALIZER diff --git a/butl/manifest-serializer.cxx b/butl/manifest-serializer.cxx new file mode 100644 index 0000000..c45aaba --- /dev/null +++ b/butl/manifest-serializer.cxx @@ -0,0 +1,238 @@ +// file : butl/manifest-serializer.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +using namespace std; + +namespace butl +{ + using serialization = manifest_serialization; + + void manifest_serializer:: + next (const string& n, const string& v) + { + switch (s_) + { + case start: + { + if (!n.empty ()) + throw serialization (name_, "format version pair expected"); + + if (v.empty ()) + { + // End of manifests. + // + os_.flush (); + s_ = end; + break; + } + + if (v != "1") + throw serialization (name_, "unsupported format version " + v); + + os_ << ':'; + + if (v != version_) + { + os_ << ' ' << v; + version_ = v; + } + + os_ << endl; + s_ = body; + break; + } + case body: + { + if (n.empty ()) + { + if (!v.empty ()) + throw serialization (name_, "non-empty value in end pair"); + + s_ = start; + break; + } + + check_name (n); + + os_ << n << ':'; + + if (!v.empty ()) + { + os_ << ' '; + + // Use the multi-line mode in any of the following cases: + // + // - name is too long (say longer than 37 (78/2 - 2) characters; + // we cannot start on the next line since that would start the + // multi-line mode) + // - value contains newlines + // - value contains leading/trailing whitespaces + // + if (n.size () > 37 || + v.find ('\n') != string::npos || + v.front () == ' ' || v.front () == '\t' || + v.back () == ' ' || v.back () == '\t') + { + os_ << "\\" << endl; // Multi-line mode introductor. + + // Chunk the value into fragments separated by newlines. + // + for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i)) + { + if (p == string::npos) + { + // Last chunk. + // + write_value (0, v.c_str () + i, v.size () - i); + break; + } + + write_value (0, v.c_str () + i, p - i); + os_ << endl; + i = p + 1; + } + + os_ << endl << "\\"; // Multi-line mode terminator. + } + else + write_value (n.size () + 2, v.c_str (), v.size ()); + } + + os_ << endl; + break; + } + case end: + { + throw serialization (name_, "serialization after eos"); + } + } + } + + void manifest_serializer:: + comment (const string& t) + { + if (s_ == end) + throw serialization (name_, "serialization after eos"); + + os_ << '#'; + + if (!t.empty ()) + os_ << ' ' << t; + + os_ << endl; + } + + void manifest_serializer:: + check_name (const string& n) + { + if (n[0] == '#') + throw serialization (name_, "name starts with '#'"); + + for (char c: n) + { + switch (c) + { + case ' ': + case '\t': + case '\n': throw serialization (name_, "name contains whitespace"); + case ':': throw serialization (name_, "name contains ':'"); + default: break; + } + } + } + + void manifest_serializer:: + write_value (size_t cl, const char* s, size_t n) + { + char c ('\0'); + + // The idea is to break on the 77th character (i.e., write it + // on the next line) which means we have written 76 characters + // on this line plus 2 for '\' and '\n', which gives us 78. + // + for (const char* e (s + n); s != e; s++, cl++) + { + c = *s; + bool br (false); // Break the line. + + // If this is a whitespace, see if it's a good place to break the + // line. + // + if (c == ' ' || c == '\t') + { + // Find the next whitespace (or the end) and see if it is a better + // place. + // + for (const char* w (s + 1); ; w++) + { + if (w == e || *w == ' ' || *w == '\t') + { + // Is this whitespace past where we need to break? Also see + // below the "hard" break case for why we use 78 at the end. + // + if (cl + static_cast (w - s) > (w != e ? 77 : 78)) + { + // Only break if this whitespace is close enough to + // the end of the line. + // + br = (cl > 57); + } + + break; + } + } + } + + // Do we have to do a "hard" break (i.e., without a whitespace)? + // If there is just one character left, then instead of writing + // '\' and then the character on the next line, we might as well + // write it on this line. + // + if (cl == (s + 1 != e ? 77 : 78)) + br = true; + + if (br) + { + os_ << '\\' << endl; + cl = 0; + } + + os_ << c; + } + + // What comes next is always a newline. I the last character that + // we have written is a backslash, escape it. + // + if (c == '\\') + os_ << '\\'; + } + + // manifest_serialization + // + + static string + format (const string& n, const string& d) + { + string r; + if (!n.empty ()) + { + r += n; + r += ": "; + } + r += "error: "; + r += d; + return r; + } + + manifest_serialization:: + manifest_serialization (const string& n, const string& d) + : runtime_error (format (n, d)), name (n), description (d) + { + } +} diff --git a/tests/buildfile b/tests/buildfile index fd2589a..84a17aa 100644 --- a/tests/buildfile +++ b/tests/buildfile @@ -2,7 +2,8 @@ # copyright : Copyright (c) 2014-2016 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -d = base64/ cpfile/ dir-iterator/ fdstream/ link/ pager/ path/ prefix-map/ \ +d = base64/ cpfile/ dir-iterator/ fdstream/ link/ manifest-parser/ \ + manifest-serializer/ manifest-roundtrip/ pager/ path/ prefix-map/ \ process/ sha256/ strcase/ timestamp/ triplet/ ./: $d diff --git a/tests/manifest-parser/buildfile b/tests/manifest-parser/buildfile new file mode 100644 index 0000000..9173bdb --- /dev/null +++ b/tests/manifest-parser/buildfile @@ -0,0 +1,7 @@ +# file : tests/manifest-parser/buildfile +# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +exe{driver}: cxx{driver} ../../butl/lib{butl} + +include ../../butl/ diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx new file mode 100644 index 0000000..bab60a8 --- /dev/null +++ b/tests/manifest-parser/driver.cxx @@ -0,0 +1,207 @@ +// file : tests/manifest-parser/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include +#include // pair +#include +#include +#include + +#include + +using namespace std; +using namespace butl; + +using pairs = vector>; + +static bool +test (const char* manifest, const pairs& expected); + +static bool +fail (const char* manifest); + +int +main () +{ + // Whitespaces and comments. + // + assert (test (" \t", {{"",""}})); + assert (test (" \t\n \n\n", {{"",""}})); + assert (test ("# one\n #two", {{"",""}})); + + // Test encountering eos at various points. + // + assert (test ("", {{"",""}})); + assert (test (" ", {{"",""}})); + assert (test ("\n", {{"",""}})); + assert (fail ("a")); + assert (test (":1\na:", {{"","1"},{"a", ""},{"",""},{"",""}})); + + // Invalid manifests. + // + assert (fail ("a:")); // format version pair expected + assert (fail (":")); // format version value expected + assert (fail (":9")); // unsupported format version + assert (fail ("a")); // ':' expected after name + assert (fail ("a b")); // ':' expected after name + assert (fail ("a\tb")); // ':' expected after name + assert (fail ("a\nb")); // ':' expected after name + assert (fail (":1\na:b\n:9")); // unsupported format version + + // Empty manifest. + // + assert (test (":1", {{"","1"},{"",""},{"",""}})); + assert (test (" \t :1", {{"","1"},{"",""},{"",""}})); + assert (test (" \t : 1", {{"","1"},{"",""},{"",""}})); + assert (test (" \t : 1 ", {{"","1"},{"",""},{"",""}})); + assert (test (":1\n", {{"","1"},{"",""},{"",""}})); + assert (test (":1 \n", {{"","1"},{"",""},{"",""}})); + + // Single manifest. + // + assert (test (":1\na:x", {{"","1"},{"a", "x"},{"",""},{"",""}})); + assert (test (":1\na:x\n", {{"","1"},{"a","x"},{"",""},{"",""}})); + assert (test (":1\na:x\nb:y", + {{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}})); + assert (test (":1\na:x\n\tb : y\n #comment", + {{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}})); + + // Multiple manifests. + // + assert (test (":1\na:x\n:\nb:y", + {{"","1"},{"a", "x"},{"",""}, + {"","1"},{"b", "y"},{"",""},{"",""}})); + assert (test (":1\na:x\n:1\nb:y", + {{"","1"},{"a", "x"},{"",""}, + {"","1"},{"b", "y"},{"",""},{"",""}})); + assert (test (":1\na:x\n:\nb:y\n:\nc:z\n", + {{"","1"},{"a", "x"},{"",""}, + {"","1"},{"b", "y"},{"",""}, + {"","1"},{"c", "z"},{"",""},{"",""}})); + + // Name parsing. + // + assert (test (":1\nabc:", {{"","1"},{"abc",""},{"",""},{"",""}})); + assert (test (":1\nabc :", {{"","1"},{"abc",""},{"",""},{"",""}})); + assert (test (":1\nabc\t:", {{"","1"},{"abc",""},{"",""},{"",""}})); + + // Simple value parsing. + // + assert (test (":1\na: \t xyz \t ", {{"","1"},{"a","xyz"},{"",""},{"",""}})); + + // Simple value escaping. + // + assert (test (":1\na:x\\", {{"","1"},{"a","x"},{"",""},{"",""}})); + assert (test (":1\na:x\\\ny", {{"","1"},{"a","xy"},{"",""},{"",""}})); + assert (test (":1\na:x\\\\\nb:", + {{"","1"},{"a","x\\"},{"b",""},{"",""},{"",""}})); + assert (test (":1\na:x\\\\\\\nb:", + {{"","1"},{"a","x\\\\"},{"b",""},{"",""},{"",""}})); + + // Simple value literal newline. + // + assert (test (":1\na:x\\\n\\", {{"","1"},{"a","x\n"},{"",""},{"",""}})); + assert (test (":1\na:x\\\n\\\ny", {{"","1"},{"a","x\ny"},{"",""},{"",""}})); + assert (test (":1\na:x\\\n\\\ny\\\n\\\nz", + {{"","1"},{"a","x\ny\nz"},{"",""},{"",""}})); + + // Multi-line value parsing. + // + assert (test (":1\na:\\", {{"","1"},{"a", ""},{"",""},{"",""}})); + assert (test (":1\na:\\\n", {{"","1"},{"a", ""},{"",""},{"",""}})); + assert (test (":1\na:\\x", {{"","1"},{"a", "\\x"},{"",""},{"",""}})); + assert (test (":1\na:\\\n\\", {{"","1"},{"a", ""},{"",""},{"",""}})); + assert (test (":1\na:\\\n\\\n", {{"","1"},{"a", ""},{"",""},{"",""}})); + assert (test (":1\na:\\\n\\x\n\\", + {{"","1"},{"a", "\\x"},{"",""},{"",""}})); + assert (test (":1\na:\\\nx\ny", {{"","1"},{"a", "x\ny"},{"",""},{"",""}})); + assert (test (":1\na:\\\n \n#\t\n\\", + {{"","1"},{"a", " \n#\t"},{"",""},{"",""}})); + assert (test (":1\na:\\\n\n\n\\", {{"","1"},{"a", "\n"},{"",""},{"",""}})); + + // Multi-line value escaping. + // + assert (test (":1\na:\\\nx\\", {{"","1"},{"a","x"},{"",""},{"",""}})); + assert (test (":1\na:\\\nx\\\ny\n\\", + {{"","1"},{"a","xy"},{"",""},{"",""}})); + assert (test (":1\na:\\\nx\\\\\n\\\nb:", + {{"","1"},{"a","x\\"},{"b",""},{"",""},{"",""}})); + assert (test (":1\na:\\\nx\\\\\\\n\\\nb:", + {{"","1"},{"a","x\\\\"},{"b",""},{"",""},{"",""}})); +} + +static ostream& +operator<< (ostream& os, const pairs& ps) +{ + os << '{'; + + bool f (true); + for (const auto& p: ps) + os << (f ? (f = false, "") : ",") + << '{' << p.first << ',' << p.second << '}'; + + os << '}'; + return os; +} + +static pairs +parse (const char* m) +{ + istringstream is (m); + is.exceptions (istream::failbit | istream::badbit); + manifest_parser p (is, ""); + + pairs r; + + for (bool eom (true), eos (false); !eos; ) + { + manifest_name_value nv (p.next ()); + + if (nv.empty ()) // End pair. + { + eos = eom; + eom = true; + } + else + eom = false; + + r.emplace_back (nv.name, nv.value); // move + } + + return r; +} + +static bool +test (const char* m, const pairs& e) +{ + pairs r (parse (m)); + + if (r != e) + { + cerr << "actual: " << r << endl + << "expect: " << e << endl; + + return false; + } + + return true; +} + +static bool +fail (const char* m) +{ + try + { + pairs r (parse (m)); + cerr << "nofail: " << r << endl; + return false; + } + catch (const manifest_parsing& e) + { + //cerr << e.what () << endl; + } + + return true; +} diff --git a/tests/manifest-roundtrip/buildfile b/tests/manifest-roundtrip/buildfile new file mode 100644 index 0000000..78e5a08 --- /dev/null +++ b/tests/manifest-roundtrip/buildfile @@ -0,0 +1,8 @@ +# file : tests/manifest-roundtrip/buildfile +# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +exe{driver}: cxx{driver} ../../butl/lib{butl} +exe{driver}: test.roundtrip = manifest + +include ../../butl/ diff --git a/tests/manifest-roundtrip/driver.cxx b/tests/manifest-roundtrip/driver.cxx new file mode 100644 index 0000000..e1ce5b8 --- /dev/null +++ b/tests/manifest-roundtrip/driver.cxx @@ -0,0 +1,52 @@ +// file : tests/manifest-roundtrip/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include +#include +#include + +using namespace std; +using namespace butl; + +int +main (int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " " << endl; + return 1; + } + + try + { + ifdstream ifs (argv[1]); + manifest_parser p (ifs, argv[1]); + + stdout_fdmode (fdstream_mode::binary); // Write in binary mode. + manifest_serializer s (cout, "stdout"); + + for (bool eom (true), eos (false); !eos; ) + { + manifest_name_value nv (p.next ()); + + if (nv.empty ()) // End pair. + { + eos = eom; + eom = true; + } + else + eom = false; + + s.next (nv.name, nv.value); + } + } + catch (const exception& e) + { + cerr << e.what () << endl; + return 1; + } +} diff --git a/tests/manifest-roundtrip/manifest b/tests/manifest-roundtrip/manifest new file mode 100644 index 0000000..23c2730 --- /dev/null +++ b/tests/manifest-roundtrip/manifest @@ -0,0 +1,32 @@ +: 1 +name: libbpkg +version: 1.0.1 +summary: build2 package manager library +license: MIT +tags: c++, package, manager, bpkg +description: A very very very very very very very very very very very very\ + very very very very very very very very very very very very very very very\ + very very long description. +changes: \ +1.0.1 + - Fixed a very very very very very very very very very very very very very\ + very annoying bug. +1.0.0 + - Firts public release + - Lots of really cool features +\ +url: http://www.codesynthesis.com/projects/libstudxml/ +email: build-users@codesynthesis.com; Public mailing list, posts by\ + non-members are allowed but moderated. +package-email: boris@codesynthesis.com; Direct email to the author. +depends: libbutl +depends: * build2 +depends: ?* bpkg +requires: ?* linux | windows +requires: c++11 +: +path: c:\windows\\ +path: \ + +c:\windows\\ +\ diff --git a/tests/manifest-serializer/buildfile b/tests/manifest-serializer/buildfile new file mode 100644 index 0000000..0325323 --- /dev/null +++ b/tests/manifest-serializer/buildfile @@ -0,0 +1,7 @@ +# file : tests/manifest-serializer/buildfile +# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +exe{driver}: cxx{driver} ../../butl/lib{butl} + +include ../../butl/ diff --git a/tests/manifest-serializer/driver.cxx b/tests/manifest-serializer/driver.cxx new file mode 100644 index 0000000..250272d --- /dev/null +++ b/tests/manifest-serializer/driver.cxx @@ -0,0 +1,245 @@ +// file : tests/manifest-serializer/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include +#include // pair +#include +#include +#include + +#include + +using namespace std; +using namespace butl; + +using pairs = vector>; + +static bool +test (const pairs& manifest, const string& expected); + +static bool +fail (const pairs& manifest); + +int +main () +{ + // Comments. + // + assert (test ({{"#", ""}}, "#\n")); + assert (test ({{"#", "x"}}, "# x\n")); + assert (test ({{"#", "x"},{"#", "y"},{"#", ""}}, "# x\n# y\n#\n")); + assert (fail ({{"",""},{"#", "x"}})); // serialization after eos + + // Empty manifest stream. + // + assert (test ({}, "")); + assert (test ({{"",""}}, "")); + + // Empty manifest. + // + assert (test ({{"","1"},{"",""},{"",""}}, ": 1\n")); + assert (test ({{"","1"},{"",""},{"","1"},{"",""},{"",""}}, ": 1\n:\n")); + + // Invalid manifests. + // + assert (fail ({{"a",""}})); // format version pair expected + assert (fail ({{"","1"},{"",""},{"a",""}})); // format version pair expected + assert (fail ({{"","9"}})); // unsupported format version 9 + assert (fail ({{"","1"},{"","x"}})); // non-empty value in end pair + assert (fail ({{"",""},{"","1"}})); // serialization after eos + + // Single manifest. + // + assert (test ({{"","1"},{"a","x"},{"",""},{"",""}}, ": 1\na: x\n")); + assert (test ({{"","1"},{"a","x"},{"b","y"},{"",""},{"",""}}, + ": 1\na: x\nb: y\n")); + assert (test ({{"","1"},{"#","c"},{"a","x"},{"",""},{"",""}}, + ": 1\n# c\na: x\n")); + + // Multiple manifests. + // + assert (test ({{"","1"},{"a","x"},{"",""}, + {"","1"},{"b","y"},{"",""},{"",""}}, ": 1\na: x\n:\nb: y\n")); + assert (test ({{"","1"},{"a","x"},{"",""}, + {"","1"},{"b","y"},{"",""}, + {"","1"},{"c","z"},{"",""},{"",""}}, + ": 1\na: x\n:\nb: y\n:\nc: z\n")); + + // Invalid name. + // + assert (fail ({{"","1"},{"#a",""}})); + assert (fail ({{"","1"},{"a:b",""}})); + assert (fail ({{"","1"},{"a b",""}})); + assert (fail ({{"","1"},{"a\tb",""}})); + assert (fail ({{"","1"},{"a\n",""}})); + + // Simple value. + // + assert (test ({{"","1"},{"a",""},{"",""},{"",""}}, ": 1\na:\n")); + assert (test ({{"","1"},{"a","x y z"},{"",""},{"",""}}, ": 1\na: x y z\n")); + + // Long simple value (newline escaping). + // + + // "Solid" text/hard break. + // + string l1 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e1 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\n" + "Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + // Space too early/hard break. + // + string l2 ("x xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "Yyyyyyyyyyyyyyyyy yyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz z" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e2 ("x xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\\\n" + "Yyyyyyyyyyyyyyyyy yyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n" + "Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz z" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + // Space/soft break. + // + string l3 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxx" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e3 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxx\\\n" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyy\\\n" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + // Space with a better one/soft break. + // + string l4 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxx xxxxxxxxx" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyy yyyyyyyyyy" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + string e4 ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxx xxxxxxxxx\\\n" + " Yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyy yyyyyyyyyy\\\n" + " Zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + assert (test ({{"","1"},{"a",l1},{"",""},{"",""}}, ": 1\na: " + e1 + "\n")); + assert (test ({{"","1"},{"a",l2},{"",""},{"",""}}, ": 1\na: " + e2 + "\n")); + assert (test ({{"","1"},{"a",l3},{"",""},{"",""}}, ": 1\na: " + e3 + "\n")); + assert (test ({{"","1"},{"a",l4},{"",""},{"",""}}, ": 1\na: " + e4 + "\n")); + + + // Multi-line value. + // + string n ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + assert (test ({{"","1"},{n,"x"},{"",""},{"",""}}, + ": 1\n" + n + ": \\\nx\n\\\n")); + assert (test ({{"","1"},{"a","\n"},{"",""},{"",""}}, + ": 1\na: \\\n\n\n\\\n")); + assert (test ({{"","1"},{"a","\n\n"},{"",""},{"",""}}, + ": 1\na: \\\n\n\n\n\\\n")); + assert (test ({{"","1"},{"a","\nx\n"},{"",""},{"",""}}, + ": 1\na: \\\n\nx\n\n\\\n")); + assert (test ({{"","1"},{"a","x\ny\nz"},{"",""},{"",""}}, + ": 1\na: \\\nx\ny\nz\n\\\n")); + assert (test ({{"","1"},{"a"," x"},{"",""},{"",""}}, + ": 1\na: \\\n x\n\\\n")); + assert (test ({{"","1"},{"a","x "},{"",""},{"",""}}, + ": 1\na: \\\nx \n\\\n")); + assert (test ({{"","1"},{"a"," x "},{"",""},{"",""}}, + ": 1\na: \\\n x \n\\\n")); + + // Extra three x's are for the leading name part ("a: ") that we + // don't have. + // + assert (test ({{"","1"},{"a","\nxxx" + l1},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e1 + "\n\\\n")); + assert (test ({{"","1"},{"a","\nxxx" + l2},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e2 + "\n\\\n")); + assert (test ({{"","1"},{"a","\nxxx" + l3},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e3 + "\n\\\n")); + assert (test ({{"","1"},{"a","\nxxx" + l4},{"",""},{"",""}}, + ": 1\na: \\\n\nxxx" + e4 + "\n\\\n")); + + // Backslash escaping (simple and multi-line). + // + assert (test ({{"","1"},{"a","c:\\"},{"",""},{"",""}}, + ": 1\na: c:\\\\\n")); + assert (test ({{"","1"},{"a","c:\\\nd:\\"},{"",""},{"",""}}, + ": 1\na: \\\nc:\\\\\nd:\\\\\n\\\n")); +} + +static string +serialize (const pairs& m) +{ + ostringstream os; + os.exceptions (istream::failbit | istream::badbit); + manifest_serializer s (os, ""); + + for (const auto& p: m) + { + if (p.first != "#") + s.next (p.first, p.second); + else + s.comment (p.second); + } + + return os.str (); +} + +static bool +test (const pairs& m, const string& e) +{ + string r (serialize (m)); + + if (r != e) + { + cerr << "actual:" << endl << "'" << r << "'"<< endl + << "expect:" << endl << "'" << e << "'"<< endl; + + return false; + } + + return true; +} + +static bool +fail (const pairs& m) +{ + try + { + string r (serialize (m)); + cerr << "nofail: " << r << endl; + return false; + } + catch (const manifest_serialization& e) + { + //cerr << e.what () << endl; + } + + return true; +} -- cgit v1.1