From f4660720e3ab0dc70d31fd39d48199590810ab03 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 11 Jun 2015 16:53:03 +0200 Subject: Implement manifest serializer --- bpkg/buildfile | 3 +- bpkg/manifest-serializer | 73 +++++++++++++ bpkg/manifest-serializer.cxx | 238 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 bpkg/manifest-serializer create mode 100644 bpkg/manifest-serializer.cxx (limited to 'bpkg') diff --git a/bpkg/buildfile b/bpkg/buildfile index b7c1c9f..45d9d89 100644 --- a/bpkg/buildfile +++ b/bpkg/buildfile @@ -2,4 +2,5 @@ # copyright : Copyright (c) 2014-2015 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -lib{bpkg}: cxx{manifest-parser} +import libs += libhello +lib{bpkg}: cxx{manifest-parser manifest-serializer} $libs diff --git a/bpkg/manifest-serializer b/bpkg/manifest-serializer new file mode 100644 index 0000000..2c5a09b --- /dev/null +++ b/bpkg/manifest-serializer @@ -0,0 +1,73 @@ +// file : bpkg/manifest-serializer -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BPKG_MANIFEST_SERIALIZER +#define BPKG_MANIFEST_SERIALIZER + +#include +#include +#include // size_t +#include // runtime_error + +namespace bpkg +{ + class manifest_serialization: public std::runtime_error + { + public: + manifest_serialization (const std::string& name, + const std::string& description); + + std::string name; + std::string description; + }; + + class manifest_serializer + { + public: + manifest_serializer (std::ostream& os, const std::string& name) + : os_ (os), name_ (name) {} + + const std::string& + name () const {return name_;} + + // The first name-value pair should be the special "start-of-manifest" + // with empty name and value being the format version. After that we + // have a sequence of ordinary pairs which are the manifest. At the + // end of the manifest we have the special "end-of-manifest" pair + // with empty name and value. After that we can either have another + // start-of-manifest pair (in which case the whole sequence repeats + // from the beginning) or we get another end-of-manifest pair which + // signals the end of stream. + // + void + next (const std::string& name, const std::string& value); + + // Write a comment. The supplied text is prefixed with "# " and + // terminated with a newline. + // + void + comment (const std::string&); + + private: + void + check_name (const std::string&); + + // Write 'n' characters from 's' (assuming there are no newlines) + // split into multiple lines at or near the 78 characters + // boundary. The first line starts at the 'column' offset. + // + void + write_value (std::size_t column, const char* s, std::size_t n); + + private: + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + private: + std::ostream& os_; + const std::string name_; + }; +} + +#endif // BPKG_MANIFEST_SERIALIZER diff --git a/bpkg/manifest-serializer.cxx b/bpkg/manifest-serializer.cxx new file mode 100644 index 0000000..1d9fd89 --- /dev/null +++ b/bpkg/manifest-serializer.cxx @@ -0,0 +1,238 @@ +// file : bpkg/manifest-serializer.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +using namespace std; + +namespace bpkg +{ + using serialization = manifest_serialization; + + void manifest_serializer:: + next (const string& n, const string& v) + { + switch (s_) + { + case start: + { + if (!n.empty ()) + throw serialization (name_, "format version pair expected"); + + if (v.empty ()) + { + // End of manifests. + // + os_.flush (); + s_ = end; + break; + } + + if (v != "1") + throw serialization (name_, "unsupported format version " + v); + + os_ << ':'; + + if (v != version_) + { + os_ << ' ' << v; + version_ = v; + } + + os_ << endl; + s_ = body; + break; + } + case body: + { + if (n.empty ()) + { + if (!v.empty ()) + throw serialization (name_, "non-empty value in end pair"); + + s_ = start; + break; + } + + check_name (n); + + os_ << n << ':'; + + if (!v.empty ()) + { + os_ << ' '; + + // Use the multi-line mode in any of the following cases: + // + // - name is too long (say longer than 37 (78/2 - 2) characters; + // we cannot start on the next line since that would start the + // multi-line mode) + // - value contains newlines + // - value contains leading/trailing whitespaces + // + if (n.size () > 37 || + v.find ('\n') != string::npos || + v.front () == ' ' || v.front () == '\t' || + v.back () == ' ' || v.back () == '\t') + { + os_ << "\\" << endl; // Multi-line mode introductor. + + // Chunk the value into fragments separated by newlines. + // + for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i)) + { + if (p == string::npos) + { + // Last chunk. + // + write_value (0, v.c_str () + i, v.size () - i); + break; + } + + write_value (0, v.c_str () + i, p - i); + os_ << endl; + i = p + 1; + } + + os_ << endl << "\\"; // Multi-line mode terminator. + } + else + write_value (n.size () + 2, v.c_str (), v.size ()); + } + + os_ << endl; + break; + } + case end: + { + throw serialization (name_, "serialization after eos"); + } + } + } + + void manifest_serializer:: + comment (const string& t) + { + if (s_ == end) + throw serialization (name_, "serialization after eos"); + + os_ << '#'; + + if (!t.empty ()) + os_ << ' ' << t; + + os_ << endl; + } + + void manifest_serializer:: + check_name (const string& n) + { + if (n[0] == '#') + throw serialization (name_, "name starts with '#'"); + + for (char c: n) + { + switch (c) + { + case ' ': + case '\t': + case '\n': throw serialization (name_, "name contains whitespace"); + case ':': throw serialization (name_, "name contains ':'"); + default: break; + } + } + } + + void manifest_serializer:: + write_value (size_t cl, const char* s, size_t n) + { + char c ('\0'); + + // The idea is to break on the 77th character (i.e., write it + // on the next line) which means we have written 76 characters + // on this line plus 2 for '\' and '\n', which gives us 78. + // + for (const char* e (s + n); s != e; s++, cl++) + { + c = *s; + bool br (false); // Break the line. + + // If this is a whitespace, see if it's a good place to break the + // line. + // + if (c == ' ' || c == '\t') + { + // Find the next whitespace (or the end) and see if it is a better + // place. + // + for (const char* w (s + 1); ; w++) + { + if (w == e || *w == ' ' || *w == '\t') + { + // Is this whitespace past where we need to break? Also see + // below the "hard" break case for why we use 78 at the end. + // + if (cl + static_cast (w - s) > (w != e ? 77 : 78)) + { + // Only break if this whitespace is close enough to + // the end of the line. + // + br = (cl > 57); + } + + break; + } + } + } + + // Do we have to do a "hard" break (i.e., without a whitespace)? + // If there is just one character left, then instead of writing + // '\' and then the character on the next line, we might as well + // write it on this line. + // + if (cl == (s + 1 != e ? 77 : 78)) + br = true; + + if (br) + { + os_ << '\\' << endl; + cl = 0; + } + + os_ << c; + } + + // What comes next is always a newline. I the last character that + // we have written is a backslash, escape it. + // + if (c == '\\') + os_ << '\\'; + } + + // manifest_serialization + // + + static string + format (const string& n, const string& d) + { + string r; + if (!n.empty ()) + { + r += n; + r += ": "; + } + r += "error: "; + r += d; + return r; + } + + manifest_serialization:: + manifest_serialization (const string& n, const string& d) + : runtime_error (format (n, d)), name (n), description (d) + { + } +} -- cgit v1.1