aboutsummaryrefslogtreecommitdiff
path: root/bpkg
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-06-11 16:53:03 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-06-11 16:53:03 +0200
commitf4660720e3ab0dc70d31fd39d48199590810ab03 (patch)
treeb8396646af57e03855aa93d798920d987fa3b0b7 /bpkg
parent2ffe5ac2998b90c004de4f13e199c7ee965c3f75 (diff)
Implement manifest serializer
Diffstat (limited to 'bpkg')
-rw-r--r--bpkg/buildfile3
-rw-r--r--bpkg/manifest-serializer73
-rw-r--r--bpkg/manifest-serializer.cxx238
3 files changed, 313 insertions, 1 deletions
diff --git a/bpkg/buildfile b/bpkg/buildfile
index b7c1c9f..45d9d89 100644
--- a/bpkg/buildfile
+++ b/bpkg/buildfile
@@ -2,4 +2,5 @@
# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
# license : MIT; see accompanying LICENSE file
-lib{bpkg}: cxx{manifest-parser}
+import libs += libhello
+lib{bpkg}: cxx{manifest-parser manifest-serializer} $libs
diff --git a/bpkg/manifest-serializer b/bpkg/manifest-serializer
new file mode 100644
index 0000000..2c5a09b
--- /dev/null
+++ b/bpkg/manifest-serializer
@@ -0,0 +1,73 @@
+// file : bpkg/manifest-serializer -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BPKG_MANIFEST_SERIALIZER
+#define BPKG_MANIFEST_SERIALIZER
+
+#include <string>
+#include <iosfwd>
+#include <cstddef> // size_t
+#include <stdexcept> // runtime_error
+
+namespace bpkg
+{
+ class manifest_serialization: public std::runtime_error
+ {
+ public:
+ manifest_serialization (const std::string& name,
+ const std::string& description);
+
+ std::string name;
+ std::string description;
+ };
+
+ class manifest_serializer
+ {
+ public:
+ manifest_serializer (std::ostream& os, const std::string& name)
+ : os_ (os), name_ (name) {}
+
+ const std::string&
+ name () const {return name_;}
+
+ // The first name-value pair should be the special "start-of-manifest"
+ // with empty name and value being the format version. After that we
+ // have a sequence of ordinary pairs which are the manifest. At the
+ // end of the manifest we have the special "end-of-manifest" pair
+ // with empty name and value. After that we can either have another
+ // start-of-manifest pair (in which case the whole sequence repeats
+ // from the beginning) or we get another end-of-manifest pair which
+ // signals the end of stream.
+ //
+ void
+ next (const std::string& name, const std::string& value);
+
+ // Write a comment. The supplied text is prefixed with "# " and
+ // terminated with a newline.
+ //
+ void
+ comment (const std::string&);
+
+ private:
+ void
+ check_name (const std::string&);
+
+ // Write 'n' characters from 's' (assuming there are no newlines)
+ // split into multiple lines at or near the 78 characters
+ // boundary. The first line starts at the 'column' offset.
+ //
+ void
+ write_value (std::size_t column, const char* s, std::size_t n);
+
+ private:
+ enum {start, body, end} s_ = start;
+ std::string version_; // Current format version.
+
+ private:
+ std::ostream& os_;
+ const std::string name_;
+ };
+}
+
+#endif // BPKG_MANIFEST_SERIALIZER
diff --git a/bpkg/manifest-serializer.cxx b/bpkg/manifest-serializer.cxx
new file mode 100644
index 0000000..1d9fd89
--- /dev/null
+++ b/bpkg/manifest-serializer.cxx
@@ -0,0 +1,238 @@
+// file : bpkg/manifest-serializer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <bpkg/manifest-serializer>
+
+#include <ostream>
+#include <cassert>
+
+using namespace std;
+
+namespace bpkg
+{
+ using serialization = manifest_serialization;
+
+ void manifest_serializer::
+ next (const string& n, const string& v)
+ {
+ switch (s_)
+ {
+ case start:
+ {
+ if (!n.empty ())
+ throw serialization (name_, "format version pair expected");
+
+ if (v.empty ())
+ {
+ // End of manifests.
+ //
+ os_.flush ();
+ s_ = end;
+ break;
+ }
+
+ if (v != "1")
+ throw serialization (name_, "unsupported format version " + v);
+
+ os_ << ':';
+
+ if (v != version_)
+ {
+ os_ << ' ' << v;
+ version_ = v;
+ }
+
+ os_ << endl;
+ s_ = body;
+ break;
+ }
+ case body:
+ {
+ if (n.empty ())
+ {
+ if (!v.empty ())
+ throw serialization (name_, "non-empty value in end pair");
+
+ s_ = start;
+ break;
+ }
+
+ check_name (n);
+
+ os_ << n << ':';
+
+ if (!v.empty ())
+ {
+ os_ << ' ';
+
+ // Use the multi-line mode in any of the following cases:
+ //
+ // - name is too long (say longer than 37 (78/2 - 2) characters;
+ // we cannot start on the next line since that would start the
+ // multi-line mode)
+ // - value contains newlines
+ // - value contains leading/trailing whitespaces
+ //
+ if (n.size () > 37 ||
+ v.find ('\n') != string::npos ||
+ v.front () == ' ' || v.front () == '\t' ||
+ v.back () == ' ' || v.back () == '\t')
+ {
+ os_ << "\\" << endl; // Multi-line mode introductor.
+
+ // Chunk the value into fragments separated by newlines.
+ //
+ for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i))
+ {
+ if (p == string::npos)
+ {
+ // Last chunk.
+ //
+ write_value (0, v.c_str () + i, v.size () - i);
+ break;
+ }
+
+ write_value (0, v.c_str () + i, p - i);
+ os_ << endl;
+ i = p + 1;
+ }
+
+ os_ << endl << "\\"; // Multi-line mode terminator.
+ }
+ else
+ write_value (n.size () + 2, v.c_str (), v.size ());
+ }
+
+ os_ << endl;
+ break;
+ }
+ case end:
+ {
+ throw serialization (name_, "serialization after eos");
+ }
+ }
+ }
+
+ void manifest_serializer::
+ comment (const string& t)
+ {
+ if (s_ == end)
+ throw serialization (name_, "serialization after eos");
+
+ os_ << '#';
+
+ if (!t.empty ())
+ os_ << ' ' << t;
+
+ os_ << endl;
+ }
+
+ void manifest_serializer::
+ check_name (const string& n)
+ {
+ if (n[0] == '#')
+ throw serialization (name_, "name starts with '#'");
+
+ for (char c: n)
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n': throw serialization (name_, "name contains whitespace");
+ case ':': throw serialization (name_, "name contains ':'");
+ default: break;
+ }
+ }
+ }
+
+ void manifest_serializer::
+ write_value (size_t cl, const char* s, size_t n)
+ {
+ char c ('\0');
+
+ // The idea is to break on the 77th character (i.e., write it
+ // on the next line) which means we have written 76 characters
+ // on this line plus 2 for '\' and '\n', which gives us 78.
+ //
+ for (const char* e (s + n); s != e; s++, cl++)
+ {
+ c = *s;
+ bool br (false); // Break the line.
+
+ // If this is a whitespace, see if it's a good place to break the
+ // line.
+ //
+ if (c == ' ' || c == '\t')
+ {
+ // Find the next whitespace (or the end) and see if it is a better
+ // place.
+ //
+ for (const char* w (s + 1); ; w++)
+ {
+ if (w == e || *w == ' ' || *w == '\t')
+ {
+ // Is this whitespace past where we need to break? Also see
+ // below the "hard" break case for why we use 78 at the end.
+ //
+ if (cl + static_cast<size_t> (w - s) > (w != e ? 77 : 78))
+ {
+ // Only break if this whitespace is close enough to
+ // the end of the line.
+ //
+ br = (cl > 57);
+ }
+
+ break;
+ }
+ }
+ }
+
+ // Do we have to do a "hard" break (i.e., without a whitespace)?
+ // If there is just one character left, then instead of writing
+ // '\' and then the character on the next line, we might as well
+ // write it on this line.
+ //
+ if (cl == (s + 1 != e ? 77 : 78))
+ br = true;
+
+ if (br)
+ {
+ os_ << '\\' << endl;
+ cl = 0;
+ }
+
+ os_ << c;
+ }
+
+ // What comes next is always a newline. I the last character that
+ // we have written is a backslash, escape it.
+ //
+ if (c == '\\')
+ os_ << '\\';
+ }
+
+ // manifest_serialization
+ //
+
+ static string
+ format (const string& n, const string& d)
+ {
+ string r;
+ if (!n.empty ())
+ {
+ r += n;
+ r += ": ";
+ }
+ r += "error: ";
+ r += d;
+ return r;
+ }
+
+ manifest_serialization::
+ manifest_serialization (const string& n, const string& d)
+ : runtime_error (format (n, d)), name (n), description (d)
+ {
+ }
+}