aboutsummaryrefslogtreecommitdiff
path: root/bpkg
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2016-09-29 22:02:28 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2016-09-29 22:02:28 +0300
commitf4a338ee1a5eb993fb7f9688588b77b12ad944c2 (patch)
treeda7bea25e838d376eddb1263a839aca50571c4a4 /bpkg
parent4dc2c7b9fbbe7bec57524d71fd5330e2d1782c52 (diff)
Move manifest_parser and manifest_serializer to libbutl
Diffstat (limited to 'bpkg')
-rw-r--r--bpkg/buildfile2
-rw-r--r--bpkg/manifest41
-rw-r--r--bpkg/manifest-parser94
-rw-r--r--bpkg/manifest-parser.cxx379
-rw-r--r--bpkg/manifest-serializer75
-rw-r--r--bpkg/manifest-serializer.cxx238
-rw-r--r--bpkg/manifest.cxx5
7 files changed, 21 insertions, 813 deletions
diff --git a/bpkg/buildfile b/bpkg/buildfile
index 6d4e09e..22eeb98 100644
--- a/bpkg/buildfile
+++ b/bpkg/buildfile
@@ -7,8 +7,6 @@ import int_libs = libbutl%lib{butl}
lib{bpkg}: \
{hxx }{ export } \
{hxx cxx}{ manifest } \
-{hxx cxx}{ manifest-parser } \
-{hxx cxx}{ manifest-serializer } \
{hxx }{ version } \
$int_libs
diff --git a/bpkg/manifest b/bpkg/manifest
index e0b8421..688cbf2 100644
--- a/bpkg/manifest
+++ b/bpkg/manifest
@@ -15,15 +15,12 @@
#include <butl/path>
#include <butl/optional>
+#include <butl/manifest-forward>
#include <bpkg/export>
namespace bpkg
{
- class manifest_parser;
- class manifest_serializer;
- class manifest_name_value;
-
using strings = std::vector<std::string>;
class LIBBPKG_EXPORT version
@@ -353,20 +350,20 @@ namespace bpkg
// Create individual package manifest.
//
- package_manifest (manifest_parser&, bool ignore_unknown = false);
+ package_manifest (butl::manifest_parser&, bool ignore_unknown = false);
// Create an element of the package list manifest.
//
- package_manifest (manifest_parser&,
- manifest_name_value start,
+ package_manifest (butl::manifest_parser&,
+ butl::manifest_name_value start,
bool ignore_unknown = false);
void
- serialize (manifest_serializer&) const;
+ serialize (butl::manifest_serializer&) const;
private:
- package_manifest (manifest_parser&,
- manifest_name_value start,
+ package_manifest (butl::manifest_parser&,
+ butl::manifest_name_value start,
bool in_list,
bool ignore_unknown);
};
@@ -384,10 +381,10 @@ namespace bpkg
public:
package_manifests () = default;
- package_manifests (manifest_parser&, bool ignore_unknown = false);
+ package_manifests (butl::manifest_parser&, bool ignore_unknown = false);
void
- serialize (manifest_serializer&) const;
+ serialize (butl::manifest_serializer&) const;
};
class LIBBPKG_EXPORT repository_location
@@ -573,13 +570,13 @@ namespace bpkg
public:
repository_manifest () = default; // VC export.
- repository_manifest (manifest_parser&, bool ignore_unknown = false);
- repository_manifest (manifest_parser&,
- manifest_name_value start,
+ repository_manifest (butl::manifest_parser&, bool ignore_unknown = false);
+ repository_manifest (butl::manifest_parser&,
+ butl::manifest_name_value start,
bool ignore_unknown = false);
void
- serialize (manifest_serializer&) const;
+ serialize (butl::manifest_serializer&) const;
};
class LIBBPKG_EXPORT repository_manifests:
@@ -591,10 +588,10 @@ namespace bpkg
using base_type::base_type;
repository_manifests () = default;
- repository_manifests (manifest_parser&, bool ignore_unknown = false);
+ repository_manifests (butl::manifest_parser&, bool ignore_unknown = false);
void
- serialize (manifest_serializer&) const;
+ serialize (butl::manifest_serializer&) const;
};
class LIBBPKG_EXPORT signature_manifest
@@ -612,12 +609,12 @@ namespace bpkg
public:
signature_manifest () = default;
- signature_manifest (manifest_parser&, bool ignore_unknown = false);
+ signature_manifest (butl::manifest_parser&, bool ignore_unknown = false);
// Serialize sha256sum and base64-encoded representation of the signature.
//
void
- serialize (manifest_serializer&) const;
+ serialize (butl::manifest_serializer&) const;
private:
// Used for delegating in public constructor. Strictly speaking is not
@@ -625,8 +622,8 @@ namespace bpkg
// a manifest list, but kept for the consistency with other manifests
// implementations.
//
- signature_manifest (manifest_parser&,
- manifest_name_value start,
+ signature_manifest (butl::manifest_parser&,
+ butl::manifest_name_value start,
bool ignore_unknown);
};
}
diff --git a/bpkg/manifest-parser b/bpkg/manifest-parser
deleted file mode 100644
index 8cbb768..0000000
--- a/bpkg/manifest-parser
+++ /dev/null
@@ -1,94 +0,0 @@
-// file : bpkg/manifest-parser -*- C++ -*-
-// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#ifndef BPKG_MANIFEST_PARSER
-#define BPKG_MANIFEST_PARSER
-
-#include <string>
-#include <iosfwd>
-#include <cstdint> // uint64_t
-#include <stdexcept> // runtime_error
-
-#include <butl/char-scanner>
-
-#include <bpkg/export>
-
-namespace bpkg
-{
- class LIBBPKG_EXPORT manifest_parsing: public std::runtime_error
- {
- public:
- manifest_parsing (const std::string& name,
- std::uint64_t line,
- std::uint64_t column,
- const std::string& description);
-
- std::string name;
- std::uint64_t line;
- std::uint64_t column;
- std::string description;
- };
-
- class manifest_name_value
- {
- public:
- std::string name;
- std::string value;
-
- std::uint64_t name_line;
- std::uint64_t name_column;
-
- std::uint64_t value_line;
- std::uint64_t value_column;
-
- bool
- empty () const {return name.empty () && value.empty ();}
- };
-
- class LIBBPKG_EXPORT manifest_parser: protected butl::char_scanner
- {
- public:
- manifest_parser (std::istream& is, const std::string& name)
- : char_scanner (is), name_ (name) {}
-
- const std::string&
- name () const {return name_;}
-
- // The first returned pair is special "start-of-manifest" with
- // empty name and value being the format version: {"", "<ver>"}.
- // After that we have a sequence of ordinary pairs which are
- // the manifest. At the end of the manifest we have the special
- // "end-of-manifest" pair with empty name and value: {"", ""}.
- // After that we can either get another start-of-manifest pair
- // (in which case the whole sequence repeats from the beginning)
- // or we get another end-of-manifest pair which signals the end
- // of stream (aka EOF). To put it another way, the parse sequence
- // always has the following form:
- //
- // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""}
- //
- manifest_name_value
- next ();
-
- private:
- void
- parse_name (manifest_name_value&);
-
- void
- parse_value (manifest_name_value&);
-
- // Skip spaces and return the first peeked non-space character.
- //
- xchar
- skip_spaces ();
-
- private:
- const std::string name_;
-
- enum {start, body, end} s_ = start;
- std::string version_; // Current format version.
- };
-}
-
-#endif // BPKG_MANIFEST_PARSER
diff --git a/bpkg/manifest-parser.cxx b/bpkg/manifest-parser.cxx
deleted file mode 100644
index 58e920a..0000000
--- a/bpkg/manifest-parser.cxx
+++ /dev/null
@@ -1,379 +0,0 @@
-// file : bpkg/manifest-parser.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#include <bpkg/manifest-parser>
-
-#include <cassert>
-#include <sstream>
-
-using namespace std;
-
-namespace bpkg
-{
- using parsing = manifest_parsing;
- using name_value = manifest_name_value;
-
- name_value manifest_parser::
- next ()
- {
- if (s_ == end)
- return name_value {"", "", line, column, line, column};
-
- xchar c (skip_spaces ());
-
- // Here is the problem: if we are in the 'body' state (that is,
- // we are parsing inside the manifest) and we see the special
- // empty name, then before returning the "start" pair for the
- // next manifest, we have to return the "end" pair. One way
- // would be to cache the "start" pair and return it on the
- // next call of next(). But that would require quite a bit
- // of extra logic. The alternative is to detect the beginning
- // of the empty name before parsing too far. This way, the
- // next call to next() will start parsing where we left of
- // and return the "start" pair naturally.
- //
- if (s_ == body && c == ':')
- {
- s_ = start;
- return name_value {"", "", c.line, c.column, c.line, c.column};
- }
-
- // Regardless of the state, what should come next is a name,
- // potentially the special empty one.
- //
- name_value r;
- parse_name (r);
-
- skip_spaces ();
- c = get ();
-
- if (eos (c))
- {
- // This is ok as long as the name is empty.
- //
- if (!r.name.empty ())
- throw parsing (name_, c.line, c.column, "':' expected after name");
-
- s_ = end;
-
- // The "end" pair.
- //
- r.value_line = r.name_line;
- r.value_column = r.name_column;
- return r;
- }
-
- if (c != ':')
- throw parsing (name_, c.line, c.column, "':' expected after name");
-
- skip_spaces ();
- parse_value (r);
-
- c = peek ();
-
- // The character after the value should be either a newline or eos.
- //
- assert (c == '\n' || eos (c));
-
- if (c == '\n')
- get ();
-
- // Now figure out whether what we've got makes sense, depending
- // on the state we are in.
- //
- if (s_ == start)
- {
- // Start of the (next) manifest. The first pair should be the
- // special empty name/format version.
- //
- if (!r.name.empty ())
- throw parsing (name_, r.name_line, r.name_column,
- "format version pair expected");
-
- // The version value is only mandatory for the first manifest in
- // a sequence.
- //
- if (r.value.empty ())
- {
- if (version_.empty ())
- throw parsing (name_, r.value_line, r.value_column,
- "format version value expected");
- r.value = version_;
- }
- else
- {
- version_ = r.value; // Update with the latest.
-
- if (version_ != "1")
- throw parsing (name_, r.value_line, r.value_column,
- "unsupported format version " + version_);
- }
-
- s_ = body;
- }
- else
- {
- // Parsing the body of the manifest.
- //
-
- // Should have been handled by the special case above.
- //
- assert (!r.name.empty ());
- }
-
- return r;
- }
-
- void manifest_parser::
- parse_name (name_value& r)
- {
- xchar c (peek ());
-
- r.name_line = c.line;
- r.name_column = c.column;
-
- for (; !eos (c); c = peek ())
- {
- if (c == ':' || c == ' ' || c == '\t' || c == '\n')
- break;
-
- r.name += c;
- get ();
- }
- }
-
- void manifest_parser::
- parse_value (name_value& r)
- {
- xchar c (peek ());
-
- r.value_line = c.line;
- r.value_column = c.column;
-
- string& v (r.value);
- string::size_type n (0); // Size of last non-space character (simple mode).
-
- // Detect the multi-line mode introductor.
- //
- bool ml (false);
- if (c == '\\')
- {
- get ();
- xchar p (peek ());
-
- if (p == '\n')
- {
- get (); // Newline is not part of the value so skip it.
- c = peek ();
- ml = true;
- }
- else if (eos (p))
- ml = true;
- else
- unget (c);
- }
-
- // The nl flag signals that the preceding character was a "special
- // newline", that is, a newline that was part of the milti-line mode
- // introductor or an escape sequence.
- //
- for (bool nl (ml); !eos (c); c = peek ())
- {
- // Detect the special "\n\\\n" sequence. In the multi-line mode,
- // this is a "terminator". In the simple mode, this is a way to
- // specify a newline.
- //
- // The key idea here is this: if we "swallowed" any characters
- // (i.e., called get() without a matching unget()), then we
- // have to restart the loop in order to do all the tests for
- // the next character. Also, for this to work, we can only
- // add one character to v, which limits us to maximum three
- // characters look-ahead: one in v, one "ungot", and one
- // peeked.
- //
- // The first block handles the special sequence that starts with
- // a special newline. In multi-line mode, this is an "immediate
- // termination" where we "use" the newline from the introductor.
- // Note also that in the simple mode the special sequence can
- // only start with a special (i.e., escaped) newline.
- //
- if (nl)
- {
- nl = false;
-
- if (c == '\\')
- {
- get ();
- xchar c1 (peek ());
-
- if (c1 == '\n' || eos (c1))
- {
- if (ml)
- break;
- else
- {
- if (c1 == '\n')
- get ();
-
- v += '\n'; // Literal newline.
- n = v.size ();
- continue; // Restart from the next character.
- }
- }
- else
- unget (c); // Fall through.
- }
- }
-
- if (c == '\n')
- {
- if (ml)
- {
- get ();
- xchar c1 (peek ());
-
- if (c1 == '\\')
- {
- get ();
- xchar c2 (peek ());
-
- if (c2 == '\n' || eos (c2))
- break;
- else
- {
- v += '\n';
- unget (c1);
- continue; // Restart from c1 (slash).
- }
- }
- else
- unget (c); // Fall through.
- }
- else
- break; // Simple value terminator.
- }
-
- // Detect the newline escape sequence. The same look-ahead
- // approach as above.
- //
- if (c == '\\')
- {
- get ();
- xchar c1 (peek ());
-
- if (c1 == '\n' || eos (c1))
- {
- if (c1 == '\n')
- {
- get ();
- nl = true; // This is a special newline.
- }
- continue; // Restart from the next character.
- }
- else if (c1 == '\\')
- {
- get ();
- xchar c2 (peek ());
-
- if (c2 == '\n' || eos (c1))
- {
- v += '\\';
- n = v.size ();
- // Restart from c2 (newline/eos).
- }
- else
- {
- v += '\\';
- n = v.size ();
- unget (c1); // Restart from c1 (second slash).
- }
-
- continue;
- }
- else
- unget (c); // Fall through.
- }
-
- get ();
- v += c;
-
- if (!ml && c != ' ' && c != '\t')
- n = v.size ();
- }
-
- // Cut off trailing whitespaces.
- //
- if (!ml)
- v.resize (n);
- }
-
- manifest_parser::xchar manifest_parser::
- skip_spaces ()
- {
- xchar c (peek ());
- bool start (c.column == 1);
-
- for (; !eos (c); c = peek ())
- {
- switch (c)
- {
- case ' ':
- case '\t':
- break;
- case '\n':
- {
- // Skip empty lines.
- //
- if (!start)
- return c;
-
- break;
- }
- case '#':
- {
- // We only recognize '#' as a start of a comment at the beginning
- // of the line (sans leading spaces).
- //
- if (!start)
- return c;
-
- get ();
-
- // Read until newline or eos.
- //
- for (c = peek (); !eos (c) && c != '\n'; c = peek ())
- get ();
-
- continue;
- }
- default:
- return c; // Not a space.
- }
-
- get ();
- }
-
- return c;
- }
-
- // manifest_parsing
- //
-
- static string
- format (const string& n, uint64_t l, uint64_t c, const string& d)
- {
- ostringstream os;
- if (!n.empty ())
- os << n << ':';
- os << l << ':' << c << ": error: " << d;
- return os.str ();
- }
-
- manifest_parsing::
- manifest_parsing (const string& n, uint64_t l, uint64_t c, const string& d)
- : runtime_error (format (n, l, c, d)),
- name (n), line (l), column (c), description (d)
- {
- }
-}
diff --git a/bpkg/manifest-serializer b/bpkg/manifest-serializer
deleted file mode 100644
index 501fd30..0000000
--- a/bpkg/manifest-serializer
+++ /dev/null
@@ -1,75 +0,0 @@
-// file : bpkg/manifest-serializer -*- C++ -*-
-// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#ifndef BPKG_MANIFEST_SERIALIZER
-#define BPKG_MANIFEST_SERIALIZER
-
-#include <string>
-#include <iosfwd>
-#include <cstddef> // size_t
-#include <stdexcept> // runtime_error
-
-#include <bpkg/export>
-
-namespace bpkg
-{
- class LIBBPKG_EXPORT manifest_serialization: public std::runtime_error
- {
- public:
- manifest_serialization (const std::string& name,
- const std::string& description);
-
- std::string name;
- std::string description;
- };
-
- class LIBBPKG_EXPORT manifest_serializer
- {
- public:
- manifest_serializer (std::ostream& os, const std::string& name)
- : os_ (os), name_ (name) {}
-
- const std::string&
- name () const {return name_;}
-
- // The first name-value pair should be the special "start-of-manifest"
- // with empty name and value being the format version. After that we
- // have a sequence of ordinary pairs which are the manifest. At the
- // end of the manifest we have the special "end-of-manifest" pair
- // with empty name and value. After that we can either have another
- // start-of-manifest pair (in which case the whole sequence repeats
- // from the beginning) or we get another end-of-manifest pair which
- // signals the end of stream.
- //
- void
- next (const std::string& name, const std::string& value);
-
- // Write a comment. The supplied text is prefixed with "# " and
- // terminated with a newline.
- //
- void
- comment (const std::string&);
-
- private:
- void
- check_name (const std::string&);
-
- // Write 'n' characters from 's' (assuming there are no newlines)
- // split into multiple lines at or near the 78 characters
- // boundary. The first line starts at the 'column' offset.
- //
- void
- write_value (std::size_t column, const char* s, std::size_t n);
-
- private:
- enum {start, body, end} s_ = start;
- std::string version_; // Current format version.
-
- private:
- std::ostream& os_;
- const std::string name_;
- };
-}
-
-#endif // BPKG_MANIFEST_SERIALIZER
diff --git a/bpkg/manifest-serializer.cxx b/bpkg/manifest-serializer.cxx
deleted file mode 100644
index 07b7db9..0000000
--- a/bpkg/manifest-serializer.cxx
+++ /dev/null
@@ -1,238 +0,0 @@
-// file : bpkg/manifest-serializer.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#include <bpkg/manifest-serializer>
-
-#include <ostream>
-#include <cassert>
-
-using namespace std;
-
-namespace bpkg
-{
- using serialization = manifest_serialization;
-
- void manifest_serializer::
- next (const string& n, const string& v)
- {
- switch (s_)
- {
- case start:
- {
- if (!n.empty ())
- throw serialization (name_, "format version pair expected");
-
- if (v.empty ())
- {
- // End of manifests.
- //
- os_.flush ();
- s_ = end;
- break;
- }
-
- if (v != "1")
- throw serialization (name_, "unsupported format version " + v);
-
- os_ << ':';
-
- if (v != version_)
- {
- os_ << ' ' << v;
- version_ = v;
- }
-
- os_ << endl;
- s_ = body;
- break;
- }
- case body:
- {
- if (n.empty ())
- {
- if (!v.empty ())
- throw serialization (name_, "non-empty value in end pair");
-
- s_ = start;
- break;
- }
-
- check_name (n);
-
- os_ << n << ':';
-
- if (!v.empty ())
- {
- os_ << ' ';
-
- // Use the multi-line mode in any of the following cases:
- //
- // - name is too long (say longer than 37 (78/2 - 2) characters;
- // we cannot start on the next line since that would start the
- // multi-line mode)
- // - value contains newlines
- // - value contains leading/trailing whitespaces
- //
- if (n.size () > 37 ||
- v.find ('\n') != string::npos ||
- v.front () == ' ' || v.front () == '\t' ||
- v.back () == ' ' || v.back () == '\t')
- {
- os_ << "\\" << endl; // Multi-line mode introductor.
-
- // Chunk the value into fragments separated by newlines.
- //
- for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i))
- {
- if (p == string::npos)
- {
- // Last chunk.
- //
- write_value (0, v.c_str () + i, v.size () - i);
- break;
- }
-
- write_value (0, v.c_str () + i, p - i);
- os_ << endl;
- i = p + 1;
- }
-
- os_ << endl << "\\"; // Multi-line mode terminator.
- }
- else
- write_value (n.size () + 2, v.c_str (), v.size ());
- }
-
- os_ << endl;
- break;
- }
- case end:
- {
- throw serialization (name_, "serialization after eos");
- }
- }
- }
-
- void manifest_serializer::
- comment (const string& t)
- {
- if (s_ == end)
- throw serialization (name_, "serialization after eos");
-
- os_ << '#';
-
- if (!t.empty ())
- os_ << ' ' << t;
-
- os_ << endl;
- }
-
- void manifest_serializer::
- check_name (const string& n)
- {
- if (n[0] == '#')
- throw serialization (name_, "name starts with '#'");
-
- for (char c: n)
- {
- switch (c)
- {
- case ' ':
- case '\t':
- case '\n': throw serialization (name_, "name contains whitespace");
- case ':': throw serialization (name_, "name contains ':'");
- default: break;
- }
- }
- }
-
- void manifest_serializer::
- write_value (size_t cl, const char* s, size_t n)
- {
- char c ('\0');
-
- // The idea is to break on the 77th character (i.e., write it
- // on the next line) which means we have written 76 characters
- // on this line plus 2 for '\' and '\n', which gives us 78.
- //
- for (const char* e (s + n); s != e; s++, cl++)
- {
- c = *s;
- bool br (false); // Break the line.
-
- // If this is a whitespace, see if it's a good place to break the
- // line.
- //
- if (c == ' ' || c == '\t')
- {
- // Find the next whitespace (or the end) and see if it is a better
- // place.
- //
- for (const char* w (s + 1); ; w++)
- {
- if (w == e || *w == ' ' || *w == '\t')
- {
- // Is this whitespace past where we need to break? Also see
- // below the "hard" break case for why we use 78 at the end.
- //
- if (cl + static_cast<size_t> (w - s) > (w != e ? 77 : 78))
- {
- // Only break if this whitespace is close enough to
- // the end of the line.
- //
- br = (cl > 57);
- }
-
- break;
- }
- }
- }
-
- // Do we have to do a "hard" break (i.e., without a whitespace)?
- // If there is just one character left, then instead of writing
- // '\' and then the character on the next line, we might as well
- // write it on this line.
- //
- if (cl == (s + 1 != e ? 77 : 78))
- br = true;
-
- if (br)
- {
- os_ << '\\' << endl;
- cl = 0;
- }
-
- os_ << c;
- }
-
- // What comes next is always a newline. I the last character that
- // we have written is a backslash, escape it.
- //
- if (c == '\\')
- os_ << '\\';
- }
-
- // manifest_serialization
- //
-
- static string
- format (const string& n, const string& d)
- {
- string r;
- if (!n.empty ())
- {
- r += n;
- r += ": ";
- }
- r += "error: ";
- r += d;
- return r;
- }
-
- manifest_serialization::
- manifest_serialization (const string& n, const string& d)
- : runtime_error (format (n, d)), name (n), description (d)
- {
- }
-}
diff --git a/bpkg/manifest.cxx b/bpkg/manifest.cxx
index b8b4ac7..246251d 100644
--- a/bpkg/manifest.cxx
+++ b/bpkg/manifest.cxx
@@ -18,9 +18,8 @@
#include <butl/path>
#include <butl/base64>
#include <butl/utility> // casecmp(), lcase(), alpha(), digit()
-
-#include <bpkg/manifest-parser>
-#include <bpkg/manifest-serializer>
+#include <butl/manifest-parser>
+#include <butl/manifest-serializer>
using namespace std;
using namespace butl;