diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2016-09-29 22:02:28 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2016-09-29 22:02:28 +0300 |
commit | f4a338ee1a5eb993fb7f9688588b77b12ad944c2 (patch) | |
tree | da7bea25e838d376eddb1263a839aca50571c4a4 /bpkg | |
parent | 4dc2c7b9fbbe7bec57524d71fd5330e2d1782c52 (diff) |
Move manifest_parser and manifest_serializer to libbutl
Diffstat (limited to 'bpkg')
-rw-r--r-- | bpkg/buildfile | 2 | ||||
-rw-r--r-- | bpkg/manifest | 41 | ||||
-rw-r--r-- | bpkg/manifest-parser | 94 | ||||
-rw-r--r-- | bpkg/manifest-parser.cxx | 379 | ||||
-rw-r--r-- | bpkg/manifest-serializer | 75 | ||||
-rw-r--r-- | bpkg/manifest-serializer.cxx | 238 | ||||
-rw-r--r-- | bpkg/manifest.cxx | 5 |
7 files changed, 21 insertions, 813 deletions
diff --git a/bpkg/buildfile b/bpkg/buildfile index 6d4e09e..22eeb98 100644 --- a/bpkg/buildfile +++ b/bpkg/buildfile @@ -7,8 +7,6 @@ import int_libs = libbutl%lib{butl} lib{bpkg}: \ {hxx }{ export } \ {hxx cxx}{ manifest } \ -{hxx cxx}{ manifest-parser } \ -{hxx cxx}{ manifest-serializer } \ {hxx }{ version } \ $int_libs diff --git a/bpkg/manifest b/bpkg/manifest index e0b8421..688cbf2 100644 --- a/bpkg/manifest +++ b/bpkg/manifest @@ -15,15 +15,12 @@ #include <butl/path> #include <butl/optional> +#include <butl/manifest-forward> #include <bpkg/export> namespace bpkg { - class manifest_parser; - class manifest_serializer; - class manifest_name_value; - using strings = std::vector<std::string>; class LIBBPKG_EXPORT version @@ -353,20 +350,20 @@ namespace bpkg // Create individual package manifest. // - package_manifest (manifest_parser&, bool ignore_unknown = false); + package_manifest (butl::manifest_parser&, bool ignore_unknown = false); // Create an element of the package list manifest. // - package_manifest (manifest_parser&, - manifest_name_value start, + package_manifest (butl::manifest_parser&, + butl::manifest_name_value start, bool ignore_unknown = false); void - serialize (manifest_serializer&) const; + serialize (butl::manifest_serializer&) const; private: - package_manifest (manifest_parser&, - manifest_name_value start, + package_manifest (butl::manifest_parser&, + butl::manifest_name_value start, bool in_list, bool ignore_unknown); }; @@ -384,10 +381,10 @@ namespace bpkg public: package_manifests () = default; - package_manifests (manifest_parser&, bool ignore_unknown = false); + package_manifests (butl::manifest_parser&, bool ignore_unknown = false); void - serialize (manifest_serializer&) const; + serialize (butl::manifest_serializer&) const; }; class LIBBPKG_EXPORT repository_location @@ -573,13 +570,13 @@ namespace bpkg public: repository_manifest () = default; // VC export. - repository_manifest (manifest_parser&, bool ignore_unknown = false); - repository_manifest (manifest_parser&, - manifest_name_value start, + repository_manifest (butl::manifest_parser&, bool ignore_unknown = false); + repository_manifest (butl::manifest_parser&, + butl::manifest_name_value start, bool ignore_unknown = false); void - serialize (manifest_serializer&) const; + serialize (butl::manifest_serializer&) const; }; class LIBBPKG_EXPORT repository_manifests: @@ -591,10 +588,10 @@ namespace bpkg using base_type::base_type; repository_manifests () = default; - repository_manifests (manifest_parser&, bool ignore_unknown = false); + repository_manifests (butl::manifest_parser&, bool ignore_unknown = false); void - serialize (manifest_serializer&) const; + serialize (butl::manifest_serializer&) const; }; class LIBBPKG_EXPORT signature_manifest @@ -612,12 +609,12 @@ namespace bpkg public: signature_manifest () = default; - signature_manifest (manifest_parser&, bool ignore_unknown = false); + signature_manifest (butl::manifest_parser&, bool ignore_unknown = false); // Serialize sha256sum and base64-encoded representation of the signature. // void - serialize (manifest_serializer&) const; + serialize (butl::manifest_serializer&) const; private: // Used for delegating in public constructor. Strictly speaking is not @@ -625,8 +622,8 @@ namespace bpkg // a manifest list, but kept for the consistency with other manifests // implementations. // - signature_manifest (manifest_parser&, - manifest_name_value start, + signature_manifest (butl::manifest_parser&, + butl::manifest_name_value start, bool ignore_unknown); }; } diff --git a/bpkg/manifest-parser b/bpkg/manifest-parser deleted file mode 100644 index 8cbb768..0000000 --- a/bpkg/manifest-parser +++ /dev/null @@ -1,94 +0,0 @@ -// file : bpkg/manifest-parser -*- C++ -*- -// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#ifndef BPKG_MANIFEST_PARSER -#define BPKG_MANIFEST_PARSER - -#include <string> -#include <iosfwd> -#include <cstdint> // uint64_t -#include <stdexcept> // runtime_error - -#include <butl/char-scanner> - -#include <bpkg/export> - -namespace bpkg -{ - class LIBBPKG_EXPORT manifest_parsing: public std::runtime_error - { - public: - manifest_parsing (const std::string& name, - std::uint64_t line, - std::uint64_t column, - const std::string& description); - - std::string name; - std::uint64_t line; - std::uint64_t column; - std::string description; - }; - - class manifest_name_value - { - public: - std::string name; - std::string value; - - std::uint64_t name_line; - std::uint64_t name_column; - - std::uint64_t value_line; - std::uint64_t value_column; - - bool - empty () const {return name.empty () && value.empty ();} - }; - - class LIBBPKG_EXPORT manifest_parser: protected butl::char_scanner - { - public: - manifest_parser (std::istream& is, const std::string& name) - : char_scanner (is), name_ (name) {} - - const std::string& - name () const {return name_;} - - // The first returned pair is special "start-of-manifest" with - // empty name and value being the format version: {"", "<ver>"}. - // After that we have a sequence of ordinary pairs which are - // the manifest. At the end of the manifest we have the special - // "end-of-manifest" pair with empty name and value: {"", ""}. - // After that we can either get another start-of-manifest pair - // (in which case the whole sequence repeats from the beginning) - // or we get another end-of-manifest pair which signals the end - // of stream (aka EOF). To put it another way, the parse sequence - // always has the following form: - // - // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""} - // - manifest_name_value - next (); - - private: - void - parse_name (manifest_name_value&); - - void - parse_value (manifest_name_value&); - - // Skip spaces and return the first peeked non-space character. - // - xchar - skip_spaces (); - - private: - const std::string name_; - - enum {start, body, end} s_ = start; - std::string version_; // Current format version. - }; -} - -#endif // BPKG_MANIFEST_PARSER diff --git a/bpkg/manifest-parser.cxx b/bpkg/manifest-parser.cxx deleted file mode 100644 index 58e920a..0000000 --- a/bpkg/manifest-parser.cxx +++ /dev/null @@ -1,379 +0,0 @@ -// file : bpkg/manifest-parser.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#include <bpkg/manifest-parser> - -#include <cassert> -#include <sstream> - -using namespace std; - -namespace bpkg -{ - using parsing = manifest_parsing; - using name_value = manifest_name_value; - - name_value manifest_parser:: - next () - { - if (s_ == end) - return name_value {"", "", line, column, line, column}; - - xchar c (skip_spaces ()); - - // Here is the problem: if we are in the 'body' state (that is, - // we are parsing inside the manifest) and we see the special - // empty name, then before returning the "start" pair for the - // next manifest, we have to return the "end" pair. One way - // would be to cache the "start" pair and return it on the - // next call of next(). But that would require quite a bit - // of extra logic. The alternative is to detect the beginning - // of the empty name before parsing too far. This way, the - // next call to next() will start parsing where we left of - // and return the "start" pair naturally. - // - if (s_ == body && c == ':') - { - s_ = start; - return name_value {"", "", c.line, c.column, c.line, c.column}; - } - - // Regardless of the state, what should come next is a name, - // potentially the special empty one. - // - name_value r; - parse_name (r); - - skip_spaces (); - c = get (); - - if (eos (c)) - { - // This is ok as long as the name is empty. - // - if (!r.name.empty ()) - throw parsing (name_, c.line, c.column, "':' expected after name"); - - s_ = end; - - // The "end" pair. - // - r.value_line = r.name_line; - r.value_column = r.name_column; - return r; - } - - if (c != ':') - throw parsing (name_, c.line, c.column, "':' expected after name"); - - skip_spaces (); - parse_value (r); - - c = peek (); - - // The character after the value should be either a newline or eos. - // - assert (c == '\n' || eos (c)); - - if (c == '\n') - get (); - - // Now figure out whether what we've got makes sense, depending - // on the state we are in. - // - if (s_ == start) - { - // Start of the (next) manifest. The first pair should be the - // special empty name/format version. - // - if (!r.name.empty ()) - throw parsing (name_, r.name_line, r.name_column, - "format version pair expected"); - - // The version value is only mandatory for the first manifest in - // a sequence. - // - if (r.value.empty ()) - { - if (version_.empty ()) - throw parsing (name_, r.value_line, r.value_column, - "format version value expected"); - r.value = version_; - } - else - { - version_ = r.value; // Update with the latest. - - if (version_ != "1") - throw parsing (name_, r.value_line, r.value_column, - "unsupported format version " + version_); - } - - s_ = body; - } - else - { - // Parsing the body of the manifest. - // - - // Should have been handled by the special case above. - // - assert (!r.name.empty ()); - } - - return r; - } - - void manifest_parser:: - parse_name (name_value& r) - { - xchar c (peek ()); - - r.name_line = c.line; - r.name_column = c.column; - - for (; !eos (c); c = peek ()) - { - if (c == ':' || c == ' ' || c == '\t' || c == '\n') - break; - - r.name += c; - get (); - } - } - - void manifest_parser:: - parse_value (name_value& r) - { - xchar c (peek ()); - - r.value_line = c.line; - r.value_column = c.column; - - string& v (r.value); - string::size_type n (0); // Size of last non-space character (simple mode). - - // Detect the multi-line mode introductor. - // - bool ml (false); - if (c == '\\') - { - get (); - xchar p (peek ()); - - if (p == '\n') - { - get (); // Newline is not part of the value so skip it. - c = peek (); - ml = true; - } - else if (eos (p)) - ml = true; - else - unget (c); - } - - // The nl flag signals that the preceding character was a "special - // newline", that is, a newline that was part of the milti-line mode - // introductor or an escape sequence. - // - for (bool nl (ml); !eos (c); c = peek ()) - { - // Detect the special "\n\\\n" sequence. In the multi-line mode, - // this is a "terminator". In the simple mode, this is a way to - // specify a newline. - // - // The key idea here is this: if we "swallowed" any characters - // (i.e., called get() without a matching unget()), then we - // have to restart the loop in order to do all the tests for - // the next character. Also, for this to work, we can only - // add one character to v, which limits us to maximum three - // characters look-ahead: one in v, one "ungot", and one - // peeked. - // - // The first block handles the special sequence that starts with - // a special newline. In multi-line mode, this is an "immediate - // termination" where we "use" the newline from the introductor. - // Note also that in the simple mode the special sequence can - // only start with a special (i.e., escaped) newline. - // - if (nl) - { - nl = false; - - if (c == '\\') - { - get (); - xchar c1 (peek ()); - - if (c1 == '\n' || eos (c1)) - { - if (ml) - break; - else - { - if (c1 == '\n') - get (); - - v += '\n'; // Literal newline. - n = v.size (); - continue; // Restart from the next character. - } - } - else - unget (c); // Fall through. - } - } - - if (c == '\n') - { - if (ml) - { - get (); - xchar c1 (peek ()); - - if (c1 == '\\') - { - get (); - xchar c2 (peek ()); - - if (c2 == '\n' || eos (c2)) - break; - else - { - v += '\n'; - unget (c1); - continue; // Restart from c1 (slash). - } - } - else - unget (c); // Fall through. - } - else - break; // Simple value terminator. - } - - // Detect the newline escape sequence. The same look-ahead - // approach as above. - // - if (c == '\\') - { - get (); - xchar c1 (peek ()); - - if (c1 == '\n' || eos (c1)) - { - if (c1 == '\n') - { - get (); - nl = true; // This is a special newline. - } - continue; // Restart from the next character. - } - else if (c1 == '\\') - { - get (); - xchar c2 (peek ()); - - if (c2 == '\n' || eos (c1)) - { - v += '\\'; - n = v.size (); - // Restart from c2 (newline/eos). - } - else - { - v += '\\'; - n = v.size (); - unget (c1); // Restart from c1 (second slash). - } - - continue; - } - else - unget (c); // Fall through. - } - - get (); - v += c; - - if (!ml && c != ' ' && c != '\t') - n = v.size (); - } - - // Cut off trailing whitespaces. - // - if (!ml) - v.resize (n); - } - - manifest_parser::xchar manifest_parser:: - skip_spaces () - { - xchar c (peek ()); - bool start (c.column == 1); - - for (; !eos (c); c = peek ()) - { - switch (c) - { - case ' ': - case '\t': - break; - case '\n': - { - // Skip empty lines. - // - if (!start) - return c; - - break; - } - case '#': - { - // We only recognize '#' as a start of a comment at the beginning - // of the line (sans leading spaces). - // - if (!start) - return c; - - get (); - - // Read until newline or eos. - // - for (c = peek (); !eos (c) && c != '\n'; c = peek ()) - get (); - - continue; - } - default: - return c; // Not a space. - } - - get (); - } - - return c; - } - - // manifest_parsing - // - - static string - format (const string& n, uint64_t l, uint64_t c, const string& d) - { - ostringstream os; - if (!n.empty ()) - os << n << ':'; - os << l << ':' << c << ": error: " << d; - return os.str (); - } - - manifest_parsing:: - manifest_parsing (const string& n, uint64_t l, uint64_t c, const string& d) - : runtime_error (format (n, l, c, d)), - name (n), line (l), column (c), description (d) - { - } -} diff --git a/bpkg/manifest-serializer b/bpkg/manifest-serializer deleted file mode 100644 index 501fd30..0000000 --- a/bpkg/manifest-serializer +++ /dev/null @@ -1,75 +0,0 @@ -// file : bpkg/manifest-serializer -*- C++ -*- -// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#ifndef BPKG_MANIFEST_SERIALIZER -#define BPKG_MANIFEST_SERIALIZER - -#include <string> -#include <iosfwd> -#include <cstddef> // size_t -#include <stdexcept> // runtime_error - -#include <bpkg/export> - -namespace bpkg -{ - class LIBBPKG_EXPORT manifest_serialization: public std::runtime_error - { - public: - manifest_serialization (const std::string& name, - const std::string& description); - - std::string name; - std::string description; - }; - - class LIBBPKG_EXPORT manifest_serializer - { - public: - manifest_serializer (std::ostream& os, const std::string& name) - : os_ (os), name_ (name) {} - - const std::string& - name () const {return name_;} - - // The first name-value pair should be the special "start-of-manifest" - // with empty name and value being the format version. After that we - // have a sequence of ordinary pairs which are the manifest. At the - // end of the manifest we have the special "end-of-manifest" pair - // with empty name and value. After that we can either have another - // start-of-manifest pair (in which case the whole sequence repeats - // from the beginning) or we get another end-of-manifest pair which - // signals the end of stream. - // - void - next (const std::string& name, const std::string& value); - - // Write a comment. The supplied text is prefixed with "# " and - // terminated with a newline. - // - void - comment (const std::string&); - - private: - void - check_name (const std::string&); - - // Write 'n' characters from 's' (assuming there are no newlines) - // split into multiple lines at or near the 78 characters - // boundary. The first line starts at the 'column' offset. - // - void - write_value (std::size_t column, const char* s, std::size_t n); - - private: - enum {start, body, end} s_ = start; - std::string version_; // Current format version. - - private: - std::ostream& os_; - const std::string name_; - }; -} - -#endif // BPKG_MANIFEST_SERIALIZER diff --git a/bpkg/manifest-serializer.cxx b/bpkg/manifest-serializer.cxx deleted file mode 100644 index 07b7db9..0000000 --- a/bpkg/manifest-serializer.cxx +++ /dev/null @@ -1,238 +0,0 @@ -// file : bpkg/manifest-serializer.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#include <bpkg/manifest-serializer> - -#include <ostream> -#include <cassert> - -using namespace std; - -namespace bpkg -{ - using serialization = manifest_serialization; - - void manifest_serializer:: - next (const string& n, const string& v) - { - switch (s_) - { - case start: - { - if (!n.empty ()) - throw serialization (name_, "format version pair expected"); - - if (v.empty ()) - { - // End of manifests. - // - os_.flush (); - s_ = end; - break; - } - - if (v != "1") - throw serialization (name_, "unsupported format version " + v); - - os_ << ':'; - - if (v != version_) - { - os_ << ' ' << v; - version_ = v; - } - - os_ << endl; - s_ = body; - break; - } - case body: - { - if (n.empty ()) - { - if (!v.empty ()) - throw serialization (name_, "non-empty value in end pair"); - - s_ = start; - break; - } - - check_name (n); - - os_ << n << ':'; - - if (!v.empty ()) - { - os_ << ' '; - - // Use the multi-line mode in any of the following cases: - // - // - name is too long (say longer than 37 (78/2 - 2) characters; - // we cannot start on the next line since that would start the - // multi-line mode) - // - value contains newlines - // - value contains leading/trailing whitespaces - // - if (n.size () > 37 || - v.find ('\n') != string::npos || - v.front () == ' ' || v.front () == '\t' || - v.back () == ' ' || v.back () == '\t') - { - os_ << "\\" << endl; // Multi-line mode introductor. - - // Chunk the value into fragments separated by newlines. - // - for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i)) - { - if (p == string::npos) - { - // Last chunk. - // - write_value (0, v.c_str () + i, v.size () - i); - break; - } - - write_value (0, v.c_str () + i, p - i); - os_ << endl; - i = p + 1; - } - - os_ << endl << "\\"; // Multi-line mode terminator. - } - else - write_value (n.size () + 2, v.c_str (), v.size ()); - } - - os_ << endl; - break; - } - case end: - { - throw serialization (name_, "serialization after eos"); - } - } - } - - void manifest_serializer:: - comment (const string& t) - { - if (s_ == end) - throw serialization (name_, "serialization after eos"); - - os_ << '#'; - - if (!t.empty ()) - os_ << ' ' << t; - - os_ << endl; - } - - void manifest_serializer:: - check_name (const string& n) - { - if (n[0] == '#') - throw serialization (name_, "name starts with '#'"); - - for (char c: n) - { - switch (c) - { - case ' ': - case '\t': - case '\n': throw serialization (name_, "name contains whitespace"); - case ':': throw serialization (name_, "name contains ':'"); - default: break; - } - } - } - - void manifest_serializer:: - write_value (size_t cl, const char* s, size_t n) - { - char c ('\0'); - - // The idea is to break on the 77th character (i.e., write it - // on the next line) which means we have written 76 characters - // on this line plus 2 for '\' and '\n', which gives us 78. - // - for (const char* e (s + n); s != e; s++, cl++) - { - c = *s; - bool br (false); // Break the line. - - // If this is a whitespace, see if it's a good place to break the - // line. - // - if (c == ' ' || c == '\t') - { - // Find the next whitespace (or the end) and see if it is a better - // place. - // - for (const char* w (s + 1); ; w++) - { - if (w == e || *w == ' ' || *w == '\t') - { - // Is this whitespace past where we need to break? Also see - // below the "hard" break case for why we use 78 at the end. - // - if (cl + static_cast<size_t> (w - s) > (w != e ? 77 : 78)) - { - // Only break if this whitespace is close enough to - // the end of the line. - // - br = (cl > 57); - } - - break; - } - } - } - - // Do we have to do a "hard" break (i.e., without a whitespace)? - // If there is just one character left, then instead of writing - // '\' and then the character on the next line, we might as well - // write it on this line. - // - if (cl == (s + 1 != e ? 77 : 78)) - br = true; - - if (br) - { - os_ << '\\' << endl; - cl = 0; - } - - os_ << c; - } - - // What comes next is always a newline. I the last character that - // we have written is a backslash, escape it. - // - if (c == '\\') - os_ << '\\'; - } - - // manifest_serialization - // - - static string - format (const string& n, const string& d) - { - string r; - if (!n.empty ()) - { - r += n; - r += ": "; - } - r += "error: "; - r += d; - return r; - } - - manifest_serialization:: - manifest_serialization (const string& n, const string& d) - : runtime_error (format (n, d)), name (n), description (d) - { - } -} diff --git a/bpkg/manifest.cxx b/bpkg/manifest.cxx index b8b4ac7..246251d 100644 --- a/bpkg/manifest.cxx +++ b/bpkg/manifest.cxx @@ -18,9 +18,8 @@ #include <butl/path> #include <butl/base64> #include <butl/utility> // casecmp(), lcase(), alpha(), digit() - -#include <bpkg/manifest-parser> -#include <bpkg/manifest-serializer> +#include <butl/manifest-parser> +#include <butl/manifest-serializer> using namespace std; using namespace butl; |