aboutsummaryrefslogtreecommitdiff
path: root/bpkg/manifest-parser
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-06-09 19:50:58 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-06-09 19:50:58 +0200
commit72648921ec28903615698a61aeff4799e1ca9a7d (patch)
tree5c94e50453232a05561ed7967c0c4c19f86c83af /bpkg/manifest-parser
parent718263310d93081d615e35301f3a55cd91c3b2ea (diff)
Implement low-level manifest parser
Diffstat (limited to 'bpkg/manifest-parser')
-rw-r--r--bpkg/manifest-parser141
1 files changed, 141 insertions, 0 deletions
diff --git a/bpkg/manifest-parser b/bpkg/manifest-parser
new file mode 100644
index 0000000..2b1e4a5
--- /dev/null
+++ b/bpkg/manifest-parser
@@ -0,0 +1,141 @@
+// file : bpkg/manifest-parser -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BPKG_MANIFEST_PARSER
+#define BPKG_MANIFEST_PARSER
+
+#include <string>
+#include <iosfwd>
+#include <cstdint> // uint64_t
+#include <stdexcept> // runtime_error
+
+namespace bpkg
+{
+ class manifest_parsing: public std::runtime_error
+ {
+ public:
+ manifest_parsing (const std::string& name,
+ std::uint64_t line,
+ std::uint64_t column,
+ const std::string& description);
+
+ std::string name;
+ std::uint64_t line;
+ std::uint64_t column;
+ std::string description;
+ };
+
+ class manifest_parser
+ {
+ public:
+ manifest_parser (std::istream& is, const std::string& name)
+ : is_ (is), name_ (name) {}
+
+ const std::string&
+ name () const {return name_;}
+
+ struct name_value_type
+ {
+ std::string name;
+ std::string value;
+
+ std::uint64_t name_line;
+ std::uint64_t name_column;
+
+ std::uint64_t value_line;
+ std::uint64_t value_column;
+ };
+
+ // The first returned pair is special "start-of-manifest" with
+ // empty name and value being the format version: {"", "<ver>"}.
+ // After that we have a sequence of ordinary pairs which are
+ // the manifest. At the end of the manifest we have the special
+ // "end-of-manifest" pair with empty name and value: {"", ""}.
+ // After that we can either get another start-of-manifest pair
+ // (in which case the whole sequence repeats from the beginning)
+ // or we get another end-of-manifest pair which signals the end
+ // of stream (aka EOF). To put it another way, the parse sequence
+ // always has the following form:
+ //
+ // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""}
+ //
+ name_value_type
+ next ();
+
+ private:
+ class xchar
+ {
+ public:
+ typedef std::char_traits<char> traits_type;
+ typedef traits_type::int_type int_type;
+ typedef traits_type::char_type char_type;
+
+ xchar (int_type v, std::uint64_t l, std::uint64_t c)
+ : v_ (v), l_ (l), c_ (c) {}
+
+ operator char_type () const {return static_cast<char_type> (v_);}
+
+ int_type
+ value () const {return v_;}
+
+ std::uint64_t line () const {return l_;}
+ std::uint64_t column () const {return c_;}
+
+ private:
+ int_type v_;
+ std::uint64_t l_;
+ std::uint64_t c_;
+ };
+
+ private:
+ void
+ parse_name (name_value_type&);
+
+ void
+ parse_value (name_value_type&);
+
+ // Skip spaces and return the first peeked non-space character.
+ //
+ xchar
+ skip_spaces ();
+
+ // Character interface.
+ //
+ private:
+ xchar
+ peek ();
+
+ xchar
+ get ();
+
+ void
+ unget (const xchar&);
+
+ // Tests.
+ //
+ bool
+ is_eos (const xchar& c) const
+ {
+ return c.value () == xchar::traits_type::eof ();
+ }
+
+ private:
+ enum {start, body, eos} s_ = start;
+ std::string version_; // Current format version.
+
+ private:
+ std::istream& is_;
+ const std::string name_;
+
+ std::uint64_t l_ {1};
+ std::uint64_t c_ {1};
+
+ bool unget_ {false};
+ xchar buf_ {0, 0, 0};
+
+ bool eos_ {false};
+ };
+}
+
+#endif // BPKG_MANIFEST_PARSER