From 72648921ec28903615698a61aeff4799e1ca9a7d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 9 Jun 2015 19:50:58 +0200 Subject: Implement low-level manifest parser --- bpkg/manifest-parser | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 bpkg/manifest-parser (limited to 'bpkg/manifest-parser') diff --git a/bpkg/manifest-parser b/bpkg/manifest-parser new file mode 100644 index 0000000..2b1e4a5 --- /dev/null +++ b/bpkg/manifest-parser @@ -0,0 +1,141 @@ +// file : bpkg/manifest-parser -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BPKG_MANIFEST_PARSER +#define BPKG_MANIFEST_PARSER + +#include +#include +#include // uint64_t +#include // runtime_error + +namespace bpkg +{ + class manifest_parsing: public std::runtime_error + { + public: + manifest_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + class manifest_parser + { + public: + manifest_parser (std::istream& is, const std::string& name) + : is_ (is), name_ (name) {} + + const std::string& + name () const {return name_;} + + struct name_value_type + { + std::string name; + std::string value; + + std::uint64_t name_line; + std::uint64_t name_column; + + std::uint64_t value_line; + std::uint64_t value_column; + }; + + // The first returned pair is special "start-of-manifest" with + // empty name and value being the format version: {"", ""}. + // After that we have a sequence of ordinary pairs which are + // the manifest. At the end of the manifest we have the special + // "end-of-manifest" pair with empty name and value: {"", ""}. + // After that we can either get another start-of-manifest pair + // (in which case the whole sequence repeats from the beginning) + // or we get another end-of-manifest pair which signals the end + // of stream (aka EOF). To put it another way, the parse sequence + // always has the following form: + // + // ({"", ""} {"", ""}* {"", ""})* {"", ""} + // + name_value_type + next (); + + private: + class xchar + { + public: + typedef std::char_traits traits_type; + typedef traits_type::int_type int_type; + typedef traits_type::char_type char_type; + + xchar (int_type v, std::uint64_t l, std::uint64_t c) + : v_ (v), l_ (l), c_ (c) {} + + operator char_type () const {return static_cast (v_);} + + int_type + value () const {return v_;} + + std::uint64_t line () const {return l_;} + std::uint64_t column () const {return c_;} + + private: + int_type v_; + std::uint64_t l_; + std::uint64_t c_; + }; + + private: + void + parse_name (name_value_type&); + + void + parse_value (name_value_type&); + + // Skip spaces and return the first peeked non-space character. + // + xchar + skip_spaces (); + + // Character interface. + // + private: + xchar + peek (); + + xchar + get (); + + void + unget (const xchar&); + + // Tests. + // + bool + is_eos (const xchar& c) const + { + return c.value () == xchar::traits_type::eof (); + } + + private: + enum {start, body, eos} s_ = start; + std::string version_; // Current format version. + + private: + std::istream& is_; + const std::string name_; + + std::uint64_t l_ {1}; + std::uint64_t c_ {1}; + + bool unget_ {false}; + xchar buf_ {0, 0, 0}; + + bool eos_ {false}; + }; +} + +#endif // BPKG_MANIFEST_PARSER -- cgit v1.1