From df1ef68cd8e8582724ce1192bfc202e0b9aeaf0c Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 28 Sep 2021 19:24:31 +0300 Subject: Get rid of C++ modules related code and rename *.mxx files to *.hxx --- libbutl/manifest-parser.hxx | 160 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 libbutl/manifest-parser.hxx (limited to 'libbutl/manifest-parser.hxx') diff --git a/libbutl/manifest-parser.hxx b/libbutl/manifest-parser.hxx new file mode 100644 index 0000000..d53eb42 --- /dev/null +++ b/libbutl/manifest-parser.hxx @@ -0,0 +1,160 @@ +// file : libbutl/manifest-parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include // uint64_t +#include // pair, move() +#include // runtime_error +#include + +#include +#include +#include +#include + +#include + +namespace butl +{ + class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error + { + public: + manifest_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + manifest_parsing (const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + class LIBBUTL_SYMEXPORT manifest_parser: + protected char_scanner + { + public: + // The filter, if specified, is called by next() prior to returning the + // pair to the caller. If the filter returns false, then the pair is + // discarded. + // + // Note that the filter should handle the end-of-manifest pairs (see + // below) carefully, so next() doesn't end up with an infinite cycle. + // + using filter_function = bool (manifest_name_value&); + + manifest_parser (std::istream& is, + const std::string& name, + std::function filter = {}) + : char_scanner (is, + utf8_validator (codepoint_types::graphic, U"\n\r\t")), + name_ (name), + filter_ (std::move (filter)) {} + + const std::string& + name () const {return name_;} + + // The first returned pair is special "start-of-manifest" with empty name + // and value being the format version: {"", ""}. After that we have a + // sequence of ordinary pairs which are the manifest. At the end of the + // manifest we have the special "end-of-manifest" pair with empty name and + // value: {"", ""}. After that we can either get another start-of-manifest + // pair (in which case the whole sequence repeats from the beginning) or + // we get another end-of-manifest-like pair which signals the end of + // stream (aka EOF) and which we will call the end-of-stream pair. To put + // it another way, the parse sequence always has the following form: + // + // ({"", ""} {"", ""}* {"", ""})* {"", ""} + // + manifest_name_value + next (); + + // Split the manifest value, optionally followed by ';' character and a + // comment into the value/comment pair. Note that ';' characters in the + // value must be escaped by the backslash. + // + static std::pair + split_comment (const std::string&); + + private: + using base = char_scanner; + + void + parse_next (manifest_name_value&); + + void + parse_name (manifest_name_value&); + + void + parse_value (manifest_name_value&); + + // Skip spaces and return the first peeked non-space character and the + // starting position of the line it belongs to. If the later is not + // available (skipped spaces are all in the middle of a line, we are at + // eos, etc.), then fallback to the first peeked character position. + // + std::pair + skip_spaces (); + + // As base::get() but in case of an invalid character throws + // manifest_parsing. + // + xchar + get (const char* what); + + // Get previously peeked character (faster). + // + void + get (const xchar&); + + // As base::peek() but in case of an invalid character throws + // manifest_parsing. + // + xchar + peek (const char* what); + + private: + const std::string name_; + const std::function filter_; + + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + // Buffer for a get()/peek() potential error. + // + std::string ebuf_; + }; + + // Parse and return a single manifest. Throw manifest_parsing in case of an + // error. + // + // Note that the returned manifest doesn't contain the format version nor + // the end-of-manifest/stream pairs. + // + LIBBUTL_SYMEXPORT std::vector + parse_manifest (manifest_parser&); + + // As above but append the manifest values to an existing list. + // + LIBBUTL_SYMEXPORT void + parse_manifest (manifest_parser&, std::vector&); + + // As above but return nullopt if eos is reached before reading any values. + // + LIBBUTL_SYMEXPORT optional> + try_parse_manifest (manifest_parser&); + + // As above but append the manifest values to an existing list returning + // false if eos is reached before reading any values. + // + LIBBUTL_SYMEXPORT bool + try_parse_manifest (manifest_parser&, std::vector&); +} + +#include -- cgit v1.1