// file : libbutl/manifest-parser.mxx -*- C++ -*- // copyright : Copyright (c) 2014-2019 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #ifndef __cpp_modules_ts #pragma once #endif // C includes. #ifndef __cpp_lib_modules_ts #include #include #include #include // uint64_t #include // pair, move() #include // runtime_error #include #endif // Other includes. #ifdef __cpp_modules_ts export module butl.manifest_parser; #ifdef __cpp_lib_modules_ts import std.core; import std.io; #endif import butl.optional; import butl.char_scanner; import butl.manifest_types; #else #include #include #include #endif #include LIBBUTL_MODEXPORT namespace butl { class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error { public: manifest_parsing (const std::string& name, std::uint64_t line, std::uint64_t column, const std::string& description); manifest_parsing (const std::string& description); std::string name; std::uint64_t line; std::uint64_t column; std::string description; }; class LIBBUTL_SYMEXPORT manifest_parser: protected butl::char_scanner { public: // The filter, if specified, is called by next() prior to returning the // pair to the caller. If the filter returns false, then the pair is // discarded. // // Note that the filter should handle the end-of-manifest pairs (see // below) carefully, so next() doesn't end up with an infinite cycle. // using filter_function = bool (manifest_name_value&); manifest_parser (std::istream& is, const std::string& name, std::function filter = {}) : char_scanner (is), name_ (name), filter_ (std::move (filter)) {} const std::string& name () const {return name_;} // The first returned pair is special "start-of-manifest" with empty name // and value being the format version: {"", ""}. After that we have a // sequence of ordinary pairs which are the manifest. At the end of the // manifest we have the special "end-of-manifest" pair with empty name and // value: {"", ""}. After that we can either get another start-of-manifest // pair (in which case the whole sequence repeats from the beginning) or // we get another end-of-manifest-like pair which signals the end of // stream (aka EOF) and which we will call the end-of-stream pair. To put // it another way, the parse sequence always has the following form: // // ({"", ""} {"", ""}* {"", ""})* {"", ""} // manifest_name_value next (); // Split the manifest value, optionally followed by ';' character and a // comment into the value/comment pair. Note that ';' characters in the // value must be escaped by the backslash. // static std::pair split_comment (const std::string&); private: void parse_next (manifest_name_value&); void parse_name (manifest_name_value&); void parse_value (manifest_name_value&); // Skip spaces and return the first peeked non-space character and the // starting position of the line it belongs to. If the later is not // available (skipped spaces are all in the middle of a line, we are at // eos, etc.), then fallback to the first peeked character position. // std::pair skip_spaces (); private: const std::string name_; const std::function filter_; enum {start, body, end} s_ = start; std::string version_; // Current format version. }; // Parse and return a single manifest. Throw manifest_parsing in case of an // error. // // Note that the returned manifest doesn't contain the format version nor // the end-of-manifest/stream pairs. // LIBBUTL_SYMEXPORT std::vector parse_manifest (manifest_parser&); // As above but append the manifest values to an existing list. // LIBBUTL_SYMEXPORT void parse_manifest (manifest_parser&, std::vector&); // As above but return nullopt if eos is reached before reading any values. // LIBBUTL_SYMEXPORT optional> try_parse_manifest (manifest_parser&); // As above but append the manifest values to an existing list returning // false if eos is reached before reading any values. // LIBBUTL_SYMEXPORT bool try_parse_manifest (manifest_parser&, std::vector&); } #include