aboutsummaryrefslogtreecommitdiff
path: root/libbutl/manifest-parser.mxx
blob: 77addff6b24721950b74ba2c626b957bee0acc74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
// file      : libbutl/manifest-parser.mxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#ifndef __cpp_modules_ts
#pragma once
#endif

// C includes.

#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <iosfwd>
#include <cstdint>    // uint64_t
#include <utility>    // pair, move()
#include <stdexcept>  // runtime_error
#include <functional>
#endif

// Other includes.

#ifdef __cpp_modules_ts
export module butl.manifest_parser;
#ifdef __cpp_lib_modules_ts
import std.core;
import std.io;
#endif
import butl.utf8;
import butl.optional;
import butl.char_scanner;
import butl.manifest_types;
#else
#include <libbutl/utf8.mxx>
#include <libbutl/optional.mxx>
#include <libbutl/char-scanner.mxx>
#include <libbutl/manifest-types.mxx>
#endif

#include <libbutl/export.hxx>

LIBBUTL_MODEXPORT namespace butl
{
  class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error
  {
  public:
    manifest_parsing (const std::string& name,
                      std::uint64_t line,
                      std::uint64_t column,
                      const std::string& description);

    manifest_parsing (const std::string& description);

    std::string name;
    std::uint64_t line;
    std::uint64_t column;
    std::string description;
  };

  class LIBBUTL_SYMEXPORT manifest_parser:
    protected char_scanner<utf8_validator>
  {
  public:
    // The filter, if specified, is called by next() prior to returning the
    // pair to the caller. If the filter returns false, then the pair is
    // discarded.
    //
    // Note that the filter should handle the end-of-manifest pairs (see
    // below) carefully, so next() doesn't end up with an infinite cycle.
    //
    using filter_function = bool (manifest_name_value&);

    manifest_parser (std::istream& is,
                     const std::string& name,
                     std::function<filter_function> filter = {})
      : char_scanner (is,
                      utf8_validator (codepoint_types::graphic, U"\n\r\t")),
        name_ (name),
        filter_ (std::move (filter)) {}

    const std::string&
    name () const {return name_;}

    // The first returned pair is special "start-of-manifest" with empty name
    // and value being the format version: {"", "<ver>"}. After that we have a
    // sequence of ordinary pairs which are the manifest. At the end of the
    // manifest we have the special "end-of-manifest" pair with empty name and
    // value: {"", ""}. After that we can either get another start-of-manifest
    // pair (in which case the whole sequence repeats from the beginning) or
    // we get another end-of-manifest-like pair which signals the end of
    // stream (aka EOF) and which we will call the end-of-stream pair. To put
    // it another way, the parse sequence always has the following form:
    //
    // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""}
    //
    manifest_name_value
    next ();

    // Split the manifest value, optionally followed by ';' character and a
    // comment into the value/comment pair. Note that ';' characters in the
    // value must be escaped by the backslash.
    //
    static std::pair<std::string, std::string>
    split_comment (const std::string&);

  private:
    using base = char_scanner<utf8_validator>;

    void
    parse_next (manifest_name_value&);

    void
    parse_name (manifest_name_value&);

    void
    parse_value (manifest_name_value&);

    // Skip spaces and return the first peeked non-space character and the
    // starting position of the line it belongs to. If the later is not
    // available (skipped spaces are all in the middle of a line, we are at
    // eos, etc.), then fallback to the first peeked character position.
    //
    std::pair<xchar, std::uint64_t>
    skip_spaces ();

    // As base::get() but in case of an invalid character throws
    // manifest_parsing.
    //
    xchar
    get (const char* what);

    // Get previously peeked character (faster).
    //
    void
    get (const xchar&);

    // As base::peek() but in case of an invalid character throws
    // manifest_parsing.
    //
    xchar
    peek (const char* what);

  private:
    const std::string name_;
    const std::function<filter_function> filter_;

    enum {start, body, end} s_ = start;
    std::string version_; // Current format version.

    // Buffer for a get()/peek() potential error.
    //
    std::string ebuf_;
  };

  // Parse and return a single manifest. Throw manifest_parsing in case of an
  // error.
  //
  // Note that the returned manifest doesn't contain the format version nor
  // the end-of-manifest/stream pairs.
  //
  LIBBUTL_SYMEXPORT std::vector<manifest_name_value>
  parse_manifest (manifest_parser&);

  // As above but append the manifest values to an existing list.
  //
  LIBBUTL_SYMEXPORT void
  parse_manifest (manifest_parser&, std::vector<manifest_name_value>&);

  // As above but return nullopt if eos is reached before reading any values.
  //
  LIBBUTL_SYMEXPORT optional<std::vector<manifest_name_value>>
  try_parse_manifest (manifest_parser&);

  // As above but append the manifest values to an existing list returning
  // false if eos is reached before reading any values.
  //
  LIBBUTL_SYMEXPORT bool
  try_parse_manifest (manifest_parser&, std::vector<manifest_name_value>&);
}

#include <libbutl/manifest-parser.ixx>