aboutsummaryrefslogtreecommitdiff
path: root/bpkg/manifest-parser
blob: 2b1e4a55c56ed1d4afd112a799b2ffd46ddfb84d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// file      : bpkg/manifest-parser -*- C++ -*-
// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#ifndef BPKG_MANIFEST_PARSER
#define BPKG_MANIFEST_PARSER

#include <string>
#include <iosfwd>
#include <cstdint>   // uint64_t
#include <stdexcept> // runtime_error

namespace bpkg
{
  class manifest_parsing: public std::runtime_error
  {
  public:
    manifest_parsing (const std::string& name,
                      std::uint64_t line,
                      std::uint64_t column,
                      const std::string& description);

    std::string name;
    std::uint64_t line;
    std::uint64_t column;
    std::string description;
  };

  class manifest_parser
  {
  public:
    manifest_parser (std::istream& is, const std::string& name)
        : is_ (is), name_ (name) {}

    const std::string&
    name () const {return name_;}

    struct name_value_type
    {
      std::string name;
      std::string value;

      std::uint64_t name_line;
      std::uint64_t name_column;

      std::uint64_t value_line;
      std::uint64_t value_column;
    };

    // The first returned pair is special "start-of-manifest" with
    // empty name and value being the format version: {"", "<ver>"}.
    // After that we have a sequence of ordinary pairs which are
    // the manifest. At the end of the manifest we have the special
    // "end-of-manifest" pair with empty name and value: {"", ""}.
    // After that we can either get another start-of-manifest pair
    // (in which case the whole sequence repeats from the beginning)
    // or we get another end-of-manifest pair which signals the end
    // of stream (aka EOF). To put it another way, the parse sequence
    // always has the following form:
    //
    // ({"", "<ver>"} {"<name>", "<value>"}* {"", ""})* {"", ""}
    //
    name_value_type
    next ();

  private:
    class xchar
    {
    public:
      typedef std::char_traits<char> traits_type;
      typedef traits_type::int_type int_type;
      typedef traits_type::char_type char_type;

      xchar (int_type v, std::uint64_t l, std::uint64_t c)
          : v_ (v), l_ (l), c_ (c) {}

      operator char_type () const {return static_cast<char_type> (v_);}

      int_type
      value () const {return v_;}

      std::uint64_t line () const {return l_;}
      std::uint64_t column () const {return c_;}

    private:
      int_type v_;
      std::uint64_t l_;
      std::uint64_t c_;
    };

  private:
    void
    parse_name (name_value_type&);

    void
    parse_value (name_value_type&);

    // Skip spaces and return the first peeked non-space character.
    //
    xchar
    skip_spaces ();

    // Character interface.
    //
  private:
    xchar
    peek ();

    xchar
    get ();

    void
    unget (const xchar&);

    // Tests.
    //
    bool
    is_eos (const xchar& c) const
    {
      return c.value () == xchar::traits_type::eof ();
    }

  private:
    enum {start, body, eos} s_ = start;
    std::string version_; // Current format version.

  private:
    std::istream& is_;
    const std::string name_;

    std::uint64_t l_ {1};
    std::uint64_t c_ {1};

    bool unget_ {false};
    xchar buf_ {0, 0, 0};

    bool eos_ {false};
  };
}

#endif // BPKG_MANIFEST_PARSER