aboutsummaryrefslogtreecommitdiff
path: root/butl/tab-parser.cxx
blob: bae932711e48aeeb222556dbb086c996cc4ff2e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// file      : butl/tab-parser.cxx -*- C++ -*-
// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#include <butl/tab-parser>

#include <cassert>
#include <sstream>

using namespace std;

namespace butl
{
  using parsing = tab_parsing;

  // tab_parser
  //
  tab_fields tab_parser::
  next ()
  {
    tab_fields r;
    xchar c (skip_spaces ()); // Skip empty lines and leading spaces.

    auto eol   = [&c] () -> bool {return eos (c) || c == '\n';};
    auto space = [&c] () -> bool {return c == ' ' || c == '\t';};
    auto next  = [&c, this] () {get (); c = peek ();};

    r.line = c.line;

    // Read line fields until eos or the newline character.
    //
    while (!eol ())
    {
      for (; !eol () && space (); next ()) ; // Skip space characters.

      if (eol ()) // No more fields.
        break;

      // Read the field. Here we scan until the first whitespace character that
      // appears out of quotes.
      //
      tab_field tf ({string (), c.column});
      char quoting ('\0'); // Current quoting mode, can be used as bool.

      for (; !eol (); next ())
      {
        if (!quoting)
        {
          if (space ())                   // End of the field.
            break;
          else if (c == '"' || c == '\'') // Begin of quoted string.
            quoting = c;
        }
        else if (c == quoting)            // End of quoted string.
          quoting = '\0';

        tf.value += c;
      }

      if (quoting)
        throw parsing (name_, c.line, c.column, "unterminated quoted string");

      r.emplace_back (move (tf));
    }

    r.end_column = c.column;

    // Read out eof or newline character from the stream. Note that "reading"
    // eof multiple times is safe.
    //
    get ();
    return r;
  }

  tab_parser::xchar tab_parser::
  skip_spaces ()
  {
    xchar c (peek ());
    bool start (c.column == 1);

    for (; !eos (c); c = peek ())
    {
      switch (c)
      {
      case ' ':
      case '\t':
        break;
      case '\n':
        {
          // Skip empty lines.
          //
          if (!start)
            return c;

          break;
        }
      case '#':
        {
          // We only recognize '#' as a start of a comment at the beginning
          // of the line (sans leading spaces).
          //
          if (!start)
            return c;

          get ();

          // Read until newline or eos.
          //
          for (c = peek (); !eos (c) && c != '\n'; c = peek ())
            get ();

          continue;
        }
      default:
        return c; // Not a space.
      }

      get ();
    }

    return c;
  }

  // tab_parsing
  //
  static string
  format (const string& n, uint64_t l, uint64_t c, const string& d)
  {
    ostringstream os;
    if (!n.empty ())
      os << n << ':';
    os << l << ':' << c << ": error: " << d;
    return os.str ();
  }

  tab_parsing::
  tab_parsing (const string& n, uint64_t l, uint64_t c, const string& d)
      : runtime_error (format (n, l, c, d)),
        name (n), line (l), column (c), description (d)
  {
  }
}