From 5661b404b0104c3065a40ad622bdd3c11d748a99 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 20 Apr 2017 17:31:26 +0300 Subject: Implement string_parser --- butl/tab-parser.cxx | 116 ++++++++++++++-------------------------------------- 1 file changed, 31 insertions(+), 85 deletions(-) (limited to 'butl/tab-parser.cxx') diff --git a/butl/tab-parser.cxx b/butl/tab-parser.cxx index bae9327..4743e69 100644 --- a/butl/tab-parser.cxx +++ b/butl/tab-parser.cxx @@ -7,6 +7,8 @@ #include #include +#include + using namespace std; namespace butl @@ -19,106 +21,50 @@ namespace butl next () { tab_fields r; - xchar c (skip_spaces ()); // Skip empty lines and leading spaces. - - auto eol = [&c] () -> bool {return eos (c) || c == '\n';}; - auto space = [&c] () -> bool {return c == ' ' || c == '\t';}; - auto next = [&c, this] () {get (); c = peek ();}; - - r.line = c.line; - // Read line fields until eos or the newline character. + // Read lines until a non-empty one or EOF is encountered. In the first + // case parse the line and bail out. // - while (!eol ()) + // Note that we check for character presence in the stream prior to the + // getline() call, to prevent it from setting the failbit. + // + while (!is_.eof () && is_.peek () != istream::traits_type::eof ()) { - for (; !eol () && space (); next ()) ; // Skip space characters. + string s; + getline (is_, s); - if (eol ()) // No more fields. - break; + ++line_; - // Read the field. Here we scan until the first whitespace character that - // appears out of quotes. + // Skip empty line. // - tab_field tf ({string (), c.column}); - char quoting ('\0'); // Current quoting mode, can be used as bool. - - for (; !eol (); next ()) - { - if (!quoting) - { - if (space ()) // End of the field. - break; - else if (c == '"' || c == '\'') // Begin of quoted string. - quoting = c; - } - else if (c == quoting) // End of quoted string. - quoting = '\0'; - - tf.value += c; - } + auto i (s.begin ()); + auto e (s.end ()); + for (; i != e && (*i == ' ' || *i == '\t'); ++i) ; // Skip spaces. - if (quoting) - throw parsing (name_, c.line, c.column, "unterminated quoted string"); + if (i == e || *i == '#') + continue; - r.emplace_back (move (tf)); - } + r.line = line_; + r.end_column = s.size () + 1; // Newline position. - r.end_column = c.column; + vector> sp; - // Read out eof or newline character from the stream. Note that "reading" - // eof multiple times is safe. - // - get (); - return r; - } - - tab_parser::xchar tab_parser:: - skip_spaces () - { - xchar c (peek ()); - bool start (c.column == 1); - - for (; !eos (c); c = peek ()) - { - switch (c) + try { - case ' ': - case '\t': - break; - case '\n': - { - // Skip empty lines. - // - if (!start) - return c; - - break; - } - case '#': - { - // We only recognize '#' as a start of a comment at the beginning - // of the line (sans leading spaces). - // - if (!start) - return c; - - get (); - - // Read until newline or eos. - // - for (c = peek (); !eos (c) && c != '\n'; c = peek ()) - get (); - - continue; - } - default: - return c; // Not a space. + sp = string_parser::parse_quoted_position (s, false); + } + catch (const invalid_string& e) + { + throw parsing (name_, line_, e.position + 1, e.what ()); } - get (); + for (auto& s: sp) + r.emplace_back (tab_field ({move (s.first), s.second + 1})); + + break; } - return c; + return r; } // tab_parsing -- cgit v1.1