aboutsummaryrefslogtreecommitdiff
path: root/butl
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-03-31 23:29:49 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-04-04 13:28:18 +0300
commitd53c8a6ce3d868da66d97a9243365e88d0879343 (patch)
tree7d1268f62808f706c3a4d5631456afb4cbe24fb2 /butl
parentf4f30ed51b9bcd84cf25b601fab0a0064aae7af8 (diff)
Add tab_parser
Diffstat (limited to 'butl')
-rw-r--r--butl/buildfile1
-rw-r--r--butl/tab-parser78
-rw-r--r--butl/tab-parser.cxx142
3 files changed, 221 insertions, 0 deletions
diff --git a/butl/buildfile b/butl/buildfile
index 0aa0aae..b54a887 100644
--- a/butl/buildfile
+++ b/butl/buildfile
@@ -25,6 +25,7 @@ lib{butl}: \
{hxx cxx}{ sha256 } \
{hxx }{ small-vector } \
{hxx txx }{ string-table } \
+ {hxx cxx}{ tab-parser } \
{hxx cxx}{ target-triplet } \
{hxx cxx}{ timestamp } \
{hxx ixx cxx}{ utility } \
diff --git a/butl/tab-parser b/butl/tab-parser
new file mode 100644
index 0000000..6aa0705
--- /dev/null
+++ b/butl/tab-parser
@@ -0,0 +1,78 @@
+// file : butl/tab-parser -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUTL_TAB_PARSER
+#define BUTL_TAB_PARSER
+
+#include <string>
+#include <vector>
+#include <iosfwd>
+#include <cstdint> // uint64_t
+#include <stdexcept> // runtime_error
+
+#include <butl/export>
+
+#include <butl/char-scanner>
+
+namespace butl
+{
+ class LIBBUTL_EXPORT tab_parsing: public std::runtime_error
+ {
+ public:
+ tab_parsing (const std::string& name,
+ std::uint64_t line,
+ std::uint64_t column,
+ const std::string& description);
+
+ std::string name;
+ std::uint64_t line;
+ std::uint64_t column;
+ std::string description;
+ };
+
+ // Line and columns are useful for issuing diagnostics about invalid or
+ // missing fields.
+ //
+ struct tab_field
+ {
+ std::string value; // Field string (quoting preserved).
+ std::uint64_t column; // Field start column number (one-based).
+ };
+
+ struct tab_fields: std::vector<tab_field>
+ {
+ std::uint64_t line; // Line number (one-based).
+ std::uint64_t end_column; // End-of-line column (line length).
+ };
+
+ // Read and parse lines consisting of space-separated fields. Field can
+ // contain single or double quoted substrings (with spaces) which are
+ // interpreted but preserved. No escaping of the quote characters is
+ // supported. Blank lines and lines that start with # (collectively called
+ // empty lines) are ignored.
+ //
+ class LIBBUTL_EXPORT tab_parser: protected butl::char_scanner
+ {
+ public:
+ tab_parser (std::istream& is, const std::string& name)
+ : char_scanner (is), name_ (name) {}
+
+ // Return next line of fields. Skip empty lines. Empty result denotes the
+ // end of stream.
+ //
+ tab_fields
+ next ();
+
+ private:
+ // Skip spaces and return the first peeked non-space character.
+ //
+ xchar
+ skip_spaces ();
+
+ private:
+ const std::string name_;
+ };
+}
+
+#endif // BUTL_TAB_PARSER
diff --git a/butl/tab-parser.cxx b/butl/tab-parser.cxx
new file mode 100644
index 0000000..bae9327
--- /dev/null
+++ b/butl/tab-parser.cxx
@@ -0,0 +1,142 @@
+// file : butl/tab-parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <butl/tab-parser>
+
+#include <cassert>
+#include <sstream>
+
+using namespace std;
+
+namespace butl
+{
+ using parsing = tab_parsing;
+
+ // tab_parser
+ //
+ tab_fields tab_parser::
+ next ()
+ {
+ tab_fields r;
+ xchar c (skip_spaces ()); // Skip empty lines and leading spaces.
+
+ auto eol = [&c] () -> bool {return eos (c) || c == '\n';};
+ auto space = [&c] () -> bool {return c == ' ' || c == '\t';};
+ auto next = [&c, this] () {get (); c = peek ();};
+
+ r.line = c.line;
+
+ // Read line fields until eos or the newline character.
+ //
+ while (!eol ())
+ {
+ for (; !eol () && space (); next ()) ; // Skip space characters.
+
+ if (eol ()) // No more fields.
+ break;
+
+ // Read the field. Here we scan until the first whitespace character that
+ // appears out of quotes.
+ //
+ tab_field tf ({string (), c.column});
+ char quoting ('\0'); // Current quoting mode, can be used as bool.
+
+ for (; !eol (); next ())
+ {
+ if (!quoting)
+ {
+ if (space ()) // End of the field.
+ break;
+ else if (c == '"' || c == '\'') // Begin of quoted string.
+ quoting = c;
+ }
+ else if (c == quoting) // End of quoted string.
+ quoting = '\0';
+
+ tf.value += c;
+ }
+
+ if (quoting)
+ throw parsing (name_, c.line, c.column, "unterminated quoted string");
+
+ r.emplace_back (move (tf));
+ }
+
+ r.end_column = c.column;
+
+ // Read out eof or newline character from the stream. Note that "reading"
+ // eof multiple times is safe.
+ //
+ get ();
+ return r;
+ }
+
+ tab_parser::xchar tab_parser::
+ skip_spaces ()
+ {
+ xchar c (peek ());
+ bool start (c.column == 1);
+
+ for (; !eos (c); c = peek ())
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ break;
+ case '\n':
+ {
+ // Skip empty lines.
+ //
+ if (!start)
+ return c;
+
+ break;
+ }
+ case '#':
+ {
+ // We only recognize '#' as a start of a comment at the beginning
+ // of the line (sans leading spaces).
+ //
+ if (!start)
+ return c;
+
+ get ();
+
+ // Read until newline or eos.
+ //
+ for (c = peek (); !eos (c) && c != '\n'; c = peek ())
+ get ();
+
+ continue;
+ }
+ default:
+ return c; // Not a space.
+ }
+
+ get ();
+ }
+
+ return c;
+ }
+
+ // tab_parsing
+ //
+ static string
+ format (const string& n, uint64_t l, uint64_t c, const string& d)
+ {
+ ostringstream os;
+ if (!n.empty ())
+ os << n << ':';
+ os << l << ':' << c << ": error: " << d;
+ return os.str ();
+ }
+
+ tab_parsing::
+ tab_parsing (const string& n, uint64_t l, uint64_t c, const string& d)
+ : runtime_error (format (n, l, c, d)),
+ name (n), line (l), column (c), description (d)
+ {
+ }
+}