aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2014-12-12 11:30:04 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2014-12-12 11:30:04 +0200
commit257ad3c2c5e633d2fd3f2228021ac3ae8d6d07cb (patch)
treeecfa5df6e8abca5bd483d5498bf84412ae58930e
parent0dcf07989b4b942f6ff872023b2886b7f698d711 (diff)
Initial buildfile parser implementation
g++-4.9 -std=c++14 -g -I../../.. -o driver driver.cxx ../../../build/lexer.cxx ../../../build/parser.cxx && ./driver
-rw-r--r--build/bd.cxx35
-rw-r--r--build/lexer16
-rw-r--r--build/lexer.cxx12
-rw-r--r--build/parser55
-rw-r--r--build/parser.cxx167
-rw-r--r--build/token6
-rw-r--r--tests/build/lexer/driver.cxx4
-rw-r--r--tests/build/parser/driver.cxx70
8 files changed, 327 insertions, 38 deletions
diff --git a/build/bd.cxx b/build/bd.cxx
index 33ee02f..82d283c 100644
--- a/build/bd.cxx
+++ b/build/bd.cxx
@@ -16,8 +16,8 @@
#include <build/process>
#include <build/diagnostics>
-#include <build/token>
#include <build/lexer>
+#include <build/parser>
using namespace std;
@@ -148,41 +148,20 @@ main (int argc, char* argv[])
}
ifs.exceptions (ifstream::failbit | ifstream::badbit);
- lexer l (ifs, bf.string ());
+ parser p;
try
{
- for (token t (l.next ());; t = l.next ())
- {
- cout << t.line () << ':' << t.column () << ": ";
-
- switch (t.type ())
- {
- case token_type::eos: cout << "<eos>"; break;
- case token_type::punctuation:
- {
- switch (t.punctuation ())
- {
- case token_punctuation::newline: cout << "\\n"; break;
- case token_punctuation::colon: cout << ':'; break;
- case token_punctuation::lcbrace: cout << '{'; break;
- case token_punctuation::rcbrace: cout << '}'; break;
- }
- break;
- }
- case token_type::name: cout << '\'' << t.name () << '\''; break;
- }
-
- cout << endl;
-
- if (t.type () == token_type::eos)
- break;
- }
+ p.parse (ifs, bf);
}
catch (const lexer_error&)
{
return 1; // Diagnostics has already been issued.
}
+ catch (const parser_error&)
+ {
+ return 1; // Diagnostics has already been issued.
+ }
catch (const std::ios_base::failure&)
{
cerr << "error: failed to read from " << bf << endl;
diff --git a/build/lexer b/build/lexer
index 34b6fcc..cf67eec 100644
--- a/build/lexer
+++ b/build/lexer
@@ -21,11 +21,8 @@ namespace build
class lexer
{
public:
- // If name is empty, then no diagnostics is issued, just lexer_error
- // is thrown (use for testing).
- //
- lexer (std::istream& is, const std::string& name)
- : is_ (is), name_ (name) {}
+ lexer (std::istream& is, const std::string& name, std::ostream& diag)
+ : is_ (is), name_ (name), diag_ (diag) {}
token
next ();
@@ -74,6 +71,8 @@ namespace build
return c.value () == xchar::traits_type::eof ();
}
+ // Scanner.
+ //
private:
xchar
escape ();
@@ -84,9 +83,16 @@ namespace build
token
name (xchar);
+ // Utilities.
+ //
+ private:
+ std::ostream&
+ error (const xchar&);
+
private:
std::istream& is_;
std::string name_;
+ std::ostream& diag_;
std::uint64_t l_ {1};
std::uint64_t c_ {1};
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 8566788..9e3521a 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -56,10 +56,7 @@ namespace build
if (!is_eos (c))
return c;
- if (!name_.empty ())
- cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " <<
- "unterminated escape sequence" << endl;
-
+ error (c) << "unterminated escape sequence" << endl;
throw lexer_error ();
}
@@ -219,4 +216,11 @@ namespace build
buf_ = c;
unget_ = true;
}
+
+ ostream& lexer::
+ error (const xchar& c)
+ {
+ return diag_ << name_ << ':' << c.line () << ':' <<
+ c.column () << ": error: ";
+ }
}
diff --git a/build/parser b/build/parser
new file mode 100644
index 0000000..04ef00d
--- /dev/null
+++ b/build/parser
@@ -0,0 +1,55 @@
+// file : build/parser -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUILD_PARSER
+#define BUILD_PARSER
+
+#include <string>
+#include <iosfwd>
+#include <exception>
+
+#include <build/path>
+
+namespace build
+{
+ class token;
+ enum class token_type;
+ class lexer;
+
+ // The handler must assume the diagnostics has already been issued.
+ //
+ struct parser_error: std::exception {};
+
+ class parser
+ {
+ public:
+ parser (std::ostream& diag): diag_ (diag) {}
+
+ void
+ parse (std::istream&, const path&);
+
+ // Recursive descent parser.
+ //
+ private:
+ void
+ names (token&, token_type&);
+
+ // Utilities.
+ //
+ private:
+ void
+ next (token&, token_type&);
+
+ std::ostream&
+ error (const token&);
+
+ private:
+ std::ostream& diag_;
+
+ lexer* lexer_;
+ const path* path_;
+ };
+}
+
+#endif // BUILD_PARSER
diff --git a/build/parser.cxx b/build/parser.cxx
new file mode 100644
index 0000000..669ac8b
--- /dev/null
+++ b/build/parser.cxx
@@ -0,0 +1,167 @@
+// file : build/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#include <build/parser>
+
+#include <iostream>
+
+#include <build/token>
+#include <build/lexer>
+
+using namespace std;
+
+namespace build
+{
+ // Output the token type and value in a format suitable for diagnostics.
+ //
+ ostream&
+ operator<< (ostream&, const token&);
+
+ typedef token_type type;
+ typedef token_punctuation punc;
+
+ void parser::
+ parse (istream& is, const path& p)
+ {
+ lexer l (is, p.string (), diag_);
+ lexer_ = &l;
+ path_ = &p;
+
+ token t (0, 0); // eos
+ type tt;
+
+ for (next (t, tt); tt != type::eos; )
+ {
+ // We always start with one or more names.
+ //
+ names (t, tt);
+
+ if (t.is (punc::colon))
+ {
+ next (t, tt);
+
+ if (tt == type::name || t.is (punc::lcbrace))
+ names (t, tt);
+
+ if (t.is (punc::newline))
+ next (t, tt);
+ else if (tt != type::eos)
+ {
+ error (t) << "expected newline insetad of " << t << endl;
+ throw parser_error ();
+ }
+
+ continue;
+ }
+
+ error (t) << "unexpected " << t << endl;
+ throw parser_error ();
+ }
+ }
+
+ void parser::
+ names (token& t, type& tt)
+ {
+ for (bool first (true);; first = false)
+ {
+ // Untyped name group, e.g., '{foo bar}'.
+ //
+ if (t.is (punc::lcbrace))
+ {
+ next (t, tt);
+ names (t, tt);
+
+ if (!t.is (punc::rcbrace))
+ {
+ error (t) << "expected '}' instead of " << t << endl;
+ throw parser_error ();
+ }
+
+ next (t, tt);
+ continue;
+ }
+
+ // Name.
+ //
+ if (tt == type::name)
+ {
+ string name (t.name ());
+
+ // See if this is a type name, that is, it is followed by '{'.
+ //
+ next (t, tt);
+
+ if (t.is (punc::lcbrace))
+ {
+ //cout << "type: " << name << endl;
+
+ //@@ TODO:
+ //
+ // - detect nested typed name groups, e.g., 'cxx{hxx{foo}}'.
+ //
+ next (t, tt);
+ names (t, tt);
+
+ if (!t.is (punc::rcbrace))
+ {
+ error (t) << "expected '}' instead of " << t << endl;
+ throw parser_error ();
+ }
+
+ next (t, tt);
+ continue;
+ }
+
+ // This is a target, directory, or variable name.
+ //cout << "name: " << name << endl;
+ continue;
+ }
+
+ if (!first)
+ break;
+
+ error (t) << "expected name instead of " << t << endl;
+ throw parser_error ();
+ }
+ }
+
+ void parser::
+ next (token& t, token_type& tt)
+ {
+ t = lexer_->next ();
+ tt = t.type ();
+ }
+
+ ostream& parser::
+ error (const token& t)
+ {
+ return diag_ << path_->string () << ':' << t.line () << ':' <<
+ t.column () << ": error: ";
+ }
+
+ // Output the token type and value in a format suitable for diagnostics.
+ //
+ ostream&
+ operator<< (ostream& os, const token& t)
+ {
+ switch (t.type ())
+ {
+ case token_type::eos: os << "<end-of-stream>"; break;
+ case token_type::punctuation:
+ {
+ switch (t.punctuation ())
+ {
+ case token_punctuation::newline: os << "<newline>"; break;
+ case token_punctuation::colon: os << "':'"; break;
+ case token_punctuation::lcbrace: os << "'{'"; break;
+ case token_punctuation::rcbrace: os << "'}'"; break;
+ }
+ break;
+ }
+ case token_type::name: os << '\'' << t.name () << '\''; break;
+ }
+
+ return os;
+ }
+}
diff --git a/build/token b/build/token
index bade45c..6f4951c 100644
--- a/build/token
+++ b/build/token
@@ -28,6 +28,12 @@ namespace build
token_punctuation
punctuation () const {assert (t_ == token_type::punctuation); return p_;}
+ bool
+ is (token_punctuation p) const
+ {
+ return t_ == token_type::punctuation && p_ == p;
+ }
+
std::uint64_t line () const {return l_;}
std::uint64_t column () const {return c_;}
diff --git a/tests/build/lexer/driver.cxx b/tests/build/lexer/driver.cxx
index b1af9d9..e329b1c 100644
--- a/tests/build/lexer/driver.cxx
+++ b/tests/build/lexer/driver.cxx
@@ -71,6 +71,8 @@ main ()
tokens ({"foo", ":", "\n", "bar", ""}));
}
+ostream cnull (nullptr);
+
static tokens
lex (const char* s)
{
@@ -78,7 +80,7 @@ lex (const char* s)
istringstream is (s);
is.exceptions (istream::failbit | istream::badbit);
- lexer l (is, "");
+ lexer l (is, "", cnull);
try
{
diff --git a/tests/build/parser/driver.cxx b/tests/build/parser/driver.cxx
new file mode 100644
index 0000000..4ba589d
--- /dev/null
+++ b/tests/build/parser/driver.cxx
@@ -0,0 +1,70 @@
+// file : tests/build/parser/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <sstream>
+#include <iostream>
+
+#include <build/path>
+#include <build/lexer>
+#include <build/parser>
+
+using namespace std;
+using namespace build;
+
+static bool
+parse (const char*);
+
+int
+main ()
+{
+ assert (parse (""));
+ assert (parse ("foo:"));
+ assert (parse ("foo bar:"));
+ assert (parse ("foo:\nbar:"));
+ assert (parse ("foo: bar"));
+ assert (parse ("foo: bar baz"));
+ assert (parse ("foo bar: baz biz"));
+
+ assert (parse ("{foo}:"));
+ assert (parse ("{foo bar}:"));
+ assert (parse ("{{foo bar}}:"));
+ assert (parse ("{{foo bar} {baz} {biz fox} fix}:"));
+
+ assert (parse ("exe{foo}:"));
+ assert (parse ("exe{foo bar}:"));
+ assert (parse ("{exe{foo bar}}:"));
+ assert (parse ("exe{{foo bar} fox}:"));
+ assert (parse ("exe{foo}: obj{bar baz} biz.o lib{fox}"));
+
+ assert (!parse (":"));
+ assert (!parse ("foo"));
+ assert (!parse ("{"));
+ assert (!parse ("{foo:"));
+ assert (!parse ("{foo{:"));
+ assert (!parse ("foo: bar:"));
+ assert (!parse ("exe{foo:"));
+}
+
+ostream cnull (nullptr);
+
+static bool
+parse (const char* s)
+{
+ istringstream is (s);
+
+ is.exceptions (istream::failbit | istream::badbit);
+ parser p (cnull);
+
+ try
+ {
+ p.parse (is, path ());
+ }
+ catch (const parser_error&)
+ {
+ return false;
+ }
+
+ return true;
+}