From 257ad3c2c5e633d2fd3f2228021ac3ae8d6d07cb Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 12 Dec 2014 11:30:04 +0200 Subject: Initial buildfile parser implementation g++-4.9 -std=c++14 -g -I../../.. -o driver driver.cxx ../../../build/lexer.cxx ../../../build/parser.cxx && ./driver --- build/bd.cxx | 35 ++------- build/lexer | 16 ++-- build/lexer.cxx | 12 ++- build/parser | 55 ++++++++++++++ build/parser.cxx | 167 ++++++++++++++++++++++++++++++++++++++++++ build/token | 6 ++ tests/build/lexer/driver.cxx | 4 +- tests/build/parser/driver.cxx | 70 ++++++++++++++++++ 8 files changed, 327 insertions(+), 38 deletions(-) create mode 100644 build/parser create mode 100644 build/parser.cxx create mode 100644 tests/build/parser/driver.cxx diff --git a/build/bd.cxx b/build/bd.cxx index 33ee02f..82d283c 100644 --- a/build/bd.cxx +++ b/build/bd.cxx @@ -16,8 +16,8 @@ #include #include -#include #include +#include using namespace std; @@ -148,41 +148,20 @@ main (int argc, char* argv[]) } ifs.exceptions (ifstream::failbit | ifstream::badbit); - lexer l (ifs, bf.string ()); + parser p; try { - for (token t (l.next ());; t = l.next ()) - { - cout << t.line () << ':' << t.column () << ": "; - - switch (t.type ()) - { - case token_type::eos: cout << ""; break; - case token_type::punctuation: - { - switch (t.punctuation ()) - { - case token_punctuation::newline: cout << "\\n"; break; - case token_punctuation::colon: cout << ':'; break; - case token_punctuation::lcbrace: cout << '{'; break; - case token_punctuation::rcbrace: cout << '}'; break; - } - break; - } - case token_type::name: cout << '\'' << t.name () << '\''; break; - } - - cout << endl; - - if (t.type () == token_type::eos) - break; - } + p.parse (ifs, bf); } catch (const lexer_error&) { return 1; // Diagnostics has already been issued. } + catch (const parser_error&) + { + return 1; // Diagnostics has already been issued. + } catch (const std::ios_base::failure&) { cerr << "error: failed to read from " << bf << endl; diff --git a/build/lexer b/build/lexer index 34b6fcc..cf67eec 100644 --- a/build/lexer +++ b/build/lexer @@ -21,11 +21,8 @@ namespace build class lexer { public: - // If name is empty, then no diagnostics is issued, just lexer_error - // is thrown (use for testing). - // - lexer (std::istream& is, const std::string& name) - : is_ (is), name_ (name) {} + lexer (std::istream& is, const std::string& name, std::ostream& diag) + : is_ (is), name_ (name), diag_ (diag) {} token next (); @@ -74,6 +71,8 @@ namespace build return c.value () == xchar::traits_type::eof (); } + // Scanner. + // private: xchar escape (); @@ -84,9 +83,16 @@ namespace build token name (xchar); + // Utilities. + // + private: + std::ostream& + error (const xchar&); + private: std::istream& is_; std::string name_; + std::ostream& diag_; std::uint64_t l_ {1}; std::uint64_t c_ {1}; diff --git a/build/lexer.cxx b/build/lexer.cxx index 8566788..9e3521a 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -56,10 +56,7 @@ namespace build if (!is_eos (c)) return c; - if (!name_.empty ()) - cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " << - "unterminated escape sequence" << endl; - + error (c) << "unterminated escape sequence" << endl; throw lexer_error (); } @@ -219,4 +216,11 @@ namespace build buf_ = c; unget_ = true; } + + ostream& lexer:: + error (const xchar& c) + { + return diag_ << name_ << ':' << c.line () << ':' << + c.column () << ": error: "; + } } diff --git a/build/parser b/build/parser new file mode 100644 index 0000000..04ef00d --- /dev/null +++ b/build/parser @@ -0,0 +1,55 @@ +// file : build/parser -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef BUILD_PARSER +#define BUILD_PARSER + +#include +#include +#include + +#include + +namespace build +{ + class token; + enum class token_type; + class lexer; + + // The handler must assume the diagnostics has already been issued. + // + struct parser_error: std::exception {}; + + class parser + { + public: + parser (std::ostream& diag): diag_ (diag) {} + + void + parse (std::istream&, const path&); + + // Recursive descent parser. + // + private: + void + names (token&, token_type&); + + // Utilities. + // + private: + void + next (token&, token_type&); + + std::ostream& + error (const token&); + + private: + std::ostream& diag_; + + lexer* lexer_; + const path* path_; + }; +} + +#endif // BUILD_PARSER diff --git a/build/parser.cxx b/build/parser.cxx new file mode 100644 index 0000000..669ac8b --- /dev/null +++ b/build/parser.cxx @@ -0,0 +1,167 @@ +// file : build/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +#include + +#include +#include + +using namespace std; + +namespace build +{ + // Output the token type and value in a format suitable for diagnostics. + // + ostream& + operator<< (ostream&, const token&); + + typedef token_type type; + typedef token_punctuation punc; + + void parser:: + parse (istream& is, const path& p) + { + lexer l (is, p.string (), diag_); + lexer_ = &l; + path_ = &p; + + token t (0, 0); // eos + type tt; + + for (next (t, tt); tt != type::eos; ) + { + // We always start with one or more names. + // + names (t, tt); + + if (t.is (punc::colon)) + { + next (t, tt); + + if (tt == type::name || t.is (punc::lcbrace)) + names (t, tt); + + if (t.is (punc::newline)) + next (t, tt); + else if (tt != type::eos) + { + error (t) << "expected newline insetad of " << t << endl; + throw parser_error (); + } + + continue; + } + + error (t) << "unexpected " << t << endl; + throw parser_error (); + } + } + + void parser:: + names (token& t, type& tt) + { + for (bool first (true);; first = false) + { + // Untyped name group, e.g., '{foo bar}'. + // + if (t.is (punc::lcbrace)) + { + next (t, tt); + names (t, tt); + + if (!t.is (punc::rcbrace)) + { + error (t) << "expected '}' instead of " << t << endl; + throw parser_error (); + } + + next (t, tt); + continue; + } + + // Name. + // + if (tt == type::name) + { + string name (t.name ()); + + // See if this is a type name, that is, it is followed by '{'. + // + next (t, tt); + + if (t.is (punc::lcbrace)) + { + //cout << "type: " << name << endl; + + //@@ TODO: + // + // - detect nested typed name groups, e.g., 'cxx{hxx{foo}}'. + // + next (t, tt); + names (t, tt); + + if (!t.is (punc::rcbrace)) + { + error (t) << "expected '}' instead of " << t << endl; + throw parser_error (); + } + + next (t, tt); + continue; + } + + // This is a target, directory, or variable name. + //cout << "name: " << name << endl; + continue; + } + + if (!first) + break; + + error (t) << "expected name instead of " << t << endl; + throw parser_error (); + } + } + + void parser:: + next (token& t, token_type& tt) + { + t = lexer_->next (); + tt = t.type (); + } + + ostream& parser:: + error (const token& t) + { + return diag_ << path_->string () << ':' << t.line () << ':' << + t.column () << ": error: "; + } + + // Output the token type and value in a format suitable for diagnostics. + // + ostream& + operator<< (ostream& os, const token& t) + { + switch (t.type ()) + { + case token_type::eos: os << ""; break; + case token_type::punctuation: + { + switch (t.punctuation ()) + { + case token_punctuation::newline: os << ""; break; + case token_punctuation::colon: os << "':'"; break; + case token_punctuation::lcbrace: os << "'{'"; break; + case token_punctuation::rcbrace: os << "'}'"; break; + } + break; + } + case token_type::name: os << '\'' << t.name () << '\''; break; + } + + return os; + } +} diff --git a/build/token b/build/token index bade45c..6f4951c 100644 --- a/build/token +++ b/build/token @@ -28,6 +28,12 @@ namespace build token_punctuation punctuation () const {assert (t_ == token_type::punctuation); return p_;} + bool + is (token_punctuation p) const + { + return t_ == token_type::punctuation && p_ == p; + } + std::uint64_t line () const {return l_;} std::uint64_t column () const {return c_;} diff --git a/tests/build/lexer/driver.cxx b/tests/build/lexer/driver.cxx index b1af9d9..e329b1c 100644 --- a/tests/build/lexer/driver.cxx +++ b/tests/build/lexer/driver.cxx @@ -71,6 +71,8 @@ main () tokens ({"foo", ":", "\n", "bar", ""})); } +ostream cnull (nullptr); + static tokens lex (const char* s) { @@ -78,7 +80,7 @@ lex (const char* s) istringstream is (s); is.exceptions (istream::failbit | istream::badbit); - lexer l (is, ""); + lexer l (is, "", cnull); try { diff --git a/tests/build/parser/driver.cxx b/tests/build/parser/driver.cxx new file mode 100644 index 0000000..4ba589d --- /dev/null +++ b/tests/build/parser/driver.cxx @@ -0,0 +1,70 @@ +// file : tests/build/parser/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include +#include +#include + +#include +#include +#include + +using namespace std; +using namespace build; + +static bool +parse (const char*); + +int +main () +{ + assert (parse ("")); + assert (parse ("foo:")); + assert (parse ("foo bar:")); + assert (parse ("foo:\nbar:")); + assert (parse ("foo: bar")); + assert (parse ("foo: bar baz")); + assert (parse ("foo bar: baz biz")); + + assert (parse ("{foo}:")); + assert (parse ("{foo bar}:")); + assert (parse ("{{foo bar}}:")); + assert (parse ("{{foo bar} {baz} {biz fox} fix}:")); + + assert (parse ("exe{foo}:")); + assert (parse ("exe{foo bar}:")); + assert (parse ("{exe{foo bar}}:")); + assert (parse ("exe{{foo bar} fox}:")); + assert (parse ("exe{foo}: obj{bar baz} biz.o lib{fox}")); + + assert (!parse (":")); + assert (!parse ("foo")); + assert (!parse ("{")); + assert (!parse ("{foo:")); + assert (!parse ("{foo{:")); + assert (!parse ("foo: bar:")); + assert (!parse ("exe{foo:")); +} + +ostream cnull (nullptr); + +static bool +parse (const char* s) +{ + istringstream is (s); + + is.exceptions (istream::failbit | istream::badbit); + parser p (cnull); + + try + { + p.parse (is, path ()); + } + catch (const parser_error&) + { + return false; + } + + return true; +} -- cgit v1.1