diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2016-10-26 11:30:31 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2016-11-04 09:26:35 +0200 |
commit | 8fabd86e687ef9455f1b31abbe0b44876afdecab (patch) | |
tree | 7602d4dae7e563d72af4443fed408d384d642a51 | |
parent | 94e717de9323864e8128b3de7ff213c4f609707f (diff) |
Rework testscript parser in preparation for scope support
-rw-r--r-- | build2/test/script/lexer.cxx | 9 | ||||
-rw-r--r-- | build2/test/script/parser | 12 | ||||
-rw-r--r-- | build2/test/script/parser.cxx | 239 |
3 files changed, 163 insertions, 97 deletions
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx index e5514bd..7442973 100644 --- a/build2/test/script/lexer.cxx +++ b/build2/test/script/lexer.cxx @@ -33,9 +33,10 @@ namespace build2 case lexer_mode::first_token: { // First token on the script line. Like script_line but recognizes - // leading plus/minus and variable assignments as separators. + // leading '+-{}' as tokens as well as variable assignments as + // separators. // - // Note that to recognize only leading plus/minus we shouldn't add + // Note that to recognize only leading '+-{}' we shouldn't add // them to the separator strings. // s1 = ";=+!|&<> $(#\t\n"; @@ -330,7 +331,7 @@ namespace build2 } } - // Plus/minus. + // Plus/minus and left/right curly braces // if (m == lexer_mode::first_token) { @@ -338,6 +339,8 @@ namespace build2 { case '+': return make_token (type::plus); case '-': return make_token (type::minus); + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); } } diff --git a/build2/test/script/parser b/build2/test/script/parser index 5538b62..0f215d1 100644 --- a/build2/test/script/parser +++ b/build2/test/script/parser @@ -48,17 +48,11 @@ namespace build2 void parse (scope&, const path& name, script&, runner&); - void - pre_parse_script (); - - void - parse_script (); - - pair<line_type, bool> - pre_parse_script_line (token&, token_type&); + token + pre_parse_scope_body (); void - parse_script_line (token&, token_type&, line_type, size_t); + parse_scope_body (); bool parse_variable_line (token&, token_type&); diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index 450b5ac..85a77ce 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -36,7 +36,22 @@ namespace build2 scope_ = nullptr; - pre_parse_script (); + // Start location of the implied script group is the beginning of the + // file. End location -- end of the file. + // + group_->start_loc_ = location (path_, 1, 1); + + token t (pre_parse_scope_body ()); + + if (t.type != type::eos) + fail (t) << "stray " << t; + + // Check that we don't expect more lines. + // + if (lines_ != nullptr) + fail (t) << "expected another line after semicolon"; + + group_->end_loc_ = get_location (t); } void parser:: @@ -57,50 +72,127 @@ namespace build2 scope_ = ≻ - parse_script (); + parse_scope_body (); } - void parser:: - pre_parse_script () + token parser:: + pre_parse_scope_body () { token t; type tt; + // Parse lines (including nested scopes) until we see '}' or eos. + // for (;;) { - // Start saving tokens for the next (logical) line. - // - replay_save (); - - // Start lexing each line recognizing leading plus/minus. + // Start lexing each line recognizing leading '+-{}'. // mode (lexer_mode::first_token); - next (t, tt); - if (group_->start_loc_.empty ()) - group_->start_loc_ = get_location (t); + // Determine the line type by peeking at the first token. + // + tt = peek (); + const location ll (get_location (peeked ())); - if (tt == type::eos) + line_type lt; + switch (tt) { - // Check that we don't expect more lines. - // - // @@ Also for explicit scope close. - // - if (lines_ != nullptr) - fail (t) << "expected another line after semicolon"; + case type::eos: + case type::rcbrace: + { + next (t, tt); + return t; + } + case type::lcbrace: + { + // @@ Nested scope. Get newlines after open/close. + + assert (false); + + /* + group_->start_loc_ = get_location (t); + + // Check that we don't expect more lines. + // + if (lines_ != nullptr) + fail (t) << "expected another line after semicolon"; + + group_->end_loc_ = get_location (t); + */ + + continue; + } + case type::plus: + case type::minus: + { + // This is a setup/teardown command. + // + lt = (tt == type::plus ? line_type::setup : line_type::tdown); + + next (t, tt); // Start saving tokens from the next one. + replay_save (); + next (t, tt); + break; + } + default: + { + // This is either a test command or a variable assignment. + // + replay_save (); // Start saving tokens from the current one. + next (t, tt); + + // Decide whether this is a variable assignment or a command. It + // is an assignment if the first token is an unquoted word and + // the next is an assign/append/prepend operator. Assignment to + // a computed variable name must use the set builtin. + // + if (tt == type::word && !t.quoted) + { + // Switch the recognition of leading variable assignments for + // the next token. This is safe to do because we know we + // cannot be in the quoted mode (since the current token is + // not quoted). + // + mode (lexer_mode::second_token); + type p (peek ()); + + if (p == type::assign || + p == type::prepend || + p == type::append) + { + lt = line_type::variable; + break; + } + } + + lt = line_type::test; + break; + } + } - group_->end_loc_ = get_location (t); - replay_stop (); // Discard replay of eos. + // Being here means we know the line type and token saving is on. + // First we pre-parse the line keeping track of whether it ends with + // a semicolon. + // + bool semi; + + switch (lt) + { + case line_type::variable: + semi = parse_variable_line (t, tt); + break; + case line_type::setup: + case line_type::tdown: + case line_type::test: + semi = parse_command_line (t, tt, lt, 0); break; } - const location ll (get_location (t)); - pair<line_type, bool> lt (pre_parse_script_line (t, tt)); assert (tt == type::newline); // Stop saving and get the tokens. // - line l {lt.first, replay_data ()}; + line l {lt, replay_data ()}; // Decide where it goes. // @@ -111,7 +203,7 @@ namespace build2 // if (lines_ != nullptr) { - switch (lt.first) + switch (lt) { case line_type::setup: fail (ll) << "setup command in test"; case line_type::tdown: fail (ll) << "teardown command in test"; @@ -122,7 +214,7 @@ namespace build2 } else { - switch (lt.first) + switch (lt) { case line_type::setup: { @@ -146,7 +238,7 @@ namespace build2 // it is part of a test (there is no reason to use semicolons // after variables in the group scope). // - if (!lt.second) + if (!semi) { // If we don't have any nested scopes or teardown commands, // then we assume this is a setup, otherwise -- teardown. @@ -198,35 +290,59 @@ namespace build2 // If this command ended with a semicolon, then the next one should // go to the same place. // - lines_ = lt.second ? ls : nullptr; + lines_ = semi ? ls : nullptr; } } void parser:: - parse_script () + parse_scope_body () { auto play = [this] (lines& ls) // Note: destructive to lines. { token t; type tt; - for (size_t i (0), n (ls.size ()); i != n; ++i) + for (size_t i (0), li (0), n (ls.size ()); i != n; ++i) { line& l (ls[i]); replay_data (move (l.tokens)); // Set the tokens and start playing. - // We don't really need the assign mode since we already know the - // line type. + // We don't really need to change the mode since we already know + // the line type. // next (t, tt); - // @@ The index counts variable assignment lines. This is probably - // not what we want. - // - parse_script_line (t, tt, l.type, n == 1 ? 0 : i + 1); - assert (tt == type::newline); + switch (l.type) + { + case line_type::variable: + { + parse_variable_line (t, tt); + break; + } + case line_type::setup: + case line_type::tdown: + case line_type::test: + { + // We use the 0 index to signal that this is the only command. + // + if (li == 0) + { + size_t j (i + 1); + for (; j != n && ls[j].type == line_type::variable; ++j) ; + + if (j != n) // We have another command. + ++li; + } + else + ++li; + + parse_command_line (t, tt, l.type, li); + break; + } + } + assert (tt == type::newline); replay_stop (); // Stop playing. } }; @@ -248,7 +364,7 @@ namespace build2 // // scope* os (scope_); // scope_ = s.get (); - // parse_script (); + // parse_scope_body (); // scope_ = os; // parser p; @@ -263,53 +379,6 @@ namespace build2 runner_->leave (*scope_, scope_->end_loc_); } - pair<line_type, bool> parser:: - pre_parse_script_line (token& t, type& tt) - { - // Decide whether this is a variable assignment or a command. It is a - // variable assignment if the first token is an unquoted word (name) - // and the next is an assign/append/prepend operator. Assignment to a - // computed variable name must use the set builtin. - // - if (tt == type::word && !t.quoted) - { - // Switch recognition of leading variable assignments for the next - // token. This is safe to do because we know we cannot be in the - // quoted mode (since the current token is not quoted). - // - mode (lexer_mode::second_token); - type p (peek ()); - - if (p == type::assign || p == type::prepend || p == type::append) - { - return make_pair (line_type::variable, - parse_variable_line (t, tt)); - } - } - - line_type lt (tt == type::plus ? line_type::setup : - tt == type::minus ? line_type::tdown : - line_type::test); - - if (lt != line_type::test) - next (t, tt); - - return make_pair (lt, parse_command_line (t, tt, lt, 0)); - } - - void parser:: - parse_script_line (token& t, type& tt, line_type lt, size_t li) - { - switch (lt) - { - case line_type::variable: parse_variable_line (t, tt); break; - - case line_type::setup: - case line_type::tdown: next (t, tt); // Skip plus/minus fallthrough. - case line_type::test: parse_command_line (t, tt, lt, li); break; - } - } - // Return true if the string contains only digit characters (used to // detect the special $NN variables). // |