diff options
-rw-r--r-- | build2/test/script/parser | 4 | ||||
-rw-r--r-- | build2/test/script/parser.cxx | 329 | ||||
-rw-r--r-- | build2/test/script/token | 4 |
3 files changed, 155 insertions, 182 deletions
diff --git a/build2/test/script/parser b/build2/test/script/parser index daaa953..f9fbd98 100644 --- a/build2/test/script/parser +++ b/build2/test/script/parser @@ -46,10 +46,10 @@ namespace build2 parse_script_line (token&, token_type&); void - parse_variable_line (token&, token_type&, string, location); + parse_variable_line (token&, token_type&); void - parse_test_line (token&, token_type&, names, location); + parse_test_line (token&, token_type&); command_exit parse_command_exit (token&, token_type&); diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index 25f6690..67585c6 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -52,56 +52,20 @@ namespace build2 void parser:: parse_script_line (token& t, token_type& tt) { - // Parse first chunk. Keep track of whether anything in it was quoted. + // Decide whether this is a variable assignment or a command. It is a + // variable assignment if the first token is a word and the next is an + // assign/append/prepend operator. Assignment to a computed variable + // name must use the set builtin. // - names ns; - location nl (get_location (t)); - lexer_->reset_quoted (t.quoted); - parse_names (t, tt, ns, true, "variable or program name"); - - // See if this is a variable assignment or a test command. - // - if (tt == type::assign || - tt == type::prepend || - tt == type::append) + auto assign = [] (type t) { - // We need to strike a balance between recognizing command lines - // that contain the assignment operator and variable assignments. - // - // If we choose to treat these tokens literally (for example, if we - // have several names on the LHS), then we have the reversibility - // problem: we need to restore original whitespaces before and after - // the assignment operator (e.g., foo=bar vs foo = bar). - // - // To keep things simple we will start with the following rule: if - // the token after the first chunk of input is assignment, then it - // must be a variable assignment. After all, command lines like this - // are not expected to be common: - // - // $* =x - // - // It will also be easy to get the desired behavior with quoting: - // - // $* "=x" - // - // The only issue here is if $* above expands to a single, simple - // name (e.g., an executable name) in which case it will be treated - // as a variable name. One way to resolve it would be to detect - // "funny" variable names and require that they be quoted (this - // won't help with built-in commands; maybe we could warn if it's - // the same as built-in). Note that currently we have no way of - // knowing it's quoted. - // - // Or perhaps we should just let people learn that first assignment - // needs to be quoted? - // - if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ()) - fail (nl) << "variable name expected instead of '" << ns << "'"; + return t == type::assign || t == type::prepend || t == type::append; + }; - parse_variable_line (t, tt, move (ns[0].value), move (nl)); - } + if (tt == type::word && assign (peek ())) + parse_variable_line (t, tt); else - parse_test_line (t, tt, move (ns), move (nl)); + parse_test_line (t, tt); } // Return true if the string contains only digit characters (used to @@ -118,17 +82,22 @@ namespace build2 } void parser:: - parse_variable_line (token& t, token_type& tt, string name, location nl) + parse_variable_line (token& t, token_type& tt) { + location nl (get_location (t)); + string name (move (t.value)); + // Check if we are trying to modify any of the special aliases ($*, // $~, $N). // if (name == "*" || name == "~" || digits (name)) fail (nl) << "attempt to set '" << name << "' variable directly"; - type kind (tt); // Assignment kind. const variable& var (script_->var_pool.insert (move (name))); + next (t, tt); + type kind (tt); // Assignment kind. + // We cannot reuse the value mode since it will recognize { which // we want to treat as a literal. // @@ -164,7 +133,7 @@ namespace build2 } void parser:: - parse_test_line (token& t, token_type& tt, names ns, location nl) + parse_test_line (token& t, token_type& tt) { // Stop recognizing variable assignments. // @@ -353,130 +322,11 @@ namespace build2 // Keep parsing chunks of the command line until we see the newline or // the exit status comparison. // - for (bool done (false); !done; ) - { - // Process words that we already have. - // - bool q (lexer_->quoted ()); - - for (name& n: ns) - { - string s; - - try - { - s = value_traits<string>::convert (move (n), nullptr); - } - catch (const invalid_argument&) - { - fail (nl) << "invalid string value '" << n << "'"; - } - - // If it is a quoted chunk, then we add the word as is. Otherwise - // we re-lex it. But if the word doesn't contain any interesting - // characters (operators plus quotes/escapes), then no need to - // re-lex. - // - if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) - add_word (move (s), nl); - else - { - // Come up with a "path" that contains both the original - // location as well as the expanded string. The resulting - // diagnostics will look like this: - // - // testscript:10:1 ('abc): unterminated single quote - // - path name; - { - string n (nl.file->string ()); - n += ':'; - n += to_string (nl.line); - n += ':'; - n += to_string (nl.column); - n += ": ("; - n += s; - n += ')'; - name = path (move (n)); - } - - istringstream is (s); - lexer lex (is, name, lexer_mode::command_line); - - // Treat the first "sub-token" as always separated from what we - // saw earlier. - // - // Note that this is not "our" token so we cannot do fail(t). - // Rather we should do fail(l). - // - token t (lex.next ()); - location l (build2::get_location (t, name)); - t.separated = true; - - string w; - bool f (t.type == type::eos); // If the whole thing is empty. + location l (get_location (t)); + names ns; // Reuse to reduce allocations. - for (; t.type != type::eos; t = lex.next ()) - { - type tt (t.type); - l = build2::get_location (t, name); - - // Re-lexing double-quotes will recognize $, ( inside as - // tokens so we have to reverse them back. Since we don't - // treat spaces as separators we can be sure we will get it - // right. - // - switch (tt) - { - case type::dollar: w += '$'; continue; - case type::lparen: w += '('; continue; - } - - // Retire the current word. We need to distinguish between - // empty and non-existent (e.g., > vs >""). - // - if (!w.empty () || f) - { - add_word (move (w), l); - f = false; - } - - if (tt == type::word) - { - w = move (t.value); - f = true; - continue; - } - - // If this is one of the operators/separators, check that we - // don't have any pending locations to be filled. - // - check_pending (l); - - // Note: there is another one in the outer loop below. - // - switch (tt) - { - case type::in_null: - case type::in_string: - case type::in_document: - case type::out_null: - case type::out_string: - case type::out_document: - parse_redirect (t, l); - break; - } - } - - // Don't forget the last word. - // - if (!w.empty () || f) - add_word (move (w), l); - } - } - - // See what is the next token. - // + for (bool done (false); !done; l = get_location (t)) + { switch (tt) { case type::equal: @@ -496,9 +346,9 @@ namespace build2 // If this is one of the operators/separators, check that we // don't have any pending locations to be filled. // - check_pending (nl); + check_pending (l); - // Note: there is another one in the inner loop above. + // Note: there is another one in the inner loop below. // switch (tt) { @@ -508,7 +358,7 @@ namespace build2 case type::out_null: case type::out_string: case type::out_document: - parse_redirect (t, get_location (t)); + parse_redirect (t, l); next (t, tt); break; } @@ -517,12 +367,133 @@ namespace build2 } default: { - // Parse the next chunk. + // Parse the next chunk as names to get variable expansion, etc. + // Note that we do it in the chunking mode to detect whether + // anything in each chunk is quoted. // - ns.clear (); lexer_->reset_quoted (t.quoted); - nl = get_location (t); parse_names (t, tt, ns, true, "command"); + + // Process what we got. + // + bool q (lexer_->quoted ()); + for (name& n: ns) + { + string s; + + try + { + s = value_traits<string>::convert (move (n), nullptr); + } + catch (const invalid_argument&) + { + fail (l) << "invalid string value '" << n << "'"; + } + + // If it is a quoted chunk, then we add the word as is. + // Otherwise we re-lex it. But if the word doesn't contain any + // interesting characters (operators plus quotes/escapes), + // then no need to re-lex. + // + if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) + add_word (move (s), l); + else + { + // Come up with a "path" that contains both the original + // location as well as the expanded string. The resulting + // diagnostics will look like this: + // + // testscript:10:1 ('abc): unterminated single quote + // + path name; + { + string n (l.file->string ()); + n += ':'; + n += to_string (l.line); + n += ':'; + n += to_string (l.column); + n += ": ("; + n += s; + n += ')'; + name = path (move (n)); + } + + istringstream is (s); + lexer lex (is, name, lexer_mode::command_line); + + // Treat the first "sub-token" as always separated from what + // we saw earlier. + // + // Note that this is not "our" token so we cannot do + // fail(t). Rather we should do fail(l). + // + token t (lex.next ()); + location l (build2::get_location (t, name)); + t.separated = true; + + string w; + bool f (t.type == type::eos); // If the whole thing is empty. + + for (; t.type != type::eos; t = lex.next ()) + { + type tt (t.type); + l = build2::get_location (t, name); + + // Re-lexing double-quotes will recognize $, ( inside as + // tokens so we have to reverse them back. Since we don't + // treat spaces as separators we can be sure we will get + // it right. + // + switch (tt) + { + case type::dollar: w += '$'; continue; + case type::lparen: w += '('; continue; + } + + // Retire the current word. We need to distinguish between + // empty and non-existent (e.g., > vs >""). + // + if (!w.empty () || f) + { + add_word (move (w), l); + f = false; + } + + if (tt == type::word) + { + w = move (t.value); + f = true; + continue; + } + + // If this is one of the operators/separators, check that + // we don't have any pending locations to be filled. + // + check_pending (l); + + // Note: there is another one in the outer loop above. + // + switch (tt) + { + case type::in_null: + case type::in_string: + case type::in_document: + case type::out_null: + case type::out_string: + case type::out_document: + parse_redirect (t, l); + break; + } + } + + // Don't forget the last word. + // + if (!w.empty () || f) + add_word (move (w), l); + } + } + + ns.clear (); break; } } @@ -530,7 +501,7 @@ namespace build2 // Verify we don't have anything pending to be filled. // - check_pending (nl); + check_pending (l); // While we no longer need to recognize command line operators, we // also don't expect a valid test trailer to contain them. So we are diff --git a/build2/test/script/token b/build2/test/script/token index ac035f7..e952e00 100644 --- a/build2/test/script/token +++ b/build2/test/script/token @@ -38,7 +38,9 @@ namespace build2 out_document // << }; - using base_type::base_type; + token_type () = default; + token_type (value_type v): base_type (v) {} + token_type (base_type v): base_type (v) {} }; void |