From 7a2f5753a12a68e87f8556f6e833710f147533b2 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 9 Sep 2015 14:10:24 +0200 Subject: Add support for evaluation context For now it acts as just the value mode that can be enabled anywhere variable expansion is supported, for example: (foo=bar): And the primary use currently is to enable/test quoted and indirect variable expansion: "foo bar" = FOO BAR print $"foo bar" # Invalid. print $("foo bar") # Yeah, baby. foo = FOO FOO = foo print $($foo) Not that you should do something like this... --- build/lexer | 50 +++++----- build/lexer.cxx | 96 +++++++++++-------- build/parser | 3 + build/parser.cxx | 185 ++++++++++++++++++++++++------------- tests/eval/buildfile | 13 +++ tests/eval/test.out | 4 + tests/eval/test.sh | 3 + tests/lexer/driver.cxx | 2 +- tests/quote/buildfile | 10 ++ tests/quote/test.out | 6 ++ tests/variable/expansion/buildfile | 26 ++++++ tests/variable/expansion/test.out | 6 ++ tests/variable/expansion/test.sh | 3 + 13 files changed, 276 insertions(+), 131 deletions(-) create mode 100644 tests/eval/buildfile create mode 100644 tests/eval/test.out create mode 100755 tests/eval/test.sh create mode 100644 tests/variable/expansion/buildfile create mode 100644 tests/variable/expansion/test.out create mode 100755 tests/variable/expansion/test.sh diff --git a/build/lexer b/build/lexer index 37c7807..13f28cb 100644 --- a/build/lexer +++ b/build/lexer @@ -5,6 +5,7 @@ #ifndef BUILD_LEXER #define BUILD_LEXER +#include #include #include #include // size_t @@ -21,16 +22,20 @@ namespace build { // Context-dependent lexing mode. In the value mode we don't treat // certain characters (e.g., +, =) as special so that we can use - // them in the variable values, e.g., 'foo = g++'. In contrast, - // in the variable mode, we restrict certain character (e.g., /) - // from appearing in the name. The pairs mode is just like value - // except that we split names separated by the pair character. - // The alternnative modes must be set manually. The value and - // pairs modes are automatically reset after the end of the line. - // The variable mode is automatically reset after the name token. - // Quoted is an internal mode and should not be explicitly set. + // them in the variable values, e.g., 'foo = g++'. In contrast, in + // the variable mode, we restrict certain character (e.g., /) from + // appearing in the name. The pairs mode is just like value except + // that we split names separated by the pair character. The eval + // mode is used in the evaluation context. // - enum class lexer_mode {normal, quoted, variable, value, pairs}; + // The alternnative modes must be set manually. The value and pairs + // modes are automatically reset after the end of the line. The + // variable mode is reset after the name token. And the eval mode + // is reset after the closing ')'. + // + // Quoted is an internal mode and should not be set explicitly. + // + enum class lexer_mode {normal, variable, value, pairs, eval, quoted}; class lexer: protected butl::char_scanner { @@ -69,14 +74,17 @@ namespace build private: token + next_eval (); + + token next_quoted (); token name (bool separated); - // Return true we have seen any spaces. Skipped empty lines don't - // count. In other words, we are only interested in spaces that - // are on the same line as the following non-space character. + // Return true if we have seen any spaces. Skipped empty lines + // don't count. In other words, we are only interested in spaces + // that are on the same line as the following non-space character. // bool skip_spaces (); @@ -101,23 +109,7 @@ namespace build private: fail_mark fail; - // Currently, the maximum mode nesting is 4: {normal, value, quoted, - // variable}. - // - struct mode_stack - { - static const size_t max_size = 4; - - void push (lexer_mode m) {assert (n_ != max_size); d_[n_++] = m;} - void pop () {assert (n_ != 0); n_--;} - lexer_mode top () const {return d_[n_ - 1];} - - private: - size_t n_ = 0; - lexer_mode d_[max_size]; - }; - - mode_stack mode_; + std::stack mode_; char pair_separator_; }; } diff --git a/build/lexer.cxx b/build/lexer.cxx index 6da18eb..133375b 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -17,6 +17,7 @@ namespace build // switch (m) { + case lexer_mode::eval: return next_eval (); case lexer_mode::quoted: return next_quoted (); default: break; } @@ -31,7 +32,8 @@ namespace build switch (c) { - // NOTE: remember to update name() if adding new punctuations. + // NOTE: remember to update name(), next_eval() if adding new + // special characters. // case '\n': { @@ -42,26 +44,11 @@ namespace build return token (token_type::newline, sep, ln, cn); } - case '{': - { - return token (token_type::lcbrace, sep, ln, cn); - } - case '}': - { - return token (token_type::rcbrace, sep, ln, cn); - } - case '$': - { - return token (token_type::dollar, sep, ln, cn); - } - case '(': - { - return token (token_type::lparen, sep, ln, cn); - } - case ')': - { - return token (token_type::rparen, sep, ln, cn); - } + case '{': return token (token_type::lcbrace, sep, ln, cn); + case '}': return token (token_type::rcbrace, sep, ln, cn); + case '$': return token (token_type::dollar, sep, ln, cn); + case '(': return token (token_type::lparen, sep, ln, cn); + case ')': return token (token_type::rparen, sep, ln, cn); } // Handle pair separator. @@ -74,14 +61,13 @@ namespace build // if (m != lexer_mode::value && m != lexer_mode::pairs) { - // NOTE: remember to update name() if adding new punctuations. - // switch (c) { - case ':': - { - return token (token_type::colon, sep, ln, cn); - } + // NOTE: remember to update name(), next_eval() if adding new + // special characters. + // + case ':': return token (token_type::colon, sep, ln, cn); + case '=': return token (token_type::equal, sep, ln, cn); case '+': { if (get () != '=') @@ -89,10 +75,42 @@ namespace build return token (token_type::plus_equal, sep, ln, cn); } - case '=': - { - return token (token_type::equal, sep, ln, cn); - } + } + } + + // Otherwise it is a name. + // + unget (c); + return name (sep); + } + + token lexer:: + next_eval () + { + bool sep (skip_spaces ()); + xchar c (get ()); + + if (eos (c)) + fail (c) << "unterminated evaluation context"; + + uint64_t ln (c.line), cn (c.column); + + // This mode is quite a bit like the value mode when it comes + // to special characters. + // + switch (c) + { + // NOTE: remember to update name() if adding new special characters. + // + case '\n': fail (c) << "newline in evaluation context"; + case '{': return token (token_type::lcbrace, sep, ln, cn); + case '}': return token (token_type::rcbrace, sep, ln, cn); + case '$': return token (token_type::dollar, sep, ln, cn); + case '(': return token (token_type::lparen, sep, ln, cn); + case ')': + { + mode_.pop (); // Expire eval mode. + return token (token_type::rparen, sep, ln, cn); } } @@ -105,7 +123,7 @@ namespace build token lexer:: next_quoted () { - xchar c (peek ()); + xchar c (get ()); if (eos (c)) fail (c) << "unterminated double-quoted sequence"; @@ -114,9 +132,14 @@ namespace build switch (c) { - case '$': get (); return token (token_type::dollar, false, ln, cn); - default: return name (false); + case '$': return token (token_type::dollar, false, ln, cn); + case '(': return token (token_type::lparen, false, ln, cn); } + + // Otherwise it is a name. + // + unget (c); + return name (false); } token lexer:: @@ -140,10 +163,11 @@ namespace build break; // The following characters are not treated as special in the - // value/pairs and quoted modes. + // value/pairs, eval, and quoted modes. // if (m != lexer_mode::value && m != lexer_mode::pairs && + m != lexer_mode::eval && m != lexer_mode::quoted) { switch (c) @@ -192,7 +216,6 @@ namespace build case '#': case '{': case '}': - case '(': case ')': { done = true; @@ -236,6 +259,7 @@ namespace build switch (c) { case '$': + case '(': { done = true; break; diff --git a/build/parser b/build/parser index b5155be..0678a62 100644 --- a/build/parser +++ b/build/parser @@ -77,6 +77,9 @@ namespace build variable_name (names_type&&, const location&); names_type + eval (token&, token_type&); + + names_type names (token& t, token_type& tt) { names_type ns; diff --git a/build/parser.cxx b/build/parser.cxx index 2daf1ce..d68bcd9 100644 --- a/build/parser.cxx +++ b/build/parser.cxx @@ -86,6 +86,7 @@ namespace build if (tt != type::name && tt != type::lcbrace && // Untyped name group: '{foo ...' tt != type::dollar && // Variable expansion: '$foo ...' + tt != type::lparen && // Eval context: '(foo) ...' tt != type::colon) // Empty name: ': ...' break; // Something else. Let our caller handle that. @@ -236,6 +237,7 @@ namespace build if (tt == type::name || tt == type::lcbrace || tt == type::dollar || + tt == type::lparen || tt == type::newline || tt == type::eos) { @@ -796,6 +798,20 @@ namespace build } } + parser::names_type parser:: + eval (token& t, token_type& tt) + { + lexer_->mode (lexer_mode::eval); + next (t, tt); + + names_type ns (tt != type::rparen ? names (t, tt) : names_type ()); + + if (tt != type::rparen) + fail (t) << "expected ')' instead of " << t; + + return ns; + } + void parser:: names (token& t, type& tt, @@ -810,10 +826,11 @@ namespace build // a={b c d{e f} {}}. // - // Buffer that is used to collect the complete name in case of an - // unseparated variable expansion, e.g., 'foo$bar$(baz)fox'. The - // idea is to concatenate all the individual parts in this buffer - // and then re-inject it into the loop as a single token. + // Buffer that is used to collect the complete name in case of + // an unseparated variable expansion or eval context, e.g., + // 'foo$bar$(baz)fox'. The idea is to concatenate all the + // individual parts in this buffer and then re-inject it into + // the loop as a single token. // string concat; @@ -827,10 +844,12 @@ namespace build { // If the accumulating buffer is not empty, then we have two options: // continue accumulating or inject. We inject if the next token is - // not a name or var expansion or if it is separated. + // not a name, var expansion, or eval context or if it is separated. // if (!concat.empty () && - ((tt != type::name && tt != type::dollar) || peeked ().separated ())) + ((tt != type::name && + tt != type::dollar && + tt != type::lparen) || peeked ().separated ())) { tt = type::name; t = token (move (concat), true, t.line (), t.column ()); @@ -849,11 +868,12 @@ namespace build // Should we accumulate? If the buffer is not empty, then // we continue accumulating (the case where we are separated // should have been handled by the injection code above). If - // the next token is a var expansion and it is not separated, - // then we need to start accumulating. + // the next token is a var expansion or eval context and it + // is not separated, then we need to start accumulating. // - if (!concat.empty () || // Continue. - (tt == type::dollar && !peeked ().separated ())) // Start. + if (!concat.empty () || // Continue. + ((tt == type::dollar || + tt == type::lparen) && !peeked ().separated ())) // Start. { concat += name; continue; @@ -1006,60 +1026,97 @@ namespace build continue; } - // Variable expansion. + // Variable expansion/function call or eval context. // - if (tt == type::dollar) + if (tt == type::dollar || tt == type::lparen) { - // Switch to the variable name mode. We want to use this - // mode for $foo but not for $(foo). Since we don't know - // whether the next token is a paren or a name, we turn - // it on and turn it off if what we get next is a paren - // so that the following name is scanned in the normal - // mode. + // These two cases are pretty similar in that in both we + // pretty quickly end up with a list of names that we need + // to splice into the result. // - lexer_->mode (lexer_mode::variable); + names_type lv_eval; + const names_type* plv; - next (t, tt); + location loc; + const char* what; // Variable or evaluation context. - bool paren (tt == type::lparen); - if (paren) + if (tt == type::dollar) { - lexer_->expire_mode (); + // Switch to the variable name mode. We want to use this + // mode for $foo but not for $(foo). Since we don't know + // whether the next token is a paren or a name, we turn + // it on and switch to the eval mode if what we get next + // is a paren. + // + lexer_->mode (lexer_mode::variable); next (t, tt); - } + loc = get_location (t, &path_); - if (tt != type::name) - fail (t) << "variable name expected instead of " << t; + string n; + if (tt == type::name) + n = t.name (); + else if (tt == type::lparen) + { + lexer_->expire_mode (); + names_type ns (eval (t, tt)); - string n; - if (t.name ().front () == '.') // Fully qualified name. - n.assign (t.name (), 1, string::npos); - else - //@@ TODO: append namespace if any. - n = t.name (); + // Make sure the result of evaluation is a single, simple name. + // + if (ns.size () != 1 || !ns.front ().simple ()) + fail (loc) << "variable name expected instead of '" << ns << "'"; - const auto& var (variable_pool.find (move (n))); - auto l (target_ != nullptr ? (*target_)[var] : (*scope_)[var]); + n = move (ns.front ().value); + } + else + fail (t) << "variable name expected instead of " << t; - // Undefined/NULL namespace variables are not allowed. - // - if (!l && var.name.find ('.') != string::npos) - fail (t) << "undefined/null namespace variable " << var.name; + if (n.empty ()) + fail (loc) << "empty variable name"; - if (paren) - { - next (t, tt); + // Process variable name. + // + if (n.front () == '.') // Fully qualified name. + n.erase (0, 1); + else + { + //@@ TODO: append namespace if any. + } + + // Lookup. + // + const auto& var (variable_pool.find (move (n))); + auto l (target_ != nullptr ? (*target_)[var] : (*scope_)[var]); + + // Undefined/NULL namespace variables are not allowed. + // + if (!l && var.name.find ('.') != string::npos) + fail (loc) << "undefined/null namespace variable " << var.name; + + tt = peek (); + + if (!l || l->empty ()) + continue; - if (tt != type::rparen) - fail (t) << "expected ) instead of " << t; + plv = &l->data_; + what = "variable expansion"; } + else + { + loc = get_location (t, &path_); + lv_eval = eval (t, tt); - tt = peek (); + tt = peek (); - if (!l || l->empty ()) - continue; + if (lv_eval.empty ()) + continue; - const names_type& lv (l->data_); + plv = &lv_eval; + what = "context evaluation"; + } + + // @@ Could move if (lv == &lv_eval). + // + const names_type& lv (*plv); // Should we accumulate? If the buffer is not empty, then // we continue accumulating (the case where we are separated @@ -1068,31 +1125,29 @@ namespace build // separated, then we need to start accumulating. // if (!concat.empty () || // Continue. - ((tt == type::name || tt == type::dollar) // Start. - && !peeked ().separated ())) + ((tt == type::name || // Start. + tt == type::dollar || + tt == type::lparen) && !peeked ().separated ())) { // This should be a simple value or a simple directory. The // token still points to the name (or closing paren). // if (lv.size () > 1) - fail (t) << "concatenating expansion of " << var.name - << " contains multiple values"; + fail (loc) << "concatenating " << what << " contains multiple " + << "values"; const name& n (lv[0]); if (n.qualified ()) - fail (t) << "concatenating expansion of " << var.name - << " contains project name"; + fail (loc) << "concatenating " << what << " contains project name"; if (n.typed ()) - fail (t) << "concatenating expansion of " << var.name - << " contains type"; + fail (loc) << "concatenating " << what << " contains type"; if (!n.dir.empty ()) { if (!n.value.empty ()) - fail (t) << "concatenating expansion of " << var.name - << " contains directory"; + fail (loc) << "concatenating " << what << " contains directory"; concat += n.dir.string (); } @@ -1115,8 +1170,8 @@ namespace build if (pp == nullptr) pp1 = n.proj; else - fail (t) << "nested project name " << *n.proj << " in " - << "variable expansion"; + fail (loc) << "nested project name " << *n.proj << " in " + << what; } dir_path d1; @@ -1125,8 +1180,8 @@ namespace build if (dp != nullptr) { if (n.dir.absolute ()) - fail (t) << "nested absolute directory " << n.dir - << " in variable expansion"; + fail (loc) << "nested absolute directory " << n.dir + << " in " << what; d1 = *dp / n.dir; dp1 = &d1; @@ -1140,8 +1195,7 @@ namespace build if (tp == nullptr) tp1 = &n.type; else - fail (t) << "nested type name " << n.type << " in variable " - << "expansion"; + fail (loc) << "nested type name " << n.type << " in " << what; } // If we are a second half of a pair. @@ -1151,7 +1205,7 @@ namespace build // Check that there are no nested pairs. // if (n.pair != '\0') - fail (t) << "nested pair in variable expansion"; + fail (loc) << "nested pair in " << what; // And add another first half unless this is the first instance. // @@ -1304,7 +1358,8 @@ namespace build while (tt != tt_end) { - // We always start with one or more names. + // We always start with one or more names. No eval context + // support for the time being. // if (tt != type::name && tt != type::lcbrace && // Untyped name group: '{foo ...' diff --git a/tests/eval/buildfile b/tests/eval/buildfile new file mode 100644 index 0000000..c658d3b --- /dev/null +++ b/tests/eval/buildfile @@ -0,0 +1,13 @@ +(./): + +# Invalid. +# +#(foo +#(foo #comment + +print () +print ((foo)(bar)) +print ((foo) (bar)) + +print (foo\ +bar) diff --git a/tests/eval/test.out b/tests/eval/test.out new file mode 100644 index 0000000..5885c7d --- /dev/null +++ b/tests/eval/test.out @@ -0,0 +1,4 @@ + +foobar +foo bar +foobar diff --git a/tests/eval/test.sh b/tests/eval/test.sh new file mode 100755 index 0000000..b898b3c --- /dev/null +++ b/tests/eval/test.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +valgrind -q b -q | diff -u test.out - diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx index a3819f5..ca27d39 100644 --- a/tests/lexer/driver.cxx +++ b/tests/lexer/driver.cxx @@ -89,7 +89,7 @@ main () assert (lex ("\"foo \"\"bar\"") == tokens ({"foo bar", ""})); assert (lex ("foo\" \"bar") == tokens ({"foo bar", ""})); assert (lex ("\"foo\nbar\"") == tokens ({"foo\nbar", ""})); - assert (lex ("\"#:{}()=+\n\"") == tokens ({"#:{}()=+\n", ""})); + assert (lex ("\"#:{})=+\n\"") == tokens ({"#:{})=+\n", ""})); assert (lex ("\"'\"") == tokens ({"'", ""})); assert (lex ("\"\\\"") == tokens ({"\\", ""})); diff --git a/tests/quote/buildfile b/tests/quote/buildfile index 6dd22b4..19c2bfc 100644 --- a/tests/quote/buildfile +++ b/tests/quote/buildfile @@ -19,4 +19,14 @@ print $foo'bar' print $foo"$bar" print "$foo"bar +# Quoting and evaluation context. +# +print ("x{foo bar}") +#print "(x{foo bar})" # multiple values in concatenating context expansion +print "({foo})" +print "('foo bar')" +print "("foo bar")" +print "("$foo bar")" +print "("$foo ($bar)")" + ./: diff --git a/tests/quote/test.out b/tests/quote/test.out index 216b1c8..f5d7a71 100644 --- a/tests/quote/test.out +++ b/tests/quote/test.out @@ -12,3 +12,9 @@ fo o bar fo obar fo o bar fo obar +x{foo bar} +foo +foo bar +foo bar +fo o bar +fo o bar diff --git a/tests/variable/expansion/buildfile b/tests/variable/expansion/buildfile new file mode 100644 index 0000000..3f28372 --- /dev/null +++ b/tests/variable/expansion/buildfile @@ -0,0 +1,26 @@ +foo = FOO + +print $foo +print $(foo) + +# Invalid. +# +#print $ +#print $() +#print $(foo bar) +#print $(foo{bar}) + +# Indirect. +# +FOO = foo +print $($FOO) +print $($(FOO)) +print $($($FOO)) + +# Quoted name. +# +"b a r" = BAR +print $("b a r") +#print $"b a r" + +./: diff --git a/tests/variable/expansion/test.out b/tests/variable/expansion/test.out new file mode 100644 index 0000000..5056f04 --- /dev/null +++ b/tests/variable/expansion/test.out @@ -0,0 +1,6 @@ +FOO +FOO +FOO +FOO +foo +BAR diff --git a/tests/variable/expansion/test.sh b/tests/variable/expansion/test.sh new file mode 100755 index 0000000..b898b3c --- /dev/null +++ b/tests/variable/expansion/test.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +valgrind -q b -q | diff -u test.out - -- cgit v1.1