Add support for evaluation context

For now it acts as just the value mode that can be enabled anywhere variable expansion is supported, for example: (foo=bar): And the primary use currently is to enable/test quoted and indirect variable expansion: "foo bar" = FOO BAR print $"foo bar" # Invalid. print $("foo bar") # Yeah, baby. foo = FOO FOO = foo print $($foo) Not that you should do something like this...
author: Boris Kolpackov <boris@codesynthesis.com> 2015-09-09 14:10:24 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2015-09-09 14:10:24 +0200
commit: 7a2f5753a12a68e87f8556f6e833710f147533b2 (patch)
tree: 850bfc8e3b0a40671db5656e695d640488bdda0a
parent: e3b6dc455ab5c98606e38983bd19426ae346f469 (diff)
13 files changed, 276 insertions, 131 deletions
diff --git a/build/lexer b/build/lexer
index 37c7807..13f28cb 100644
--- a/build/lexer
+++ b/build/lexer
@@ -5,6 +5,7 @@
 #ifndef BUILD_LEXER
 #define BUILD_LEXER
 
+#include <stack>
 #include <string>
 #include <iosfwd>
 #include <cstddef> // size_t
@@ -21,16 +22,20 @@ namespace build
 {
   // Context-dependent lexing mode. In the value mode we don't treat
   // certain characters (e.g., +, =) as special so that we can use
-  // them in the variable values, e.g., 'foo = g++'. In contrast,
-  // in the variable mode, we restrict certain character (e.g., /)
-  // from appearing in the name. The pairs mode is just like value
-  // except that we split names separated by the pair character.
-  // The alternnative modes must be set manually. The value and
-  // pairs modes are automatically reset after the end of the line.
-  // The variable mode is automatically reset after the name token.
-  // Quoted is an internal mode and should not be explicitly set.
+  // them in the variable values, e.g., 'foo = g++'. In contrast, in
+  // the variable mode, we restrict certain character (e.g., /) from
+  // appearing in the name. The pairs mode is just like value except
+  // that we split names separated by the pair character. The eval
+  // mode is used in the evaluation context.
   //
-  enum class lexer_mode {normal, quoted, variable, value, pairs};
+  // The alternnative modes must be set manually. The value and pairs
+  // modes are automatically reset after the end of the line. The
+  // variable mode is reset after the name token. And the eval mode
+  // is reset after the closing ')'.
+  //
+  // Quoted is an internal mode and should not be set explicitly.
+  //
+  enum class lexer_mode {normal, variable, value, pairs, eval, quoted};
 
   class lexer: protected butl::char_scanner
   {
@@ -69,14 +74,17 @@ namespace build
 
   private:
     token
+    next_eval ();
+
+    token
     next_quoted ();
 
     token
     name (bool separated);
 
-    // Return true we have seen any spaces. Skipped empty lines don't
-    // count. In other words, we are only interested in spaces that
-    // are on the same line as the following non-space character.
+    // Return true if we have seen any spaces. Skipped empty lines
+    // don't count. In other words, we are only interested in spaces
+    // that are on the same line as the following non-space character.
     //
     bool
     skip_spaces ();
@@ -101,23 +109,7 @@ namespace build
   private:
     fail_mark fail;
 
-    // Currently, the maximum mode nesting is 4: {normal, value, quoted,
-    // variable}.
-    //
-    struct mode_stack
-    {
-      static const size_t max_size = 4;
-
-      void push (lexer_mode m) {assert (n_ != max_size); d_[n_++] = m;}
-      void pop () {assert (n_ != 0); n_--;}
-      lexer_mode top () const {return d_[n_ - 1];}
-
-    private:
-      size_t n_ = 0;
-      lexer_mode d_[max_size];
-    };
-
-    mode_stack mode_;
+    std::stack<lexer_mode> mode_;
     char pair_separator_;
   };
 }
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 6da18eb..133375b 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -17,6 +17,7 @@ namespace build
     //
     switch (m)
     {
+    case lexer_mode::eval: return next_eval ();
     case lexer_mode::quoted: return next_quoted ();
     default: break;
     }
@@ -31,7 +32,8 @@ namespace build
 
     switch (c)
     {
-      // NOTE: remember to update name() if adding new punctuations.
+      // NOTE: remember to update name(), next_eval() if adding new
+      // special characters.
       //
     case '\n':
       {
@@ -42,26 +44,11 @@ namespace build
 
         return token (token_type::newline, sep, ln, cn);
       }
-    case '{':
-      {
-        return token (token_type::lcbrace, sep, ln, cn);
-      }
-    case '}':
-      {
-        return token (token_type::rcbrace, sep, ln, cn);
-      }
-    case '$':
-      {
-        return token (token_type::dollar, sep, ln, cn);
-      }
-    case '(':
-      {
-        return token (token_type::lparen, sep, ln, cn);
-      }
-    case ')':
-      {
-        return token (token_type::rparen, sep, ln, cn);
-      }
+    case '{': return token (token_type::lcbrace, sep, ln, cn);
+    case '}': return token (token_type::rcbrace, sep, ln, cn);
+    case '$': return token (token_type::dollar, sep, ln, cn);
+    case '(': return token (token_type::lparen, sep, ln, cn);
+    case ')': return token (token_type::rparen, sep, ln, cn);
     }
 
     // Handle pair separator.
@@ -74,14 +61,13 @@ namespace build
     //
     if (m != lexer_mode::value && m != lexer_mode::pairs)
     {
-      // NOTE: remember to update name() if adding new punctuations.
-      //
       switch (c)
       {
-      case ':':
-        {
-          return token (token_type::colon, sep, ln, cn);
-        }
+        // NOTE: remember to update name(), next_eval() if adding new
+        // special characters.
+        //
+      case ':': return token (token_type::colon, sep, ln, cn);
+      case '=': return token (token_type::equal, sep, ln, cn);
       case '+':
         {
           if (get () != '=')
@@ -89,10 +75,42 @@ namespace build
 
           return token (token_type::plus_equal, sep, ln, cn);
         }
-      case '=':
-        {
-          return token (token_type::equal, sep, ln, cn);
-        }
+      }
+    }
+
+    // Otherwise it is a name.
+    //
+    unget (c);
+    return name (sep);
+  }
+
+  token lexer::
+  next_eval ()
+  {
+    bool sep (skip_spaces ());
+    xchar c (get ());
+
+    if (eos (c))
+      fail (c) << "unterminated evaluation context";
+
+    uint64_t ln (c.line), cn (c.column);
+
+    // This mode is quite a bit like the value mode when it comes
+    // to special characters.
+    //
+    switch (c)
+    {
+      // NOTE: remember to update name() if adding new special characters.
+      //
+    case '\n': fail (c) << "newline in evaluation context";
+    case '{': return token (token_type::lcbrace, sep, ln, cn);
+    case '}': return token (token_type::rcbrace, sep, ln, cn);
+    case '$': return token (token_type::dollar, sep, ln, cn);
+    case '(': return token (token_type::lparen, sep, ln, cn);
+    case ')':
+      {
+        mode_.pop (); // Expire eval mode.
+        return token (token_type::rparen, sep, ln, cn);
       }
     }
 
@@ -105,7 +123,7 @@ namespace build
   token lexer::
   next_quoted ()
   {
-    xchar c (peek ());
+    xchar c (get ());
 
     if (eos (c))
       fail (c) << "unterminated double-quoted sequence";
@@ -114,9 +132,14 @@ namespace build
 
     switch (c)
     {
-    case '$': get (); return token (token_type::dollar, false, ln, cn);
-    default:          return name (false);
+    case '$': return token (token_type::dollar, false, ln, cn);
+    case '(': return token (token_type::lparen, false, ln, cn);
     }
+
+    // Otherwise it is a name.
+    //
+    unget (c);
+    return name (false);
   }
 
   token lexer::
@@ -140,10 +163,11 @@ namespace build
         break;
 
       // The following characters are not treated as special in the
-      // value/pairs and quoted modes.
+      // value/pairs, eval, and quoted modes.
       //
       if (m != lexer_mode::value &&
           m != lexer_mode::pairs &&
+          m != lexer_mode::eval  &&
           m != lexer_mode::quoted)
       {
         switch (c)
@@ -192,7 +216,6 @@ namespace build
         case '#':
         case '{':
         case '}':
-        case '(':
         case ')':
           {
             done = true;
@@ -236,6 +259,7 @@ namespace build
       switch (c)
       {
       case '$':
+      case '(':
         {
           done = true;
           break;
diff --git a/build/parser b/build/parser
index b5155be..0678a62 100644
--- a/build/parser
+++ b/build/parser
@@ -77,6 +77,9 @@ namespace build
     variable_name (names_type&&, const location&);
 
     names_type
+    eval (token&, token_type&);
+
+    names_type
     names (token& t, token_type& tt)
     {
       names_type ns;
diff --git a/build/parser.cxx b/build/parser.cxx
index 2daf1ce..d68bcd9 100644
--- a/build/parser.cxx
+++ b/build/parser.cxx
@@ -86,6 +86,7 @@ namespace build
       if (tt != type::name    &&
           tt != type::lcbrace && // Untyped name group: '{foo ...'
           tt != type::dollar  && // Variable expansion: '$foo ...'
+          tt != type::lparen  && // Eval context: '(foo) ...'
           tt != type::colon)     // Empty name: ': ...'
         break; // Something else. Let our caller handle that.
 
@@ -236,6 +237,7 @@ namespace build
         if (tt == type::name    ||
             tt == type::lcbrace ||
             tt == type::dollar  ||
+            tt == type::lparen  ||
             tt == type::newline ||
             tt == type::eos)
         {
@@ -796,6 +798,20 @@ namespace build
     }
   }
 
+  parser::names_type parser::
+  eval (token& t, token_type& tt)
+  {
+    lexer_->mode (lexer_mode::eval);
+    next (t, tt);
+
+    names_type ns (tt != type::rparen ? names (t, tt) : names_type ());
+
+    if (tt != type::rparen)
+      fail (t) << "expected ')' instead of " << t;
+
+    return ns;
+  }
+
   void parser::
   names (token& t,
          type& tt,
@@ -810,10 +826,11 @@ namespace build
     // a={b c d{e f} {}}.
     //
 
-    // Buffer that is used to collect the complete name in case of an
-    // unseparated variable expansion, e.g., 'foo$bar$(baz)fox'. The
-    // idea is to concatenate all the individual parts in this buffer
-    // and then re-inject it into the loop as a single token.
+    // Buffer that is used to collect the complete name in case of
+    // an unseparated variable expansion or eval context, e.g.,
+    // 'foo$bar$(baz)fox'. The idea is to concatenate all the
+    // individual parts in this buffer and then re-inject it into
+    // the loop as a single token.
     //
     string concat;
 
@@ -827,10 +844,12 @@ namespace build
     {
       // If the accumulating buffer is not empty, then we have two options:
       // continue accumulating or inject. We inject if the next token is
-      // not a name or var expansion or if it is separated.
+      // not a name, var expansion, or eval context or if it is separated.
       //
       if (!concat.empty () &&
-          ((tt != type::name && tt != type::dollar) || peeked ().separated ()))
+          ((tt != type::name   &&
+            tt != type::dollar &&
+            tt != type::lparen) || peeked ().separated ()))
       {
         tt = type::name;
         t = token (move (concat), true, t.line (), t.column ());
@@ -849,11 +868,12 @@ namespace build
         // Should we accumulate? If the buffer is not empty, then
         // we continue accumulating (the case where we are separated
         // should have been handled by the injection code above). If
-        // the next token is a var expansion and it is not separated,
-        // then we need to start accumulating.
+        // the next token is a var expansion or eval context and it
+        // is not separated, then we need to start accumulating.
         //
-        if (!concat.empty () ||                              // Continue.
-            (tt == type::dollar && !peeked ().separated ())) // Start.
+        if (!concat.empty () ||                                // Continue.
+            ((tt == type::dollar ||
+              tt == type::lparen) && !peeked ().separated ())) // Start.
         {
           concat += name;
           continue;
@@ -1006,60 +1026,97 @@ namespace build
         continue;
       }
 
-      // Variable expansion.
+      // Variable expansion/function call or eval context.
       //
-      if (tt == type::dollar)
+      if (tt == type::dollar || tt == type::lparen)
       {
-        // Switch to the variable name mode. We want to use this
-        // mode for $foo but not for $(foo). Since we don't know
-        // whether the next token is a paren or a name, we turn
-        // it on and turn it off if what we get next is a paren
-        // so that the following name is scanned in the normal
-        // mode.
+        // These two cases are pretty similar in that in both we
+        // pretty quickly end up with a list of names that we need
+        // to splice into the result.
         //
-        lexer_->mode (lexer_mode::variable);
+        names_type lv_eval;
+        const names_type* plv;
 
-        next (t, tt);
+        location loc;
+        const char* what; // Variable or evaluation context.
 
-        bool paren (tt == type::lparen);
-        if (paren)
+        if (tt == type::dollar)
         {
-          lexer_->expire_mode ();
+          // Switch to the variable name mode. We want to use this
+          // mode for $foo but not for $(foo). Since we don't know
+          // whether the next token is a paren or a name, we turn
+          // it on and switch to the eval mode if what we get next
+          // is a paren.
+          //
+          lexer_->mode (lexer_mode::variable);
           next (t, tt);
-        }
+          loc = get_location (t, &path_);
 
-        if (tt != type::name)
-          fail (t) << "variable name expected instead of " << t;
+          string n;
+          if (tt == type::name)
+            n = t.name ();
+          else if (tt == type::lparen)
+          {
+            lexer_->expire_mode ();
+            names_type ns (eval (t, tt));
 
-        string n;
-        if (t.name ().front () == '.') // Fully qualified name.
-          n.assign (t.name (), 1, string::npos);
-        else
-          //@@ TODO: append namespace if any.
-          n = t.name ();
+            // Make sure the result of evaluation is a single, simple name.
+            //
+            if (ns.size () != 1 || !ns.front ().simple ())
+              fail (loc) << "variable name expected instead of '" << ns << "'";
 
-        const auto& var (variable_pool.find (move (n)));
-        auto l (target_ != nullptr ? (*target_)[var] : (*scope_)[var]);
+            n = move (ns.front ().value);
+          }
+          else
+            fail (t) << "variable name expected instead of " << t;
 
-        // Undefined/NULL namespace variables are not allowed.
-        //
-        if (!l && var.name.find ('.') != string::npos)
-          fail (t) << "undefined/null namespace variable " << var.name;
+          if (n.empty ())
+            fail (loc) << "empty variable name";
 
-        if (paren)
-        {
-          next (t, tt);
+          // Process variable name.
+          //
+          if (n.front () == '.') // Fully qualified name.
+            n.erase (0, 1);
+          else
+          {
+            //@@ TODO: append namespace if any.
+          }
+
+          // Lookup.
+          //
+          const auto& var (variable_pool.find (move (n)));
+          auto l (target_ != nullptr ? (*target_)[var] : (*scope_)[var]);
+
+          // Undefined/NULL namespace variables are not allowed.
+          //
+          if (!l && var.name.find ('.') != string::npos)
+            fail (loc) << "undefined/null namespace variable " << var.name;
+
+          tt = peek ();
+
+          if (!l || l->empty ())
+            continue;
 
-          if (tt != type::rparen)
-            fail (t) << "expected ) instead of " << t;
+          plv = &l->data_;
+          what = "variable expansion";
         }
+        else
+        {
+          loc = get_location (t, &path_);
+          lv_eval = eval (t, tt);
 
-        tt = peek ();
+          tt = peek ();
 
-        if (!l || l->empty ())
-          continue;
+          if (lv_eval.empty ())
+            continue;
 
-        const names_type& lv (l->data_);
+          plv = &lv_eval;
+          what = "context evaluation";
+        }
+
+        // @@ Could move if (lv == &lv_eval).
+        //
+        const names_type& lv (*plv);
 
         // Should we accumulate? If the buffer is not empty, then
         // we continue accumulating (the case where we are separated
@@ -1068,31 +1125,29 @@ namespace build
         // separated, then we need to start accumulating.
         //
         if (!concat.empty () ||                       // Continue.
-            ((tt == type::name || tt == type::dollar) // Start.
-             && !peeked ().separated ()))
+            ((tt == type::name   ||                   // Start.
+              tt == type::dollar ||
+              tt == type::lparen) && !peeked ().separated ()))
         {
           // This should be a simple value or a simple directory. The
           // token still points to the name (or closing paren).
           //
           if (lv.size () > 1)
-            fail (t) << "concatenating expansion of " << var.name
-                     << " contains multiple values";
+            fail (loc) << "concatenating " << what << " contains multiple "
+                       << "values";
 
           const name& n (lv[0]);
 
           if (n.qualified ())
-            fail (t) << "concatenating expansion of " << var.name
-                     << " contains project name";
+            fail (loc) << "concatenating " << what << " contains project name";
 
           if (n.typed ())
-            fail (t) << "concatenating expansion of " << var.name
-                     << " contains type";
+            fail (loc) << "concatenating " << what << " contains type";
 
           if (!n.dir.empty ())
           {
             if (!n.value.empty ())
-              fail (t) << "concatenating expansion of " << var.name
-                       << " contains directory";
+              fail (loc) << "concatenating " << what << " contains directory";
 
             concat += n.dir.string ();
           }
@@ -1115,8 +1170,8 @@ namespace build
               if (pp == nullptr)
                 pp1 = n.proj;
               else
-                fail (t) << "nested project name " << *n.proj << " in "
-                         << "variable expansion";
+                fail (loc) << "nested project name " << *n.proj << " in "
+                           << what;
             }
 
             dir_path d1;
@@ -1125,8 +1180,8 @@ namespace build
               if (dp != nullptr)
               {
                 if (n.dir.absolute ())
-                  fail (t) << "nested absolute directory " << n.dir
-                           << " in variable expansion";
+                  fail (loc) << "nested absolute directory " << n.dir
+                             << " in " << what;
 
                 d1 = *dp / n.dir;
                 dp1 = &d1;
@@ -1140,8 +1195,7 @@ namespace build
               if (tp == nullptr)
                 tp1 = &n.type;
               else
-                fail (t) << "nested type name " << n.type << " in variable "
-                         << "expansion";
+                fail (loc) << "nested type name " << n.type << " in " << what;
             }
 
             // If we are a second half of a pair.
@@ -1151,7 +1205,7 @@ namespace build
               // Check that there are no nested pairs.
               //
               if (n.pair != '\0')
-                fail (t) << "nested pair in variable expansion";
+                fail (loc) << "nested pair in " << what;
 
               // And add another first half unless this is the first instance.
               //
@@ -1304,7 +1358,8 @@ namespace build
 
     while (tt != tt_end)
     {
-      // We always start with one or more names.
+      // We always start with one or more names. No eval context
+      // support for the time being.
       //
       if (tt != type::name    &&
           tt != type::lcbrace &&      // Untyped name group: '{foo ...'
diff --git a/tests/eval/buildfile b/tests/eval/buildfile
new file mode 100644
index 0000000..c658d3b
--- /dev/null
+++ b/tests/eval/buildfile
@@ -0,0 +1,13 @@
+(./):
+
+# Invalid.
+#
+#(foo
+#(foo #comment
+
+print ()
+print ((foo)(bar))
+print ((foo)   (bar))
+
+print (foo\
+bar)
diff --git a/tests/eval/test.out b/tests/eval/test.out
new file mode 100644
index 0000000..5885c7d
--- /dev/null
+++ b/tests/eval/test.out
@@ -0,0 +1,4 @@
+
+foobar
+foo bar
+foobar
diff --git a/tests/eval/test.sh b/tests/eval/test.sh
new file mode 100755
index 0000000..b898b3c
--- /dev/null
+++ b/tests/eval/test.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+valgrind -q b -q | diff -u test.out -
diff --git a/tests/lexer/driver.cxx b/tests/lexer/driver.cxx
index a3819f5..ca27d39 100644
--- a/tests/lexer/driver.cxx
+++ b/tests/lexer/driver.cxx
@@ -89,7 +89,7 @@ main ()
   assert (lex ("\"foo \"\"bar\"") == tokens ({"foo bar", ""}));
   assert (lex ("foo\" \"bar") == tokens ({"foo bar", ""}));
   assert (lex ("\"foo\nbar\"") == tokens ({"foo\nbar", ""}));
-  assert (lex ("\"#:{}()=+\n\"") == tokens ({"#:{}()=+\n", ""}));
+  assert (lex ("\"#:{})=+\n\"") == tokens ({"#:{})=+\n", ""}));
   assert (lex ("\"'\"") == tokens ({"'", ""}));
   assert (lex ("\"\\\"") == tokens ({"\\", ""}));
 
diff --git a/tests/quote/buildfile b/tests/quote/buildfile
index 6dd22b4..19c2bfc 100644
--- a/tests/quote/buildfile
+++ b/tests/quote/buildfile
@@ -19,4 +19,14 @@ print $foo'bar'
 print $foo"$bar"
 print "$foo"bar
 
+# Quoting and evaluation context.
+#
+print ("x{foo bar}")
+#print "(x{foo bar})" # multiple values in concatenating context expansion
+print "({foo})"
+print "('foo bar')"
+print "("foo bar")"
+print "("$foo bar")"
+print "("$foo ($bar)")"
+
 ./:
diff --git a/tests/quote/test.out b/tests/quote/test.out
index 216b1c8..f5d7a71 100644
--- a/tests/quote/test.out
+++ b/tests/quote/test.out
@@ -12,3 +12,9 @@ fo o bar
 fo obar
 fo o bar 
 fo obar
+x{foo bar}
+foo
+foo bar
+foo bar
+fo o bar
+fo o  bar 
diff --git a/tests/variable/expansion/buildfile b/tests/variable/expansion/buildfile
new file mode 100644
index 0000000..3f28372
--- /dev/null
+++ b/tests/variable/expansion/buildfile
@@ -0,0 +1,26 @@
+foo = FOO
+
+print $foo
+print $(foo)
+
+# Invalid.
+#
+#print $
+#print $()
+#print $(foo bar)
+#print $(foo{bar})
+
+# Indirect.
+#
+FOO = foo
+print $($FOO)
+print $($(FOO))
+print $($($FOO))
+
+# Quoted name.
+#
+"b a r" = BAR
+print $("b a r")
+#print $"b a r"
+
+./:
diff --git a/tests/variable/expansion/test.out b/tests/variable/expansion/test.out
new file mode 100644
index 0000000..5056f04
--- /dev/null
+++ b/tests/variable/expansion/test.out
@@ -0,0 +1,6 @@
+FOO
+FOO
+FOO
+FOO
+foo
+BAR
diff --git a/tests/variable/expansion/test.sh b/tests/variable/expansion/test.sh
new file mode 100755
index 0000000..b898b3c
--- /dev/null
+++ b/tests/variable/expansion/test.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+valgrind -q b -q | diff -u test.out -
author	Boris Kolpackov <boris@codesynthesis.com>	2015-09-09 14:10:24 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2015-09-09 14:10:24 +0200
commit	7a2f5753a12a68e87f8556f6e833710f147533b2 (patch)
tree	850bfc8e3b0a40671db5656e695d640488bdda0a
parent	e3b6dc455ab5c98606e38983bd19426ae346f469 (diff)