From 12268f7741ba73c75a73fafb6063f1393e485aae Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Fri, 27 Sep 2019 13:55:07 +0200
Subject: Add support for custom match/extract functions in switch expression

---
 libbuild2/lexer.cxx     |  34 +++++++++++--
 libbuild2/lexer.hxx     |   6 ++-
 libbuild2/parser.cxx    | 128 +++++++++++++++++++++++++++++++-----------------
 libbuild2/parser.hxx    |  21 +++++---
 tests/switch/testscript | 113 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 244 insertions(+), 58 deletions(-)

diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index ac62996..17b0c7d 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -55,6 +55,12 @@ namespace build2
         s2 = "            ";
         break;
       }
+    case lexer_mode::switch_expressions:
+      {
+        s1 = " $(){}[],:#\t\n";
+        s2 = "             ";
+        break;
+      }
     case lexer_mode::case_patterns:
       {
         s1 = " $(){}[],|#\t\n";
@@ -119,6 +125,7 @@ namespace build2
     case lexer_mode::normal:
     case lexer_mode::value:
     case lexer_mode::values:
+    case lexer_mode::switch_expressions:
     case lexer_mode::case_patterns:
     case lexer_mode::attribute:
     case lexer_mode::variable:
@@ -157,8 +164,9 @@ namespace build2
       {
         // Expire value/values modes at the end of the line.
         //
-        if (m == lexer_mode::value  ||
-            m == lexer_mode::values ||
+        if (m == lexer_mode::value              ||
+            m == lexer_mode::values             ||
+            m == lexer_mode::switch_expressions ||
             m == lexer_mode::case_patterns)
           state_.pop ();
 
@@ -190,6 +198,22 @@ namespace build2
       }
     }
 
+    // The following characters are special in the normal, variable, and
+    // switch_expressions modes.
+    //
+    if (m == lexer_mode::normal   ||
+        m == lexer_mode::variable ||
+        m == lexer_mode::switch_expressions)
+    {
+      switch (c)
+      {
+        // NOTE: remember to update mode(), next_eval() if adding new special
+        // characters.
+        //
+      case ':': return make_token (type::colon);
+      }
+    }
+
     // The following characters are special in the normal and variable modes.
     //
     if (m == lexer_mode::normal || m == lexer_mode::variable)
@@ -199,7 +223,6 @@ namespace build2
         // NOTE: remember to update mode(), next_eval() if adding new special
         // characters.
         //
-      case ':': return make_token (type::colon);
       case '=':
         {
           if (peek () == '+')
@@ -236,8 +259,9 @@ namespace build2
 
     // The following characters are special in the values and buildspec mode.
     //
-    if (m == lexer_mode::buildspec ||
-        m == lexer_mode::values    ||
+    if (m == lexer_mode::buildspec          ||
+        m == lexer_mode::values             ||
+        m == lexer_mode::switch_expressions ||
         m == lexer_mode::case_patterns)
     {
       // NOTE: remember to update mode() if adding new special characters.
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index a629ba7..72ec050 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -30,7 +30,10 @@ namespace build2
   // groups in attributes). The eval mode is used in the evaluation context.
   //
   // A number of modes are "derived" from the value/values mode by recognizing
-  // a few extra characters: case_patterns (values plus '|').
+  // a few extra characters:
+  //
+  //   switch_expressions  values plus `:`
+  //   case_patterns       values plus '|'
   //
   // Note that the normal, value/values and derived, as well as eval modes
   // split words separated by the pair character (to disable pairs one can
@@ -61,6 +64,7 @@ namespace build2
       value,
       values,
       case_patterns,
+      switch_expressions,
       attribute,
       eval,
       single_quoted,
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 33b6d11..d457c68 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -879,10 +879,7 @@ namespace build2
       attributes_push (t, tt);
 
       location nloc (get_location (t));
-      names ns (parse_names (t, tt,
-                             pattern_mode::ignore,
-                             false /* chunk */,
-                             "variable name"));
+      names ns (parse_names (t, tt, pattern_mode::ignore, "variable name"));
 
       if (tt != type::assign  &&
           tt != type::prepend &&
@@ -1367,11 +1364,7 @@ namespace build2
     next (t, tt);
     const location l (get_location (t));
     names ns (tt != type::newline && tt != type::eos
-              ? parse_names (t, tt,
-                             pattern_mode::expand,
-                             false,
-                             "path",
-                             nullptr)
+              ? parse_names (t, tt, pattern_mode::expand, "path", nullptr)
               : names ());
 
     for (name& n: ns)
@@ -1425,11 +1418,7 @@ namespace build2
     next (t, tt);
     const location l (get_location (t));
     names ns (tt != type::newline && tt != type::eos
-              ? parse_names (t, tt,
-                             pattern_mode::expand,
-                             false,
-                             "path",
-                             nullptr)
+              ? parse_names (t, tt, pattern_mode::expand, "path", nullptr)
               : names ());
 
     for (name& n: ns)
@@ -1556,13 +1545,10 @@ namespace build2
     strings args;
     try
     {
-      args = convert<strings> (tt != type::newline && tt != type::eos
-                               ? parse_names (t, tt,
-                                              pattern_mode::ignore,
-                                              false,
-                                              "argument",
-                                              nullptr)
-                               : names ());
+      args = convert<strings> (
+        tt != type::newline && tt != type::eos
+        ? parse_names (t, tt, pattern_mode::ignore, "argument", nullptr)
+        : names ());
     }
     catch (const invalid_argument& e)
     {
@@ -1852,11 +1838,7 @@ namespace build2
     next (t, tt);
     const location l (get_location (t));
     names ns (tt != type::newline && tt != type::eos
-              ? parse_names (t, tt,
-                             pattern_mode::ignore,
-                             false,
-                             "module",
-                             nullptr)
+              ? parse_names (t, tt, pattern_mode::ignore, "module", nullptr)
               : names ());
 
     for (auto i (ns.begin ()); i != ns.end (); ++i)
@@ -2077,22 +2059,22 @@ namespace build2
   void parser::
   parse_switch (token& t, type& tt)
   {
-    // switch <value>[, <value>....]
+    // switch <value> [: <func> [<arg>]] [, <value>...]
     // {
-    //   case <pattern>[, <pattern>...]
+    //   case <pattern> [, <pattern>...]
     //     <line>
     //
-    //   case <pattern>[, <pattern>...]
+    //   case <pattern> [, <pattern>...]
     //   {
     //     <block>
     //   }
     //
-    //   case <pattern>[, <pattern>...]
+    //   case <pattern> [, <pattern>...]
     //   ...
-    //   case <pattern>[, <pattern>...]
+    //   case <pattern> [, <pattern>...]
     //     ...
     //
-    //   case <pattern>[|<pattern>]
+    //   case <pattern> [| <pattern>... ]
     //
     //   default
     //     ...
@@ -2103,23 +2085,49 @@ namespace build2
     // Parse and evaluate the values we are matching. Similar to if-else, we
     // expand patterns.
     //
-    values vs;
+    struct expr
+    {
+      build2::value    value;
+      optional<string> func;
+      names            arg;
+    };
+    small_vector<expr, 1> exprs;
+
+    mode (lexer_mode::switch_expressions); // Recognize `:` and `,`.
+
+    do
     {
-      mode (lexer_mode::values); // Recognize `,`.
+      next (t, tt);
+      if (tt == type::newline || tt == type::eos)
+        fail (t) << "expected switch expression instead of " << t;
+
+      expr e;
+
+      e.value =
+        parse_value (t, tt, pattern_mode::expand, "expression", nullptr);
 
-      do
+      if (tt == type::colon)
       {
         next (t, tt);
-        if (tt == type::newline || tt == type::eos)
-          fail (t) << "expected switch expression instead of " << t;
+        const location l (get_location (t));
+        names ns (parse_names (t, tt, pattern_mode::ignore, "function name"));
+
+        if (ns.empty () || ns[0].empty ())
+          fail (l) << "function name expected after ':'";
+
+        if (!ns[0].simple ())
+          fail (l) << "function name expected instead of " << ns[0];
 
-        vs.push_back (
-          parse_value (t, tt, pattern_mode::expand, "expression", nullptr));
+        e.func = move (ns[0].value);
+        ns.erase (ns.begin ());
+        e.arg = move (ns);
       }
-      while (tt == type::comma);
 
-      next_after_newline (t, tt, "switch expression");
+      exprs.push_back (move (e));
     }
+    while (tt == type::comma);
+
+    next_after_newline (t, tt, "switch expression");
 
     // Next we should always have a block.
     //
@@ -2204,7 +2212,7 @@ namespace build2
             if (tt == type::newline || tt == type::eos)
               fail (t) << "expected case pattern instead of " << t;
 
-            if (i == vs.size ())
+            if (i == exprs.size ())
               fail (t) << "more patterns than switch expressions";
 
             // Handle pattern alternatives (<pattern>|<pattern>).
@@ -2213,7 +2221,37 @@ namespace build2
             {
               const location l (get_location (t));
               value p (parse_pattern (t, tt));
-              take = compare_values (type::equal, vs[i], p, l);
+              expr& e (exprs[i]); // Note: value might be modified (typified).
+
+              if (e.func)
+              {
+                // Call <func>(<value>, <pattern> [, <arg>]).
+                //
+                small_vector<value, 3> args {value (e.value), move (p)};
+
+                if (!e.arg.empty ())
+                  args.push_back (value (e.arg));
+
+                value r (ctx.functions.call (scope_, *e.func, args, l));
+
+                // We support two types of functions: matchers and extractors:
+                // a matcher returns a statically-typed bool value while an
+                // extractor returns NULL if there is no match and the
+                // extracted value otherwise.
+                //
+                if (r.type == &value_traits<bool>::value_type)
+                {
+                  if (r.null)
+                    fail (l) << "match function " << *e.func << " returned "
+                             << "null";
+
+                  take = r.as<bool> ();
+                }
+                else
+                  take = !r.null;
+              }
+              else
+                take = compare_values (type::equal, e.value, p, l);
 
               if (tt != type::bit_or)
                 break;
@@ -2515,7 +2553,6 @@ namespace build2
     names ns (tt != type::newline && tt != type::eos
               ? parse_names (t, tt,
                              pattern_mode::ignore,
-                             false,
                              "description",
                              nullptr)
               : names ());
@@ -3391,8 +3428,7 @@ namespace build2
     if (has)
     {
       names ns (
-        parse_names (
-          t, tt, pattern_mode::ignore, false, "attribute", nullptr));
+        parse_names (t, tt, pattern_mode::ignore, "attribute", nullptr));
 
       if (!pre_parse_)
       {
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
index e199a9a..581ad1b 100644
--- a/libbuild2/parser.hxx
+++ b/libbuild2/parser.hxx
@@ -254,11 +254,6 @@ namespace build2
             bool enter,
             bool default_target);
 
-    // If chunk is true, then parse the smallest but complete, name-wise,
-    // chunk of input. Note that in this case you may still end up with
-    // multiple names, for example, {foo bar} or $foo. In the pre-parse mode
-    // always return empty list of names.
-    //
     // The what argument is used in diagnostics (e.g., "expected <what>
     // instead of ...".
     //
@@ -273,7 +268,21 @@ namespace build2
     names
     parse_names (token& t, token_type& tt,
                  pattern_mode pmode,
-                 bool chunk = false,
+                 const char* what = "name",
+                 const string* separators = &name_separators)
+    {
+      return parse_names (t, tt, pmode, false /* chunk */, what, separators);
+    }
+
+    // If chunk is true, then parse the smallest but complete, name-wise,
+    // chunk of input. Note that in this case you may still end up with
+    // multiple names, for example, {foo bar} or $foo. In the pre-parse mode
+    // always return empty list of names.
+    //
+    names
+    parse_names (token& t, token_type& tt,
+                 pattern_mode pmode,
+                 bool chunk,
                  const char* what = "name",
                  const string* separators = &name_separators)
     {
diff --git a/tests/switch/testscript b/tests/switch/testscript
index 1399df0..c8adaf0 100644
--- a/tests/switch/testscript
+++ b/tests/switch/testscript
@@ -68,6 +68,83 @@ EOI
 d
 EOO
 
+: basics-matcher
+:
+$* <<EOI >>EOO
+for i: 123 abc
+{
+  switch $i: regex.match
+  {
+    case '[0-9]+'
+      print n
+    case '[a-z]+'
+      print a
+  }
+}
+EOI
+n
+a
+EOO
+
+: basics-matcher-arg
+:
+$* <<EOI >>EOO
+for i: abc ABC aBC
+{
+  switch $i: regex.match icase
+  {
+    case '[a-z]+'
+      print a
+  }
+}
+EOI
+a
+a
+a
+EOO
+
+: basics-matcher-multiple
+:
+$* <<EOI >>EOO
+for i: 123 abc
+{
+  switch $i: regex.match, $i: regex.match
+  {
+    case '[0-9]+', '[0-9]+'
+      print nn
+    case '[0-9]+', '[a-z]+'
+      print na
+    case '[a-z]+', '[0-9]+'
+      print an
+    case '[a-z]+', '[a-z]+'
+      print aa
+  }
+}
+EOI
+nn
+aa
+EOO
+
+#\
+: basics-extractor
+:
+$* <<EOI >>EOO
+for i: 123 abc
+{
+  switch $i: regex.extract
+  {
+    case '([0-9]+)'
+      print n
+    default
+      print d
+  }
+}
+EOI
+n
+d
+EOO
+#\
+
 : empty
 :
 $* <<EOI
@@ -199,3 +276,39 @@ switch 1
 EOI
 <stdin>:3:11: error: more patterns than switch expressions
 EOE
+
+: matcher-missing
+:
+$* <<EOI 2>>EOE != 0
+switch 1:
+{
+  case 1
+    x = 1
+}
+EOI
+<stdin>:1:10: error: expected function name instead of <newline>
+EOE
+
+: matcher-bad-name
+:
+$* <<EOI 2>>EOE != 0
+switch 1: file{x}
+{
+  case 1
+    x = 1
+}
+EOI
+<stdin>:1:11: error: function name expected instead of file{x}
+EOE
+
+: matcher-unknown
+:
+$* <<EOI 2>>EOE != 0
+switch 1: no_such_matcher
+{
+  case 1
+    x = 1
+}
+EOI
+<stdin>:3:8: error: unmatched call to no_such_matcher(<untyped>, <untyped>)
+EOE
-- 
cgit v1.1