Initial support for parsing and dumping recipes

author: Boris Kolpackov <boris@codesynthesis.com> 2020-05-08 06:34:13 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2020-05-27 08:35:29 +0200
commit: a5bf818ebb55e4e27eb0f067664fd4db70284267 (patch)
tree: 2c0699ba4827cee34161db3da7ec8f94a6175ea9 /libbuild2
parent: fce9782a330e8f701a8df0b5200e5b78e97ec4b5 (diff)
6 files changed, 413 insertions, 140 deletions
diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx
index 8ee68b7..919b921 100644
--- a/libbuild2/dump.cxx
+++ b/libbuild2/dump.cxx
@@ -213,7 +213,90 @@ namespace build2
 
     os << ind << t << ':';
 
-    // First print target/rule-specific variables, if any.
+    // First check if this is the simple case where we can print everything
+    // as a single declaration.
+    //
+    const prerequisites& ps (t.prerequisites ());
+    bool simple (true);
+    for (const prerequisite& p: ps)
+    {
+      if (!p.vars.empty ()) // Has prerequisite-specific vars.
+      {
+        simple = false;
+        break;
+      }
+    }
+
+    // If the target has been matched to a rule, we also print resolved
+    // prerequisite targets.
+    //
+    // Note: running serial and task_count is 0 before any operation has
+    // started.
+    //
+    const prerequisite_targets* pts (nullptr);
+    {
+      action inner; // @@ Only for the inner part of the action currently.
+
+      if (size_t c = t[inner].task_count.load (memory_order_relaxed))
+      {
+        if (c == t.ctx.count_applied () || c == t.ctx.count_executed ())
+        {
+          pts = &t.prerequisite_targets[inner];
+
+          bool f (false);
+          for (const target* pt: *pts)
+          {
+            if (pt != nullptr)
+            {
+              f = true;
+              break;
+            }
+          }
+
+          if (!f)
+            pts = nullptr;
+        }
+      }
+    }
+
+    auto print_pts = [&os, &ps, pts] ()
+    {
+      for (const target* pt: *pts)
+      {
+        if (pt != nullptr)
+          os << ' ' << *pt;
+      }
+
+      // Only omit '|' if we have no prerequisites nor targets.
+      //
+      if (!ps.empty ())
+      {
+        os << " |";
+        return true;
+      }
+
+      return false;
+    };
+
+    if (simple)
+    {
+      if (pts != nullptr)
+        print_pts ();
+
+      for (const prerequisite& p: ps)
+      {
+        // Print it as a target if one has been cached.
+        //
+        if (const target* t = p.target.load (memory_order_relaxed)) // Serial.
+          os << ' ' << *t;
+        else
+          os << ' ' << p;
+      }
+    }
+
+    bool used (false); // Target header has been used.
+
+    // Print target/rule-specific variables, if any.
     //
     {
       bool tv (!t.vars.empty ());
@@ -258,87 +341,100 @@ namespace build2
         if (rel)
           stream_verb (os, nsv);
 
-        os << endl
-           << ind << t << ':';
+        used = true;
       }
     }
 
-    bool used (false); // Target header has been used to display prerequisites.
-
-    // If the target has been matched to a rule, first print resolved
-    // prerequisite targets.
-    //
-    // Note: running serial and task_count is 0 before any operation has
-    // started.
+    // Then ad hoc recipes, if any.
     //
-    action inner; // @@ Only for the inner part of the action currently.
-
-    if (size_t c = t[inner].task_count.load (memory_order_relaxed))
+    if (!t.adhoc_recipes.empty ())
     {
-      if (c == t.ctx.count_applied () || c == t.ctx.count_executed ())
+      for (const adhoc_recipe r: t.adhoc_recipes)
       {
-        bool f (false);
-        for (const target* pt: t.prerequisite_targets[inner])
-        {
-          if (pt == nullptr) // Skipped.
-            continue;
-
-          os << ' ' << *pt;
-          f = true;
-        }
+        // @@ TODO: indentation is multi-line recipes is off (would need to
+        //          insert indentation after every newline).
+        //
+        os << endl;
 
-        // Only omit '|' if we have no prerequisites nor targets.
+        // Do we need the header?
         //
-        if (f || !t.prerequisites ().empty ())
+        if (r.diag)
         {
-          os << " |";
-          used = true;
+          os << ind << '%';
+
+          if (r.diag)
+          {
+            os << " [";
+            os << "diag="; to_stream (os, name (*r.diag), true /*quote*/, '@');
+            os << ']';
+          }
+
+          os << endl;
         }
+
+        os << ind << string (r.braces, '{') << endl
+           << ind << r.recipe
+           << ind << string (r.braces, '}');
       }
+
+      used = true;
     }
 
-    // Print prerequisites. Those that have prerequisite-specific variables
-    // have to be printed as a separate dependency.
-    //
-    const prerequisites& ps (t.prerequisites ());
-    for (auto i (ps.begin ()), e (ps.end ()); i != e; )
+    if (!simple)
     {
-      const prerequisite& p (*i++);
-      bool ps (!p.vars.empty ()); // Has prerequisite-specific vars.
-
-      if (ps && used) // If it has been used, get a new header.
+      if (used)
+      {
         os << endl
            << ind << t << ':';
 
-      // Print it as a target if one has been cached.
-      //
-      if (const target* t = p.target.load (memory_order_relaxed)) // Serial.
-        os << ' ' << *t;
-      else
-        os << ' ' << p;
+        used = false;
+      }
 
-      if (ps)
+      if (pts != nullptr)
+        used = print_pts () || used;
+
+      // Print prerequisites. Those that have prerequisite-specific variables
+      // have to be printed as a separate dependency.
+      //
+      for (auto i (ps.begin ()), e (ps.end ()); i != e; )
       {
-        if (rel)
-          stream_verb (os, osv); // We want variable values in full.
+        const prerequisite& p (*i++);
+        bool ps (!p.vars.empty ()); // Has prerequisite-specific vars.
 
-        os << ':' << endl
-           << ind << '{';
-        ind += "  ";
-        dump_variables (os, ind, p.vars, s, variable_kind::prerequisite);
-        ind.resize (ind.size () - 2);
-        os << endl
-           << ind << '}';
+        if (ps && used) // If it has been used, get a new header.
+          os << endl
+             << ind << t << ':';
 
-        if (rel)
-          stream_verb (os, nsv);
+        // Print it as a target if one has been cached.
+        //
+        if (const target* t = p.target.load (memory_order_relaxed)) // Serial.
+          os << ' ' << *t;
+        else
+          os << ' ' << p;
 
-        if (i != e) // If we have another, get a new header.
+        if (ps)
+        {
+          if (rel)
+            stream_verb (os, osv); // We want variable values in full.
+
+          os << ':' << endl
+             << ind << '{';
+          ind += "  ";
+          dump_variables (os, ind, p.vars, s, variable_kind::prerequisite);
+          ind.resize (ind.size () - 2);
           os << endl
-             << ind << t << ':';
-      }
+             << ind << '}';
+
+          if (rel)
+            stream_verb (os, nsv);
 
-      used = !ps;
+          if (i != e) // If we have another, get a new header.
+            os << endl
+               << ind << t << ':';
+        }
+
+        used = !ps;
+      }
     }
 
     if (rel)
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 000670b..aa4ada9 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -28,6 +28,21 @@ namespace build2
 {
   using type = token_type;
 
+  ostream&
+  operator<< (ostream& o, const parser::attribute& a)
+  {
+    o << a.name;
+
+    if (!a.value.null)
+    {
+      o << '=';
+      names storage;
+      to_stream (o, reverse (a.value, storage), true /* quote */, '@');
+    }
+
+    return o;
+  }
+
   class parser::enter_scope
   {
   public:
@@ -506,7 +521,8 @@ namespace build2
           // exactly that would mean is unclear. One potentially useful
           // semantics would be the ability to specify attributes for ad hoc
           // members though the fact that the primary target is listed first
-          // would make it rather unintuitive.
+          // would make it rather unintuitive. Maybe attributes that change
+          // the group semantics itself?
           //
           next_with_attributes (t, tt);
 
@@ -598,9 +614,11 @@ namespace build2
         //
         // void (token& t, type& tt, const target_type* type, string pat)
         //
-        auto for_each = [this, &trace,
-                         &t, &tt,
-                         &ns, &nloc, &ans] (auto&& f)
+        // Note that the target and its ad hoc members are inserted implied
+        // but this flag can be cleared and default_target logic applied if
+        // appropriate.
+        //
+        auto for_each = [this, &trace, &t, &tt, &ns, &nloc, &ans] (auto&& f)
         {
           // Note: watch out for an out-qualified single target (two names).
           //
@@ -679,30 +697,64 @@ namespace build2
 
         if (tt == type::newline)
         {
-          // See if this is a target block.
+          // See if this is a target-specific variable and/or recipe block.
           //
           // Note that we cannot just let parse_dependency() handle this case
           // because we can have (a mixture of) target type/patterns.
           //
-          if (next (t, tt) == type::lcbrace && peek () == type::newline)
+          // @@ This might change once we support ad hoc rules (where we may
+          // have prerequisites for a pattern; but perhaps this should be
+          // handled separately since the parse_dependency() is already too
+          // complex and there will be no chains in this case).
+          //
+          next (t, tt);
+          if (tt == type::percent       ||
+              tt == type::multi_lcbrace ||
+              (tt == type::lcbrace && peek () == type::newline))
           {
-            next (t, tt); // Newline.
+            token st (t); // Save start token.
 
-            // Parse the block for each target.
+            // Parse the block(s) for each target.
+            //
+            // Note that because we have to peek past the closing brace(s) to
+            // see whether there is a/another recipe block, we have to make
+            // that token part of the replay (we cannot peek past the replay
+            // sequence).
             //
-            for_each ([this] (token& t, type& tt,
-                              const target_type* type, string pat)
-                      {
-                        next (t, tt); // First token inside the block.
+            auto parse = [this, &st] (token& t, type& tt,
+                                      const target_type* type, string pat)
+            {
+              token rt; // Recipe start token.
 
-                        parse_variable_block (t, tt, type, move (pat));
+              // The variable block, if any, should be first.
+              //
+              if (st.type == type::lcbrace)
+              {
+                next (t, tt); // Newline.
+                next (t, tt); // First token inside the variable block.
+                parse_variable_block (t, tt, type, move (pat));
+
+                if (tt != type::rcbrace)
+                  fail (t) << "expected '}' instead of " << t;
+
+                next (t, tt);                    // Newline.
+                next_after_newline (t, tt, '}'); // Should be on its own line.
 
-                        if (tt != type::rcbrace)
-                          fail (t) << "expected '}' instead of " << t;
-                      });
+                if (tt != type::percent && tt != type::multi_lcbrace)
+                  return;
+
+                rt = t;
+              }
+              else
+                rt = st;
 
-            next (t, tt);                    // Presumably newline after '}'.
-            next_after_newline (t, tt, '}'); // Should be on its own line.
+              if (type != nullptr)
+                fail (rt) << "recipe in target type/pattern";
+
+              parse_recipe (t, tt, rt);
+            };
+
+            for_each (parse);
           }
           else
           {
@@ -767,7 +819,7 @@ namespace build2
           next_after_newline (t, tt);
         }
         // Dependency declaration potentially followed by a chain and/or a
-        // prerequisite-specific variable assignment/block.
+        // target/prerequisite-specific variable assignment/block.
         //
         else
         {
@@ -959,6 +1011,101 @@ namespace build2
   }
 
   void parser::
+  parse_recipe (token& t, type& tt, const token& start)
+  {
+    // Parse a recipe chain.
+    //
+    // % [<attrs>]
+    // {{
+    //   ...
+    // }}
+    //
+    // enter: percent or openining multi-curly-brace
+    // leave: token past newline after last closing multi-curly-brace
+    //
+
+    // If we have a recipe, the target is not implied.
+    //
+    if (target_->implied)
+    {
+      for (target* m (target_); m != nullptr; m = m->adhoc_member)
+        m->implied = false;
+
+      if (default_target_ == nullptr)
+        default_target_ = target_;
+    }
+
+    for (token st (start);; st = t)
+    {
+      optional<string> diag;
+
+      if (st.type == type::percent)
+      {
+        next_with_attributes (t, tt);
+        attributes_push (t, tt, true /* standalone */);
+
+        // Get variable (or value) attributes, if any, and deal with the special
+        // metadata attribute. Since currently it can only appear in the import
+        // directive, we handle it in an ad hoc manner.
+        //
+        attributes& as (attributes_top ());
+        for (attribute& a: as)
+        {
+          const string& n (a.name);
+
+          if (n == "diag")
+          {
+            try
+            {
+              diag = convert<string> (move (a.value));
+            }
+            catch (const invalid_argument& e)
+            {
+              fail (as.loc) << "invalid " << n << " attribute value: " << e;
+            }
+          }
+          else
+            fail (as.loc) << "unknown recipe attribute " << a;
+        }
+
+        attributes_pop ();
+
+        next_after_newline (t, tt, '%');
+
+        if (tt != type::multi_lcbrace)
+          fail (t) << "expected recipe block instead of " << t;
+
+        st = t; // And fall through.
+      }
+
+      next (t, tt);                   // Newline after {{.
+      mode (lexer_mode::foreign, '\0', st.value.size ());
+      next_after_newline (t, tt, st); // Should be on its own line.
+
+      if (tt != type::word)
+        fail (t) << "unterminated recipe block" <<
+          info (st) << "recipe block starts here" << endf;
+
+      action a (perform_id, update_id);
+
+      target_->adhoc_recipes.emplace_back (a,
+                                           move (t.value),
+                                           move (diag),
+                                           get_location (st),
+                                           st.value.size ());
+
+      next (t, tt);
+      assert (tt == type::multi_rcbrace);
+
+      next (t, tt);                          // Newline.
+      next_after_newline (t, tt, token (t)); // Should be on its own line.
+
+      if (tt != type::percent && tt != type::multi_lcbrace)
+        break;
+    }
+  }
+
+  void parser::
   enter_adhoc_members (adhoc_names_loc&& ans, bool implied)
   {
     tracer trace ("parser::enter_adhoc_members", &path_);
@@ -1653,7 +1800,7 @@ namespace build2
       //
       {
         auto df = make_diag_frame (
-          [&args, &l](const diag_record& dr)
+          [this, &args, &l](const diag_record& dr)
           {
             dr << info (l) << "while parsing " << args[0] << " output";
           });
@@ -1763,7 +1910,7 @@ namespace build2
         }
         catch (const invalid_argument& e)
         {
-          fail << "invalid " << i->name << " attribute value: " << e;
+          fail (as.loc) << "invalid " << i->name << " attribute value: " << e;
         }
       }
       else if (i->name == "config.report.variable")
@@ -1774,7 +1921,7 @@ namespace build2
         }
         catch (const invalid_argument& e)
         {
-          fail << "invalid " << i->name << " attribute value: " << e;
+          fail (as.loc) << "invalid " << i->name << " attribute value: " << e;
         }
       }
       else
@@ -3268,16 +3415,10 @@ namespace build2
     optional<variable_visibility> vis;
     optional<bool> ovr;
 
-    auto print = [storage = names ()] (diag_record& dr, const value& v) mutable
+    for (auto& a: as)
     {
-      storage.clear ();
-      to_stream (dr.os, reverse (v, storage), true /* quote */, '@');
-    };
-
-    for (auto& p: as)
-    {
-      string& n (p.name);
-      value& v (p.value);
+      string& n (a.name);
+      value& v (a.value);
 
       if (const value_type* t = map_type (n))
       {
@@ -3288,23 +3429,10 @@ namespace build2
         // Fall through.
       }
       else
-      {
-        diag_record dr (fail (l));
-        dr << "unknown variable attribute " << n;
-
-        if (!v.null)
-        {
-          dr << '=';
-          print (dr, v);
-        }
-      }
+        fail (l) << "unknown variable attribute " << a;
 
       if (!v.null)
-      {
-        diag_record dr (fail (l));
-        dr << "unexpected value for attribute " << n << ": ";
-        print (dr, v);
-      }
+        fail (l) << "unexpected value in attribute " << a;
     }
 
     if (type != nullptr && var.type != nullptr)
@@ -3341,16 +3469,10 @@ namespace build2
     bool null (false);
     const value_type* type (nullptr);
 
-    auto print = [storage = names ()] (diag_record& dr, const value& v) mutable
+    for (auto& a: as)
     {
-      storage.clear ();
-      to_stream (dr.os, reverse (v, storage), true /* quote */, '@');
-    };
-
-    for (auto& p: as)
-    {
-      string& n (p.name);
-      value& v (p.value);
+      string& n (a.name);
+      value& v (a.value);
 
       if (n == "null")
       {
@@ -3369,23 +3491,10 @@ namespace build2
         // Fall through.
       }
       else
-      {
-        diag_record dr (fail (l));
-        dr << "unknown value attribute " << n;
-
-        if (!v.null)
-        {
-          dr << '=';
-          print (dr, v);
-        }
-      }
+        fail (l) << "unknown value attribute " << a;
 
       if (!v.null)
-      {
-        diag_record dr (fail (l));
-        dr << "unexpected value for attribute " << n << ": ";
-        print (dr, v);
-      }
+        fail (l) << "unexpected value in attribute " << a;
     }
 
     // When do we set the type and when do we keep the original? This gets
@@ -4712,7 +4821,7 @@ namespace build2
           // Print the location information in case the function fails.
           //
           auto df = make_diag_frame (
-            [&loc, l, r] (const diag_record& dr)
+            [this, &loc, l, r] (const diag_record& dr)
             {
               dr << info (loc) << "while concatenating " << l << " to " << r;
               dr << info << "use quoting to force untyped concatenation";
@@ -5512,7 +5621,7 @@ namespace build2
               // Print the location information in case the function fails.
               //
               auto df = make_diag_frame (
-                [&loc, t] (const diag_record& dr)
+                [this, &loc, t] (const diag_record& dr)
                 {
                   dr << info (loc) << "while converting " << t << " to string";
                 });
@@ -6326,6 +6435,20 @@ namespace build2
     return tt;
   }
 
+  inline type parser::
+  next_after_newline (token& t, type& tt, const token& a)
+  {
+    if (tt == type::newline)
+      next (t, tt);
+    else if (tt != type::eos)
+    {
+      diag_record dr (fail (t));
+      dr << "expected newline instead of " << t << " after " << a;
+    }
+
+    return tt;
+  }
+
   type parser::
   peek ()
   {
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
index c55e14f..5b930c5 100644
--- a/libbuild2/parser.hxx
+++ b/libbuild2/parser.hxx
@@ -26,7 +26,9 @@ namespace build2
 
     explicit
     parser (context& c, stage s = stage::rest)
-      : fail ("error", &path_), ctx (c), stage_ (s) {}
+      : fail ("error", &path_), info ("info", &path_),
+        ctx (c),
+        stage_ (s) {}
 
     // Issue diagnostics and throw failed in case of an error.
     //
@@ -109,6 +111,9 @@ namespace build2
                           const target_type* = nullptr,
                           string = string ());
 
+    void
+    parse_recipe (token&, token_type&, const token&);
+
     // Ad hoc target names inside < ... >.
     //
     struct adhoc_names_loc
@@ -246,6 +251,9 @@ namespace build2
       build2::value value;
     };
 
+    friend ostream&
+    operator<< (ostream&, const attribute&);
+
     struct attributes: small_vector<attribute, 1>
     {
       location loc; // Start location.
@@ -525,8 +533,14 @@ namespace build2
 
     // If the current token is newline, then get the next token. Otherwise,
     // fail unless the current token is eos (i.e., optional newline at the end
-    // of stream). If the after argument is not \0, use it in diagnostics as
-    // the token after which the newline was expectd.
+    // of stream). Use the after token in diagnostics as the token after which
+    // the newline was expected.
+    //
+    token_type
+    next_after_newline (token&, token_type&, const token& after);
+
+    // As above but the after argument is a single-character token. If it is
+    // \0, then it is ignored.
     //
     token_type
     next_after_newline (token&, token_type&, char after = '\0');
@@ -568,10 +582,10 @@ namespace build2
     }
 
     void
-    mode (lexer_mode m, char ps = '\0')
+    mode (lexer_mode m, char ps = '\0', uintptr_t d = 0)
     {
       if (replay_ != replay::play)
-        lexer_->mode (m, ps);
+        lexer_->mode (m, ps, nullopt, d);
       else
         // As a sanity check, make sure the mode matches the next token. Note
         // that we don't check the attributes flags or the pair separator
@@ -612,8 +626,10 @@ namespace build2
     // with the lexer directly (e.g., the keyword() test). Replays also cannot
     // nest. For now we don't enforce any of this.
     //
-    // Note also that the peeked token is not part of the replay, until it
-    // is "got".
+    // Note also that the peeked token is not part of the replay until it is
+    // "got". In particular, this means that we cannot peek past the replay
+    // sequence (since we will get the peeked token as the first token of
+    // the replay).
     //
     void
     replay_save ()
@@ -628,6 +644,8 @@ namespace build2
       assert ((replay_ == replay::save && !replay_data_.empty ()) ||
               (replay_ == replay::play && replay_i_ == replay_data_.size ()));
 
+      assert (!peeked_);
+
       if (replay_ == replay::save)
         replay_path_ = path_; // Save old path.
 
@@ -638,6 +656,8 @@ namespace build2
     void
     replay_stop ()
     {
+      assert (!peeked_);
+
       if (replay_ == replay::play)
         path_ = replay_path_; // Restore old path.
 
@@ -726,6 +746,7 @@ namespace build2
     //
   protected:
     const fail_mark fail;
+    const basic_mark info;
 
     // Parser state.
     //
diff --git a/libbuild2/recipe.hxx b/libbuild2/recipe.hxx
index a4933dd..e73a8ea 100644
--- a/libbuild2/recipe.hxx
+++ b/libbuild2/recipe.hxx
@@ -53,9 +53,28 @@ namespace build2
   //
   struct adhoc_recipe
   {
-    build2::action action;
-  };
+    using action_type = build2::action;
+    using location_type = build2::location;
+
+    action_type      action;
+    string           recipe;
+    optional<string> diag;   // Command name for low-verbosity diagnostics.
 
+    // Diagnostics-related information.
+    //
+    path_name_value file;    // Buildfile of recipe.
+    location_type location;  // Buildfile location of recipe.
+    size_t braces;           // Number of braces in multi-brace tokens.
+
+    adhoc_recipe (action_type a,
+                  string r,
+                  optional<string> d,
+                  const location_type& l, size_t b)
+        : action (a),
+          recipe (move (r)),
+          diag (move (d)),
+          file (l.file), location (file, l.line, l.column), braces (b) {}
+  };
 }
 
 #endif // LIBBUILD2_RECIPE_HXX
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx
index 06cefc7..86a73f1 100644
--- a/libbuild2/test/script/parser.cxx
+++ b/libbuild2/test/script/parser.cxx
@@ -2292,7 +2292,7 @@ namespace build2
                   //   testscript:2:5: info: while parsing string '1>&a'
                   //
                   auto df = make_diag_frame (
-                    [s, &l](const diag_record& dr)
+                    [this, s, &l](const diag_record& dr)
                     {
                       dr << info (l) << "while parsing string '" << s << "'";
                     });
diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx
index e11b880..bd176b5 100644
--- a/libbuild2/token.hxx
+++ b/libbuild2/token.hxx
@@ -152,6 +152,20 @@ namespace build2
   inline ostream&
   operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;}
 
+  // Note: these are currently only used for sanity checks.
+  //
+  inline bool
+  operator== (const token& x, const token& y)
+  {
+    return x.type == y.type && x.value == y.value;
+  }
+
+  inline bool
+  operator!= (const token& x, const token& y)
+  {
+    return !(x == y);
+  }
+
   // Context-dependent lexing (see lexer_mode for details).
   //
   struct lexer_mode_base
author	Boris Kolpackov <boris@codesynthesis.com>	2020-05-08 06:34:13 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2020-05-27 08:35:29 +0200
commit	a5bf818ebb55e4e27eb0f067664fd4db70284267 (patch)
tree	2c0699ba4827cee34161db3da7ec8f94a6175ea9 /libbuild2
parent	fce9782a330e8f701a8df0b5200e5b78e97ec4b5 (diff)