Move config, dist, test, and install modules into library

author: Karen Arutyunov <karen@codesynthesis.com> 2019-07-04 19:12:15 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2019-07-05 14:24:43 +0300
commit: 57b10c06925d0bdf6ffb38488ee908f085109e95 (patch)
tree: f2103684d319650c3302aef9d7a70dd64ff2a347 /libbuild2/test/script/parser.cxx
parent: 30b4eda196e090aa820d312e6a9435a4ae84c303 (diff)
1 files changed, 3451 insertions, 0 deletions
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx
new file mode 100644
index 0000000..260bc88
--- /dev/null
+++ b/libbuild2/test/script/parser.cxx
@@ -0,0 +1,3451 @@
+// file      : libbuild2/test/script/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/test/script/parser.hxx>
+
+#include <sstream>
+
+#include <libbuild2/context.hxx> // sched, keep_going
+
+#include <libbuild2/test/script/lexer.hxx>
+#include <libbuild2/test/script/runner.hxx>
+
+using namespace std;
+
+namespace build2
+{
+  namespace test
+  {
+    namespace script
+    {
+      using type = token_type;
+
+      // Return true if the string contains only a single digit characters
+      // (used to detect the special $N variables).
+      //
+      static inline bool
+      digit (const string& s)
+      {
+        return s.size () == 1 && butl::digit (s[0]);
+      }
+
+      //
+      // Pre-parse.
+      //
+
+      void parser::
+      pre_parse (script& s)
+      {
+        const path& p (s.script_target.path ());
+        assert (!p.empty ()); // Should have been assigned.
+
+        try
+        {
+          ifdstream ifs (p);
+          pre_parse (ifs, s);
+        }
+        catch (const io_error& e)
+        {
+          fail << "unable to read testscript " << p << ": " << e << endf;
+        }
+      }
+
+      void parser::
+      pre_parse (istream& is, script& s)
+      {
+        path_ = &*s.paths_.insert (s.script_target.path ()).first;
+
+        pre_parse_ = true;
+
+        lexer l (is, *path_, lexer_mode::command_line);
+        set_lexer (&l);
+
+        id_prefix_.clear ();
+
+        id_map idm;
+        include_set ins;
+
+        script_ = &s;
+        runner_ = nullptr;
+        group_ = script_;
+        id_map_ = &idm;
+        include_set_ = &ins;
+        scope_ = nullptr;
+
+        //@@ PAT TODO: set pbase_?
+
+        // Start location of the implied script group is the beginning of
+        // the file. End location -- end of the file.
+        //
+        group_->start_loc_ = location (path_, 1, 1);
+
+        token t (pre_parse_scope_body ());
+
+        if (t.type != type::eos)
+          fail (t) << "stray " << t;
+
+        group_->end_loc_ = get_location (t);
+      }
+
+      bool parser::
+      pre_parse_demote_group_scope (unique_ptr<scope>& s)
+      {
+        // See if this turned out to be an explicit test scope. An explicit
+        // test scope contains a single test, only variable assignments in
+        // setup and nothing in teardown. Plus only the group can have the
+        // description. Because we apply this recursively, also disqualify
+        // a test scope that has an if-condition.
+        //
+        // If we have a chain, then all the scopes must be demotable. So we
+        // first check if this scope is demotable and if so then recurse for
+        // the next in chain.
+        //
+        group& g (static_cast<group&> (*s));
+
+        auto& sc (g.scopes);
+        auto& su (g.setup_);
+        auto& td (g.tdown_);
+
+        test* t;
+        if (sc.size () == 1                                          &&
+            (t = dynamic_cast<test*> (sc.back ().get ())) != nullptr &&
+            find_if (
+              su.begin (), su.end (),
+              [] (const line& l) {
+                return l.type != line_type::var;
+              }) == su.end ()                                        &&
+
+            td.empty ()                                              &&
+            !t->desc                                                 &&
+            !t->if_cond_)
+        {
+          if (g.if_chain != nullptr &&
+              !pre_parse_demote_group_scope (g.if_chain))
+            return false;
+
+          // It would have been nice to reuse the test object and only throw
+          // away the group. However, the merged scope has to use id_path and
+          // wd_path of the group. So to keep things simple we are going to
+          // throw away both and create a new test object.
+          //
+          // We always use the group's id since the test cannot have a
+          // user-provided one.
+          //
+          unique_ptr<test> m (new test (g.id_path.leaf ().string (), *group_));
+
+          // Move the description, if-condition, and if-chain.
+          //
+          m->desc = move (g.desc);
+          m->if_cond_ = move (g.if_cond_);
+          m->if_chain = move (g.if_chain);
+
+          // Merge the lines of the group and the test.
+          //
+          if (su.empty ())
+            m->tests_ = move (t->tests_);
+          else
+          {
+            m->tests_ = move (su); // Should come first.
+            m->tests_.insert (m->tests_.end (),
+                              make_move_iterator (t->tests_.begin ()),
+                              make_move_iterator (t->tests_.end ()));
+          }
+
+          // Use start/end locations of the outer scope.
+          //
+          m->start_loc_ = g.start_loc_;
+          m->end_loc_ = g.end_loc_;
+
+          s = move (m);
+          return true;
+        }
+
+        return false;
+      }
+
+      token parser::
+      pre_parse_scope_body ()
+      {
+        // enter: next token is first token of scope body
+        // leave: rcbrace or eos (returned)
+
+        token t;
+        type tt;
+
+        // Parse lines (including nested scopes) until we see '}' or eos.
+        //
+        for (;;)
+        {
+          // Start lexing each line recognizing leading '.+-{}'.
+          //
+          tt = peek (lexer_mode::first_token);
+
+          // Handle description.
+          //
+          optional<description> d;
+          if (tt == type::colon)
+            d = pre_parse_leading_description (t, tt);
+
+          // Determine the line type by peeking at the first token.
+          //
+          switch (tt)
+          {
+          case type::eos:
+          case type::rcbrace:
+            {
+              next (t, tt);
+
+              if (d)
+                fail (t) << "description before " << t;
+
+              return t;
+            }
+          case type::lcbrace:
+            {
+              // Nested scope.
+              //
+              next (t, tt); // Get '{'.
+              const location sl (get_location (t));
+
+              // First check that we don't have any teardown commands yet.
+              // This will detect things like variable assignments between
+              // scopes.
+              //
+              if (!group_->tdown_.empty ())
+              {
+                location tl (
+                  group_->tdown_.back ().tokens.front ().location ());
+
+                fail (sl) << "scope after teardown" <<
+                  info (tl) << "last teardown line appears here";
+              }
+
+              // If there is no user-supplied id, use the line number
+              // (prefixed with include id) as the scope id.
+              //
+              const string& id (
+                d && !d->id.empty ()
+                ? d->id
+                : insert_id (id_prefix_ + to_string (sl.line), sl));
+
+              unique_ptr<scope> g (pre_parse_scope_block (t, tt, id));
+              g->desc = move (d);
+
+              pre_parse_demote_group_scope (g);
+              group_->scopes.push_back (move (g));
+              continue;
+            }
+          default:
+            {
+              pre_parse_line (t, tt, d);
+              assert (tt == type::newline);
+              break;
+            }
+          }
+        }
+      }
+
+      unique_ptr<group> parser::
+      pre_parse_scope_block (token& t, type& tt, const string& id)
+      {
+        // enter: lcbrace
+        // leave: newline after rcbrace
+
+        const location sl (get_location (t));
+
+        if (next (t, tt) != type::newline)
+          fail (t) << "expected newline after '{'";
+
+        // Push group.
+        //
+        id_map idm;
+        include_set ins;
+
+        unique_ptr<group> g (new group (id, *group_));
+
+        id_map* om (id_map_);
+        id_map_ = &idm;
+
+        include_set* os (include_set_);
+        include_set_ = &ins;
+
+        group* og (group_);
+        group_ = g.get ();
+
+        // Parse body.
+        //
+        group_->start_loc_ = sl;
+        token e (pre_parse_scope_body ());
+        group_->end_loc_ = get_location (e);
+
+        // Pop group.
+        //
+        group_ = og;
+        include_set_ = os;
+        id_map_ = om;
+
+        if (e.type != type::rcbrace)
+          fail (e) << "expected '}' at the end of the scope";
+
+        if (next (t, tt) != type::newline)
+          fail (t) << "expected newline after '}'";
+
+        return g;
+      }
+
+      // Parse a logical line (as well as scope-if since the only way to
+      // recognize it is to parse the if line).
+      //
+      // If one is true then only parse one line returning an indication of
+      // whether the line ended with a semicolon.
+      //
+      bool parser::
+      pre_parse_line (token& t, type& tt,
+                      optional<description>& d,
+                      lines* ls,
+                      bool one)
+      {
+        // enter: next token is peeked at (type in tt)
+        // leave: newline
+
+        // Note: token is only peeked at.
+        //
+        const location ll (get_location (peeked ()));
+
+        // Determine the line type/start token.
+        //
+        line_type lt;
+        type st (type::eos);
+
+        switch (tt)
+        {
+        case type::dot:
+          {
+            // Directive.
+            //
+            next (t, tt); // Skip dot.
+            next (t, tt); // Get the directive name.
+
+            if (tt != type::word || t.qtype != quote_type::unquoted)
+              fail (t) << "expected directive name instead of " << t;
+
+            // Make sure we are not inside a test (i.e., after semi).
+            //
+            if (ls != nullptr)
+              fail (ll) << "directive after ';'";
+
+            const string& n (t.value);
+
+            if (n == "include")
+              pre_parse_directive (t, tt);
+            else
+              fail (t) << "unknown directive '" << n << "'";
+
+            assert (tt == type::newline);
+            return false;
+          }
+        case type::plus:
+        case type::minus:
+          {
+            // Setup/teardown command.
+            //
+            st = tt;
+
+            next (t, tt);   // Start saving tokens from the next one.
+            replay_save ();
+            next (t, tt);
+
+            // See if this is a special command.
+            //
+            lt = line_type::cmd; // Default.
+
+            if (tt == type::word && t.qtype == quote_type::unquoted)
+            {
+              const string& n (t.value);
+
+              if      (n == "if")  lt = line_type::cmd_if;
+              else if (n == "if!") lt = line_type::cmd_ifn;
+            }
+
+            break;
+          }
+        default:
+          {
+            // Either variable assignment or test command.
+            //
+            replay_save (); // Start saving tokens from the current one.
+            next (t, tt);
+
+            // Decide whether this is a variable assignment or a command.
+            //
+            // It is an assignment if the first token is an unquoted name and
+            // the next token is an assign/append/prepend operator. Assignment
+            // to a computed variable name must use the set builtin.
+            //
+            // Note also thatspecial commands take precedence over variable
+            // assignments.
+            //
+            lt = line_type::cmd; // Default.
+
+            if (tt == type::word && t.qtype == quote_type::unquoted)
+            {
+              const string& n (t.value);
+
+              if      (n == "if")    lt = line_type::cmd_if;
+              else if (n == "if!")   lt = line_type::cmd_ifn;
+              else if (n == "elif")  lt = line_type::cmd_elif;
+              else if (n == "elif!") lt = line_type::cmd_elifn;
+              else if (n == "else")  lt = line_type::cmd_else;
+              else if (n == "end")   lt = line_type::cmd_end;
+              else
+              {
+                // Switch the recognition of leading variable assignments for
+                // the next token. This is safe to do because we know we
+                // cannot be in the quoted mode (since the current token is
+                // not quoted).
+                //
+                type p (peek (lexer_mode::second_token));
+
+                if (p == type::assign  ||
+                    p == type::prepend ||
+                    p == type::append)
+                {
+                  lt = line_type::var;
+                  st = p;
+                }
+              }
+            }
+
+            break;
+          }
+        }
+
+        // Pre-parse the line keeping track of whether it ends with a semi.
+        //
+        bool semi (false);
+
+        line ln;
+        switch (lt)
+        {
+        case line_type::var:
+          {
+            // Check if we are trying to modify any of the special aliases
+            // ($*, $N, $~, $@).
+            //
+            string& n (t.value);
+
+            if (n == "*" || n == "~" || n == "@" || digit (n))
+              fail (t) << "attempt to set '" << n << "' variable directly";
+
+            // Pre-enter the variables now while we are executing serially.
+            // Once parallel, it becomes a lot harder to do.
+            //
+            ln.var = &script_->var_pool.insert (move (n));
+
+            next (t, tt); // Assignment kind.
+            parse_variable_line (t, tt);
+
+            semi = (tt == type::semi);
+
+            if (tt == type::semi)
+              next (t, tt);
+
+            if (tt != type::newline)
+              fail (t) << "expected newline instead of " << t;
+
+            break;
+          }
+        case line_type::cmd_if:
+        case line_type::cmd_ifn:
+        case line_type::cmd_elif:
+        case line_type::cmd_elifn:
+        case line_type::cmd_else:
+        case line_type::cmd_end:
+          next (t, tt); // Skip to start of command.
+          // Fall through.
+        case line_type::cmd:
+          {
+            pair<command_expr, here_docs> p;
+
+            if (lt != line_type::cmd_else && lt != line_type::cmd_end)
+              p = parse_command_expr (t, tt);
+
+            // Colon and semicolon are only valid in test command lines and
+            // after 'end' in if-else. Note that we still recognize them
+            // lexically, they are just not valid tokens per the grammar.
+            //
+            if (tt != type::newline)
+            {
+              if (lt != line_type::cmd && lt != line_type::cmd_end)
+                fail (t) << "expected newline instead of " << t;
+
+              switch (st)
+              {
+              case type::plus:  fail (t) << t << " after setup command" << endf;
+              case type::minus: fail (t) << t << " after teardown command" << endf;
+              }
+            }
+
+            switch (tt)
+            {
+            case type::colon:
+              {
+                if (d)
+                  fail (ll) << "both leading and trailing descriptions";
+
+                d = parse_trailing_description (t, tt);
+                break;
+              }
+            case type::semi:
+              {
+                semi = true;
+                next (t, tt); // Get newline.
+                break;
+              }
+            }
+
+            if (tt != type::newline)
+              fail (t) << "expected newline instead of " << t;
+
+            parse_here_documents (t, tt, p);
+            break;
+          }
+        }
+
+        assert (tt == type::newline);
+
+        // Stop saving and get the tokens.
+        //
+        lines ls_data;
+
+        if (ls == nullptr)
+          ls = &ls_data;
+
+        ln.type = lt;
+        ln.tokens = replay_data ();
+        ls->push_back (move (ln));
+
+        if (lt == line_type::cmd_if || lt == line_type::cmd_ifn)
+        {
+          semi = pre_parse_if_else (t, tt, d, *ls);
+
+          // If this turned out to be scope-if, then ls is empty, semi is
+          // false, and none of the below logic applies.
+          //
+          if (ls->empty ())
+            return semi;
+        }
+
+        // Unless we were told where to put it, decide where it actually goes.
+        //
+        if (ls == &ls_data)
+        {
+          // First pre-check variable and variable-if: by themselves (i.e.,
+          // without a trailing semicolon) they are treated as either setup or
+          // teardown without plus/minus. Also handle illegal line types.
+          //
+          switch (lt)
+          {
+          case line_type::cmd_elif:
+          case line_type::cmd_elifn:
+          case line_type::cmd_else:
+          case line_type::cmd_end:
+            {
+              fail (ll) << lt << " without preceding 'if'" << endf;
+            }
+          case line_type::cmd_if:
+          case line_type::cmd_ifn:
+            {
+              // See if this is a variable-only command-if.
+              //
+              if (find_if (ls_data.begin (), ls_data.end (),
+                           [] (const line& l) {
+                             return l.type == line_type::cmd;
+                           }) != ls_data.end ())
+                break;
+            }
+            // Fall through.
+          case line_type::var:
+            {
+              // If there is a semicolon after the variable then we assume
+              // it is part of a test (there is no reason to use semicolons
+              // after variables in the group scope). Otherwise -- setup or
+              // teardown.
+              //
+              if (!semi)
+              {
+                if (d)
+                {
+                  if (lt == line_type::var)
+                    fail (ll) << "description before setup/teardown variable";
+                  else
+                    fail (ll) << "description before/after setup/teardown "
+                              << "variable-if";
+                }
+
+                // If we don't have any nested scopes or teardown commands,
+                // then we assume this is a setup, otherwise -- teardown.
+                //
+                ls = group_->scopes.empty () && group_->tdown_.empty ()
+                  ? &group_->setup_
+                  : &group_->tdown_;
+              }
+              break;
+            }
+          default:
+            break;
+          }
+
+          // If pre-check didn't change the destination, then it's a test.
+          //
+          if (ls == &ls_data)
+          {
+            switch (st)
+            {
+              // Setup.
+              //
+            case type::plus:
+              {
+                if (d)
+                  fail (ll) << "description before setup command";
+
+                if (!group_->scopes.empty ())
+                  fail (ll) << "setup command after tests";
+
+                if (!group_->tdown_.empty ())
+                  fail (ll) << "setup command after teardown";
+
+                ls = &group_->setup_;
+                break;
+              }
+
+              // Teardown.
+              //
+            case type::minus:
+              {
+                if (d)
+                  fail (ll) << "description before teardown command";
+
+                ls = &group_->tdown_;
+                break;
+              }
+
+              // Test command or variable.
+              //
+            default:
+              {
+                // First check that we don't have any teardown commands yet.
+                // This will detect things like variable assignments between
+                // tests.
+                //
+                if (!group_->tdown_.empty ())
+                {
+                  location tl (
+                    group_->tdown_.back ().tokens.front ().location ());
+
+                  fail (ll) << "test after teardown" <<
+                    info (tl) << "last teardown line appears here";
+                }
+                break;
+              }
+            }
+          }
+
+          // If the destination changed, then move the data over.
+          //
+          if (ls != &ls_data)
+            ls->insert (ls->end (),
+                        make_move_iterator (ls_data.begin ()),
+                        make_move_iterator (ls_data.end ()));
+        }
+
+        // If this command ended with a semicolon, then the next one should
+        // go to the same place.
+        //
+        if (semi && !one)
+        {
+          tt = peek (lexer_mode::first_token);
+          const location ll (get_location (peeked ()));
+
+          switch (tt)
+          {
+          case type::colon:
+            fail (ll) << "description inside test" << endf;
+          case type::eos:
+          case type::rcbrace:
+          case type::lcbrace:
+            fail (ll) << "expected another line after ';'" << endf;
+          case type::plus:
+            fail (ll) << "setup command in test" << endf;
+          case type::minus:
+            fail (ll) << "teardown command in test" << endf;
+          default:
+            semi = pre_parse_line (t, tt, d, ls);
+            assert (tt == type::newline); // End of last test line.
+          }
+        }
+
+        // If this is a test then create implicit test scope.
+        //
+        if (ls == &ls_data)
+        {
+          // If there is no user-supplied id, use the line number (prefixed
+          // with include id) as the scope id.
+          //
+          const string& id (
+            d && !d->id.empty ()
+            ? d->id
+            : insert_id (id_prefix_ + to_string (ll.line), ll));
+
+          unique_ptr<test> p (new test (id, *group_));
+
+          p->desc = move (d);
+
+          p->start_loc_ = ll;
+          p->tests_ = move (ls_data);
+          p->end_loc_ = get_location (t);
+
+          group_->scopes.push_back (move (p));
+        }
+
+        return semi;
+      }
+
+      bool parser::
+      pre_parse_if_else (token& t, type& tt,
+                         optional<description>& d,
+                         lines& ls)
+      {
+        // enter: <newline> (previous line)
+        // leave: <newline>
+
+        tt = peek (lexer_mode::first_token);
+
+        return tt == type::lcbrace
+          ? pre_parse_if_else_scope (t, tt, d, ls)
+          : pre_parse_if_else_command (t, tt, d, ls);
+      }
+
+      bool parser::
+      pre_parse_if_else_scope (token& t, type& tt,
+                               optional<description>& d,
+                               lines& ls)
+      {
+        // enter: peeked token of next line (lcbrace)
+        // leave: newline
+
+        assert (ls.size () == 1); // The if/if! line.
+
+        // Use if/if! as the entire scope chain location.
+        //
+        const location sl (ls.back ().tokens.front ().location ());
+
+        // First check that we don't have any teardown commands yet. This
+        // will detect things like variable assignments between scopes.
+        //
+        if (!group_->tdown_.empty ())
+        {
+          location tl (
+            group_->tdown_.back ().tokens.front ().location ());
+
+          fail (sl) << "scope after teardown" <<
+            info (tl) << "last teardown line appears here";
+        }
+
+        // If there is no user-supplied id, use the line number (prefixed with
+        // include id) as the scope id. Note that we use the same id for all
+        // scopes in the chain.
+        //
+        const string& id (
+          d && !d->id.empty ()
+          ? d->id
+          : insert_id (id_prefix_ + to_string (sl.line), sl));
+
+        unique_ptr<scope> root;
+
+        // Parse the if-else scope chain.
+        //
+        line_type bt (line_type::cmd_if); // Current block.
+
+        for (unique_ptr<scope>* ps (&root);; ps = &(*ps)->if_chain)
+        {
+          next (t, tt); // Get '{'.
+
+          {
+            unique_ptr<group> g (pre_parse_scope_block (t, tt, id));
+
+            // If-condition.
+            //
+            g->if_cond_ = move (ls.back ());
+            ls.clear ();
+
+            // Description. For now we just duplicate it through the entire
+            // chain.
+            //
+            g->desc = (ps == &root ? d : root->desc);
+
+            *ps = move (g);
+          }
+
+          // See if what comes next is another chain element.
+          //
+          line_type lt (line_type::cmd_end);
+
+          type pt (peek (lexer_mode::first_token));
+          const token& p (peeked ());
+          const location ll (get_location (p));
+
+          if (pt == type::word && p.qtype == quote_type::unquoted)
+          {
+            if      (p.value == "elif")  lt = line_type::cmd_elif;
+            else if (p.value == "elif!") lt = line_type::cmd_elifn;
+            else if (p.value == "else")  lt = line_type::cmd_else;
+          }
+
+          if (lt == line_type::cmd_end)
+            break;
+
+          // Check if-else block sequencing.
+          //
+          if (bt == line_type::cmd_else)
+          {
+            if (lt == line_type::cmd_else ||
+                lt == line_type::cmd_elif ||
+                lt == line_type::cmd_elifn)
+              fail (ll) << lt << " after " << bt;
+          }
+
+          // Parse just the condition line using pre_parse_line() in the "one"
+          // mode and into ls so that it is naturally picked up as if_cond_ on
+          // the next iteration.
+          //
+          optional<description> td;
+          bool semi (pre_parse_line (t, (tt = pt), td, &ls, true));
+          assert (ls.size () == 1 && ls.back ().type == lt);
+          assert (tt == type::newline);
+
+          // For any of these lines trailing semi or description is illegal.
+          //
+          // @@ Not the exact location of semi/colon.
+          //
+          if (semi)
+            fail (ll) << "';' after " << lt;
+
+          if (td)
+            fail (ll) << "description after " << lt;
+
+          // Make sure what comes next is another scope.
+          //
+          tt = peek (lexer_mode::first_token);
+
+          if (tt != type::lcbrace)
+            fail (ll) << "expected scope after " << lt;
+
+          // Update current if-else block.
+          //
+          switch (lt)
+          {
+          case line_type::cmd_elif:
+          case line_type::cmd_elifn: bt = line_type::cmd_elif; break;
+          case line_type::cmd_else:  bt = line_type::cmd_else; break;
+          default: break;
+          }
+        }
+
+        pre_parse_demote_group_scope (root);
+        group_->scopes.push_back (move (root));
+        return false; // We never end with a semi.
+      }
+
+      bool parser::
+      pre_parse_if_else_command (token& t, type& tt,
+                                 optional<description>& d,
+                                 lines& ls)
+      {
+        // enter: peeked first token of next line (type in tt)
+        // leave: newline
+
+        // Parse lines until we see closing 'end'. Nested if-else blocks are
+        // handled recursively.
+        //
+        for (line_type bt (line_type::cmd_if); // Current block.
+             ;
+             tt = peek (lexer_mode::first_token))
+        {
+          const location ll (get_location (peeked ()));
+
+          switch (tt)
+          {
+          case type::colon:
+            fail (ll) << "description inside " << bt << endf;
+          case type::eos:
+          case type::rcbrace:
+          case type::lcbrace:
+            fail (ll) << "expected closing 'end'" << endf;
+          case type::plus:
+            fail (ll) << "setup command inside " << bt << endf;
+          case type::minus:
+            fail (ll) << "teardown command inside " << bt << endf;
+          }
+
+          // Parse one line. Note that this one line can still be multiple
+          // lines in case of if-else. In this case we want to view it as
+          // cmd_if, not cmd_end. Thus remember the start position of the
+          // next logical line.
+          //
+          size_t i (ls.size ());
+
+          optional<description> td;
+          bool semi (pre_parse_line (t, tt, td, &ls, true));
+          assert (tt == type::newline);
+
+          line_type lt (ls[i].type);
+
+          // First take care of 'end'.
+          //
+          if (lt == line_type::cmd_end)
+          {
+            if (td)
+            {
+              if (d)
+                fail (ll) << "both leading and trailing descriptions";
+
+              d = move (td);
+            }
+
+            return semi;
+          }
+
+          // For any other line trailing semi or description is illegal.
+          //
+          // @@ Not the exact location of semi/colon.
+          //
+          if (semi)
+            fail (ll) << "';' inside " << bt;
+
+          if (td)
+            fail (ll) << "description inside " << bt;
+
+          // Check if-else block sequencing.
+          //
+          if (bt == line_type::cmd_else)
+          {
+            if (lt == line_type::cmd_else ||
+                lt == line_type::cmd_elif ||
+                lt == line_type::cmd_elifn)
+              fail (ll) << lt << " after " << bt;
+          }
+
+          // Update current if-else block.
+          //
+          switch (lt)
+          {
+          case line_type::cmd_elif:
+          case line_type::cmd_elifn: bt = line_type::cmd_elif; break;
+          case line_type::cmd_else:  bt = line_type::cmd_else; break;
+          default: break;
+          }
+        }
+      }
+
+      void parser::
+      pre_parse_directive (token& t, type& tt)
+      {
+        // enter: directive name
+        // leave: newline
+
+        string d (t.value);
+        location l (get_location (t));
+        next (t, tt);
+
+        // Suspend pre-parsing since we want to really parse the line, with
+        // expansion, etc. Also parse the whole line in one go.
+        //
+        names args;
+
+        if (tt != type::newline)
+        {
+          pre_parse_ = false;
+          args = parse_names (t, tt,
+                              pattern_mode::expand,
+                              false,
+                              "directive argument",
+                              nullptr);
+          pre_parse_ = true;
+        }
+
+        if (tt != type::newline)
+          fail (t) << t << " after directive";
+
+        if (d == "include")
+          pre_parse_include_line (move (args), move (l));
+        else
+          assert (false); // Unhandled directive.
+      }
+
+      void parser::
+      pre_parse_include_line (names args, location dl)
+      {
+        auto i (args.begin ());
+
+        // Process options.
+        //
+        bool once (false);
+        for (; i != args.end () && i->simple (); ++i)
+        {
+          if (i->value == "--once")
+            once = true;
+          else
+            break;
+        }
+
+        // Process arguments.
+        //
+        auto include = [&dl, once, this] (string n) // throw invalid_path
+        {
+          // It may be tempting to use relative paths in diagnostics but it
+          // most likely will be misguided.
+          //
+          auto enter_path = [this] (string n) -> const path&
+          {
+            path p (move (n));
+
+            if (p.relative ())
+              p = path_->directory () / p;
+
+            p.normalize ();
+
+            return *script_->paths_.insert (move (p)).first;
+          };
+
+          const path& p (enter_path (move (n)));
+
+          if (include_set_->insert (p).second || !once)
+          {
+            try
+            {
+              ifdstream ifs (p);
+              lexer l (ifs, p, lexer_mode::command_line);
+
+              const path* op (path_);
+              path_ = &p;
+
+              lexer* ol (lexer_);
+              set_lexer (&l);
+
+              string oip (id_prefix_);
+              id_prefix_ += to_string (dl.line);
+              id_prefix_ += '-';
+              id_prefix_ += p.leaf ().base ().string ();
+              id_prefix_ += '-';
+
+              token t (pre_parse_scope_body ());
+
+              if (t.type != type::eos)
+                fail (t) << "stray " << t;
+
+              id_prefix_ = oip;
+              set_lexer (ol);
+              path_ = op;
+            }
+            catch (const io_error& e)
+            {
+              fail (dl) << "unable to read testscript " << p << ": " << e;
+            }
+          }
+        };
+
+        for (; i != args.end (); ++i)
+        {
+          name& n (*i);
+
+          try
+          {
+            if (n.simple () && !n.empty ())
+            {
+              include (move (n.value));
+              continue;
+            }
+          }
+          catch (const invalid_path&) {} // Fall through.
+
+          diag_record dr (fail (dl));
+          dr << "invalid testscript include path ";
+          to_stream (dr.os, n, true); // Quote.
+        }
+      }
+
+      description parser::
+      pre_parse_leading_description (token& t, type& tt)
+      {
+        // enter: peeked at colon (type in tt)
+        // leave: peeked at in the first_token mode (type in tt)
+
+        assert (tt == type::colon);
+
+        description r;
+        location loc (get_location (peeked ()));
+
+        string sp;     // Strip prefix.
+        size_t sn (0); // Strip prefix length.
+
+        for (size_t ln (1); tt == type::colon; ++ln)
+        {
+          next (t, tt); // Get ':'.
+
+          mode (lexer_mode::description_line);
+          next (t, tt);
+
+          // If it is empty, then we get newline right away.
+          //
+          const string& l (tt == type::word ? t.value : string ());
+
+          if (tt == type::word)
+            next (t, tt); // Get newline.
+
+          assert (tt == type::newline);
+
+          // If this is the first line, then get the "strip prefix", i.e.,
+          // the beginning of the line that contains only whitespaces. If
+          // the subsequent lines start with the same prefix, then we strip
+          // it.
+          //
+          if (ln == 1)
+          {
+            sn = l.find_first_not_of (" \t");
+            sp.assign (l, 0, sn == string::npos ? (sn = 0) : sn);
+          }
+
+          // Apply strip prefix.
+          //
+          size_t i (l.compare (0, sn, sp) == 0 ? sn : 0);
+
+          // Strip trailing whitespaces, as a courtesy to the user.
+          //
+          size_t j (l.find_last_not_of (" \t"));
+          j = j != string::npos ? j + 1 : i;
+
+          size_t n (j - i); // [i, j) is our data.
+
+          if (ln == 1)
+          {
+            // First line. Ignore if it's blank.
+            //
+            if (n == 0)
+              --ln; // Stay as if on the first line.
+            else
+            {
+              // Otherwise, see if it is the id. Failed that we assume it is
+              // the summary until we see the next line.
+              //
+              (l.find_first_of (" \t.", i) >= j ? r.id : r.summary).
+                assign (l, i, n);
+
+              // If this is an id then validate it.
+              //
+              if (!r.id.empty ())
+              {
+                for (char c: r.id)
+                {
+                  if (!(alnum (c) || c == '_' || c == '-' || c == '+'))
+                    fail (loc) << "illegal character '" << c
+                               << "' in test id '" << r.id << "'";
+                }
+              }
+            }
+          }
+          else if (ln == 2)
+          {
+            // If this is a blank then whatever we have in id/summary is good.
+            // Otherwise, if we have id, then assume this is summary until we
+            // see the next line. And if not, then move what we (wrongly)
+            // assumed to be the summary to details.
+            //
+            if (n != 0)
+            {
+              if (!r.id.empty ())
+                r.summary.assign (l, i, n);
+              else
+              {
+                r.details = move (r.summary);
+                r.details += '\n';
+                r.details.append (l, i, n);
+
+                r.summary.clear ();
+              }
+            }
+          }
+          // Don't treat line 3 as special if we have given up on id/summary.
+          //
+          else if (ln == 3 && r.details.empty ())
+          {
+            // If this is a blank and we have id and/or summary, then we are
+            // good. Otherwise, if we have both, then move what we (wrongly)
+            // assumed to be id and summary to details.
+            //
+            if (n != 0)
+            {
+              if (!r.id.empty () && !r.summary.empty ())
+              {
+                r.details = move (r.id);
+                r.details += '\n';
+                r.details += r.summary;
+                r.details += '\n';
+
+                r.id.clear ();
+                r.summary.clear ();
+              }
+
+              r.details.append (l, i, n);
+            }
+          }
+          else
+          {
+            if (!r.details.empty ())
+              r.details += '\n';
+
+            r.details.append (l, i, n);
+          }
+
+          tt = peek (lexer_mode::first_token);
+        }
+
+        // Zap trailing newlines in the details.
+        //
+        size_t p (r.details.find_last_not_of ('\n'));
+        if (p != string::npos && ++p != r.details.size ())
+          r.details.resize (p);
+
+        if (r.empty ())
+          fail (loc) << "empty description";
+
+        // Insert id into the id map if we have one.
+        //
+        if (!r.id.empty ())
+          insert_id (r.id, loc);
+
+        return r;
+      }
+
+      description parser::
+      parse_trailing_description (token& t, type& tt)
+      {
+        // enter: colon
+        // leave: newline
+
+        // Parse one-line trailing description.
+        //
+        description r;
+
+        // @@ Would be nice to omit trailing description from replay.
+        //
+        const location loc (get_location (t));
+
+        mode (lexer_mode::description_line);
+        next (t, tt);
+
+        // If it is empty, then we will get newline right away.
+        //
+        if (tt == type::word)
+        {
+          string l (move (t.value));
+          trim (l); // Strip leading/trailing whitespaces.
+
+          // Decide whether this is id or summary.
+          //
+          (l.find_first_of (" \t") == string::npos ? r.id : r.summary) =
+            move (l);
+
+          next (t, tt); // Get newline.
+        }
+
+        assert (tt == type::newline); // Lexer mode invariant.
+
+        if (r.empty ())
+          fail (loc) << "empty description";
+
+        // Insert id into the id map if we have one.
+        //
+        if (pre_parse_ && !r.id.empty ())
+          insert_id (r.id, loc);
+
+        return r;
+      }
+
+      value parser::
+      parse_variable_line (token& t, type& tt)
+      {
+        // enter: assignment
+        // leave: newline or semi
+
+        // We cannot reuse the value mode since it will recognize { which we
+        // want to treat as a literal.
+        //
+        mode (lexer_mode::variable_line);
+        next (t, tt);
+
+        // Parse value attributes if any. Note that it's ok not to have
+        // anything after the attributes (e.g., foo=[null]).
+        //
+        attributes_push (t, tt, true);
+
+        // @@ PAT: Should we expand patterns? Note that it will only be
+        // simple ones since we have disabled {}. Also, what would be the
+        // pattern base directory?
+        //
+        return tt != type::newline && tt != type::semi
+          ? parse_value (t, tt,
+                         pattern_mode::ignore,
+                         "variable value",
+                         nullptr)
+          : value (names ());
+      }
+
+      command_expr parser::
+      parse_command_line (token& t, type& tt)
+      {
+        // enter: first token of the command line
+        // leave: <newline>
+
+        // Note: this one is only used during execution.
+
+        pair<command_expr, here_docs> p (parse_command_expr (t, tt));
+
+        switch (tt)
+        {
+        case type::colon: parse_trailing_description (t, tt); break;
+        case type::semi: next (t, tt); break; // Get newline.
+        }
+
+        assert (tt == type::newline);
+
+        parse_here_documents (t, tt, p);
+        assert (tt == type::newline);
+
+        return move (p.first);
+      }
+
+      // Parse the regular expression representation (non-empty string value
+      // framed with introducer characters and optionally followed by flag
+      // characters from the {di} set, for example '/foo/id') into
+      // components. Also return end-of-parsing position if requested,
+      // otherwise treat any unparsed characters left as an error.
+      //
+      struct regex_parts
+      {
+        string value;
+        char   intro;
+        string flags; // Combination of characters from {di} set.
+
+        // Create a special empty object.
+        //
+        regex_parts (): intro ('\0') {}
+
+        regex_parts (string v, char i, string f)
+            : value (move (v)), intro (i), flags (move (f)) {}
+      };
+
+      static regex_parts
+      parse_regex (const string& s,
+                   const location& l,
+                   const char* what,
+                   size_t* end = nullptr)
+      {
+        if (s.empty ())
+          fail (l) << "no introducer character in " << what;
+
+        size_t p (s.find (s[0], 1)); // Find terminating introducer.
+
+        if (p == string::npos)
+          fail (l) << "no closing introducer character in " << what;
+
+        size_t rn (p - 1); // Regex length.
+        if (rn == 0)
+          fail (l) << what << " is empty";
+
+        // Find end-of-flags position.
+        //
+        size_t fp (++p); // Save flags starting position.
+        for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ;
+
+        // If string end is not reached then report invalid flags, unless
+        // end-of-parsing position is requested (which means regex is just a
+        // prefix).
+        //
+        if (s[p] != '\0' && end == nullptr)
+          fail (l) << "junk at the end of " << what;
+
+        if (end != nullptr)
+          *end = p;
+
+        return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp));
+      }
+
+      pair<command_expr, parser::here_docs> parser::
+      parse_command_expr (token& t, type& tt)
+      {
+        // enter: first token of the command line
+        // leave: <newline>
+
+        command_expr expr;
+
+        // OR-ed to an implied false for the first term.
+        //
+        expr.push_back ({expr_operator::log_or, command_pipe ()});
+
+        command c; // Command being assembled.
+
+        // Make sure the command makes sense.
+        //
+        auto check_command = [&c, this] (const location& l, bool last)
+        {
+          if (c.out.type == redirect_type::merge &&
+              c.err.type == redirect_type::merge)
+            fail (l) << "stdout and stderr redirected to each other";
+
+          if (!last && c.out.type != redirect_type::none)
+            fail (l) << "stdout is both redirected and piped";
+        };
+
+        // Check that the introducer character differs from '/' if the
+        // portable path modifier is specified. Must be called before
+        // parse_regex() (see below) to make sure its diagnostics is
+        // meaningful.
+        //
+        // Note that the portable path modifier assumes '/' to be a valid
+        // regex character and so makes it indistinguishable from the
+        // terminating introducer.
+        //
+        auto check_regex_mod = [this] (const string& mod,
+                                       const string& re,
+                                       const location& l,
+                                       const char* what)
+        {
+          // Handles empty regex properly.
+          //
+          if (mod.find ('/') != string::npos && re[0] == '/')
+            fail (l) << "portable path modifier and '/' introducer in "
+                     << what;
+        };
+
+        // Pending positions where the next word should go.
+        //
+        enum class pending
+        {
+          none,
+          program,
+          in_string,
+          in_document,
+          in_file,
+          out_merge,
+          out_string,
+          out_str_regex,
+          out_document,
+          out_doc_regex,
+          out_file,
+          err_merge,
+          err_string,
+          err_str_regex,
+          err_document,
+          err_doc_regex,
+          err_file,
+          clean
+        };
+        pending p (pending::program);
+        string mod;   // Modifiers for pending in_* and out_* positions.
+        here_docs hd; // Expected here-documents.
+
+        // Add the next word to either one of the pending positions or to
+        // program arguments by default.
+        //
+        auto add_word = [&c, &p, &mod, &check_regex_mod, this] (
+          string&& w, const location& l)
+        {
+          auto add_merge = [&l, this] (redirect& r, const string& w, int fd)
+          {
+            try
+            {
+              size_t n;
+              if (stoi (w, &n) == fd && n == w.size ())
+              {
+                r.fd = fd;
+                return;
+              }
+            }
+            catch (const exception&) {} // Fall through.
+
+            fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect "
+                     << "file descriptor must be " << fd;
+          };
+
+          auto add_here_str = [] (redirect& r, string&& w)
+          {
+            if (r.modifiers.find (':') == string::npos)
+              w += '\n';
+            r.str = move (w);
+          };
+
+          auto add_here_str_regex = [&l, &check_regex_mod] (
+            redirect& r, int fd, string&& w)
+          {
+            const char* what (nullptr);
+            switch (fd)
+            {
+            case 1: what = "stdout regex redirect"; break;
+            case 2: what = "stderr regex redirect"; break;
+            }
+
+            check_regex_mod (r.modifiers, w, l, what);
+
+            regex_parts rp (parse_regex (w, l, what));
+
+            regex_lines& re (r.regex);
+            re.intro = rp.intro;
+
+            re.lines.emplace_back (
+              l.line, l.column, move (rp.value), move (rp.flags));
+
+            // Add final blank line unless suppressed.
+            //
+            // Note that the position is synthetic, but that's ok as we don't
+            // expect any diagnostics to refer this line.
+            //
+            if (r.modifiers.find (':') == string::npos)
+              re.lines.emplace_back (l.line, l.column, string (), false);
+          };
+
+          auto parse_path = [&l, this] (string&& w, const char* what) -> path
+          {
+            try
+            {
+              path p (move (w));
+
+              if (!p.empty ())
+              {
+                p.normalize ();
+                return p;
+              }
+
+              fail (l) << "empty " << what << endf;
+            }
+            catch (const invalid_path& e)
+            {
+              fail (l) << "invalid " << what << " '" << e.path << "'" << endf;
+            }
+          };
+
+          auto add_file = [&parse_path] (redirect& r, int fd, string&& w)
+          {
+            const char* what (nullptr);
+            switch (fd)
+            {
+            case 0: what = "stdin redirect path";  break;
+            case 1: what = "stdout redirect path"; break;
+            case 2: what = "stderr redirect path"; break;
+            }
+
+            r.file.path = parse_path (move (w), what);
+          };
+
+          switch (p)
+          {
+          case pending::none: c.arguments.push_back (move (w)); break;
+          case pending::program:
+            c.program = parse_path (move (w), "program path");
+            break;
+
+          case pending::out_merge: add_merge (c.out, w, 2); break;
+          case pending::err_merge: add_merge (c.err, w, 1); break;
+
+          case pending::in_string:  add_here_str (c.in,  move (w)); break;
+          case pending::out_string: add_here_str (c.out, move (w)); break;
+          case pending::err_string: add_here_str (c.err, move (w)); break;
+
+          case pending::out_str_regex:
+            {
+              add_here_str_regex (c.out, 1, move (w));
+              break;
+            }
+          case pending::err_str_regex:
+            {
+              add_here_str_regex (c.err, 2, move (w));
+              break;
+            }
+
+            // These are handled specially below.
+            //
+          case pending::in_document:
+          case pending::out_document:
+          case pending::err_document:
+          case pending::out_doc_regex:
+          case pending::err_doc_regex: assert (false); break;
+
+          case pending::in_file:  add_file (c.in,  0, move (w)); break;
+          case pending::out_file: add_file (c.out, 1, move (w)); break;
+          case pending::err_file: add_file (c.err, 2, move (w)); break;
+
+          case pending::clean:
+            {
+              cleanup_type t;
+              switch (mod[0]) // Ok, if empty
+              {
+              case '!': t = cleanup_type::never;  break;
+              case '?': t = cleanup_type::maybe;  break;
+              default:  t = cleanup_type::always; break;
+              }
+
+              c.cleanups.push_back (
+                {t, parse_path (move (w), "cleanup path")});
+              break;
+            }
+          }
+
+          p = pending::none;
+          mod.clear ();
+        };
+
+        // Make sure we don't have any pending positions to fill.
+        //
+        auto check_pending = [&p, this] (const location& l)
+        {
+          const char* what (nullptr);
+
+          switch (p)
+          {
+          case pending::none:                                            break;
+          case pending::program:      what = "program";                  break;
+          case pending::in_string:    what = "stdin here-string";        break;
+          case pending::in_document:  what = "stdin here-document end";  break;
+          case pending::in_file:      what = "stdin file";               break;
+          case pending::out_merge:    what = "stdout file descriptor";   break;
+          case pending::out_string:   what = "stdout here-string";       break;
+          case pending::out_document: what = "stdout here-document end"; break;
+          case pending::out_file:     what = "stdout file";              break;
+          case pending::err_merge:    what = "stderr file descriptor";   break;
+          case pending::err_string:   what = "stderr here-string";       break;
+          case pending::err_document: what = "stderr here-document end"; break;
+          case pending::err_file:     what = "stderr file";              break;
+          case pending::clean:        what = "cleanup path";             break;
+
+          case pending::out_str_regex:
+            {
+              what = "stdout here-string regex";
+              break;
+            }
+          case pending::err_str_regex:
+            {
+              what = "stderr here-string regex";
+              break;
+            }
+          case pending::out_doc_regex:
+            {
+              what = "stdout here-document regex end";
+              break;
+            }
+          case pending::err_doc_regex:
+            {
+              what = "stderr here-document regex end";
+              break;
+            }
+          }
+
+          if (what != nullptr)
+            fail (l) << "missing " << what;
+        };
+
+        // Parse the redirect operator.
+        //
+        auto parse_redirect =
+          [&c, &expr, &p, &mod, this] (token& t, const location& l)
+        {
+          // Our semantics is the last redirect seen takes effect.
+          //
+          assert (p == pending::none && mod.empty ());
+
+          // See if we have the file descriptor.
+          //
+          unsigned long fd (3);
+          if (!t.separated)
+          {
+            if (c.arguments.empty ())
+              fail (l) << "missing redirect file descriptor";
+
+            const string& s (c.arguments.back ());
+
+            try
+            {
+              size_t n;
+              fd = stoul (s, &n);
+
+              if (n != s.size () || fd > 2)
+                throw invalid_argument (string ());
+            }
+            catch (const exception&)
+            {
+              fail (l) << "invalid redirect file descriptor '" << s << "'";
+            }
+
+            c.arguments.pop_back ();
+          }
+
+          type tt (t.type);
+
+          // Validate/set default file descriptor.
+          //
+          switch (tt)
+          {
+          case type::in_pass:
+          case type::in_null:
+          case type::in_str:
+          case type::in_doc:
+          case type::in_file:
+            {
+              if ((fd = fd == 3 ? 0 : fd) != 0)
+                fail (l) << "invalid in redirect file descriptor " << fd;
+
+              if (!expr.back ().pipe.empty ())
+                fail (l) << "stdin is both piped and redirected";
+
+              break;
+            }
+          case type::out_pass:
+          case type::out_null:
+          case type::out_trace:
+          case type::out_merge:
+          case type::out_str:
+          case type::out_doc:
+          case type::out_file_cmp:
+          case type::out_file_ovr:
+          case type::out_file_app:
+            {
+              if ((fd = fd == 3 ? 1 : fd) == 0)
+                fail (l) << "invalid out redirect file descriptor " << fd;
+
+              break;
+            }
+          }
+
+          mod = move (t.value);
+
+          redirect_type rt (redirect_type::none);
+          switch (tt)
+          {
+          case type::in_pass:
+          case type::out_pass:  rt = redirect_type::pass;  break;
+
+          case type::in_null:
+          case type::out_null:  rt = redirect_type::null;  break;
+
+          case type::out_trace: rt = redirect_type::trace; break;
+
+          case type::out_merge: rt = redirect_type::merge; break;
+
+          case type::in_str:
+          case type::out_str:
+            {
+              bool re (mod.find ('~') != string::npos);
+              assert (tt == type::out_str || !re);
+
+              rt = re
+                ? redirect_type::here_str_regex
+                : redirect_type::here_str_literal;
+
+              break;
+            }
+
+          case type::in_doc:
+          case type::out_doc:
+            {
+              bool re (mod.find ('~') != string::npos);
+              assert (tt == type::out_doc || !re);
+
+              rt = re
+                ? redirect_type::here_doc_regex
+                : redirect_type::here_doc_literal;
+
+              break;
+            }
+
+          case type::in_file:
+          case type::out_file_cmp:
+          case type::out_file_ovr:
+          case type::out_file_app: rt = redirect_type::file; break;
+          }
+
+          redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err);
+          r = redirect (rt);
+
+          // Don't move as still may be used for pending here-document end
+          // marker processing.
+          //
+          r.modifiers = mod;
+
+          switch (rt)
+          {
+          case redirect_type::none:
+          case redirect_type::pass:
+          case redirect_type::null:
+          case redirect_type::trace:
+            break;
+          case redirect_type::merge:
+            switch (fd)
+            {
+            case 0: assert (false);         break;
+            case 1: p = pending::out_merge; break;
+            case 2: p = pending::err_merge; break;
+            }
+            break;
+          case redirect_type::here_str_literal:
+            switch (fd)
+            {
+            case 0: p = pending::in_string;  break;
+            case 1: p = pending::out_string; break;
+            case 2: p = pending::err_string; break;
+            }
+            break;
+          case redirect_type::here_str_regex:
+            switch (fd)
+            {
+            case 0: assert (false);             break;
+            case 1: p = pending::out_str_regex; break;
+            case 2: p = pending::err_str_regex; break;
+            }
+            break;
+          case redirect_type::here_doc_literal:
+            switch (fd)
+            {
+            case 0: p = pending::in_document;  break;
+            case 1: p = pending::out_document; break;
+            case 2: p = pending::err_document; break;
+            }
+            break;
+          case redirect_type::here_doc_regex:
+            switch (fd)
+            {
+            case 0: assert (false);             break;
+            case 1: p = pending::out_doc_regex; break;
+            case 2: p = pending::err_doc_regex; break;
+            }
+            break;
+          case redirect_type::file:
+            switch (fd)
+            {
+            case 0: p = pending::in_file;  break;
+            case 1: p = pending::out_file; break;
+            case 2: p = pending::err_file; break;
+            }
+
+            // Also sets for stdin, but this is harmless.
+            //
+            r.file.mode = tt == type::out_file_ovr
+              ? redirect_fmode::overwrite
+              : (tt == type::out_file_app
+                 ? redirect_fmode::append
+                 : redirect_fmode::compare);
+
+            break;
+
+          case redirect_type::here_doc_ref: assert (false); break;
+          }
+        };
+
+        // Set pending cleanup type.
+        //
+        auto parse_clean = [&p, &mod] (token& t)
+        {
+          p = pending::clean;
+          mod = move (t.value);
+        };
+
+        const location ll (get_location (t)); // Line location.
+
+        // Keep parsing chunks of the command line until we see one of the
+        // "terminators" (newline, semicolon, exit status comparison, etc).
+        //
+        location l (ll);
+        names ns; // Reuse to reduce allocations.
+
+        for (bool done (false); !done; l = get_location (t))
+        {
+          switch (tt)
+          {
+          case type::semi:
+          case type::colon:
+          case type::newline:
+            {
+              done = true;
+              break;
+            }
+
+          case type::equal:
+          case type::not_equal:
+            {
+              if (!pre_parse_)
+                check_pending (l);
+
+              c.exit = parse_command_exit (t, tt);
+
+              // Only a limited set of things can appear after the exit status
+              // so we check this here.
+              //
+              switch (tt)
+              {
+              case type::semi:
+              case type::colon:
+              case type::newline:
+
+              case type::pipe:
+              case type::log_or:
+              case type::log_and:
+                break;
+              default:
+                fail (t) << "unexpected " << t << " after command exit status";
+              }
+
+              break;
+            }
+
+          case type::pipe:
+          case type::log_or:
+          case type::log_and:
+
+          case type::in_pass:
+          case type::out_pass:
+
+          case type::in_null:
+          case type::out_null:
+
+          case type::out_trace:
+
+          case type::out_merge:
+
+          case type::in_str:
+          case type::in_doc:
+          case type::out_str:
+          case type::out_doc:
+
+          case type::in_file:
+          case type::out_file_cmp:
+          case type::out_file_ovr:
+          case type::out_file_app:
+
+          case type::clean:
+            {
+              if (pre_parse_)
+              {
+                // The only things we need to handle here are the here-document
+                // and here-document regex end markers since we need to know
+                // how many of them to pre-parse after the command.
+                //
+                switch (tt)
+                {
+                case type::in_doc:
+                case type::out_doc:
+                  mod = move (t.value);
+
+                  bool re (mod.find ('~') != string::npos);
+                  const char* what (re
+                                    ? "here-document regex end marker"
+                                    : "here-document end marker");
+
+                  // We require the end marker to be a literal, unquoted word.
+                  // In particularm, we don't allow quoted because of cases
+                  // like foo"$bar" (where we will see word 'foo').
+                  //
+                  next (t, tt);
+
+                  // We require the end marker to be an unquoted or completely
+                  // quoted word. The complete quoting becomes important for
+                  // cases like foo"$bar" (where we will see word 'foo').
+                  //
+                  // For good measure we could have also required it to be
+                  // separated from the following token, but out grammar
+                  // allows one to write >>EOO;. The problematic sequence
+                  // would be >>FOO$bar -- on reparse it will be expanded
+                  // as a single word.
+                  //
+                  if (tt != type::word || t.value.empty ())
+                    fail (t) << "expected " << what;
+
+                  peek ();
+                  const token& p (peeked ());
+                  if (!p.separated)
+                  {
+                    switch (p.type)
+                    {
+                    case type::dollar:
+                    case type::lparen:
+                      fail (p) << what << " must be literal";
+                    }
+                  }
+
+                  quote_type qt (t.qtype);
+                  switch (qt)
+                  {
+                  case quote_type::unquoted:
+                    qt = quote_type::single; // Treat as single-quoted.
+                    break;
+                  case quote_type::single:
+                  case quote_type::double_:
+                    if (t.qcomp)
+                      break;
+                    // Fall through.
+                  case quote_type::mixed:
+                    fail (t) << "partially-quoted " << what;
+                  }
+
+                  regex_parts r;
+                  string end (move (t.value));
+
+                  if (re)
+                  {
+                    check_regex_mod (mod, end, l, what);
+
+                    r = parse_regex (end, l, what);
+                    end = move (r.value); // The "cleared" end marker.
+                  }
+
+                  bool literal (qt == quote_type::single);
+                  bool shared (false);
+
+                  for (const auto& d: hd)
+                  {
+                    if (d.end == end)
+                    {
+                      auto check = [&t, &end, &re, this] (bool c,
+                                                          const char* what)
+                      {
+                        if (!c)
+                          fail (t) << "different " << what
+                                   << " for shared here-document "
+                                   << (re ? "regex '" : "'") << end << "'";
+                      };
+
+                      check (d.modifiers == mod, "modifiers");
+                      check (d.literal == literal, "quoting");
+
+                      if (re)
+                      {
+                        check (d.regex == r.intro, "introducers");
+                        check (d.regex_flags == r.flags, "global flags");
+                      }
+
+                      shared = true;
+                      break;
+                    }
+                  }
+
+                  if (!shared)
+                    hd.push_back (
+                      here_doc {
+                        {},
+                        move (end),
+                        literal,
+                        move (mod),
+                        r.intro, move (r.flags)});
+
+                  break;
+                }
+
+                next (t, tt);
+                break;
+              }
+
+              // If this is one of the operators/separators, check that we
+              // don't have any pending locations to be filled.
+              //
+              check_pending (l);
+
+              // Note: there is another one in the inner loop below.
+              //
+              switch (tt)
+              {
+              case type::pipe:
+              case type::log_or:
+              case type::log_and:
+                {
+                  // Check that the previous command makes sense.
+                  //
+                  check_command (l, tt != type::pipe);
+                  expr.back ().pipe.push_back (move (c));
+
+                  c = command ();
+                  p = pending::program;
+
+                  if (tt != type::pipe)
+                  {
+                    expr_operator o (tt == type::log_or
+                                     ? expr_operator::log_or
+                                     : expr_operator::log_and);
+                    expr.push_back ({o, command_pipe ()});
+                  }
+
+                  break;
+                }
+
+              case type::in_pass:
+              case type::out_pass:
+
+              case type::in_null:
+              case type::out_null:
+
+              case type::out_trace:
+
+              case type::out_merge:
+
+              case type::in_str:
+              case type::in_doc:
+              case type::out_str:
+              case type::out_doc:
+
+              case type::in_file:
+              case type::out_file_cmp:
+              case type::out_file_ovr:
+              case type::out_file_app:
+                {
+                  parse_redirect (t, l);
+                  break;
+                }
+
+              case type::clean:
+                {
+                  parse_clean (t);
+                  break;
+                }
+
+              default: assert (false); break;
+              }
+
+              next (t, tt);
+              break;
+            }
+          default:
+            {
+              // Here-document end markers are literal (we verified that above
+              // during pre-parsing) and we need to know whether they were
+              // quoted. So handle this case specially.
+              //
+              {
+                int fd;
+                switch (p)
+                {
+                case pending::in_document:   fd =  0; break;
+                case pending::out_document:
+                case pending::out_doc_regex: fd =  1; break;
+                case pending::err_document:
+                case pending::err_doc_regex: fd =  2; break;
+                default:                     fd = -1; break;
+                }
+
+                if (fd != -1)
+                {
+                  here_redirect rd {
+                    expr.size () - 1, expr.back ().pipe.size (), fd};
+
+                  string end (move (t.value));
+
+                  regex_parts r;
+
+                  if (p == pending::out_doc_regex ||
+                      p == pending::err_doc_regex)
+                  {
+                    // We can't fail here as we already parsed all the end
+                    // markers during pre-parsing stage, and so no need in the
+                    // description.
+                    //
+                    r = parse_regex (end, l, "");
+                    end = move (r.value); // The "cleared" end marker.
+                  }
+
+                  bool shared (false);
+                  for (auto& d: hd)
+                  {
+                    // No need to check that redirects that share here-document
+                    // have the same modifiers, etc. That have been done during
+                    // pre-parsing.
+                    //
+                    if (d.end == end)
+                    {
+                      d.redirects.emplace_back (rd);
+                      shared = true;
+                      break;
+                    }
+                  }
+
+                  if (!shared)
+                    hd.push_back (
+                      here_doc {
+                        {rd},
+                        move (end),
+                        (t.qtype == quote_type::unquoted ||
+                         t.qtype == quote_type::single),
+                        move (mod),
+                        r.intro, move (r.flags)});
+
+                  p = pending::none;
+                  mod.clear ();
+
+                  next (t, tt);
+                  break;
+                }
+              }
+
+              // Parse the next chunk as simple names to get expansion, etc.
+              // Note that we do it in the chunking mode to detect whether
+              // anything in each chunk is quoted.
+              //
+              // @@ PAT: should we support pattern expansion? This is even
+              // fuzzier than the variable case above. Though this is the
+              // shell semantics. Think what happens when we do rm *.txt?
+              //
+              reset_quoted (t);
+              parse_names (t, tt,
+                           ns,
+                           pattern_mode::ignore,
+                           true,
+                           "command line",
+                           nullptr);
+
+              if (pre_parse_) // Nothing else to do if we are pre-parsing.
+                break;
+
+              // Process what we got. Determine whether anything inside was
+              // quoted (note that the current token is "next" and is not part
+              // of this).
+              //
+              bool q ((quoted () -
+                       (t.qtype != quote_type::unquoted ? 1 : 0)) != 0);
+
+              for (name& n: ns)
+              {
+                string s;
+
+                try
+                {
+                  s = value_traits<string>::convert (move (n), nullptr);
+                }
+                catch (const invalid_argument&)
+                {
+                  diag_record dr (fail (l));
+                  dr << "invalid string value ";
+                  to_stream (dr.os, n, true); // Quote.
+                }
+
+                // If it is a quoted chunk, then we add the word as is.
+                // Otherwise we re-lex it. But if the word doesn't contain any
+                // interesting characters (operators plus quotes/escapes),
+                // then no need to re-lex.
+                //
+                // NOTE: update quoting (script.cxx:to_stream_q()) if adding
+                // any new characters.
+                //
+                if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
+                  add_word (move (s), l);
+                else
+                {
+                  // If the chunk re-parsing results in error, our diagnostics
+                  // will look like this:
+                  //
+                  // <string>:1:4: error: stdout merge redirect file descriptor must be 2
+                  //   testscript:2:5: info: while parsing string '1>&a'
+                  //
+                  auto df = make_diag_frame (
+                    [s, &l](const diag_record& dr)
+                    {
+                      dr << info (l) << "while parsing string '" << s << "'";
+                    });
+
+                  // When re-lexing we do "effective escaping" and only for
+                  // ['"\] (quotes plus the backslash itself). In particular,
+                  // there is no way to escape redirects, operators, etc. The
+                  // idea is to prefer quoting except for passing literal
+                  // quotes, for example:
+                  //
+                  // args = \"&foo\"
+                  // cmd $args               # cmd &foo
+                  //
+                  // args = 'x=\"foo bar\"'
+                  // cmd $args               # cmd x="foo bar"
+                  //
+
+                  path name ("<string>");
+                  istringstream is (s);
+                  lexer lex (is, name,
+                             lexer_mode::command_expansion,
+                             "\'\"\\");
+
+                  // Treat the first "sub-token" as always separated from what
+                  // we saw earlier.
+                  //
+                  // Note that this is not "our" token so we cannot do
+                  // fail(t). Rather we should do fail(l).
+                  //
+                  token t (lex.next ());
+                  location l (build2::get_location (t, name));
+                  t.separated = true;
+
+                  string w;
+                  bool f (t.type == type::eos); // If the whole thing is empty.
+
+                  for (; t.type != type::eos; t = lex.next ())
+                  {
+                    type tt (t.type);
+                    l = build2::get_location (t, name);
+
+                    // Re-lexing double-quotes will recognize $, ( inside as
+                    // tokens so we have to reverse them back. Since we don't
+                    // treat spaces as separators we can be sure we will get
+                    // it right.
+                    //
+                    switch (tt)
+                    {
+                    case type::dollar: w += '$'; continue;
+                    case type::lparen: w += '('; continue;
+                    }
+
+                    // Retire the current word. We need to distinguish between
+                    // empty and non-existent (e.g., > vs >"").
+                    //
+                    if (!w.empty () || f)
+                    {
+                      add_word (move (w), l);
+                      f = false;
+                    }
+
+                    if (tt == type::word)
+                    {
+                      w = move (t.value);
+                      f = true;
+                      continue;
+                    }
+
+                    // If this is one of the operators/separators, check that
+                    // we don't have any pending locations to be filled.
+                    //
+                    check_pending (l);
+
+                    // Note: there is another one in the outer loop above.
+                    //
+                    switch (tt)
+                    {
+                    case type::pipe:
+                    case type::log_or:
+                    case type::log_and:
+                      {
+                        // Check that the previous command makes sense.
+                        //
+                        check_command (l, tt != type::pipe);
+                        expr.back ().pipe.push_back (move (c));
+
+                        c = command ();
+                        p = pending::program;
+
+                        if (tt != type::pipe)
+                        {
+                          expr_operator o (tt == type::log_or
+                                           ? expr_operator::log_or
+                                           : expr_operator::log_and);
+                          expr.push_back ({o, command_pipe ()});
+                        }
+
+                        break;
+                      }
+
+                    case type::in_pass:
+                    case type::out_pass:
+
+                    case type::in_null:
+                    case type::out_null:
+
+                    case type::out_trace:
+
+                    case type::out_merge:
+
+                    case type::in_str:
+                    case type::out_str:
+
+                    case type::in_file:
+                    case type::out_file_cmp:
+                    case type::out_file_ovr:
+                    case type::out_file_app:
+                      {
+                        parse_redirect (t, l);
+                        break;
+                      }
+
+                    case type::clean:
+                      {
+                        parse_clean (t);
+                        break;
+                      }
+
+                    case type::in_doc:
+                    case type::out_doc:
+                      {
+                        fail (l) << "here-document redirect in expansion";
+                        break;
+                      }
+                    }
+                  }
+
+                  // Don't forget the last word.
+                  //
+                  if (!w.empty () || f)
+                    add_word (move (w), l);
+                }
+              }
+
+              ns.clear ();
+              break;
+            }
+          }
+        }
+
+        if (!pre_parse_)
+        {
+          // Verify we don't have anything pending to be filled and the
+          // command makes sense.
+          //
+          check_pending (l);
+          check_command (l, true);
+
+          expr.back ().pipe.push_back (move (c));
+        }
+
+        return make_pair (move (expr), move (hd));
+      }
+
+      command_exit parser::
+      parse_command_exit (token& t, type& tt)
+      {
+        // enter: equal/not_equal
+        // leave: token after exit status (one parse_names() chunk)
+
+        exit_comparison comp (tt == type::equal
+                              ? exit_comparison::eq
+                              : exit_comparison::ne);
+
+        // The next chunk should be the exit status.
+        //
+        next (t, tt);
+        location l (get_location (t));
+        names ns (parse_names (t, tt,
+                               pattern_mode::ignore,
+                               true,
+                               "exit status",
+                               nullptr));
+        unsigned long es (256);
+
+        if (!pre_parse_)
+        {
+          try
+          {
+            if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ())
+              es = stoul (ns[0].value);
+          }
+          catch (const exception&) {} // Fall through.
+
+          if (es > 255)
+          {
+            diag_record dr;
+
+            dr << fail (l) << "expected exit status instead of ";
+            to_stream (dr.os, ns, true); // Quote.
+
+            dr << info << "exit status is an unsigned integer less than 256";
+          }
+        }
+
+        return command_exit {comp, static_cast<uint8_t> (es)};
+      }
+
+      void parser::
+      parse_here_documents (token& t, type& tt,
+                            pair<command_expr, here_docs>& p)
+      {
+        // enter: newline
+        // leave: newline
+
+        // Parse here-document fragments in the order they were mentioned on
+        // the command line.
+        //
+        for (here_doc& h: p.second)
+        {
+          // Switch to the here-line mode which is like single/double-quoted
+          // string but recognized the newline as a separator.
+          //
+          mode (h.literal
+                ? lexer_mode::here_line_single
+                : lexer_mode::here_line_double);
+          next (t, tt);
+
+          parsed_doc v (
+            parse_here_document (t, tt, h.end, h.modifiers, h.regex));
+
+          if (!pre_parse_)
+          {
+            assert (!h.redirects.empty ());
+            auto i (h.redirects.cbegin ());
+
+            command& c (p.first[i->expr].pipe[i->pipe]);
+            redirect& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err);
+
+            if (v.re)
+            {
+              r.regex = move (v.regex);
+              r.regex.flags = move (h.regex_flags);
+            }
+            else
+              r.str = move (v.str);
+
+            r.end        = move (h.end);
+            r.end_line   = v.end_line;
+            r.end_column = v.end_column;
+
+            // Note that our references cannot be invalidated because the
+            // command_expr/command-pipe vectors already contain all their
+            // elements.
+            //
+            for (++i; i != h.redirects.cend (); ++i)
+            {
+              command& c (p.first[i->expr].pipe[i->pipe]);
+
+              (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err) =
+                redirect (redirect_type::here_doc_ref, r);
+            }
+          }
+
+          expire_mode ();
+        }
+      }
+
+      parser::parsed_doc parser::
+      parse_here_document (token& t, type& tt,
+                           const string& em,
+                           const string& mod,
+                           char re)
+      {
+        // enter: first token on first line
+        // leave: newline (after end marker)
+
+        // String literal. Note that when decide if to terminate the previously
+        // added line with a newline, we need to distinguish a yet empty result
+        // and the one that has a single blank line added.
+        //
+        optional<string> rs;
+
+        regex_lines rre;
+
+        // Here-documents can be indented. The leading whitespaces of the end
+        // marker line (called strip prefix) determine the indentation. Every
+        // other line in the here-document should start with this prefix which
+        // is automatically stripped. The only exception is a blank line.
+        //
+        // The fact that the strip prefix is only known at the end, after
+        // seeing all the lines, is rather inconvenient. As a result, the way
+        // we implement this is a bit hackish (though there is also something
+        // elegant about it): at the end of the pre-parse stage we are going
+        // re-examine the sequence of tokens that comprise this here-document
+        // and "fix up" the first token of each line by stripping the prefix.
+        //
+        string sp;
+
+        // Remember the position of the first token in this here-document.
+        //
+        size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0);
+
+        // We will use the location of the first token on the line for the
+        // regex diagnostics. At the end of the loop it will point to the
+        // beginning of the end marker.
+        //
+        location l;
+
+        while (tt != type::eos)
+        {
+          l = get_location (t);
+
+          // Check if this is the end marker. For starters, it should be a
+          // single, unquoted word followed by a newline.
+          //
+          if (tt == type::word &&
+              t.qtype == quote_type::unquoted &&
+              peek () == type::newline)
+          {
+            const string& v (t.value);
+
+            size_t vn (v.size ());
+            size_t en (em.size ());
+
+            // Then check that it ends with the end marker.
+            //
+            if (vn >= en && v.compare (vn - en, en, em) == 0)
+            {
+              // Now check that the prefix only contains whitespaces.
+              //
+              size_t n (vn - en);
+
+              if (v.find_first_not_of (" \t") >= n)
+              {
+                assert (pre_parse_ || n == 0); // Should have been stripped.
+
+                if (n != 0)
+                  sp.assign (v, 0, n); // Save the strip prefix.
+
+                next (t, tt); // Get the newline.
+                break;
+              }
+            }
+          }
+
+          // Expand the line (can be blank).
+          //
+          // @@ PAT: one could argue that if we do it in variables, then we
+          // should do it here as well. Though feels bizarre.
+          //
+          names ns (tt != type::newline
+                    ? parse_names (t, tt,
+                                   pattern_mode::ignore,
+                                   false,
+                                   "here-document line",
+                                   nullptr)
+                    : names ());
+
+          if (!pre_parse_)
+          {
+            // What shall we do if the expansion results in multiple names?
+            // For, example if the line contains just the variable expansion
+            // and it is of type strings. Adding all the elements space-
+            // separated seems like the natural thing to do.
+            //
+            string s;
+            for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
+            {
+              string n;
+
+              try
+              {
+                n = value_traits<string>::convert (move (*i), nullptr);
+              }
+              catch (const invalid_argument&)
+              {
+                fail (l) << "invalid string value '" << *i << "'";
+              }
+
+              if (i == b)
+                s = move (n);
+              else
+              {
+                s += ' ';
+                s += n;
+              }
+            }
+
+            if (!re)
+            {
+              // Add newline after previous line.
+              //
+              if (rs)
+              {
+                *rs += '\n';
+                *rs += s;
+              }
+              else
+                rs = move (s);
+            }
+            else
+            {
+              // Due to expansion we can end up with multiple lines. If empty
+              // then will add a blank textual literal.
+              //
+              for (size_t p (0); p != string::npos; )
+              {
+                string ln;
+                size_t np (s.find ('\n', p));
+
+                if (np != string::npos)
+                {
+                  ln = string (s, p, np - p);
+                  p = np + 1;
+                }
+                else
+                {
+                  ln = string (s, p);
+                  p = np;
+                }
+
+                if (ln[0] != re) // Line doesn't start with regex introducer.
+                {
+                  // This is a line-char literal (covers blank lines as well).
+                  //
+                  // Append textual literal.
+                  //
+                  rre.lines.emplace_back (l.line, l.column, move (ln), false);
+                }
+                else // Line starts with the regex introducer.
+                {
+                  // This is a char-regex, or a sequence of line-regex syntax
+                  // characters or both (in this specific order). So we will
+                  // add regex (with optional special characters) or special
+                  // literal.
+                  //
+                  size_t p (ln.find (re, 1));
+                  if (p == string::npos)
+                  {
+                    // No regex, just a sequence of syntax characters.
+                    //
+                    string spec (ln, 1);
+                    if (spec.empty ())
+                      fail (l) << "no syntax line characters";
+
+                    // Append special literal.
+                    //
+                    rre.lines.emplace_back (
+                      l.line, l.column, move (spec), true);
+                  }
+                  else
+                  {
+                    // Regex (probably with syntax characters).
+                    //
+                    regex_parts re;
+
+                    // Empty regex is a special case repesenting a blank line.
+                    //
+                    if (p == 1)
+                      // Position to optional specal characters of an empty
+                      // regex.
+                      //
+                      ++p;
+                    else
+                      // Can't fail as all the pre-conditions verified
+                      // (non-empty with both introducers in place), so no
+                      // description required.
+                      //
+                      re = parse_regex (ln, l, "", &p);
+
+                    // Append regex with optional special characters.
+                    //
+                    rre.lines.emplace_back (l.line, l.column,
+                                            move (re.value), move (re.flags),
+                                            string (ln, p));
+                  }
+                }
+              }
+            }
+          }
+
+          // We should expand the whole line at once so this would normally be
+          // a newline but can also be an end-of-stream.
+          //
+          if (tt == type::newline)
+            next (t, tt);
+          else
+            assert (tt == type::eos);
+        }
+
+        if (tt == type::eos)
+          fail (t) << "missing here-document end marker '" << em << "'";
+
+        if (pre_parse_)
+        {
+          // Strip the indentation prefix if there is one.
+          //
+          assert (replay_ == replay::save);
+
+          if (!sp.empty ())
+          {
+            size_t sn (sp.size ());
+
+            for (; ri != replay_data_.size (); ++ri)
+            {
+              token& rt (replay_data_[ri].token);
+
+              if (rt.type == type::newline) // Blank
+                continue;
+
+              if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0)
+                fail (rt) << "unindented here-document line";
+
+              // If the word is equal to the strip prefix then we have to drop
+              // the token. Note that simply making it an empty word won't
+              // have the same semantics. For instance, it would trigger
+              // concatenated expansion.
+              //
+              if (rt.value.size () == sn)
+                replay_data_.erase (replay_data_.begin () + ri);
+              else
+              {
+                rt.value.erase (0, sn);
+                rt.column += sn;
+                ++ri;
+              }
+
+              // Skip until next newline.
+              //
+              for (; replay_data_[ri].token.type != type::newline; ++ri) ;
+            }
+          }
+        }
+        else
+        {
+          // Add final newline unless suppressed.
+          //
+          if (mod.find (':') == string::npos)
+          {
+            if (re)
+              // Note that the position is synthetic, but that's ok as we don't
+              // expect any diagnostics to refer this line.
+              //
+              rre.lines.emplace_back (l.line, l.column, string (), false);
+            else if (rs)
+              *rs += '\n';
+            else
+              rs = "\n";
+          }
+
+          // Finalize regex lines.
+          //
+          if (re)
+          {
+            // Empty regex matches nothing, so not of much use.
+            //
+            if (rre.lines.empty ())
+              fail (l) << "empty here-document regex";
+
+            rre.intro  = re;
+          }
+        }
+
+        return re
+          ? parsed_doc (move (rre), l.line, l.column)
+          : parsed_doc (rs ? move (*rs) : string (), l.line, l.column);
+      }
+
+      //
+      // Execute.
+      //
+
+      void parser::
+      execute (script& s, runner& r)
+      {
+        assert (s.state == scope_state::unknown);
+
+        auto g (
+          make_exception_guard (
+            [&s] () {s.state = scope_state::failed;}));
+
+        if (!s.empty ())
+          execute (s, s, r);
+        else
+          s.state = scope_state::passed;
+      }
+
+      void parser::
+      execute (scope& sc, script& s, runner& r)
+      {
+        path_ = nullptr; // Set by replays.
+
+        pre_parse_ = false;
+
+        set_lexer (nullptr);
+
+        script_ = &s;
+        runner_ = &r;
+        group_ = nullptr;
+        id_map_ = nullptr;
+        include_set_ = nullptr;
+        scope_ = &sc;
+
+        //@@ PAT TODO: set pbase_?
+
+        exec_scope_body ();
+      }
+
+      static void
+      execute_impl (scope& s, script& scr, runner& r)
+      {
+        try
+        {
+          parser p;
+          p.execute (s, scr, r);
+        }
+        catch (const failed&)
+        {
+          s.state = scope_state::failed;
+        }
+      }
+
+      void parser::
+      exec_scope_body ()
+      {
+        size_t li (0);
+
+        runner_->enter (*scope_, scope_->start_loc_);
+
+        if (test* t = dynamic_cast<test*> (scope_))
+        {
+          exec_lines (
+            t->tests_.begin (), t->tests_.end (), li, command_type::test);
+        }
+        else if (group* g = dynamic_cast<group*> (scope_))
+        {
+          bool exec_scope (
+            exec_lines (
+              g->setup_.begin (), g->setup_.end (), li, command_type::setup));
+
+          if (exec_scope)
+          {
+            atomic_count task_count (0);
+            wait_guard wg (task_count);
+
+            // Start asynchronous execution of inner scopes keeping track of
+            // how many we have handled.
+            //
+            for (unique_ptr<scope>& chain: g->scopes)
+            {
+              // Check if this scope is ignored (e.g., via config.test).
+              //
+              if (!runner_->test (*chain) || !exec_scope)
+              {
+                chain = nullptr;
+                continue;
+              }
+
+              // Pick a scope from the if-else chain.
+              //
+              // In fact, we are going to drop all but the selected (if any)
+              // scope. This way we can re-examine the scope states later. It
+              // will also free some memory.
+              //
+              unique_ptr<scope>* ps;
+              for (ps = &chain; *ps != nullptr; ps = &ps->get ()->if_chain)
+              {
+                scope& s (**ps);
+
+                if (!s.if_cond_) // Unconditional.
+                {
+                  assert (s.if_chain == nullptr);
+                  break;
+                }
+
+                line l (move (*s.if_cond_));
+                line_type lt (l.type);
+
+                replay_data (move (l.tokens));
+
+                token t;
+                type tt;
+
+                next (t, tt);
+                const location ll (get_location (t));
+                next (t, tt); // Skip to start of command.
+
+                bool take;
+                if (lt != line_type::cmd_else)
+                {
+                  // Note: the line index count continues from setup.
+                  //
+                  command_expr ce (parse_command_line (t, tt));
+
+                  try
+                  {
+                    take = runner_->run_if (*scope_, ce, ++li, ll);
+                  }
+                  catch (const exit_scope& e)
+                  {
+                    // Bail out if the scope is exited with the failure status.
+                    // Otherwise leave the scope normally.
+                    //
+                    if (!e.status)
+                      throw failed ();
+
+                    // Stop iterating through if conditions, and stop executing
+                    // inner scopes.
+                    //
+                    exec_scope = false;
+                    replay_stop ();
+                    break;
+                  }
+
+                  if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn)
+                    take = !take;
+                }
+                else
+                {
+                  assert (tt == type::newline);
+                  take = true;
+                }
+
+                replay_stop ();
+
+                if (take)
+                {
+                  // Count the remaining conditions for the line index.
+                  //
+                  for (scope* r (s.if_chain.get ());
+                       r != nullptr &&
+                         r->if_cond_->type != line_type::cmd_else;
+                       r = r->if_chain.get ())
+                    ++li;
+
+                  s.if_chain.reset (); // Drop remaining scopes.
+                  break;
+                }
+              }
+
+              chain.reset (*ps == nullptr || (*ps)->empty () || !exec_scope
+                           ? nullptr
+                           : ps->release ());
+
+              if (chain != nullptr)
+              {
+                // Hand it off to a sub-parser potentially in another thread.
+                // But we could also have handled it serially in this parser:
+                //
+                // scope* os (scope_);
+                // scope_ = chain.get ();
+                // exec_scope_body ();
+                // scope_ = os;
+
+                // Pass our diagnostics stack (this is safe since we are going
+                // to wait for completion before unwinding the diag stack).
+                //
+                // If the scope was executed synchronously, check the status
+                // and bail out if we weren't asked to keep going.
+                //
+                // UBSan workaround.
+                //
+                const diag_frame* df (diag_frame::stack ());
+                if (!sched.async (task_count,
+                                  [] (const diag_frame* ds,
+                                      scope& s,
+                                      script& scr,
+                                      runner& r)
+                                  {
+                                    diag_frame::stack_guard dsg (ds);
+                                    execute_impl (s, scr, r);
+                                  },
+                                  df,
+                                  ref (*chain),
+                                  ref (*script_),
+                                  ref (*runner_)))
+                {
+                  // Bail out if the scope has failed and we weren't instructed
+                  // to keep going.
+                  //
+                  if (chain->state == scope_state::failed && !keep_going)
+                    throw failed ();
+                }
+              }
+            }
+
+            wg.wait ();
+
+            // Re-examine the scopes we have executed collecting their state.
+            //
+            for (const unique_ptr<scope>& chain: g->scopes)
+            {
+              if (chain == nullptr)
+                continue;
+
+              switch (chain->state)
+              {
+              case scope_state::passed: break;
+              case scope_state::failed: throw failed ();
+              default:                  assert (false);
+              }
+            }
+          }
+
+          exec_lines (
+            g->tdown_.begin (), g->tdown_.end (), li, command_type::teardown);
+        }
+        else
+          assert (false);
+
+        runner_->leave (*scope_, scope_->end_loc_);
+
+        scope_->state = scope_state::passed;
+      }
+
+      bool parser::
+      exec_lines (lines::iterator i, lines::iterator e,
+                  size_t& li,
+                  command_type ct)
+      {
+        try
+        {
+          token t;
+          type tt;
+
+          for (; i != e; ++i)
+          {
+            line& ln (*i);
+            line_type lt (ln.type);
+
+            assert (path_ == nullptr);
+
+            // Set the tokens and start playing.
+            //
+            replay_data (move (ln.tokens));
+
+            // We don't really need to change the mode since we already know
+            // the line type.
+            //
+            next (t, tt);
+            const location ll (get_location (t));
+
+            switch (lt)
+            {
+            case line_type::var:
+              {
+                // Parse.
+                //
+                string name (move (t.value));
+
+                next (t, tt);
+                type kind (tt); // Assignment kind.
+
+                value rhs (parse_variable_line (t, tt));
+
+                if (tt == type::semi)
+                  next (t, tt);
+
+                assert (tt == type::newline);
+
+                // Assign.
+                //
+                const variable& var (*ln.var);
+
+                value& lhs (kind == type::assign
+                            ? scope_->assign (var)
+                            : scope_->append (var));
+
+                build2::parser::apply_value_attributes (
+                  &var, lhs, move (rhs), kind);
+
+                // If we changes any of the test.* values, then reset the $*,
+                // $N special aliases.
+                //
+                if (var.name == script_->test_var.name      ||
+                    var.name == script_->options_var.name   ||
+                    var.name == script_->arguments_var.name ||
+                    var.name == script_->redirects_var.name ||
+                    var.name == script_->cleanups_var.name)
+                {
+                  scope_->reset_special ();
+                }
+
+                replay_stop ();
+                break;
+              }
+            case line_type::cmd:
+              {
+                // We use the 0 index to signal that this is the only command.
+                // Note that we only do this for test commands.
+                //
+                if (ct == command_type::test && li == 0)
+                {
+                  lines::iterator j (i);
+                  for (++j; j != e && j->type == line_type::var; ++j) ;
+
+                  if (j != e) // We have another command.
+                    ++li;
+                }
+                else
+                  ++li;
+
+                command_expr ce (parse_command_line (t, tt));
+                runner_->run (*scope_, ce, ct, li, ll);
+
+                replay_stop ();
+                break;
+              }
+            case line_type::cmd_if:
+            case line_type::cmd_ifn:
+            case line_type::cmd_elif:
+            case line_type::cmd_elifn:
+            case line_type::cmd_else:
+              {
+                next (t, tt); // Skip to start of command.
+
+                bool take;
+                if (lt != line_type::cmd_else)
+                {
+                  // Assume if-else always involves multiple commands.
+                  //
+                  command_expr ce (parse_command_line (t, tt));
+                  take = runner_->run_if (*scope_, ce, ++li, ll);
+
+                  if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn)
+                    take = !take;
+                }
+                else
+                {
+                  assert (tt == type::newline);
+                  take = true;
+                }
+
+                replay_stop ();
+
+                // If end is true, then find the 'end' line. Otherwise, find
+                // the next if-else line. If skip is true then increment the
+                // command line index.
+                //
+                auto next = [e, &li]
+                  (lines::iterator j, bool end, bool skip) -> lines::iterator
+                  {
+                    // We need to be aware of nested if-else chains.
+                    //
+                    size_t n (0);
+
+                    for (++j; j != e; ++j)
+                    {
+                      line_type lt (j->type);
+
+                      if (lt == line_type::cmd_if ||
+                          lt == line_type::cmd_ifn)
+                        ++n;
+
+                      // If we are nested then we just wait until we get back
+                      // to the surface.
+                      //
+                      if (n == 0)
+                      {
+                        switch (lt)
+                        {
+                        case line_type::cmd_elif:
+                        case line_type::cmd_elifn:
+                        case line_type::cmd_else:
+                          if (end) break;
+                          // Fall through.
+                        case line_type::cmd_end:  return j;
+                        default: break;
+                        }
+                      }
+
+                      if (lt == line_type::cmd_end)
+                        --n;
+
+                      if (skip)
+                      {
+                        // Note that we don't count else and end as commands.
+                        //
+                        switch (lt)
+                        {
+                        case line_type::cmd:
+                        case line_type::cmd_if:
+                        case line_type::cmd_ifn:
+                        case line_type::cmd_elif:
+                        case line_type::cmd_elifn: ++li; break;
+                        default:                         break;
+                        }
+                      }
+                    }
+
+                    assert (false); // Missing end.
+                    return e;
+                  };
+
+                // If we are taking this branch then we need to parse all the
+                // lines until the next if-else line and then skip all the
+                // lines until the end (unless next is already end).
+                //
+                // Otherwise, we need to skip all the lines until the next
+                // if-else line and then continue parsing.
+                //
+                if (take)
+                {
+                  lines::iterator j (next (i, false, false)); // Next if-else.
+                  if (!exec_lines (i + 1, j, li, ct))
+                    return false;
+
+                  i = j->type == line_type::cmd_end ? j : next (j, true, true);
+                }
+                else
+                {
+                  i = next (i, false, true);
+                  if (i->type != line_type::cmd_end)
+                    --i; // Continue with this line (e.g., elif or else).
+                }
+
+                break;
+              }
+            case line_type::cmd_end:
+              {
+                assert (false);
+              }
+            }
+          }
+
+          return true;
+        }
+        catch (const exit_scope& e)
+        {
+          // Bail out if the scope is exited with the failure status. Otherwise
+          // leave the scope normally.
+          //
+          if (!e.status)
+            throw failed ();
+
+          replay_stop ();
+          return false;
+        }
+      }
+
+      //
+      // The rest.
+      //
+
+      lookup parser::
+      lookup_variable (name&& qual, string&& name, const location& loc)
+      {
+        assert (!pre_parse_);
+
+        if (!qual.empty ())
+          fail (loc) << "qualified variable name";
+
+        // If we have no scope (happens when pre-parsing directives), then we
+        // only look for buildfile variables.
+        //
+        // Otherwise, every variable that is ever set in a script has been
+        // pre-entered during pre-parse or introduced with the set builtin
+        // during test execution. Which means that if one is not found in the
+        // script pool then it can only possibly be set in the buildfile.
+        //
+        // Note that we need to acquire the variable pool lock. The pool can
+        // be changed from multiple threads by the set builtin. The obtained
+        // variable pointer can safelly be used with no locking as the variable
+        // pool is an associative container (underneath) and we are only adding
+        // new variables into it.
+        //
+        const variable* pvar (nullptr);
+
+        if (scope_ != nullptr)
+        {
+          slock sl (script_->var_pool_mutex);
+          pvar = script_->var_pool.find (name);
+        }
+
+        return pvar != nullptr
+          ? scope_->find (*pvar)
+          : script_->find_in_buildfile (name);
+      }
+
+      size_t parser::
+      quoted () const
+      {
+        size_t r (0);
+
+        if (replay_ != replay::play)
+          r = lexer_->quoted ();
+        else
+        {
+          // Examine tokens we have replayed since last reset.
+          //
+          for (size_t i (replay_quoted_); i != replay_i_; ++i)
+            if (replay_data_[i].token.qtype != quote_type::unquoted)
+              ++r;
+        }
+
+        return r;
+      }
+
+      void parser::
+      reset_quoted (token& cur)
+      {
+        if (replay_ != replay::play)
+          lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0);
+        else
+        {
+          replay_quoted_ = replay_i_ - 1;
+
+          // Must be the same token.
+          //
+          assert (replay_data_[replay_quoted_].token.qtype == cur.qtype);
+        }
+      }
+
+      const string& parser::
+      insert_id (string id, location l)
+      {
+        auto p (id_map_->emplace (move (id), move (l)));
+
+        if (!p.second)
+          fail (l) << "duplicate id " << p.first->first <<
+            info (p.first->second) << "previously used here";
+
+        return p.first->first;
+      }
+
+      void parser::
+      set_lexer (lexer* l)
+      {
+        lexer_ = l;
+        base_parser::lexer_ = l;
+      }
+
+      void parser::
+      apply_value_attributes (const variable* var,
+                              value& lhs,
+                              value&& rhs,
+                              const string& attributes,
+                              token_type kind,
+                              const path& name)
+      {
+        path_ = &name;
+
+        istringstream is (attributes);
+        lexer l (is, name, lexer_mode::attribute);
+        set_lexer (&l);
+
+        token t;
+        type tt;
+        next (t, tt);
+
+        if (tt != type::lsbrace && tt != type::eos)
+          fail (t) << "expected '[' instead of " << t;
+
+        attributes_push (t, tt, true);
+
+        if (tt != type::eos)
+          fail (t) << "trailing junk after ']'";
+
+        build2::parser::apply_value_attributes (var, lhs, move (rhs), kind);
+      }
+
+      // parser::parsed_doc
+      //
+      parser::parsed_doc::
+      parsed_doc (string s, uint64_t l, uint64_t c)
+          : str (move (s)), re (false), end_line (l), end_column (c)
+      {
+      }
+
+      parser::parsed_doc::
+      parsed_doc (regex_lines&& r, uint64_t l, uint64_t c)
+          : regex (move (r)), re (true), end_line (l), end_column (c)
+      {
+      }
+
+      parser::parsed_doc::
+      parsed_doc (parsed_doc&& d)
+          : re (d.re), end_line (d.end_line), end_column (d.end_column)
+      {
+        if (re)
+          new (&regex) regex_lines (move (d.regex));
+        else
+          new (&str) string (move (d.str));
+      }
+
+      parser::parsed_doc::
+      ~parsed_doc ()
+      {
+        if (re)
+          regex.~regex_lines ();
+        else
+          str.~string ();
+      }
+    }
+  }
+}
author	Karen Arutyunov <karen@codesynthesis.com>	2019-07-04 19:12:15 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2019-07-05 14:24:43 +0300
commit	57b10c06925d0bdf6ffb38488ee908f085109e95 (patch)
tree	f2103684d319650c3302aef9d7a70dd64ff2a347 /libbuild2/test/script/parser.cxx
parent	30b4eda196e090aa820d312e6a9435a4ae84c303 (diff)