// file : libbuild2/test/script/parser.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbuild2/test/script/parser.hxx> #include <libbuild2/context.hxx> // sched, keep_going #include <libbuild2/test/script/lexer.hxx> #include <libbuild2/test/script/runner.hxx> using namespace std; namespace build2 { namespace test { namespace script { using type = token_type; // Return true if the string contains only a single digit characters // (used to detect the special $N variables). // static inline bool digit (const string& s) { return s.size () == 1 && butl::digit (s[0]); } // // Pre-parse. // void parser:: pre_parse (script& s) { const path& p (s.script_target.path ()); assert (!p.empty ()); // Should have been assigned. try { ifdstream ifs (p); pre_parse (ifs, s); } catch (const io_error& e) { fail << "unable to read testscript " << p << ": " << e << endf; } } void parser:: pre_parse (istream& is, script& s) { path_ = &*s.paths_.insert ( path_name_value (s.script_target.path ())).first; pre_parse_ = true; lexer l (is, *path_, lexer_mode::command_line); set_lexer (&l); id_prefix_.clear (); id_map idm; include_set ins; script_ = &s; runner_ = nullptr; group_ = script_; id_map_ = &idm; include_set_ = &ins; scope_ = nullptr; //@@ PAT TODO: set pbase_? // Start location of the implied script group is the beginning of // the file. End location -- end of the file. // group_->start_loc_ = location (*path_, 1, 1); token t (pre_parse_scope_body ()); if (t.type != type::eos) fail (t) << "stray " << t; group_->end_loc_ = get_location (t); } bool parser:: pre_parse_demote_group_scope (unique_ptr<scope>& s) { // See if this turned out to be an explicit test scope. An explicit // test scope contains a single test, only variable assignments in // setup and nothing in teardown. Plus only the group can have the // description. Because we apply this recursively, also disqualify // a test scope that has an if-condition. // // If we have a chain, then all the scopes must be demotable. So we // first check if this scope is demotable and if so then recurse for // the next in chain. // group& g (static_cast<group&> (*s)); auto& sc (g.scopes); auto& su (g.setup_); auto& td (g.tdown_); test* t; if (sc.size () == 1 && (t = dynamic_cast<test*> (sc.back ().get ())) != nullptr && find_if ( su.begin (), su.end (), [] (const line& l) { return l.type != line_type::var; }) == su.end () && td.empty () && !t->desc && !t->if_cond_) { if (g.if_chain != nullptr && !pre_parse_demote_group_scope (g.if_chain)) return false; // It would have been nice to reuse the test object and only throw // away the group. However, the merged scope has to use id_path and // wd_path of the group. So to keep things simple we are going to // throw away both and create a new test object. // // We always use the group's id since the test cannot have a // user-provided one. // unique_ptr<test> m (new test (g.id_path.leaf ().string (), *group_)); // Move the description, if-condition, and if-chain. // m->desc = move (g.desc); m->if_cond_ = move (g.if_cond_); m->if_chain = move (g.if_chain); // Merge the lines of the group and the test. // if (su.empty ()) m->tests_ = move (t->tests_); else { m->tests_ = move (su); // Should come first. m->tests_.insert (m->tests_.end (), make_move_iterator (t->tests_.begin ()), make_move_iterator (t->tests_.end ())); } // Use start/end locations of the outer scope. // m->start_loc_ = g.start_loc_; m->end_loc_ = g.end_loc_; s = move (m); return true; } return false; } token parser:: pre_parse_scope_body () { // enter: next token is first token of scope body // leave: rcbrace or eos (returned) token t; type tt; // Parse lines (including nested scopes) until we see '}' or eos. // for (;;) { // Start lexing each line recognizing leading '.+-{}'. // tt = peek (lexer_mode::first_token); // Handle description. // optional<description> d; if (tt == type::colon) d = pre_parse_leading_description (t, tt); // Determine the line type by peeking at the first token. // switch (tt) { case type::eos: case type::rcbrace: { next (t, tt); if (d) fail (t) << "description before " << t; return t; } case type::lcbrace: { // Nested scope. // next (t, tt); // Get '{'. const location sl (get_location (t)); // First check that we don't have any teardown commands yet. // This will detect things like variable assignments between // scopes. // if (!group_->tdown_.empty ()) { location tl ( group_->tdown_.back ().tokens.front ().location ()); fail (sl) << "scope after teardown" << info (tl) << "last teardown line appears here"; } // If there is no user-supplied id, use the line number // (prefixed with include id) as the scope id. // const string& id ( d && !d->id.empty () ? d->id : insert_id (id_prefix_ + to_string (sl.line), sl)); unique_ptr<scope> g (pre_parse_scope_block (t, tt, id)); g->desc = move (d); pre_parse_demote_group_scope (g); group_->scopes.push_back (move (g)); continue; } default: { pre_parse_line (t, tt, d); assert (tt == type::newline); break; } } } } unique_ptr<group> parser:: pre_parse_scope_block (token& t, type& tt, const string& id) { // enter: lcbrace // leave: newline after rcbrace const location sl (get_location (t)); if (next (t, tt) != type::newline) fail (t) << "expected newline after '{'"; // Push group. // id_map idm; include_set ins; unique_ptr<group> g (new group (id, *group_)); id_map* om (id_map_); id_map_ = &idm; include_set* os (include_set_); include_set_ = &ins; group* og (group_); group_ = g.get (); // Parse body. // group_->start_loc_ = sl; token e (pre_parse_scope_body ()); group_->end_loc_ = get_location (e); // Pop group. // group_ = og; include_set_ = os; id_map_ = om; if (e.type != type::rcbrace) fail (e) << "expected '}' at the end of the scope"; if (next (t, tt) != type::newline) fail (t) << "expected newline after '}'"; return g; } // Parse a logical line (as well as scope-if since the only way to // recognize it is to parse the if line), handling the flow control // constructs recursively. // // If one is true then only parse one line returning an indication of // whether the line ended with a semicolon. If the flow control // construct type is specified, then this line is assumed to belong to // such construct. // bool parser:: pre_parse_line (token& t, type& tt, optional<description>& d, lines* ls, bool one, optional<line_type> fct) { // enter: next token is peeked at (type in tt) // leave: newline assert (!fct || *fct == line_type::cmd_if || *fct == line_type::cmd_while || *fct == line_type::cmd_for_stream || *fct == line_type::cmd_for_args); // Note: token is only peeked at. // const location ll (get_location (peeked ())); // Determine the line type/start token. // line_type lt; type st (type::eos); // Later, can only be set to plus or minus. bool semi (false); // Parse the command line tail, starting from the newline or the // potential colon/semicolon token. // // Note that colon and semicolon are only valid in test command lines // and after 'end' in flow control constructs. Note that we always // recognize them lexically, even when they are not valid tokens per // the grammar. // auto parse_command_tail = [&t, &tt, &st, <, &d, &semi, &ll, this] () { if (tt != type::newline) { if (lt != line_type::cmd && lt != line_type::cmd_end) fail (t) << "expected newline instead of " << t; switch (st) { case type::plus: fail (t) << t << " after setup command" << endf; case type::minus: fail (t) << t << " after teardown command" << endf; } } switch (tt) { case type::colon: { if (d) fail (ll) << "both leading and trailing descriptions"; d = parse_trailing_description (t, tt); break; } case type::semi: { semi = true; replay_pop (); // See above for the reasoning. next (t, tt); // Get newline. break; } } if (tt != type::newline) fail (t) << "expected newline instead of " << t; }; switch (tt) { case type::dot: { // Directive. // next (t, tt); // Skip dot. next (t, tt); // Get the directive name. if (tt != type::word || t.qtype != quote_type::unquoted) fail (t) << "expected directive name instead of " << t; // Make sure we are not inside a test (i.e., after semi). // if (ls != nullptr) fail (ll) << "directive after ';'"; const string& n (t.value); if (n == "include") pre_parse_directive (t, tt); else fail (t) << "unknown directive '" << n << "'"; assert (tt == type::newline); return false; } case type::plus: case type::minus: { // Setup/teardown command. // st = tt; next (t, tt); // Start saving tokens from the next one. replay_save (); next (t, tt); // See if this is a special command. // lt = line_type::cmd; // Default. if (tt == type::word && t.qtype == quote_type::unquoted) { const string& n (t.value); // Handle the for-loop consistently with pre_parse_line_start(). // if (n == "if") lt = line_type::cmd_if; else if (n == "if!") lt = line_type::cmd_ifn; else if (n == "while") lt = line_type::cmd_while; else if (n == "for") lt = line_type::cmd_for_stream; } break; } default: { lt = pre_parse_line_start (t, tt, lexer_mode::second_token); break; } } // Pre-parse the line keeping track of whether it ends with a semi. // line ln; switch (lt) { case line_type::var: { // Check if we are trying to modify any of the special aliases // ($*, $N, $~, $@). // string& n (t.value); if (special_variable (n)) fail (t) << "attempt to set '" << n << "' variable directly"; // Pre-enter the variables now while we are executing serially. // Once parallel, it becomes a lot harder to do. // ln.var = &script_->var_pool.insert (move (n)); next (t, tt); // Assignment kind. // We cannot reuse the value mode since it will recognize `{` // which we want to treat as a literal. // mode (lexer_mode::variable_line); parse_variable_line (t, tt); // Note that the semicolon token is only required during // pre-parsing to decide which line list the current line should // go to and provides no additional semantics during the // execution. Moreover, build2::script::parser::exec_lines() // doesn't expect this token to be present. Thus, we just drop // this token from the saved tokens. // semi = (tt == type::semi); if (semi) { replay_pop (); next (t, tt); } if (tt != type::newline) fail (t) << "expected newline instead of " << t; break; } // // See pre_parse_line_start() for details. // case line_type::cmd_for_args: assert (false); break; case line_type::cmd_for_stream: { // First we need to sense the next few tokens and detect which // form of the for-loop that actually is (see // libbuild2/build/script/parser.cxx for details). // token pt (t); assert (pt.type == type::word && pt.value == "for"); mode (lexer_mode::for_loop); next (t, tt); string& n (t.value); if (tt == type::word && t.qtype == quote_type::unquoted && (n[0] == '_' || alpha (n[0]) || // Variable. n == "*" || n == "~" || n == "@")) // Special variable. { // Detect patterns analogous to parse_variable_name() (so we // diagnose `for x[string]: ...`). // if (n.find_first_of ("[*?") != string::npos) fail (t) << "expected variable name instead of " << n; if (special_variable (n)) fail (t) << "attempt to set '" << n << "' variable directly"; if (lexer_->peek_char ().first == '[') { token vt (move (t)); next_with_attributes (t, tt); attributes_push (t, tt, true /* standalone */, false /* next_token */); t = move (vt); tt = t.type; } if (lexer_->peek_char ().first == ':') lt = line_type::cmd_for_args; } if (lt == line_type::cmd_for_stream) // for x <... { ln.var = nullptr; expire_mode (); parse_command_expr_result r ( parse_command_expr (t, tt, lexer::redirect_aliases, move (pt))); assert (r.for_loop); parse_command_tail (); parse_here_documents (t, tt, r); } else // for x: ... { ln.var = &script_->var_pool.insert (move (n)); next (t, tt); assert (tt == type::colon); expire_mode (); // Parse the value similar to the var line type (see above), // except for the fact that we don't expect a trailing semicolon. // mode (lexer_mode::variable_line); parse_variable_line (t, tt); if (tt != type::newline) fail (t) << "expected newline instead of " << t << " after for"; } break; } case line_type::cmd_elif: case line_type::cmd_elifn: case line_type::cmd_else: { if (!fct || *fct != line_type::cmd_if) fail (t) << lt << " without preceding 'if'"; } // Fall through. case line_type::cmd_end: { if (!fct) fail (t) << lt << " without preceding 'if', 'for', or 'while'"; } // Fall through. case line_type::cmd_if: case line_type::cmd_ifn: case line_type::cmd_while: next (t, tt); // Skip to start of command. // Fall through. case line_type::cmd: { parse_command_expr_result r; if (lt != line_type::cmd_else && lt != line_type::cmd_end) r = parse_command_expr (t, tt, lexer::redirect_aliases); if (r.for_loop) { lt = line_type::cmd_for_stream; ln.var = nullptr; } parse_command_tail (); parse_here_documents (t, tt, r); break; } } assert (tt == type::newline); // Stop saving and get the tokens. // lines ls_data; if (ls == nullptr) ls = &ls_data; ln.type = lt; ln.tokens = replay_data (); ls->push_back (move (ln)); switch (lt) { case line_type::cmd_if: case line_type::cmd_ifn: { semi = pre_parse_if_else (t, tt, d, *ls); // If this turned out to be scope-if, then ls is empty, semi is // false, and none of the below logic applies. // if (ls->empty ()) return semi; break; } case line_type::cmd_while: case line_type::cmd_for_stream: case line_type::cmd_for_args: { semi = pre_parse_loop (t, tt, lt, d, *ls); break; } default: break; } // Unless we were told where to put it, decide where it actually goes. // if (ls == &ls_data) { // First pre-check variables and variable-only flow control // constructs: by themselves (i.e., without a trailing semicolon) // they are treated as either setup or teardown without // plus/minus. Also handle illegal line types. // switch (lt) { case line_type::cmd_elif: case line_type::cmd_elifn: case line_type::cmd_else: case line_type::cmd_end: { assert (false); // Should have been failed earlier. } case line_type::cmd_if: case line_type::cmd_ifn: case line_type::cmd_while: case line_type::cmd_for_stream: case line_type::cmd_for_args: { // See if this is a variable-only flow control construct. // if (find_if (ls_data.begin (), ls_data.end (), [] (const line& l) { return l.type == line_type::cmd; }) != ls_data.end ()) break; } // Fall through. case line_type::var: { // If there is a semicolon after the variable then we assume // it is part of a test (there is no reason to use semicolons // after variables in the group scope). Otherwise -- setup or // teardown. // if (!semi) { if (d) { if (lt == line_type::var) fail (ll) << "description before setup/teardown variable"; else fail (ll) << "description before/after setup/teardown " << "variable-only " << lt; } // If we don't have any nested scopes or teardown commands, // then we assume this is a setup, otherwise -- teardown. // ls = group_->scopes.empty () && group_->tdown_.empty () ? &group_->setup_ : &group_->tdown_; } break; } default: break; } // If pre-check didn't change the destination, then it's a test. // if (ls == &ls_data) { switch (st) { // Setup. // case type::plus: { if (d) fail (ll) << "description before setup command"; if (!group_->scopes.empty ()) fail (ll) << "setup command after tests"; if (!group_->tdown_.empty ()) fail (ll) << "setup command after teardown"; ls = &group_->setup_; break; } // Teardown. // case type::minus: { if (d) fail (ll) << "description before teardown command"; ls = &group_->tdown_; break; } // Test command or variable. // default: { // First check that we don't have any teardown commands yet. // This will detect things like variable assignments between // tests. // if (!group_->tdown_.empty ()) { location tl ( group_->tdown_.back ().tokens.front ().location ()); fail (ll) << "test after teardown" << info (tl) << "last teardown line appears here"; } break; } } } // If the destination changed, then move the data over. // if (ls != &ls_data) ls->insert (ls->end (), make_move_iterator (ls_data.begin ()), make_move_iterator (ls_data.end ())); } // If this command ended with a semicolon, then the next one should // go to the same place. // if (semi && !one) { tt = peek (lexer_mode::first_token); const location ll (get_location (peeked ())); switch (tt) { case type::colon: fail (ll) << "description inside test" << endf; case type::eos: case type::rcbrace: case type::lcbrace: fail (ll) << "expected another line after ';'" << endf; case type::plus: fail (ll) << "setup command in test" << endf; case type::minus: fail (ll) << "teardown command in test" << endf; default: semi = pre_parse_line (t, tt, d, ls); assert (tt == type::newline); // End of last test line. } } // If this is a test then create implicit test scope. // if (ls == &ls_data) { // If there is no user-supplied id, use the line number (prefixed // with include id) as the scope id. // const string& id ( d && !d->id.empty () ? d->id : insert_id (id_prefix_ + to_string (ll.line), ll)); unique_ptr<test> p (new test (id, *group_)); p->desc = move (d); p->start_loc_ = ll; p->tests_ = move (ls_data); p->end_loc_ = get_location (t); group_->scopes.push_back (move (p)); } return semi; } bool parser:: pre_parse_if_else (token& t, type& tt, optional<description>& d, lines& ls) { // enter: <newline> (previous line) // leave: <newline> tt = peek (lexer_mode::first_token); return tt == type::lcbrace ? pre_parse_if_else_scope (t, tt, d, ls) : pre_parse_if_else_command (t, tt, d, ls); } bool parser:: pre_parse_if_else_scope (token& t, type& tt, optional<description>& d, lines& ls) { // enter: peeked token of next line (lcbrace) // leave: newline assert (ls.size () == 1); // The if/if! line. // Use if/if! as the entire scope chain location. // const location sl (ls.back ().tokens.front ().location ()); // First check that we don't have any teardown commands yet. This // will detect things like variable assignments between scopes. // if (!group_->tdown_.empty ()) { location tl ( group_->tdown_.back ().tokens.front ().location ()); fail (sl) << "scope after teardown" << info (tl) << "last teardown line appears here"; } // If there is no user-supplied id, use the line number (prefixed with // include id) as the scope id. Note that we use the same id for all // scopes in the chain. // const string& id ( d && !d->id.empty () ? d->id : insert_id (id_prefix_ + to_string (sl.line), sl)); unique_ptr<scope> root; // Parse the if-else scope chain. // line_type bt (line_type::cmd_if); // Current block. for (unique_ptr<scope>* ps (&root);; ps = &(*ps)->if_chain) { next (t, tt); // Get '{'. { unique_ptr<group> g (pre_parse_scope_block (t, tt, id)); // If-condition. // g->if_cond_ = move (ls.back ()); ls.clear (); // Description. For now we just duplicate it through the entire // chain. // g->desc = (ps == &root ? d : root->desc); *ps = move (g); } // See if what comes next is another chain element. // line_type lt (line_type::cmd_end); type pt (peek (lexer_mode::first_token)); const token& p (peeked ()); const location ll (get_location (p)); if (pt == type::word && p.qtype == quote_type::unquoted) { if (p.value == "elif") lt = line_type::cmd_elif; else if (p.value == "elif!") lt = line_type::cmd_elifn; else if (p.value == "else") lt = line_type::cmd_else; } if (lt == line_type::cmd_end) break; // Check if-else block sequencing. // if (bt == line_type::cmd_else) { if (lt == line_type::cmd_else || lt == line_type::cmd_elif || lt == line_type::cmd_elifn) fail (ll) << lt << " after " << bt; } // Parse just the condition line using pre_parse_line() in the "one" // mode and into ls so that it is naturally picked up as if_cond_ on // the next iteration. // optional<description> td; bool semi (pre_parse_line (t, (tt = pt), td, &ls, true /* one */, line_type::cmd_if)); assert (ls.size () == 1 && ls.back ().type == lt); assert (tt == type::newline); // For any of these lines trailing semi or description is illegal. // // @@ Not the exact location of semi/colon. // if (semi) fail (ll) << "';' after " << lt; if (td) fail (ll) << "description after " << lt; // Make sure what comes next is another scope. // tt = peek (lexer_mode::first_token); if (tt != type::lcbrace) fail (ll) << "expected scope after " << lt; // Update current if-else block. // switch (lt) { case line_type::cmd_elif: case line_type::cmd_elifn: bt = line_type::cmd_elif; break; case line_type::cmd_else: bt = line_type::cmd_else; break; default: break; } } pre_parse_demote_group_scope (root); group_->scopes.push_back (move (root)); return false; // We never end with a semi. } // Pre-parse the flow control construct block line. Fail if the line is // unexpectedly followed with a semicolon or test description. // bool parser:: pre_parse_block_line (token& t, type& tt, line_type bt, optional<description>& d, lines& ls) { // enter: peeked first token of the line (type in tt) // leave: newline const location ll (get_location (peeked ())); switch (tt) { case type::colon: fail (ll) << "description inside " << bt << endf; case type::eos: case type::rcbrace: case type::lcbrace: fail (ll) << "expected closing 'end'" << endf; case type::plus: fail (ll) << "setup command inside " << bt << endf; case type::minus: fail (ll) << "teardown command inside " << bt << endf; } // Parse one line. Note that this one line can still be multiple lines // in case of a flow control construct. In this case we want to view // it as, for example, cmd_if, not cmd_end. Thus remember the start // position of the next logical line. // size_t i (ls.size ()); line_type fct; // Flow control construct type the block type relates to. switch (bt) { case line_type::cmd_if: case line_type::cmd_ifn: case line_type::cmd_elif: case line_type::cmd_elifn: case line_type::cmd_else: { fct = line_type::cmd_if; break; } case line_type::cmd_while: case line_type::cmd_for_stream: case line_type::cmd_for_args: { fct = bt; break; } default: assert(false); } optional<description> td; bool semi (pre_parse_line (t, tt, td, &ls, true /* one */, fct)); assert (tt == type::newline); line_type lt (ls[i].type); // First take care of 'end'. // if (lt == line_type::cmd_end) { if (td) { if (d) fail (ll) << "both leading and trailing descriptions"; d = move (td); } return semi; } // For any other line trailing semi or description is illegal. // // @@ Not the exact location of semi/colon. // if (semi) fail (ll) << "';' inside " << bt; if (td) fail (ll) << "description inside " << bt; return false; } bool parser:: pre_parse_if_else_command (token& t, type& tt, optional<description>& d, lines& ls) { // enter: peeked first token of next line (type in tt) // leave: newline // Parse lines until we see closing 'end'. // for (line_type bt (line_type::cmd_if); // Current block. ; tt = peek (lexer_mode::first_token)) { const location ll (get_location (peeked ())); size_t i (ls.size ()); bool semi (pre_parse_block_line (t, tt, bt, d, ls)); line_type lt (ls[i].type); // First take care of 'end'. // if (lt == line_type::cmd_end) return semi; // Check if-else block sequencing. // if (bt == line_type::cmd_else) { if (lt == line_type::cmd_else || lt == line_type::cmd_elif || lt == line_type::cmd_elifn) fail (ll) << lt << " after " << bt; } // Update current if-else block. // switch (lt) { case line_type::cmd_elif: case line_type::cmd_elifn: bt = line_type::cmd_elif; break; case line_type::cmd_else: bt = line_type::cmd_else; break; default: break; } } assert (false); // Can't be here. return false; } bool parser:: pre_parse_loop (token& t, type& tt, line_type lt, optional<description>& d, lines& ls) { // enter: <newline> (previous line) // leave: <newline> assert (lt == line_type::cmd_while || lt == line_type::cmd_for_stream || lt == line_type::cmd_for_args); tt = peek (lexer_mode::first_token); // Parse lines until we see closing 'end'. // for (;; tt = peek (lexer_mode::first_token)) { size_t i (ls.size ()); bool semi (pre_parse_block_line (t, tt, lt, d, ls)); if (ls[i].type == line_type::cmd_end) return semi; } assert (false); // Can't be here. return false; } void parser:: pre_parse_directive (token& t, type& tt) { // enter: directive name // leave: newline string d (t.value); location l (get_location (t)); next (t, tt); // Suspend pre-parsing since we want to really parse the line, with // expansion, etc. Also parse the whole line in one go. // names args; if (tt != type::newline) { pre_parse_ = false; args = parse_names (t, tt, pattern_mode::ignore, false, "directive argument", nullptr); pre_parse_ = true; } if (tt != type::newline) fail (t) << t << " after directive"; if (d == "include") pre_parse_include_line (move (args), move (l)); else assert (false); // Unhandled directive. } void parser:: pre_parse_include_line (names args, location dl) { auto i (args.begin ()); // Process options. // bool once (false); for (; i != args.end () && i->simple (); ++i) { if (i->value == "--once") once = true; else break; } // Process arguments. // auto include = [&dl, once, this] (string n) // throw invalid_path { // It may be tempting to use relative paths in diagnostics but it // most likely will be misguided. // auto enter_path = [this] (string n) -> const path_name_value& { path p (move (n)); if (p.relative ()) { // There is always the testscript path (path_ refers to an // object in the script::paths_ set). // assert (path_->path != nullptr); p = path_->path->directory () / p; } p.normalize (); return *script_->paths_.insert (path_name_value (move (p))).first; }; const path_name_value& pn (enter_path (move (n))); const path& p (pn.path); if (include_set_->insert (p).second || !once) { try { ifdstream ifs (p); lexer l (ifs, pn, lexer_mode::command_line); const path_name* op (path_); path_ = &pn; build2::script::lexer* ol (lexer_); set_lexer (&l); string oip (id_prefix_); id_prefix_ += to_string (dl.line); id_prefix_ += '-'; id_prefix_ += p.leaf ().base ().string (); id_prefix_ += '-'; token t (pre_parse_scope_body ()); if (t.type != type::eos) fail (t) << "stray " << t; id_prefix_ = oip; set_lexer (ol); path_ = op; } catch (const io_error& e) { fail (dl) << "unable to read testscript " << p << ": " << e; } } }; for (; i != args.end (); ++i) { name& n (*i); try { if (n.simple () && !n.empty ()) { include (move (n.value)); continue; } } catch (const invalid_path&) {} // Fall through. diag_record dr (fail (dl)); dr << "invalid testscript include path "; to_stream (dr.os, n, quote_mode::normal); } } description parser:: pre_parse_leading_description (token& t, type& tt) { // enter: peeked at colon (type in tt) // leave: peeked at in the first_token mode (type in tt) assert (tt == type::colon); description r; location loc (get_location (peeked ())); string sp; // Strip prefix. size_t sn (0); // Strip prefix length. for (size_t ln (1); tt == type::colon; ++ln) { next (t, tt); // Get ':'. mode (lexer_mode::description_line); next (t, tt); // If it is empty, then we get newline right away. // const string& l (tt == type::word ? t.value : string ()); if (tt == type::word) next (t, tt); // Get newline. assert (tt == type::newline); // If this is the first line, then get the "strip prefix", i.e., // the beginning of the line that contains only whitespaces. If // the subsequent lines start with the same prefix, then we strip // it. // if (ln == 1) { sn = l.find_first_not_of (" \t"); sp.assign (l, 0, sn == string::npos ? (sn = 0) : sn); } // Apply strip prefix. // size_t i (l.compare (0, sn, sp) == 0 ? sn : 0); // Strip trailing whitespaces, as a courtesy to the user. // size_t j (l.find_last_not_of (" \t")); j = j != string::npos ? j + 1 : i; size_t n (j - i); // [i, j) is our data. if (ln == 1) { // First line. Ignore if it's blank. // if (n == 0) --ln; // Stay as if on the first line. else { // Otherwise, see if it is the id. Failed that we assume it is // the summary until we see the next line. // (l.find_first_of (" \t.", i) >= j ? r.id : r.summary). assign (l, i, n); // If this is an id then validate it. // if (!r.id.empty ()) { for (char c: r.id) { if (!(alnum (c) || c == '_' || c == '-' || c == '+')) fail (loc) << "illegal character '" << c << "' in test id '" << r.id << "'"; } } } } else if (ln == 2) { // If this is a blank then whatever we have in id/summary is good. // Otherwise, if we have id, then assume this is summary until we // see the next line. And if not, then move what we (wrongly) // assumed to be the summary to details. // if (n != 0) { if (!r.id.empty ()) r.summary.assign (l, i, n); else { r.details = move (r.summary); r.details += '\n'; r.details.append (l, i, n); r.summary.clear (); } } } // Don't treat line 3 as special if we have given up on id/summary. // else if (ln == 3 && r.details.empty ()) { // If this is a blank and we have id and/or summary, then we are // good. Otherwise, if we have both, then move what we (wrongly) // assumed to be id and summary to details. // if (n != 0) { if (!r.id.empty () && !r.summary.empty ()) { r.details = move (r.id); r.details += '\n'; r.details += r.summary; r.details += '\n'; r.id.clear (); r.summary.clear (); } r.details.append (l, i, n); } } else { if (!r.details.empty ()) r.details += '\n'; r.details.append (l, i, n); } tt = peek (lexer_mode::first_token); } // Zap trailing newlines in the details. // size_t p (r.details.find_last_not_of ('\n')); if (p != string::npos && ++p != r.details.size ()) r.details.resize (p); if (r.empty ()) fail (loc) << "empty description"; // Insert id into the id map if we have one. // if (!r.id.empty ()) insert_id (r.id, loc); return r; } description parser:: parse_trailing_description (token& t, type& tt) { // enter: colon // leave: newline // Parse one-line trailing description. // description r; // @@ Would be nice to omit trailing description from replay. // const location loc (get_location (t)); mode (lexer_mode::description_line); next (t, tt); // If it is empty, then we will get newline right away. // if (tt == type::word) { string l (move (t.value)); trim (l); // Strip leading/trailing whitespaces. // Decide whether this is id or summary. // (l.find_first_of (" \t") == string::npos ? r.id : r.summary) = move (l); next (t, tt); // Get newline. } assert (tt == type::newline); // Lexer mode invariant. if (r.empty ()) fail (loc) << "empty description"; // Insert id into the id map if we have one. // if (pre_parse_ && !r.id.empty ()) insert_id (r.id, loc); return r; } command_expr parser:: parse_command_line (token& t, type& tt) { // enter: first token of the command line // leave: <newline> // Note: this one is only used during execution. parse_command_expr_result pr ( parse_command_expr (t, tt, lexer::redirect_aliases)); if (tt == type::colon) parse_trailing_description (t, tt); assert (tt == type::newline); parse_here_documents (t, tt, pr); assert (tt == type::newline); command_expr r (move (pr.expr)); // If the test program runner is specified, then adjust the // expressions to run test programs via this runner. // pair<const process_path*, const strings*> tr ( runner_->test_runner ()); if (tr.first != nullptr) { for (expr_term& t: r) { for (command& c: t.pipe) { if (scope_->test_program (c.program.recall)) { // Append the runner options and the test program path to the // the arguments list and rotate the list to the left, so that // it starts from the runner options. This should probably be // not less efficient than inserting the program path and then // the runner options at the beginning of the list. // strings& args (c.arguments); size_t n (args.size ()); args.insert (args.end (), tr.second->begin (), tr.second->end ()); args.push_back (c.program.recall.string ()); rotate (args.begin (), args.begin () + n, args.end ()); c.program = process_path (*tr.first, false /* init */); } } } } return r; } // // Execute. // void parser:: execute (script& s, runner& r) { assert (s.state == scope_state::unknown); auto g ( make_exception_guard ( [&s] () {s.state = scope_state::failed;})); if (!s.empty ()) execute (s, s, r); else s.state = scope_state::passed; } void parser:: execute (scope& sc, script& s, runner& r) { path_ = nullptr; // Set by replays. pre_parse_ = false; set_lexer (nullptr); script_ = &s; runner_ = &r; group_ = nullptr; id_map_ = nullptr; include_set_ = nullptr; scope_ = ≻ // The script shouldn't be able to modify the scope. // // Note that we need it for calling functions which require the // current scope, such as $target.path(). // build2::parser::scope_ = const_cast<build2::scope*> (&s.target_scope); //@@ PAT TODO: set pbase_? exec_scope_body (); } static void execute_impl (scope& s, script& scr, runner& r) { try { parser p (scr.test_target.ctx); p.execute (s, scr, r); } catch (const failed&) { s.state = scope_state::failed; } } void parser:: exec_scope_body () { runner_->enter (*scope_, scope_->start_loc_); // Note that we rely on "small function object" optimization for the // exec_*() lambdas. // auto exec_set = [this] (const variable& var, token& t, build2::script::token_type& tt, const location&) { next (t, tt); type kind (tt); // Assignment kind. // We cannot reuse the value mode (see above for details). // mode (lexer_mode::variable_line); value rhs (parse_variable_line (t, tt)); assert (tt == type::newline); // Assign. // value& lhs (kind == type::assign ? scope_->assign (var) : scope_->append (var)); apply_value_attributes (&var, lhs, move (rhs), kind); if (script_->test_command_var (var.name)) scope_->reset_special (); }; // Is set later, right before the exec_lines() call. // command_type ct; auto exec_cmd = [&ct, this] (token& t, build2::script::token_type& tt, const iteration_index* ii, size_t li, bool single, const function<command_function>& cf, const location& ll) { // We use the 0 index to signal that this is the only command. // Note that we only do this for test commands. // if (ct == command_type::test && single) li = 0; command_expr ce ( parse_command_line (t, static_cast<token_type&> (tt))); runner_->run (*scope_, ce, ct, ii, li, cf, ll); }; auto exec_cond = [this] (token& t, build2::script::token_type& tt, const iteration_index* ii, size_t li, const location& ll) { command_expr ce ( parse_command_line (t, static_cast<token_type&> (tt))); // Assume a flow control construct always involves multiple // commands. // return runner_->run_cond (*scope_, ce, ii, li, ll); }; auto exec_for = [this] (const variable& var, value&& val, const attributes& val_attrs, const location&) { value& lhs (scope_->assign (var)); attributes_.push_back (val_attrs); apply_value_attributes (&var, lhs, move (val), type::assign); if (script_->test_command_var (var.name)) scope_->reset_special (); }; size_t li (1); if (test* t = dynamic_cast<test*> (scope_)) { ct = command_type::test; exec_lines (t->tests_.begin (), t->tests_.end (), exec_set, exec_cmd, exec_cond, exec_for, nullptr /* iteration_index */, li); } else if (group* g = dynamic_cast<group*> (scope_)) { ct = command_type::setup; bool exec_scope ( exec_lines (g->setup_.begin (), g->setup_.end (), exec_set, exec_cmd, exec_cond, exec_for, nullptr /* iteration_index */, li)); if (exec_scope) { atomic_count task_count (0); wait_guard wg (g->root.test_target.ctx, task_count); // Start asynchronous execution of inner scopes keeping track of // how many we have handled. // for (unique_ptr<scope>& chain: g->scopes) { // Check if this scope is ignored (e.g., via config.test). // if (!runner_->test (*chain) || !exec_scope) { chain = nullptr; continue; } // Pick a scope from the if-else chain. // // In fact, we are going to drop all but the selected (if any) // scope. This way we can re-examine the scope states later. It // will also free some memory. // unique_ptr<scope>* ps; for (ps = &chain; *ps != nullptr; ps = &ps->get ()->if_chain) { scope& s (**ps); if (!s.if_cond_) // Unconditional. { assert (s.if_chain == nullptr); break; } line l (move (*s.if_cond_)); line_type lt (l.type); replay_data (move (l.tokens)); token t; type tt; next (t, tt); const location ll (get_location (t)); next (t, tt); // Skip to start of command. bool take; if (lt != line_type::cmd_else) { // Note: the line index count continues from setup. // command_expr ce (parse_command_line (t, tt)); try { take = runner_->run_cond ( *scope_, ce, nullptr /* iteration_index */, li++, ll); } catch (const exit_scope& e) { // Bail out if the scope is exited with the failure status. // Otherwise leave the scope normally. // if (!e.status) throw failed (); // Stop iterating through if conditions, and stop executing // inner scopes. // exec_scope = false; replay_stop (); break; } if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) take = !take; } else { assert (tt == type::newline); take = true; } replay_stop (); if (take) { // Count the remaining conditions for the line index. // for (scope* r (s.if_chain.get ()); r != nullptr && r->if_cond_->type != line_type::cmd_else; r = r->if_chain.get ()) ++li; s.if_chain.reset (); // Drop remaining scopes. break; } } chain.reset (*ps == nullptr || (*ps)->empty () || !exec_scope ? nullptr : ps->release ()); if (chain != nullptr) { // Hand it off to a sub-parser potentially in another thread. // But we could also have handled it serially in this parser: // // scope* os (scope_); // scope_ = chain.get (); // exec_scope_body (); // scope_ = os; // Pass our diagnostics stack (this is safe since we are going // to wait for completion before unwinding the diag stack). // // If the scope was executed synchronously, check the status // and bail out if we weren't asked to keep going. // // UBSan workaround. // const diag_frame* df (diag_frame::stack ()); if (!ctx->sched.async (task_count, [] (const diag_frame* ds, scope& s, script& scr, runner& r) { diag_frame::stack_guard dsg (ds); execute_impl (s, scr, r); }, df, ref (*chain), ref (*script_), ref (*runner_))) { // Bail out if the scope has failed and we weren't instructed // to keep going. // if (chain->state == scope_state::failed && !ctx->keep_going) throw failed (); } } } wg.wait (); // Re-examine the scopes we have executed collecting their state. // for (const unique_ptr<scope>& chain: g->scopes) { if (chain == nullptr) continue; switch (chain->state) { case scope_state::passed: break; case scope_state::failed: throw failed (); default: assert (false); } } } ct = command_type::teardown; exec_lines (g->tdown_.begin (), g->tdown_.end (), exec_set, exec_cmd, exec_cond, exec_for, nullptr /* iteration_index */, li); } else assert (false); runner_->leave (*scope_, scope_->end_loc_); scope_->state = scope_state::passed; } // // The rest. // // When add a special variable don't forget to update lexer::word() and // for-loop parsing in pre_parse_line(). // bool parser:: special_variable (const string& n) noexcept { return n == "*" || n == "~" || n == "@" || digit (n); } lookup parser:: lookup_variable (names&& qual, string&& name, const location& loc) { if (pre_parse_) return lookup (); if (!qual.empty ()) fail (loc) << "qualified variable name"; // If we have no scope (happens when pre-parsing directives), then we // only look for buildfile variables. // // Otherwise, every variable that is ever set in a script has been // pre-entered during pre-parse or introduced with the set builtin // during test execution. Which means that if one is not found in the // script pool then it can only possibly be set in the buildfile. // // Note that we need to acquire the variable pool lock. The pool can // be changed from multiple threads by the set builtin. The obtained // variable pointer can safelly be used with no locking as the variable // pool is an associative container (underneath) and we are only adding // new variables into it. // const variable* pvar (nullptr); if (scope_ != nullptr) { slock sl (script_->var_pool_mutex); pvar = script_->var_pool.find (name); } return pvar != nullptr ? scope_->lookup (*pvar) : script_->lookup_in_buildfile (name); } const string& parser:: insert_id (string id, location l) { auto p (id_map_->emplace (move (id), move (l))); if (!p.second) fail (l) << "duplicate id " << p.first->first << info (p.first->second) << "previously used here"; return p.first->first; } } } }