From f59d82eb8fda3ddcf790556c6c3615e40ae8b15b Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 3 Oct 2022 21:23:22 +0300 Subject: Add support for 'for' loop second (... | for x) and third (for x <...) forms in script --- libbuild2/script/builtin-options.cxx | 291 ++++++++++++++++++++++++++ libbuild2/script/builtin-options.hxx | 84 ++++++++ libbuild2/script/builtin-options.ixx | 57 +++++ libbuild2/script/builtin.cli | 7 + libbuild2/script/parser.cxx | 373 +++++++++++++++++++++++++++------ libbuild2/script/parser.hxx | 31 ++- libbuild2/script/run.cxx | 391 ++++++++++++++++++++++------------- libbuild2/script/run.hxx | 66 +++++- libbuild2/script/script.cxx | 24 ++- libbuild2/script/script.hxx | 18 +- 10 files changed, 1107 insertions(+), 235 deletions(-) (limited to 'libbuild2/script') diff --git a/libbuild2/script/builtin-options.cxx b/libbuild2/script/builtin-options.cxx index 8e15ddd..9b4067b 100644 --- a/libbuild2/script/builtin-options.cxx +++ b/libbuild2/script/builtin-options.cxx @@ -1076,6 +1076,297 @@ namespace build2 return r; } + + // for_options + // + + for_options:: + for_options () + : exact_ (), + newline_ (), + whitespace_ () + { + } + + for_options:: + for_options (int& argc, + char** argv, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + } + + for_options:: + for_options (int start, + int& argc, + char** argv, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + } + + for_options:: + for_options (int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + for_options:: + for_options (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + ::build2::build::cli::argv_scanner s (start, argc, argv, erase); + _parse (s, opt, arg); + end = s.end (); + } + + for_options:: + for_options (::build2::build::cli::scanner& s, + ::build2::build::cli::unknown_mode opt, + ::build2::build::cli::unknown_mode arg) + : exact_ (), + newline_ (), + whitespace_ () + { + _parse (s, opt, arg); + } + + typedef + std::map + _cli_for_options_map; + + static _cli_for_options_map _cli_for_options_map_; + + struct _cli_for_options_map_init + { + _cli_for_options_map_init () + { + _cli_for_options_map_["--exact"] = + &::build2::build::cli::thunk< for_options, &for_options::exact_ >; + _cli_for_options_map_["-e"] = + &::build2::build::cli::thunk< for_options, &for_options::exact_ >; + _cli_for_options_map_["--newline"] = + &::build2::build::cli::thunk< for_options, &for_options::newline_ >; + _cli_for_options_map_["-n"] = + &::build2::build::cli::thunk< for_options, &for_options::newline_ >; + _cli_for_options_map_["--whitespace"] = + &::build2::build::cli::thunk< for_options, &for_options::whitespace_ >; + _cli_for_options_map_["-w"] = + &::build2::build::cli::thunk< for_options, &for_options::whitespace_ >; + } + }; + + static _cli_for_options_map_init _cli_for_options_map_init_; + + bool for_options:: + _parse (const char* o, ::build2::build::cli::scanner& s) + { + _cli_for_options_map::const_iterator i (_cli_for_options_map_.find (o)); + + if (i != _cli_for_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool for_options:: + _parse (::build2::build::cli::scanner& s, + ::build2::build::cli::unknown_mode opt_mode, + ::build2::build::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::build2::build::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + s.skip (); + r = true; + continue; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast (co.c_str ()), + const_cast (v) + }; + + ::build2::build::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::build2::build::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::build2::build::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::build2::build::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::cli::unknown_mode::fail: + { + throw ::build2::build::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::build2::build::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::cli::unknown_mode::fail: + { + throw ::build2::build::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } } } diff --git a/libbuild2/script/builtin-options.hxx b/libbuild2/script/builtin-options.hxx index c7cebbc..9361d18 100644 --- a/libbuild2/script/builtin-options.hxx +++ b/libbuild2/script/builtin-options.hxx @@ -253,6 +253,90 @@ namespace build2 vector clear_; bool clear_specified_; }; + + class for_options + { + public: + for_options (); + + for_options (int& argc, + char** argv, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int start, + int& argc, + char** argv, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + for_options (::build2::build::cli::scanner&, + ::build2::build::cli::unknown_mode option = ::build2::build::cli::unknown_mode::fail, + ::build2::build::cli::unknown_mode argument = ::build2::build::cli::unknown_mode::stop); + + // Option accessors and modifiers. + // + const bool& + exact () const; + + bool& + exact (); + + void + exact (const bool&); + + const bool& + newline () const; + + bool& + newline (); + + void + newline (const bool&); + + const bool& + whitespace () const; + + bool& + whitespace (); + + void + whitespace (const bool&); + + // Implementation details. + // + protected: + bool + _parse (const char*, ::build2::build::cli::scanner&); + + private: + bool + _parse (::build2::build::cli::scanner&, + ::build2::build::cli::unknown_mode option, + ::build2::build::cli::unknown_mode argument); + + public: + bool exact_; + bool newline_; + bool whitespace_; + }; } } diff --git a/libbuild2/script/builtin-options.ixx b/libbuild2/script/builtin-options.ixx index 8f84177..575eb95 100644 --- a/libbuild2/script/builtin-options.ixx +++ b/libbuild2/script/builtin-options.ixx @@ -153,6 +153,63 @@ namespace build2 { this->clear_specified_ = x; } + + // for_options + // + + inline const bool& for_options:: + exact () const + { + return this->exact_; + } + + inline bool& for_options:: + exact () + { + return this->exact_; + } + + inline void for_options:: + exact (const bool& x) + { + this->exact_ = x; + } + + inline const bool& for_options:: + newline () const + { + return this->newline_; + } + + inline bool& for_options:: + newline () + { + return this->newline_; + } + + inline void for_options:: + newline (const bool& x) + { + this->newline_ = x; + } + + inline const bool& for_options:: + whitespace () const + { + return this->whitespace_; + } + + inline bool& for_options:: + whitespace () + { + return this->whitespace_; + } + + inline void for_options:: + whitespace (const bool& x) + { + this->whitespace_ = x; + } } } diff --git a/libbuild2/script/builtin.cli b/libbuild2/script/builtin.cli index 50dd3a0..c993983 100644 --- a/libbuild2/script/builtin.cli +++ b/libbuild2/script/builtin.cli @@ -30,5 +30,12 @@ namespace build2 vector --unset|-u; vector --clear|-c; }; + + class for_options + { + bool --exact|-e; + bool --newline|-n; + bool --whitespace|-w; + }; } } diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 536821b..7989c20 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -4,10 +4,13 @@ #include #include // strchr() +#include #include -#include // exit + +#include // exit, stream_reader #include +#include using namespace std; @@ -140,18 +143,20 @@ namespace build2 return nullopt; } - pair parser:: + parser::parse_command_expr_result parser:: parse_command_expr (token& t, type& tt, - const redirect_aliases& ra) + const redirect_aliases& ra, + optional&& program) { - // enter: first token of the command line + // enter: first (or second, if program) token of the command line // leave: or unknown token command_expr expr; // OR-ed to an implied false for the first term. // - expr.push_back ({expr_operator::log_or, command_pipe ()}); + if (!pre_parse_) + expr.push_back ({expr_operator::log_or, command_pipe ()}); command c; // Command being assembled. @@ -218,8 +223,8 @@ namespace build2 // Add the next word to either one of the pending positions or to // program arguments by default. // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) + auto add_word = [&c, &p, &mod, &check_regex_mod, this] + (string&& w, const location& l) { auto add_merge = [&l, this] (optional& r, const string& w, @@ -697,11 +702,30 @@ namespace build2 const location ll (get_location (t)); // Line location. // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, exit status comparison, etc). + // "terminators" (newline or unknown/unexpected token). // location l (ll); names ns; // Reuse to reduce allocations. + bool for_loop (false); + + if (program) + { + assert (program->type == type::word); + + // Note that here we skip all the parse_program() business since the + // program can only be one of the specially-recognized names. + // + if (program->value == "for") + for_loop = true; + else + assert (false); // Must be specially-recognized program. + + // Save the program name and continue parsing as a command. + // + add_word (move (program->value), get_location (*program)); + } + for (bool done (false); !done; l = get_location (t)) { tt = ra.resolve (tt); @@ -717,6 +741,9 @@ namespace build2 case type::equal: case type::not_equal: { + if (for_loop) + fail (l) << "for-loop exit code cannot be checked"; + if (!pre_parse_) check_pending (l); @@ -747,30 +774,39 @@ namespace build2 } case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Fall through. - case type::in_pass: - case type::out_pass: + case type::clean: + if (for_loop) + fail (l) << "cleanup in for-loop"; + // Fall through. - case type::in_null: + case type::out_pass: case type::out_null: - case type::out_trace: - case type::out_merge: - - case type::in_str: - case type::in_doc: case type::out_str: case type::out_doc: - - case type::in_file: case type::out_file_cmp: case type::out_file_ovr: case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. - case type::clean: + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_doc: + case type::in_file: { if (pre_parse_) { @@ -968,6 +1004,42 @@ namespace build2 next (t, tt); break; } + case type::lsbrace: + { + // Recompose the attributes into a single command argument. + // + assert (!pre_parse_); + + attributes_push (t, tt, true /* standalone */); + + attributes as (attributes_pop ()); + assert (!as.empty ()); + + ostringstream os; + names storage; + char c ('['); + for (const attribute& a: as) + { + os << c << a.name; + + if (!a.value.null) + { + os << '='; + + storage.clear (); + to_stream (os, + reverse (a.value, storage), + quote_mode::normal, + '@'); + } + + c = ','; + } + os << ']'; + + add_word (os.str (), l); + break; + } default: { // Bail out if this is one of the unknown tokens. @@ -1053,16 +1125,33 @@ namespace build2 bool prog (p == pending::program_first || p == pending::program_next); - // Check if this is the env pseudo-builtin. + // Check if this is the env pseudo-builtin or the for-loop. // bool env (false); - if (prog && tt == type::word && t.value == "env") + if (prog && tt == type::word) { - parsed_env r (parse_env_builtin (t, tt)); - c.cwd = move (r.cwd); - c.variables = move (r.variables); - c.timeout = r.timeout; - env = true; + if (t.value == "env") + { + parsed_env r (parse_env_builtin (t, tt)); + c.cwd = move (r.cwd); + c.variables = move (r.variables); + c.timeout = r.timeout; + env = true; + } + else if (t.value == "for") + { + if (expr.size () > 1) + fail (l) << "command expression involving for-loop"; + + for_loop = true; + + // Save 'for' as a program name and continue parsing as a + // command. + // + add_word (move (t.value), l); + next (t, tt); + continue; + } } // Parse the next chunk as names to get expansion, etc. Note that @@ -1243,9 +1332,16 @@ namespace build2 switch (tt) { case type::pipe: + if (for_loop) + fail (l) << "for-loop must be last command in a pipe"; + // Fall through. + case type::log_or: case type::log_and: { + if (for_loop) + fail (l) << "command expression involving for-loop"; + // Check that the previous command makes sense. // check_command (l, tt != type::pipe); @@ -1265,30 +1361,11 @@ namespace build2 break; } - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (move (t), tt, l); - break; - } - case type::clean: { + if (for_loop) + fail (l) << "cleanup in for-loop"; + parse_clean (t); break; } @@ -1299,6 +1376,27 @@ namespace build2 fail (l) << "here-document redirect in expansion"; break; } + + case type::out_pass: + case type::out_null: + case type::out_trace: + case type::out_merge: + case type::out_str: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + if (for_loop) + fail (l) << "output redirect in for-loop"; + // Fall through. + + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_file: + { + parse_redirect (move (t), tt, l); + break; + } } } @@ -1326,7 +1424,7 @@ namespace build2 expr.back ().pipe.push_back (move (c)); } - return make_pair (move (expr), move (hd)); + return parse_command_expr_result {move (expr), move (hd), for_loop}; } parser::parsed_env parser:: @@ -1575,7 +1673,7 @@ namespace build2 void parser:: parse_here_documents (token& t, type& tt, - pair& p) + parse_command_expr_result& pr) { // enter: newline // leave: newline @@ -1583,7 +1681,7 @@ namespace build2 // Parse here-document fragments in the order they were mentioned on // the command line. // - for (here_doc& h: p.second) + for (here_doc& h: pr.docs) { // Switch to the here-line mode which is like single/double-quoted // string but recognized the newline as a separator. @@ -1603,7 +1701,7 @@ namespace build2 { auto i (h.redirects.cbegin ()); - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -1635,7 +1733,7 @@ namespace build2 // for (++i; i != h.redirects.cend (); ++i) { - command& c (p.first[i->expr].pipe[i->pipe]); + command& c (pr.expr[i->expr].pipe[i->pipe]); optional& ir (i->fd == 0 ? c.in : i->fd == 1 ? c.out : @@ -2062,7 +2160,7 @@ namespace build2 else if (n == "elif!") r = line_type::cmd_elifn; else if (n == "else") r = line_type::cmd_else; else if (n == "while") r = line_type::cmd_while; - else if (n == "for") r = line_type::cmd_for; + else if (n == "for") r = line_type::cmd_for_stream; else if (n == "end") r = line_type::cmd_end; else { @@ -2136,10 +2234,11 @@ namespace build2 { line_type lt (j->type); - if (lt == line_type::cmd_if || - lt == line_type::cmd_ifn || - lt == line_type::cmd_while || - lt == line_type::cmd_for) + if (lt == line_type::cmd_if || + lt == line_type::cmd_ifn || + lt == line_type::cmd_while || + lt == line_type::cmd_for_stream || + lt == line_type::cmd_for_args) ++n; // If we are nested then we just wait until we get back @@ -2164,10 +2263,8 @@ namespace build2 if (skip) { - // Note that we don't count else and end as commands. - // - // @@ Note that for the for-loop's second and third forms - // will probably need to increment li. + // Note that we don't count else, end, and 'for x: ...' as + // commands. // switch (lt) { @@ -2176,8 +2273,9 @@ namespace build2 case line_type::cmd_ifn: case line_type::cmd_elif: case line_type::cmd_elifn: - case line_type::cmd_while: ++li; break; - default: break; + case line_type::cmd_for_stream: + case line_type::cmd_while: ++li; break; + default: break; } } } @@ -2221,7 +2319,10 @@ namespace build2 single = true; } - exec_cmd (t, tt, ii, li++, single, ll); + exec_cmd (t, tt, + ii, li++, single, + nullptr /* command_function */, + ll); replay_stop (); break; @@ -2339,7 +2440,147 @@ namespace build2 break; } - case line_type::cmd_for: + case line_type::cmd_for_stream: + { + // The for-loop construct end. Set on the first iteration. + // + lines::const_iterator fe (e); + + // Let's "wrap up" all the required data into the single object + // to rely on the "small function object" optimization. + // + struct + { + lines::const_iterator i; + lines::const_iterator e; + const function& exec_set; + const function& exec_cmd; + const function& exec_cond; + const function& exec_for; + const iteration_index* ii; + size_t& li; + variable_pool* var_pool; + decltype (fcend)& fce; + lines::const_iterator& fe; + } d {i, e, + exec_set, exec_cmd, exec_cond, exec_for, + ii, li, + var_pool, + fcend, + fe}; + + function cf ( + [&d, this] + (environment& env, + const strings& args, + auto_fd in, + bool pipe, + const optional& dl, + const command& deadline_cmd, + const location& ll) + { + namespace cli = build2::build::cli; + + try + { + // Parse arguments. + // + cli::vector_scanner scan (args); + for_options ops (scan); + + // Note: diagnostics consistent with the set builtin. + // + if (ops.whitespace () && ops.newline ()) + fail (ll) << "for: both -n|--newline and " + << "-w|--whitespace specified"; + + if (!scan.more ()) + fail (ll) << "for: missing variable name"; + + // Either attributes or variable name. + // + string a (scan.next ()); + const string* ats (!scan.more () ? nullptr : &a); + string vname (!scan.more () ? move (a) : scan.next ()); + + if (scan.more ()) + fail (ll) << "for: unexpected argument '" + << scan.next () << "'"; + + if (ats != nullptr && ats->empty ()) + fail (ll) << "for: empty variable attributes"; + + if (vname.empty ()) + fail (ll) << "for: empty variable name"; + + // Let's also diagnose the `... | for x:...` misuse which + // can probably be quite common. + // + if (vname.find (':') != string::npos) + fail (ll) << "for: ':' after variable name"; + + stream_reader sr ( + move (in), pipe, + ops.whitespace (), ops.newline (), ops.exact (), + dl, deadline_cmd, + ll); + + // Since the command pipe is parsed, we can stop + // replaying. Note that we should do this before calling + // exec_lines() for the loop body. Also note that we + // should increment the line index before that. + // + replay_stop (); + + size_t fli (++d.li); + iteration_index fi {1, d.ii}; + + for (optional s; (s = sr.next ()); ) + { + d.li = fli; + + // Don't move from the variable name since it is used on + // each iteration. + // + env.set_variable (vname, + names {name (move (*s))}, + ats != nullptr ? *ats : empty_string, + ll); + + // Find the construct end, if it is not found yet. + // + if (d.fe == d.e) + d.fe = d.fce (d.i, true, false); + + if (!exec_lines (d.i + 1, d.fe, + d.exec_set, + d.exec_cmd, + d.exec_cond, + d.exec_for, + &fi, d.li, + d.var_pool)) + { + throw exit (true); + } + + fi.index++; + } + } + catch (const cli::exception& e) + { + fail (ll) << "for: " << e; + } + }); + + exec_cmd (t, tt, ii, li, false /* single */, cf, ll); + + // Position to construct end. + // + i = (fe != e ? fe : fcend (i, true, true)); + + break; + } + case line_type::cmd_for_args: { // Parse the variable name with the potential attributes. // diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx index 3a4c46f..c402d3e 100644 --- a/libbuild2/script/parser.hxx +++ b/libbuild2/script/parser.hxx @@ -97,15 +97,34 @@ namespace build2 }; using here_docs = vector; - pair - parse_command_expr (token&, token_type&, const redirect_aliases&); + struct parse_command_expr_result + { + command_expr expr; // Single pipe for the for-loop. + here_docs docs; + bool for_loop = false; + + parse_command_expr_result () = default; + + parse_command_expr_result (command_expr&& e, + here_docs&& h, + bool f) + : expr (move (e)), docs (move (h)), for_loop (f) {} + }; + + // Pass the first special command program name (token_type::word) if it + // is already pre-parsed. + // + parse_command_expr_result + parse_command_expr (token&, token_type&, + const redirect_aliases&, + optional&& program = nullopt); command_exit parse_command_exit (token&, token_type&); void parse_here_documents (token&, token_type&, - pair&); + parse_command_expr_result&); struct parsed_doc { @@ -135,6 +154,11 @@ namespace build2 // the first two tokens. Use the specified lexer mode to peek the second // token. // + // Always return the cmd_for_stream line type for the for-loop. Note + // that the for-loop form cannot be detected easily, based on the first + // two tokens. Also note that the detection can be specific for the + // script implementation (custom lexing mode, special variables, etc). + // line_type pre_parse_line_start (token&, token_type&, lexer_mode); @@ -169,6 +193,7 @@ namespace build2 using exec_cmd_function = void (token&, token_type&, const iteration_index*, size_t li, bool single, + const function&, const location&); using exec_cond_function = bool (token&, token_type&, diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx index 81abdab..b7f3314 100644 --- a/libbuild2/script/run.cxx +++ b/libbuild2/script/run.cxx @@ -9,7 +9,8 @@ # include // DBG_TERMINATE_PROCESS #endif -#include // streamsize +#include // streamsize +#include // strchr() #include #include @@ -971,81 +972,201 @@ namespace build2 : path (c.program.recall_string ()); } - // Read out the stream content into a string. Throw io_error on the - // underlying OS error. - // - // If the execution deadline is specified, then turn the stream into the - // non-blocking mode reading its content in chunks and with a single - // operation otherwise. If the specified deadline is reached while - // reading the stream, then bail out for the successful deadline and - // fail otherwise. Note that in the former case the result will be - // incomplete, but we leave it to the caller to handle that. - // - // Note that on Windows we can only turn pipe file descriptors into the - // non-blocking mode. Thus, we have no choice but to read from - // descriptors of other types synchronously there. That implies that we - // can potentially block indefinitely reading a file and missing the - // deadline on Windows. Note though, that the user can normally rewrite - // the command, for example, `set foo <<& dl, - const command& deadline_cmd, - const location& ll) + bool ws, bool nl, bool ex, + const optional& dl, + const command& dc, + const location& l) + : whitespace_ (ws), + newline_ (nl), + exact_ (ex), + deadline_cmd_ (dc), + location_ (l) { - string r; - ifdstream cin; - #ifndef _WIN32 if (dl) #else if (dl && pipe) #endif { - fdselect_set fds {in.get ()}; - cin.open (move (in), fdstream_mode::non_blocking); + is_.open (move (in), fdstream_mode::non_blocking); + deadline_ = dl; + } + else + is_.open (move (in)); + } - const timestamp& dlt (dl->value); + optional stream_reader:: + next () + { + if (!is_.is_open ()) + return nullopt; + + // If eos is not reached, then read and return a character. Otherwise + // close the stream and return nullopt. If the deadline is specified and + // is reached, then return nullopt for the successful deadline (as if + // eof is reached) and fail otherwise. + // + // Set the empty_ flag to false after the first character is read. + // + auto get = [this] () -> optional + { + char r; - for (char buf[4096];; ) + if (deadline_) // Reading a character in the non-blocking mode. { - timestamp now (system_clock::now ()); + fdselect_set fds {is_.fd ()}; - if (dlt <= now || ifdselect (fds, dlt - now) == 0) + // Only fallback to ifdselect() if there is no character immediately + // available. + // + for (;;) { - if (!dl->success) - fail (ll) << cmd_path (deadline_cmd) - << " terminated: execution timeout expired"; - else + streamsize n (is_.readsome (&r, 1)); + + if (n == 1) break; + + if (is_.eof ()) + { + is_.close (); + return nullopt; + } + + const timestamp& dlt (deadline_->value); + timestamp now (system_clock::now ()); + + if (dlt <= now || ifdselect (fds, dlt - now) == 0) + { + is_.close (); + + if (!deadline_->success) + fail (location_) << cmd_path (deadline_cmd_) + << " terminated: execution timeout expired"; + else + return nullopt; + } + } + } + else // Reading a character in the blocking mode. + { + if (is_.peek () == ifdstream::traits_type::eof ()) + { + is_.close (); + return nullopt; } - streamsize n (cin.readsome (buf, sizeof (buf))); + is_.get (r); + } - // Bail out if eos is reached. - // - if (n == 0) - break; + empty_ = false; + return r; + }; + + if (whitespace_) // The whitespace mode. + { + const char* sep (" \n\r\t"); + + // Note that we collapse multiple consecutive whitespaces. + // + optional c; + + // Skip the whitespaces. + // + while ((c = get ()) && strchr (sep, *c) != nullptr) ; - r.append (buf, n); + // Bail out for the trailing whitespace(s) or an empty stream. + // + if (!c) + { + // Return the trailing "blank" after the trailing whitespaces in the + // exact mode, unless the stream is empty. + // + return exact_ && !empty_ ? empty_string : optional (); } + + // Read the word until eof or a whitespace character is encountered. + // + string r (1, *c); + while ((c = get ()) && strchr (sep, *c) == nullptr) + r += *c; + + return optional (move (r)); } - else + else // The newline or no-split mode. { - cin.open (move (in)); - r = cin.read_text (); - } + // Note that we don't collapse multiple consecutive newlines. + // + // Note also that we always sanitize CRs, so in the no-split mode we + // need to loop rather than read the whole text at once. + // + optional r; - cin.close (); + do + { + string l; + optional c; - return r; + // Read the line until eof or newline character is encountered. + // + while ((c = get ()) && *c != '\n') + l += *c; + + // Strip the trailing CRs that can appear while, for example, + // cross-testing Windows target or as a part of msvcrt junk + // production (see above). + // + while (!l.empty () && l.back () == '\r') + l.pop_back (); + + // Append the line. + // + if (!l.empty () || // Non-empty. + c || // Empty, non-trailing. + (exact_ && // Empty, trailing, in the exact mode for + !empty_)) // non-empty stream. + { + if (newline_ || !r) + { + r = move (l); + } + else + { + *r += '\n'; + *r += l; + } + } + } + while (!newline_ && is_.is_open ()); + + return r; + } + } + + string + stream_read (auto_fd&& in, + bool pipe, + const optional& dl, + const command& dc, + const location& ll) + { + stream_reader sr (move (in), + pipe, + false /* whitespace */, + false /* newline */, + true /* exact */, + dl, + dc, + ll); + + optional s (sr.next ()); + return s ? move (*s) : empty_string; } // The set pseudo-builtin: set variable from the stdin input. @@ -1087,87 +1208,17 @@ namespace build2 if (vname.empty ()) fail (ll) << "set: empty variable name"; - // Read out the stream content into a string while keeping an eye on - // the deadline. - // - string s (read (move (in), pipe, dl, deadline_cmd, ll)); + stream_reader sr (move (in), pipe, + ops.whitespace (), ops.newline (), ops.exact (), + dl, deadline_cmd, + ll); // Parse the stream content into the variable value. // names ns; - if (!s.empty ()) - { - if (ops.whitespace ()) // The whitespace mode. - { - // Note that we collapse multiple consecutive whitespaces. - // - for (size_t p (0); p != string::npos; ) - { - // Skip the whitespaces. - // - const char* sep (" \n\r\t"); - size_t b (s.find_first_not_of (sep, p)); - - if (b != string::npos) // Word beginning. - { - size_t e (s.find_first_of (sep, b)); // Find the word end. - ns.emplace_back (string (s, b, e != string::npos ? e - b : e)); - - p = e; - } - else // Trailings whitespaces. - { - // Append the trailing "blank" after the trailing whitespaces - // in the exact mode. - // - if (ops.exact ()) - ns.emplace_back (empty_string); - - // Bail out since the end of the string is reached. - // - break; - } - } - } - else // The newline or no-split mode. - { - // Note that we don't collapse multiple consecutive newlines. - // - // Note also that we always sanitize CRs so this loop is always - // needed. - // - for (size_t p (0); p != string::npos; ) - { - size_t e (s.find ('\n', p)); - string l (s, p, e != string::npos ? e - p : e); - - // Strip the trailing CRs that can appear while, for example, - // cross-testing Windows target or as a part of msvcrt junk - // production (see above). - // - while (!l.empty () && l.back () == '\r') - l.pop_back (); - - // Append the line. - // - if (!l.empty () || // Non-empty. - e != string::npos || // Empty, non-trailing. - ops.exact ()) // Empty, trailing, in the exact mode. - { - if (ops.newline () || ns.empty ()) - ns.emplace_back (move (l)); - else - { - ns[0].value += '\n'; - ns[0].value += l; - } - } - - p = e != string::npos ? e + 1 : e; - } - } - } + for (optional s; (s = sr.next ()); ) + ns.emplace_back (move (*s)); env.set_variable (move (vname), move (ns), @@ -1242,7 +1293,7 @@ namespace build2 const iteration_index* ii, size_t li, size_t ci, const location& ll, bool diag, - string* output, + const function& cf, bool last_cmd, optional dl = nullopt, const command* dl_cmd = nullptr, // env -t pipe_command* prev_cmd = nullptr) @@ -1253,8 +1304,10 @@ namespace build2 // if (bc == ec) { - if (output != nullptr) + if (cf != nullptr) { + assert (!last_cmd); // Otherwise we wouldn't be here. + // The pipeline can't be empty. // assert (ifd != nullfd && prev_cmd != nullptr); @@ -1263,15 +1316,14 @@ namespace build2 try { - *output = read (move (ifd), - true /* pipe */, - dl, - dl_cmd != nullptr ? *dl_cmd : c, - ll); + cf (env, strings () /* arguments */, + move (ifd), true /* pipe */, + dl, dl_cmd != nullptr ? *dl_cmd : c, + ll); } catch (const io_error& e) { - fail (ll) << "io error reading " << cmd_path (c) << " output: " + fail (ll) << "unable to read from " << cmd_path (c) << " output: " << e; } } @@ -1329,9 +1381,10 @@ namespace build2 command_pipe::const_iterator nc (bc + 1); bool last (nc == ec); - // Make sure that stdout is not redirected if meant to be read. + // Make sure that stdout is not redirected if meant to be read (last_cmd + // is false) or cannot not be produced (last_cmd is true). // - if (last && output != nullptr && c.out) + if (last && c.out && cf != nullptr) fail (ll) << "stdout cannot be redirected"; // True if the process path is not pre-searched and the program path @@ -1345,7 +1398,7 @@ namespace build2 const redirect& in ((c.in ? *c.in : env.in).effective ()); - const redirect* out (!last || output != nullptr + const redirect* out (!last || (cf != nullptr && !last_cmd) ? nullptr // stdout is piped. : &(c.out ? *c.out : env.out).effective ()); @@ -1413,7 +1466,7 @@ namespace build2 if (c.out) fail (ll) << program << " builtin stdout cannot be redirected"; - if (output != nullptr) + if (cf != nullptr && !last_cmd) fail (ll) << program << " builtin stdout cannot be read"; if (c.err) @@ -1620,7 +1673,7 @@ namespace build2 if (c.out) fail (ll) << "set builtin stdout cannot be redirected"; - if (output != nullptr) + if (cf != nullptr && !last_cmd) fail (ll) << "set builtin stdout cannot be read"; if (c.err) @@ -1640,6 +1693,39 @@ namespace build2 return true; } + // If this is the last command in the pipe and the command function is + // specified for it, then call it. + // + if (last && cf != nullptr && last_cmd) + { + // Must be enforced by the caller. + // + assert (!c.out && !c.err && !c.exit); + + try + { + cf (env, c.arguments, + move (ifd), !first, + dl, dl_cmd != nullptr ? *dl_cmd : c, + ll); + } + catch (const io_error& e) + { + diag_record dr (fail (ll)); + + dr << cmd_path (c) << ": unable to read from "; + + if (prev_cmd != nullptr) + dr << cmd_path (prev_cmd->cmd) << " output"; + else + dr << "stdin"; + + dr << ": " << e; + } + + return true; + } + // Open a file for command output redirect if requested explicitly // (file overwrite/append redirects) or for the purpose of the output // validation (none, here_*, file comparison redirects), register the @@ -2220,7 +2306,7 @@ namespace build2 nc, ec, move (ofd.in), ii, li, ci + 1, ll, diag, - output, + cf, last_cmd, dl, dl_cmd, &pc); @@ -2347,7 +2433,7 @@ namespace build2 nc, ec, move (ofd.in), ii, li, ci + 1, ll, diag, - output, + cf, last_cmd, dl, dl_cmd, &pc); @@ -2487,7 +2573,7 @@ namespace build2 const iteration_index* ii, size_t li, const location& ll, bool diag, - string* output) + const function& cf, bool last_cmd) { // Commands are numbered sequentially throughout the expression // starting with 1. Number 0 means the command is a single one. @@ -2532,7 +2618,7 @@ namespace build2 p.begin (), p.end (), auto_fd (), ii, li, ci, ll, print, - output); + cf, last_cmd); } ci += p.size (); @@ -2546,13 +2632,18 @@ namespace build2 const command_expr& expr, const iteration_index* ii, size_t li, const location& ll, - string* output) + const function& cf, + bool last_cmd) { // Note that we don't print the expression at any verbosity level // assuming that the caller does this, potentially providing some // additional information (command type, etc). // - if (!run_expr (env, expr, ii, li, ll, true /* diag */, output)) + if (!run_expr (env, + expr, + ii, li, ll, + true /* diag */, + cf, last_cmd)) throw failed (); // Assume diagnostics is already printed. } @@ -2561,11 +2652,15 @@ namespace build2 const command_expr& expr, const iteration_index* ii, size_t li, const location& ll, - string* output) + const function& cf, bool last_cmd) { // Note that we don't print the expression here (see above). // - return run_expr (env, expr, ii, li, ll, false /* diag */, output); + return run_expr (env, + expr, + ii, li, ll, + false /* diag */, + cf, last_cmd); } void diff --git a/libbuild2/script/run.hxx b/libbuild2/script/run.hxx index 01b010c..5d46d21 100644 --- a/libbuild2/script/run.hxx +++ b/libbuild2/script/run.hxx @@ -38,22 +38,24 @@ namespace build2 // Location is the start position of this command line in the script. It // can be used in diagnostics. // - // Optionally, save the command output into the referenced variable. In - // this case assume that the expression contains a single pipline. + // Optionally, execute the specified function at the end of the pipe, + // either after the last command or instead of it. // void run (environment&, const command_expr&, const iteration_index*, size_t index, const location&, - string* output = nullptr); + const function& = nullptr, + bool last_cmd = true); bool run_cond (environment&, const command_expr&, const iteration_index*, size_t index, const location&, - string* output = nullptr); + const function& = nullptr, + bool last_cmd = true); // Perform the registered special file cleanups in the direct order and // then the regular cleanups in the reverse order. @@ -80,6 +82,62 @@ namespace build2 // string diag_path (const dir_name_view&); + + // Read out the stream content into a string, optionally splitting the + // input data at whitespaces or newlines in which case return one + // sub-string at a time (see the set builtin options for the splitting + // semantics). Throw io_error on the underlying OS error. + // + // If the execution deadline is specified, then turn the stream into the + // non-blocking mode. If the specified deadline is reached while reading + // the stream, then bail out for the successful deadline and fail + // otherwise. Note that in the former case the result will be incomplete, + // but we leave it to the caller to handle that. + // + // Note that on Windows we can only turn pipe file descriptors into the + // non-blocking mode. Thus, we have no choice but to read from descriptors + // of other types synchronously there. That implies that we can + // potentially block indefinitely reading a file and missing the deadline + // on Windows. Note though, that the user can normally rewrite the + // command, for example, `set foo <<&, + const command& deadline_cmd, + const location&); + + // Return nullopt if eos is reached. + // + optional + next (); + + private: + ifdstream is_; + bool whitespace_; + bool newline_; + bool exact_; + optional deadline_; + const command& deadline_cmd_; + const location& location_; + + bool empty_ = true; // Set to false after the first character is read. + }; + + // Read the stream content using the stream reader in the no-split exact + // mode. + // + string + stream_read (auto_fd&&, + bool pipe, + const optional&, + const command& deadline_cmd, + const location&); } } diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx index 33c4c30..b8dfc68 100644 --- a/libbuild2/script/script.cxx +++ b/libbuild2/script/script.cxx @@ -20,16 +20,17 @@ namespace build2 switch (lt) { - case line_type::var: s = "variable"; break; - case line_type::cmd: s = "command"; break; - case line_type::cmd_if: s = "'if'"; break; - case line_type::cmd_ifn: s = "'if!'"; break; - case line_type::cmd_elif: s = "'elif'"; break; - case line_type::cmd_elifn: s = "'elif!'"; break; - case line_type::cmd_else: s = "'else'"; break; - case line_type::cmd_while: s = "'while'"; break; - case line_type::cmd_for: s = "'for'"; break; - case line_type::cmd_end: s = "'end'"; break; + case line_type::var: s = "variable"; break; + case line_type::cmd: s = "command"; break; + case line_type::cmd_if: s = "'if'"; break; + case line_type::cmd_ifn: s = "'if!'"; break; + case line_type::cmd_elif: s = "'elif'"; break; + case line_type::cmd_elifn: s = "'elif!'"; break; + case line_type::cmd_else: s = "'else'"; break; + case line_type::cmd_while: s = "'while'"; break; + case line_type::cmd_for_args: s = "'for'"; break; + case line_type::cmd_for_stream: s = "'for'"; break; + case line_type::cmd_end: s = "'end'"; break; } return o << s; @@ -227,7 +228,8 @@ namespace build2 case line_type::cmd_elifn: case line_type::cmd_else: case line_type::cmd_while: - case line_type::cmd_for: fc_ind += " "; break; + case line_type::cmd_for_args: + case line_type::cmd_for_stream: fc_ind += " "; break; default: break; } diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx index 5eb4ee9..aa96b7f 100644 --- a/libbuild2/script/script.hxx +++ b/libbuild2/script/script.hxx @@ -28,7 +28,8 @@ namespace build2 cmd_elifn, cmd_else, cmd_while, - cmd_for, + cmd_for_args, // `for x: ...` + cmd_for_stream, // `... | for x` and `for x <...` cmd_end }; @@ -42,7 +43,7 @@ namespace build2 union { - const variable* var; // Pre-entered for line_type::var. + const variable* var; // Pre-entered for line_type::{var,cmd_for_*}. }; }; @@ -547,7 +548,7 @@ namespace build2 // Set variable value with optional (non-empty) attributes. // virtual void - set_variable (string&& name, + set_variable (string name, names&&, const string& attrs, const location&) = 0; @@ -580,6 +581,17 @@ namespace build2 ~environment () = default; }; + // Custom command function that can be executed at the end of the pipe. + // Should throw io_error on the underlying OS error. + // + using command_function = void (environment&, + const strings& args, + auto_fd in, + bool pipe, + const optional&, + const command& deadline_cmd, + const location&); + // Helpers. // // Issue diagnostics with the specified prefix and fail if the string -- cgit v1.1