// file : libbuild2/functions-process.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbutl/regex.hxx> #include <libbutl/builtin.hxx> #include <libbuild2/scope.hxx> #include <libbuild2/context.hxx> #include <libbuild2/function.hxx> #include <libbuild2/variable.hxx> using namespace std; using namespace butl; namespace build2 { // Ideas for potential further improvements: // // - Use scope to query environment. // - Mode to ignore error/suppress diagnostics and return NULL? // - Similar regex flags to regex.* functions (icase, etc)? // Convert the program (builtin or process) arguments from names to strings. // The function name is only used for diagnostics. // static inline strings program_args (names&& args, const char* fn) { try { return convert<strings> (move (args)); } catch (const invalid_argument& e) { fail << "invalid process." << fn << "() argument: " << e << endf; } } // Read text from a stream, trim it and return as a value. Throw io_error on // the stream reading error. // static value read (auto_fd&& fd) { string v; ifdstream is (move (fd)); // Note that getline() will fail if there is no data. // if (is.peek () != ifdstream::traits_type::eof ()) getline (is, v, '\0'); is.close (); // Detect errors. names r; r.push_back (to_name (move (trim (v)))); return value (move (r)); } regex parse_regex (const string&, regex::flag_type); // functions-regex.cxx // Read lines from a stream, match them against a regular expression, and // return the list of matched lines or their replacements, if the format is // specified. Throw invalid_argument on the regex parsing error and io_error // on the stream reading error. // static value read_regex (auto_fd&& fd, const string& pat, const optional<string>& fmt) { names r; ifdstream is (move (fd), fdstream_mode::skip, ifdstream::badbit); // Note that the stream is read out (and is silently closed) if // invalid_argument is thrown, which is probably ok since this is not a // common case. // regex re (parse_regex (pat, regex::ECMAScript)); for (string l; !eof (getline (is, l)); ) { if (fmt) { pair<string, bool> p (regex_replace_match (l, re, *fmt)); if (p.second) r.push_back (to_name (move (p.first))); } else if (regex_match (l, re)) r.push_back (to_name (move (l))); } is.close (); // Detect errors. return value (move (r)); } // Return the builtin function pointer if this is a call to an internal // builtin and NULL otherwise. // static builtin_function* builtin (const names& args) { if (args.empty ()) return nullptr; const name& nm (args[0]); if (!nm.simple () || nm.pair) return nullptr; const builtin_info* r (builtins.find (nm.value)); return r != nullptr ? r->function : nullptr; } // Return the builtin name and its arguments. The builtin function is only // used to make sure that args have been checked with the builtin() // predicate. // static pair<string, strings> builtin_args (builtin_function*, names&& args, const char* fn) { string bn (move (args[0].value)); args.erase (args.begin (), args.begin () + 1); return pair<string, strings> (move (bn), program_args (move (args), fn)); } // Read data from a stream, optionally processing it and returning the // result as a value. // using read_function = function<value (auto_fd&&)>; // Run a builtin. The builtin name is only used for diagnostics. // static value run_builtin_impl (builtin_function* bf, const strings& args, const string& bn, const read_function& read) { try { dir_path cwd; builtin_callbacks cb; fdpipe ofd (open_pipe ()); if (verb >= 3) print_process (process_args (bn, args)); uint8_t rs; // Storage. butl::builtin b (bf (rs, args, nullfd /* stdin */, move (ofd.out) /* stdout */, nullfd /* stderr */, cwd, cb)); try { value r (read (move (ofd.in))); if (b.wait () == 0) return r; // Fall through. // } catch (const io_error& e) { // If the builtin has failed then assume the io error was caused by // that and so fall through. // if (b.wait () == 0) fail << "io error reading " << bn << " builtin output: " << e; } // While assuming that the builtin has issued the diagnostics on failure // we still print the error message (see process_finish() for details). // diag_record dr; dr << fail << "builtin " << bn << " " << process_exit (rs); if (verb >= 1 && verb <= 2) { dr << info << "command line: "; print_process (dr, process_args (bn, args)); } dr << endf; } catch (const system_error& e) { fail << "unable to execute " << bn << " builtin: " << e << endf; } } static inline value run_builtin (const scope* s, builtin_function* bf, const strings& args, const string& bn) { // See below. // if (s != nullptr && s->ctx.phase != run_phase::load) fail << "process.run() called during " << s->ctx.phase << " phase"; return run_builtin_impl (bf, args, bn, read); } static inline value run_builtin_regex (const scope* s, builtin_function* bf, const strings& args, const string& bn, const string& pat, const optional<string>& fmt) { // See below. // if (s != nullptr && s->ctx.phase != run_phase::load) fail << "process.run_regex() called during " << s->ctx.phase << " phase"; // Note that we rely on the "small function object" optimization here. // return run_builtin_impl (bf, args, bn, [&pat, &fmt] (auto_fd&& fd) { return read_regex (move (fd), pat, fmt); }); } // Return the process path and its arguments. // static pair<process_path, strings> process_args (names&& args, const char* fn) { if (args.empty () || args[0].empty ()) fail << "executable name expected in process." << fn << "()"; optional<process_path> pp; try { size_t erase (0); // This can be a process_path (pair), process_path_ex (process_path // optionally followed by the name@, checksum@, and env-checksum@ // pairs), or just a path. // // First, check if the arguments begin with a process_path[_ex] and, if // that's the case, only use the leading name/pair to create the process // path, discarding the metadata. // if (args[0].file ()) { // Find the end of the process_path[_ex] value. // auto b (args.begin ()); auto i (value_traits<process_path_ex>::find_end (args)); if (b->pair || i != b + 1) // First is a pair or pairs after. { pp = convert<process_path> ( names (make_move_iterator (b), make_move_iterator (b + (b->pair ? 2 : 1)))); erase = i - b; } } // Fallback to a path, if this is not a process path. // if (!pp) { // Strip the builtin-escaping '^' character, if present. // path p (convert<path> (move (args[0]))); if (p.simple ()) try { const string& s (p.string ()); // Don't end up with an empty path. // if (s.size () > 1 && s[0] == '^') p = path (s, 1, s.size () - 1); } catch (const invalid_path& e) { throw invalid_argument (e.path); } pp = run_search (p); erase = 1; } args.erase (args.begin (), args.begin () + erase); } catch (const invalid_argument& e) { fail << "invalid process." << fn << "() executable path: " << e; } return pair<process_path, strings> (move (*pp), program_args (move (args), fn)); } static process process_start (const scope*, const process_path& pp, const strings& args, cstrings& cargs) { cargs.reserve (args.size () + 2); cargs.push_back (pp.recall_string ()); transform (args.begin (), args.end (), back_inserter (cargs), [] (const string& s) {return s.c_str ();}); cargs.push_back (nullptr); // Note that for now these functions can only be called during the load // phase (see below) and so no diagnostics buffering is needed. // return run_start (3 /* verbosity */, pp, cargs, 0 /* stdin */, -1 /* stdout */); } // Always issue diagnostics on process failure, regardless if the process // exited abnormally or normally with non-zero exit code. // // Note that the diagnostics stack is only printed if a diagnostics record // is created, which is not always the case for run_finish(). // void process_finish (const scope*, const cstrings& args, process& pr) { run_finish (args, pr, 2 /* verbosity */); } // Run a process. // static value run_process_impl (const scope* s, const process_path& pp, const strings& args, const read_function& read) { cstrings cargs; process pr (process_start (s, pp, args, cargs)); value r; try { r = read (move (pr.in_ofd)); } catch (const io_error& e) { if (run_wait (cargs, pr)) fail << "io error reading " << cargs[0] << " output: " << e; // If the child process has failed then assume the io error was // caused by that and let process_finish() deal with it. } process_finish (s, cargs, pr); return r; } static inline value run_process (const scope* s, const process_path& pp, const strings& args) { // The only plausible place where these functions can be called outside // the load phase are scripts and there it doesn't make much sense to use // them (the same can be achieved with commands in a uniform manner). Note // that if there is no scope, then this is most likely (certainly?) the // load phase (for example, command line). // if (s != nullptr && s->ctx.phase != run_phase::load) fail << "process.run() called during " << s->ctx.phase << " phase"; return run_process_impl (s, pp, args, read); } static inline value run_process_regex (const scope* s, const process_path& pp, const strings& args, const string& pat, const optional<string>& fmt) { // See above. // if (s != nullptr && s->ctx.phase != run_phase::load) fail << "process.run_regex() called during " << s->ctx.phase << " phase"; // Note that we rely on the "small function object" optimization here. // return run_process_impl (s, pp, args, [&pat, &fmt] (auto_fd&& fd) { return read_regex (move (fd), pat, fmt); }); } static inline value run (const scope* s, names&& args) { if (builtin_function* bf = builtin (args)) { pair<string, strings> ba (builtin_args (bf, move (args), "run")); return run_builtin (s, bf, ba.second, ba.first); } else { pair<process_path, strings> pa (process_args (move (args), "run")); return run_process (s, pa.first, pa.second); } } static inline value run_regex (const scope* s, names&& args, const string& pat, const optional<string>& fmt) { if (builtin_function* bf = builtin (args)) { pair<string, strings> ba (builtin_args (bf, move (args), "run_regex")); return run_builtin_regex (s, bf, ba.second, ba.first, pat, fmt); } else { pair<process_path, strings> pa (process_args (move (args), "run_regex")); return run_process_regex (s, pa.first, pa.second, pat, fmt); } } void process_functions (function_map& m) { function_family f (m, "process"); // $process.run(<prog>[ <args>...]) // // Run builtin or external program and return trimmed `stdout` output. // // Note that if the result of executing the program can be affected by // environment variables and this result can in turn affect the build // result, then such variables should be reported with the // `config.environment` directive. // // Note that this function is not pure and can only be called during the // load phase. // f.insert (".run", false) += [](const scope* s, names args) { return run (s, move (args)); }; f.insert ("run", false) += [](const scope* s, process_path pp) { return run_process (s, pp, strings ()); }; // $process.run_regex(<prog>[ <args>...], <pat>[, <fmt>]) // // Run builtin or external program and return `stdout` output lines // matched and optionally processed with a regular expression. // // Each line of stdout (including the customary trailing blank) is matched // (as a whole) against <pat> and, if successful, returned, optionally // processed with <fmt>, as an element of a list. See the `$regex.*()` // function family for details on regular expressions and format strings. // // Note that if the result of executing the program can be affected by // environment variables and this result can in turn affect the build // result, then such variables should be reported with the // `config.environment` directive. // // Note that this function is not pure and can only be called during the // load phase. // { auto e (f.insert (".run_regex", false)); e += [](const scope* s, names a, string p, optional<string> f) { return run_regex (s, move (a), p, f); }; e += [] (const scope* s, names a, names p, optional<names> f) { return run_regex (s, move (a), convert<string> (move (p)), f ? convert<string> (move (*f)) : nullopt_string); }; } { auto e (f.insert ("run_regex", false)); e += [](const scope* s, process_path pp, string p, optional<string> f) { return run_process_regex (s, pp, strings (), p, f); }; e += [](const scope* s, process_path pp, names p, optional<names> f) { return run_process_regex (s, pp, strings (), convert<string> (move (p)), (f ? convert<string> (move (*f)) : nullopt_string)); }; } } }