// file : libbuild2/script/script.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbuild2/script/script.hxx> #include <chrono> #include <sstream> #include <cstring> // strchr() using namespace std; namespace build2 { namespace script { ostream& operator<< (ostream& o, line_type lt) { const char* s (nullptr); switch (lt) { case line_type::var: s = "variable"; break; case line_type::cmd: s = "command"; break; case line_type::cmd_if: s = "'if'"; break; case line_type::cmd_ifn: s = "'if!'"; break; case line_type::cmd_elif: s = "'elif'"; break; case line_type::cmd_elifn: s = "'elif!'"; break; case line_type::cmd_else: s = "'else'"; break; case line_type::cmd_while: s = "'while'"; break; case line_type::cmd_for_args: s = "'for'"; break; case line_type::cmd_for_stream: s = "'for'"; break; case line_type::cmd_end: s = "'end'"; break; } return o << s; } void dump (ostream& os, const line& ln, bool newline) { // Print the line's tokens literal representation trying to reproduce // the quoting. Consider mixed quoting as double quoting since the // information is lost. // const replay_tokens& rts (ln.tokens); assert (!rts.empty ()); // ... <newline> const token& ft (rts[0].token); // If true, this is a special builtin line. // // Note that special characters set differs for such lines since they // are parsed in the value lexer mode. // bool builtin (ln.type == line_type::cmd && ft.type == token_type::word && (ft.value == "diag" || ft.value == "depdb")); // '"' or '\'' if we are inside the quoted token sequence and '\0' // otherwise. Thus, can be used as bool. // char qseq ('\0'); optional<token_type> prev_tt; for (const replay_token& rt: rts) { const token& t (rt.token); // '"' or '\'' if the token is quoted and '\0' otherwise. Thus, can be // used as bool. // char qtok ('\0'); switch (t.qtype) { case quote_type::unquoted: qtok = '\0'; break; case quote_type::single: qtok = '\''; break; case quote_type::mixed: case quote_type::double_: qtok = '"'; break; } // If being inside a quoted token sequence we have reached a token // quoted differently or the newline, then we probably made a mistake // misinterpreting some previous partially quoted token, for example // f"oo" as "foo. If that's the case, all we can do is to end the // sequence adding the trailing quote. // // Note that a token inside the quoted sequence may well be unquoted, // so for example "$foo" is lexed as: // // token quoting complete notes // '' " no // $ " yes // 'foo' Unquoted since lexed in variable mode. // '' " no // \n // if (qseq && ((qtok && qtok != qseq) || t.type == token_type::newline)) { os << qseq; qseq = '\0'; } // Left and right token quotes (can be used as bool). // char lq ('\0'); char rq ('\0'); // If the token is quoted, then determine if/which quotes should be // present on its sides and track the quoted token sequence. // if (qtok) { if (t.qcomp) // Complete token quoting. { // If we are inside a quoted token sequence then do noting. // Otherwise just quote the current token not starting a sequence. // if (!qseq) { lq = qtok; rq = qtok; } } else // Partial token quoting. { // Note that we can not always reproduce the original tokens // representation for partial quoting. For example, the two // following tokens are lexed into the identical token objects: // // "foo // f"oo" // // We will always assume that the partially quoted token either // starts or ends the quoted token sequence. Sometimes this ends // up unexpectedly, but seems there is not much we can do: // // f"oo" "ba"r -> "foo bar" // if (!qseq) // Start quoted sequence. { lq = qtok; qseq = qtok; } else // End quoted sequence. { rq = qtok; qseq = '\0'; } } } // Print the space character prior to the separated token, unless it // is a first like token or the newline. // if (t.separated && t.type != token_type::newline && &rt != &rts[0]) os << ' '; if (lq) os << lq; // Print the left quote, if required. // Escape the special characters, unless the token in not a word, is a // variable name, or is single-quoted. Note that the special // characters set depends on whether the word is double-quoted or // unquoted and whether this is a special builtin line or not. // if (t.type == token_type::word && qtok != '\'' && (!prev_tt || *prev_tt != token_type::dollar)) { for (char c: t.value) { if (strchr (qtok || builtin ? "\\\"" : "|&<>=\\\"", c) != nullptr) os << '\\'; os << c; } } else if (t.type != token_type::newline || newline) t.printer (os, t, print_mode::raw); if (rq) os << rq; // Print the right quote, if required. prev_tt = t.type; } } void dump (ostream& os, const string& ind, const lines& ls) { // Additionally indent the flow control construct block lines. // string fc_ind; for (const line& l: ls) { // Before printing indentation, decrease it if the else, end, etc line // is reached. // switch (l.type) { case line_type::cmd_elif: case line_type::cmd_elifn: case line_type::cmd_else: case line_type::cmd_end: { size_t n (fc_ind.size ()); assert (n >= 2); fc_ind.resize (n - 2); break; } default: break; } // Print indentations. // os << ind << fc_ind; // After printing indentation, increase it for the flow control // construct block lines. // switch (l.type) { case line_type::cmd_if: case line_type::cmd_ifn: case line_type::cmd_elif: case line_type::cmd_elifn: case line_type::cmd_else: case line_type::cmd_while: case line_type::cmd_for_args: case line_type::cmd_for_stream: fc_ind += " "; break; default: break; } dump (os, l, true /* newline */); } } // Quote a string unconditionally, assuming it contains some special // characters. // // If the quote character is present in the string then it is double // quoted rather than single quoted. In this case the following characters // are escaped: // // \" // static void to_stream_quoted (ostream& o, const char* s) { if (strchr (s, '\'') != nullptr) { o << '"'; for (; *s != '\0'; ++s) { // Escape characters special inside double quotes. // if (strchr ("\\\"", *s) != nullptr) o << '\\'; o << *s; } o << '"'; } else o << '\'' << s << '\''; } static inline void to_stream_quoted (ostream& o, const string& s) { to_stream_quoted (o, s.c_str ()); } // Quote if empty or contains spaces or any of the command line special // characters. // static void to_stream_q (ostream& o, const string& s) { // NOTE: update dump(line) if adding any new special character. // if (s.empty () || s.find_first_of (" |&<>=\\\"'") != string::npos) to_stream_quoted (o, s); else o << s; } void to_stream (ostream& o, const command& c, command_to_stream m) { auto print_path = [&o] (const path& p) { using build2::operator<<; ostringstream s; stream_verb (s, stream_verb (o)); s << p; to_stream_q (o, s.str ()); }; auto print_redirect = [&o, print_path] (const redirect& r, int fd) { const redirect& er (r.effective ()); // Print the none redirect (no data allowed) if/when the respective // syntax is invented. // if (er.type == redirect_type::none) return; o << ' '; // Print the redirect file descriptor. // if (fd == 2) o << fd; // Print the redirect original representation and the modifiers, if // present. // r.token.printer (o, r.token, print_mode::raw); // Print the rest of the redirect (file path, etc). // switch (er.type) { case redirect_type::none: assert (false); break; case redirect_type::here_doc_ref: assert (false); break; case redirect_type::pass: case redirect_type::null: case redirect_type::trace: break; case redirect_type::merge: o << er.fd; break; case redirect_type::file: { print_path (er.file.path); break; } case redirect_type::here_str_literal: case redirect_type::here_doc_literal: { if (er.type == redirect_type::here_doc_literal) o << er.end; else { const string& v (er.str); to_stream_q (o, er.modifiers ().find (':') == string::npos ? string (v, 0, v.size () - 1) // Strip newline. : v); } break; } case redirect_type::here_str_regex: case redirect_type::here_doc_regex: { const regex_lines& re (er.regex); if (er.type == redirect_type::here_doc_regex) o << re.intro + er.end + re.intro + re.flags; else { assert (!re.lines.empty ()); // Regex can't be empty. regex_line l (re.lines[0]); to_stream_q (o, re.intro + l.value + re.intro + l.flags); } break; } } }; auto print_doc = [&o] (const redirect& r) { o << endl; if (r.type == redirect_type::here_doc_literal) o << r.str; else { assert (r.type == redirect_type::here_doc_regex); const regex_lines& rl (r.regex); for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); i != e; ++i) { if (i != b) o << endl; const regex_line& l (*i); if (l.regex) // Regex (possibly empty), o << rl.intro << l.value << rl.intro << l.flags; else if (!l.special.empty ()) // Special literal. o << rl.intro; else // Textual literal. o << l.value; o << l.special; } } o << (r.modifiers ().find (':') == string::npos ? "" : "\n") << r.end; }; if ((m & command_to_stream::header) == command_to_stream::header) { // Print the env builtin if any of its options/arguments are present. // if (c.timeout || c.cwd || !c.variables.empty ()) { o << "env"; // Timeout. // if (c.timeout) o << " -t " << chrono::duration_cast<chrono::seconds> (*c.timeout).count (); // CWD. // if (c.cwd) { o << " -c "; print_path (*c.cwd); } // Variable unsets/sets. // auto b (c.variables.begin ()), i (b), e (c.variables.end ()); // Print a variable name or assignment to the stream, quoting it if // necessary. // auto print = [&o] (const string& v, bool name) { size_t p (v.find_first_of (" \\\"'")); // Print the variable name/assignment as is if it doesn't contain // any special characters. // if (p == string::npos) { o << v; return; } // If the variable name contains any special characters, then // quote the name/assignment as a whole. // size_t eq; if (name || (eq = v.find ('=')) > p) { to_stream_quoted (o, v); return; } // Finally, if the variable value contains any special characters, // then we quote only the value. // assert (eq != string::npos); o.write (v.c_str (), eq + 1); // Includes '='. to_stream_quoted (o, v.c_str () + eq + 1); }; // Variable unsets. // // Print the variable unsets as the -u options until a variable set // is encountered (contains '=') or the end of the variable list is // reached. // // Note that we rely on the fact that unsets come first, which is // guaranteed by parser::parse_env_builtin(). // for (; i != e; ++i) { const string& v (*i); if (v.find ('=') == string::npos) // Variable unset. { o << " -u "; print (v, true /* name*/); } else // Variable set. break; } // Variable sets. // // Note that we don't add the '-' separator since we always use the // `-* <value>` option notation and so there can't be any ambiguity // with a variable set. // for (; i != e; ++i) { o << ' '; print (*i, false /* name */); } o << " -- "; } // Program. // to_stream_q (o, c.program.recall_string ()); // Arguments. // for (const string& a: c.arguments) { o << ' '; to_stream_q (o, a); } // Redirects. // if (c.in) print_redirect (*c.in, 0); if (c.out) print_redirect (*c.out, 1); if (c.err) print_redirect (*c.err, 2); for (const auto& p: c.cleanups) { o << " &"; if (p.type != cleanup_type::always) o << (p.type == cleanup_type::maybe ? '?' : '!'); print_path (p.path); } if (c.exit) { switch (c.exit->comparison) { case exit_comparison::eq: o << " == "; break; case exit_comparison::ne: o << " != "; break; } o << static_cast<uint16_t> (c.exit->code); } } if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) { // Here-documents. // if (c.in && (c.in->type == redirect_type::here_doc_literal || c.in->type == redirect_type::here_doc_regex)) print_doc (*c.in); if (c.out && (c.out->type == redirect_type::here_doc_literal || c.out->type == redirect_type::here_doc_regex)) print_doc (*c.out); if (c.err && (c.err->type == redirect_type::here_doc_literal || c.err->type == redirect_type::here_doc_regex)) print_doc (*c.err); } } void to_stream (ostream& o, const command_pipe& p, command_to_stream m) { if ((m & command_to_stream::header) == command_to_stream::header) { for (auto b (p.begin ()), i (b); i != p.end (); ++i) { if (i != b) o << " | "; to_stream (o, *i, command_to_stream::header); } } if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) { for (const command& c: p) to_stream (o, c, command_to_stream::here_doc); } } void to_stream (ostream& o, const command_expr& e, command_to_stream m) { if ((m & command_to_stream::header) == command_to_stream::header) { for (auto b (e.begin ()), i (b); i != e.end (); ++i) { if (i != b) { switch (i->op) { case expr_operator::log_or: o << " || "; break; case expr_operator::log_and: o << " && "; break; } } to_stream (o, i->pipe, command_to_stream::header); } } if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) { for (const expr_term& t: e) to_stream (o, t.pipe, command_to_stream::here_doc); } } // environment_vars // environment_vars::iterator environment_vars:: find (const string& var) { size_t n (var.find ('=')); if (n == string::npos) n = var.size (); return find_if (begin (), end (), [&var, n] (const string& v) { return v.compare (0, n, var, 0, n) == 0 && (v[n] == '=' || v[n] == '\0'); }); } void environment_vars:: add (string var) { iterator i (find (var)); if (i != end ()) *i = move (var); else push_back (move (var)); } // redirect // redirect:: redirect (redirect_type t) : type (t) { switch (type) { case redirect_type::none: case redirect_type::pass: case redirect_type::null: case redirect_type::trace: case redirect_type::merge: break; case redirect_type::here_str_literal: case redirect_type::here_doc_literal: new (&str) string (); break; case redirect_type::here_str_regex: case redirect_type::here_doc_regex: { new (®ex) regex_lines (); break; } case redirect_type::file: new (&file) file_type (); break; case redirect_type::here_doc_ref: assert (false); break; } } redirect:: redirect (redirect&& r) noexcept : type (r.type), token (move (r.token)), end (move (r.end)), end_line (r.end_line), end_column (r.end_column) { switch (type) { case redirect_type::none: case redirect_type::pass: case redirect_type::null: case redirect_type::trace: break; case redirect_type::merge: fd = r.fd; break; case redirect_type::here_str_literal: case redirect_type::here_doc_literal: { new (&str) string (move (r.str)); break; } case redirect_type::here_str_regex: case redirect_type::here_doc_regex: { new (®ex) regex_lines (move (r.regex)); break; } case redirect_type::file: { new (&file) file_type (move (r.file)); break; } case redirect_type::here_doc_ref: { new (&ref) reference_wrapper<const redirect> (r.ref); break; } } } redirect& redirect:: operator= (redirect&& r) noexcept { if (this != &r) { this->~redirect (); new (this) redirect (move (r)); // Assume noexcept move-constructor. } return *this; } redirect:: ~redirect () { switch (type) { case redirect_type::none: case redirect_type::pass: case redirect_type::null: case redirect_type::trace: case redirect_type::merge: break; case redirect_type::here_str_literal: case redirect_type::here_doc_literal: str.~string (); break; case redirect_type::here_str_regex: case redirect_type::here_doc_regex: regex.~regex_lines (); break; case redirect_type::file: file.~file_type (); break; case redirect_type::here_doc_ref: { ref.~reference_wrapper<const redirect> (); break; } } } // environment // void environment:: clean (script::cleanup c, bool implicit) { using script::cleanup; // Implicit never-cleanup doesn't make sense. // assert (!implicit || c.type != cleanup_type::never); const path& p (c.path); if (sandbox_dir.path != nullptr && !p.sub (*sandbox_dir.path)) { if (implicit) return; else assert (false); // Error so should have been checked. } auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;}; auto i (find_if (cleanups.begin (), cleanups.end (), pr)); if (i == cleanups.end ()) cleanups.emplace_back (move (c)); else if (!implicit) i->type = c.type; } void environment:: clean_special (path p) { special_cleanups.emplace_back (move (p)); } const environment_vars& environment:: exported_variables (environment_vars&) { return exported_vars; } const environment_vars& environment:: merge_exported_variables (const environment_vars& vars, environment_vars& storage) { const environment_vars& own (exported_variables (storage)); // If both, the own and the specified variable (un)sets are present, // then merge them. Otherwise, return the own (un)sets, if present, or // the specified (un)sets otherwise. // if (!own.empty () && !vars.empty ()) { // Copy the own (un)sets into the storage, if they are not there yet. // if (&storage != &own) storage = own; for (const string& v: vars) storage.add (v); return storage; } else if (!own.empty ()) return own; else return vars; } // Helpers. // void verify_environment_var_name (const string& name, const char* prefix, const location& l, const char* opt) { if (name.empty ()) { diag_record dr (fail (l)); dr << prefix << "empty "; if (opt == nullptr) dr << "variable name"; else dr << "value for option " << opt; } if (name.find ('=') != string::npos) { diag_record dr (fail (l)); dr << prefix << "invalid "; if (opt == nullptr) dr << "variable name '" << name << "'"; else dr << "value '" << name << "' for option " << opt; dr << ": contains '='"; } } void verify_environment_var_assignment (const string& var, const char* prefix, const location& l) { size_t p (var.find ('=')); if (p == 0) fail (l) << prefix << "empty variable name"; if (p == string::npos) fail (l) << prefix << "expected variable assignment instead of '" << var << "'"; } } }