From a473abe80f4c42a366f0573bbbc762fa440b7fe6 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 26 Apr 2022 10:39:03 +0200 Subject: Use new cmdline type for canned command lines in {Build,Test}script --- libbuild2/script/parser.cxx | 47 +++++++++++++++++++++++++++++++++++---------- libbuild2/script/parser.hxx | 19 ++++++++++++++++-- 2 files changed, 54 insertions(+), 12 deletions(-) (limited to 'libbuild2/script') diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 82eb9c8..d5cabe2 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -3,6 +3,8 @@ #include +#include // strchr() + #include #include // exit #include @@ -15,6 +17,33 @@ namespace build2 { using type = token_type; + bool parser:: + need_cmdline_relex (const string& s) + { + for (auto i (s.begin ()), e (s.end ()); i != e; ++i) + { + char c (*i); + + if (c == '\\') + { + if (++i != e) + return false; + + c = *i; + + if (c == '\\' || c == '\'' || c == '\"') + return true; + + // Fall through. + } + + if (strchr ("|<>&\"'", c) != nullptr) + return true; + } + + return false; + } + value parser:: parse_variable_line (token& t, type& tt) { @@ -1092,16 +1121,17 @@ namespace build2 // Process what we got. // - // First see if this is a value that should not be re-lexed. The - // long term plan is to only re-lex values of a special type - // representing a canned command line. + // First see if this is a value that should not be re-lexed. We + // only re-lex values of the special `cmdline` type that + // represents a canned command line. // // Otherwise, determine whether anything inside was quoted (note // that the current token is "next" and is not part of this). // - bool q ( - (pr.value && !relex_) || - (quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); + bool lex ( + pr.value + ? pr.type != nullptr && pr.type->is_a () + : (quoted () - (t.qtype != quote_type::unquoted ? 1 : 0)) == 0); for (name& n: ns) { @@ -1123,10 +1153,7 @@ namespace build2 // interesting characters (operators plus quotes/escapes), // then no need to re-lex. // - // NOTE: update quoting (script.cxx:to_stream_q()) if adding - // any new characters. - // - if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) + if (!lex || !need_cmdline_relex (s)) add_word (move (s), l); else { diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx index 6e24d37..d8e5dbf 100644 --- a/libbuild2/script/parser.hxx +++ b/libbuild2/script/parser.hxx @@ -25,7 +25,7 @@ namespace build2 class parser: protected build2::parser { public: - parser (context& c, bool relex): build2::parser (c), relex_ (relex) {} + parser (context& c): build2::parser (c) {} // Helpers. // @@ -42,6 +42,15 @@ namespace build2 using build2::parser::apply_value_attributes; + // Return true if a command line element needs to be re-lexed. + // + // Specifically, it needs to be re-lexed if it contains any of the + // special characters (|<>&), quotes ("') or effective escape sequences + // (\", \', \\). + // + static bool + need_cmdline_relex (const string&); + // Commonly used parsing functions. Issue diagnostics and throw failed // in case of an error. // @@ -200,6 +209,13 @@ namespace build2 // something that requires re-lexing, for example `foo|bar`, which won't // be easy to translate but which are handled by the parser. // + // Note that the chunk could be of the special cmdline type in which + // case the names may need to be "preprocessed" (at least unquoted or + // potentially fully re-lexed) before being analyzed/consumed. Note also + // that in this case any names left unconsumed must remain of the + // cmdline type. + // + // // During the pre-parsing phase the returned process path and names // (that must still be parsed) are discarded. The main purpose of the // call is to allow implementations to perform static script analysis, @@ -229,7 +245,6 @@ namespace build2 size_t replay_quoted_; protected: - bool relex_; lexer* lexer_ = nullptr; }; } -- cgit v1.1