From ad7d4bd0722aa70ba634900cebf93a1b1814fed9 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Sat, 23 Jul 2016 16:09:49 +0200 Subject: Only do "effective escaping" (['"\$(]) on the command line This will make things more convenient on Windows provided we use "sane" paths (no spaces, no (), etc). --- build2/b.cli | 15 +++++++++++++++ build2/b.cxx | 2 -- build2/context.cxx | 9 +++++++-- build2/lexer | 16 ++++++++++++---- build2/lexer.cxx | 34 +++++++++++++++++++--------------- build2/parser.cxx | 5 ++++- 6 files changed, 57 insertions(+), 24 deletions(-) diff --git a/build2/b.cli b/build2/b.cli index 013a756..f0cd48e 100644 --- a/build2/b.cli +++ b/build2/b.cli @@ -178,6 +178,21 @@ namespace build2 config.install.root=/usr/local config.install.root.sudo=sudo \ configure \ + + Note also that buildspec and command line variable values are treated as + \cb{buildfile} fragments and so can use quoting and escaping as well as + contain variable expansions and evaluation contexts. However, to be more + usable on various platforms, escaping in these two situations is limited + to the \i{effective sequences} of \cb{\\'}, \cb{\\\"}, \cb{\\\\}, + \cb{\\$}, and \cb{\\(} with all other sequences interpreted as is. + Together with double-quoting this is sufficient to represent any value. + For example: + + \ + b config.install.root=c:\projects\install + b \"config.install.root='c:\Program Files (x86)\test\'\" + b 'config.cxx.poptions=-DFOO_STR=\"foo\"' + \ " } diff --git a/build2/b.cxx b/build2/b.cxx index 748eda7..b2ba2c1 100644 --- a/build2/b.cxx +++ b/build2/b.cxx @@ -30,8 +30,6 @@ #include #include -#include -#include #include #include diff --git a/build2/context.cxx b/build2/context.cxx index e08fb52..f756bfc 100644 --- a/build2/context.cxx +++ b/build2/context.cxx @@ -97,7 +97,12 @@ namespace build2 { istringstream is (s); is.exceptions (istringstream::failbit | istringstream::badbit); - lexer l (is, path ("")); + + // Similar to buildspec we do "effective escaping" and only for ['"\$(] + // (basically what's necessary inside a double-quoted literal plus the + // single quote). + // + lexer l (is, path (""), "\'\"\\$("); // The first token should be a name, either the variable name or the // scope qualification. @@ -143,7 +148,7 @@ namespace build2 if (c == '!' && !dir.empty ()) fail << "scope-qualified global override of variable " << n; - variable_visibility v (c == '/' ? variable_visibility::scope : + variable_visibility v (c == '/' ? variable_visibility::scope : c == '%' ? variable_visibility::project : variable_visibility::normal); diff --git a/build2/lexer b/build2/lexer index c856344..42222e6 100644 --- a/build2/lexer +++ b/build2/lexer @@ -37,10 +37,19 @@ namespace build2 class lexer: protected butl::char_scanner { public: + // If escape is not NULL then only escape sequences with characters from + // this string are considered "effective escapes" with all others passed + // through as is. Note that the escape string is not copied. + // lexer (istream& is, const path& name, + const char* escapes = nullptr, void (*processor) (token&, const lexer&) = nullptr) - : char_scanner (is), fail (name), processor_ (processor), sep_ (false) + : char_scanner (is), + fail (name), + escapes_ (escapes), + processor_ (processor), + sep_ (false) { mode (lexer_mode::normal); } @@ -100,9 +109,6 @@ namespace build2 bool skip_spaces (); - xchar - escape (); - // Diagnostics. // private: @@ -120,8 +126,10 @@ namespace build2 private: fail_mark fail; + const char* escapes_; void (*processor_) (token&, const lexer&); + struct state { lexer_mode mode; diff --git a/build2/lexer.cxx b/build2/lexer.cxx index 773cd88..84e972e 100644 --- a/build2/lexer.cxx +++ b/build2/lexer.cxx @@ -4,6 +4,8 @@ #include +#include // strchr() + using namespace std; namespace build2 @@ -309,10 +311,23 @@ namespace build2 if (c == '\\') { get (); - c = escape (); - if (c != '\n') // Ignore. - lexeme += c; - continue; + xchar e (peek ()); + + if (escapes_ == nullptr || + (!eos (e) && strchr (escapes_, e) != nullptr)) + { + get (); + + if (eos (e)) + fail (e) << "unterminated escape sequence"; + + if (e != '\n') // Ignore. + lexeme += e; + + continue; + } + else + unget (c); // Treat as a normal character. } // If we are quoted, these are ordinary characters. @@ -484,17 +499,6 @@ namespace build2 return r; } - lexer::xchar lexer:: - escape () - { - xchar c (get ()); - - if (eos (c)) - fail (c) << "unterminated escape sequence"; - - return c; - } - location_prologue lexer::fail_mark_base:: operator() (const xchar& c) const { diff --git a/build2/parser.cxx b/build2/parser.cxx index 99e4227..a64a94e 100644 --- a/build2/parser.cxx +++ b/build2/parser.cxx @@ -2680,7 +2680,10 @@ namespace build2 { path_ = &name; - lexer l (is, *path_, &paren_processor); + // We do "effective escaping" and only for ['"\$(] (basically what's + // necessary inside a double-quoted literal plus the single quote). + // + lexer l (is, *path_, "\'\"\\$(", &paren_processor); lexer_ = &l; target_ = nullptr; scope_ = root_ = global_scope; -- cgit v1.1