From 897a0e4fdf9ca90ee8d236a38e138a8ae6bc3627 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 6 Mar 2015 09:15:40 +0200 Subject: Add support for lexing and parsing name pairs We will need it for the buildspec and also if/when we support map variable types. --- build/lexer | 30 +++++++++---- build/lexer.cxx | 77 +++++++++++++++++++++++++------- build/name | 4 ++ build/name.cxx | 14 +++++- build/parser | 5 ++- build/parser.cxx | 134 ++++++++++++++++++++++++++++++++++++++++++++++++------- 6 files changed, 218 insertions(+), 46 deletions(-) diff --git a/build/lexer b/build/lexer index d6817f2..787ba72 100644 --- a/build/lexer +++ b/build/lexer @@ -15,11 +15,29 @@ namespace build { + // Context-dependent lexing mode. In the value mode we don't treat + // certain characters (e.g., +, =) as special so that we can use + // them in the variable values, e.g., 'foo = g++'. In contrast, + // in the variable mode, we restrict certain character (e.g., /) + // from appearing in the name. The pairs mode is just like value + // except that we split names separated by '='. The pairs mode must + // be set manually. + // + enum class lexer_mode {normal, value, variable, pairs}; + class lexer { public: lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {} + // Note: sets mode for the next token. + // + void + mode (lexer_mode m) {next_mode_ = m;} + + lexer_mode + mode () const {return mode_;} + // Scanner. // token @@ -108,15 +126,9 @@ namespace build xchar buf_ {0, 0, 0}; bool eos_ {false}; - - // Context-dependent lexing mode. In the value mode we don't treat - // certain characters (e.g., +, =) as special so that we can use - // them in the variable values, e.g., 'foo = g++'. In contrast, - // in the variable mode, we restrict certain character (e.g., /) - // from appearing in the name. - // - enum class mode {normal, value, variable}; - mode mode_ {mode::normal}; + lexer_mode mode_ {lexer_mode::normal}; + lexer_mode next_mode_; // Mode to switch to for the next token. + lexer_mode prev_mode_; // Mode to return to after this mode expires. }; } diff --git a/build/lexer.cxx b/build/lexer.cxx index 5f394fc..9683567 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -11,6 +11,12 @@ namespace build token lexer:: next () { + if (mode_ != next_mode_) + { + prev_mode_ = mode_; + mode_ = next_mode_; + } + bool sep (skip_spaces ()); xchar c (get ()); @@ -27,8 +33,8 @@ namespace build { // Restore the normal mode at the end of the line. // - if (mode_ == mode::value) - mode_ = mode::normal; + if (mode_ == lexer_mode::value || mode_ == lexer_mode::pairs) + mode_ = next_mode_ = lexer_mode::normal; return token (token_type::newline, sep, ln, cn); } @@ -42,7 +48,10 @@ namespace build } case '$': { - mode_ = mode::variable; // The next name is lexed in the var mode. + // The following name is lexed in the variable mode. + // + next_mode_ = lexer_mode::variable; + return token (token_type::dollar, sep, ln, cn); } case '(': @@ -56,9 +65,9 @@ namespace build } // The following characters are not treated as special in the - // value mode. + // value or pairs mode. // - if (mode_ != mode::value) + if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs) { // NOTE: remember to update name() if adding new punctuations. // @@ -68,22 +77,39 @@ namespace build { return token (token_type::colon, sep, ln, cn); } - case '=': - { - mode_ = mode::value; - return token (token_type::equal, sep, ln, cn); - } case '+': { if (get () != '=') fail (c) << "expected = after +"; - mode_ = mode::value; + next_mode_ = lexer_mode::value; return token (token_type::plus_equal, sep, ln, cn); } } } + // The following characters are not treated as special in the + // value mode. + // + if (mode_ != lexer_mode::value) + { + // NOTE: remember to update name() if adding new punctuations. + // + switch (c) + { + case '=': + { + // Unless we are already in the pairs mode, switch to the + // value mode. + // + if (next_mode_ != lexer_mode::pairs) + next_mode_ = lexer_mode::value; + + return token (token_type::equal, sep, ln, cn); + } + } + } + // Otherwise it is a name. // return name (c, sep); @@ -101,14 +127,13 @@ namespace build bool done (false); // The following characters are not treated as special in the - // value mode. + // value or pairs mode. // - if (mode_ != mode::value) + if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs) { switch (c) { case ':': - case '=': case '+': { done = true; @@ -120,10 +145,28 @@ namespace build break; } + // The following characters are not treated as special in the + // value mode. + // + if (mode_ != lexer_mode::value) + { + switch (c) + { + case '=': + { + done = true; + break; + } + } + + if (done) + break; + } + // While these extra characters are treated as the name end in // the variable mode. // - if (mode_ == mode::variable) + if (mode_ == lexer_mode::variable) { switch (c) { @@ -171,8 +214,8 @@ namespace build break; } - if (mode_ == mode::variable) - mode_ = mode::normal; + if (mode_ == lexer_mode::variable) + next_mode_ = prev_mode_; return token (lexeme, sep, ln, cn); } diff --git a/build/name b/build/name index 00f0c00..fc89f44 100644 --- a/build/name +++ b/build/name @@ -19,6 +19,9 @@ namespace build // without a type and directory can be used to represent any text. // A name with directory and empty value represents a directory. // + // If pair is true, then this name and the next in the list form + // a pair. + // struct name { explicit @@ -33,6 +36,7 @@ namespace build std::string type; path dir; std::string value; + bool pair {false}; }; typedef std::vector names; diff --git a/build/name.cxx b/build/name.cxx index 6280676..46e2440 100644 --- a/build/name.cxx +++ b/build/name.cxx @@ -17,6 +17,7 @@ namespace build { bool ht (!n.type.empty ()); bool hv (!n.value.empty ()); + bool hd (false); if (ht) os << n.type << '{'; @@ -37,6 +38,8 @@ namespace build // if (s.back () != path::traits::directory_separator && (hv || !ht)) os << path::traits::directory_separator; + + hd = true; } } @@ -45,14 +48,21 @@ namespace build if (ht) os << '}'; + if (!ht && !hv && !hd) + os << "{}"; // Nothing got printed. + return os; } ostream& operator<< (ostream& os, const names& ns) { - for (auto b (ns.begin ()), i (b), e (ns.end ()); i != e; ++i) - os << (i != b ? " " : "") << *i; + for (auto i (ns.begin ()), e (ns.end ()); i != e; ) + { + const name& n (*i); + ++i; + os << n << (n.pair ? "=" : (i != e ? " " : "")); + } return os; } diff --git a/build/parser b/build/parser index 6e0be34..4375b27 100644 --- a/build/parser +++ b/build/parser @@ -51,12 +51,13 @@ namespace build names (token& t, token_type& tt) { names_type ns; - names (t, tt, ns, nullptr, nullptr); + names (t, tt, ns, 0, nullptr, nullptr); return ns; } void - names (token&, token_type&, names_type&, + names (token&, token_type&, + names_type&, std::size_t pair, const path* dir, const std::string* type); // Utilities. diff --git a/build/parser.cxx b/build/parser.cxx index 8f4f81d..56a61c5 100644 --- a/build/parser.cxx +++ b/build/parser.cxx @@ -191,6 +191,12 @@ namespace build if (tt == type::colon) { + // While '{}:' means empty name, '{$x}:' where x is empty list + // means empty list. + // + if (ns.empty ()) + fail (t) << "target expected before :"; + next (t, tt); if (tt == type::newline) @@ -625,8 +631,18 @@ namespace build } void parser:: - names (token& t, type& tt, names_type& ns, const path* dp, const string* tp) + names (token& t, + type& tt, + names_type& ns, + size_t pair, + const path* dp, + const string* tp) { + // If pair is not 0, then it is an index + 1 of the first half of + // the pair for which we are parsing the second halves, e.g., + // a={b c d{e f} {}}. + // + // Buffer that is used to collect the complete name in case of an // unseparated variable expansion, e.g., 'foo$bar$(baz)fox'. The // idea is to concatenate all the individual parts in this buffer @@ -634,6 +650,12 @@ namespace build // string concat; + // Number of names in the last group. This is used to detect when + // we need to add an empty first pair element (e.g., {=y}) or when + // we have a for now unsupported multi-name LHS (e.g., {x y}=z). + // + size_t count (0); + for (bool first (true);; first = false) { // If the accumulating buffer is not empty, then we have two options: @@ -714,7 +736,14 @@ namespace build } next (t, tt); - names (t, tt, ns, dp1, tp1); + count = ns.size (); + names (t, tt, + ns, + (pair != 0 + ? pair + : (ns.empty () || !ns.back ().pair ? 0 : ns.size ())), + dp1, tp1); + count = ns.size () - count; if (tt != type::rcbrace) fail (t) << "expected } instead of " << t; @@ -723,6 +752,12 @@ namespace build continue; } + // If we are a second half of a pair, add another first half + // unless this is the first instance. + // + if (pair != 0 && pair != ns.size ()) + ns.push_back (ns[pair - 1]); + // If it ends with a directory separator, then it is a directory. // Note that at this stage we don't treat '.' and '..' as special // (unless they are specified with a directory separator) because @@ -753,20 +788,7 @@ namespace build (dp != nullptr ? *dp : path ()), move (name)); - continue; - } - - // Untyped name group without a directory prefix, e.g., '{foo bar}'. - // - if (tt == type::lcbrace) - { - next (t, tt); - names (t, tt, ns, dp, tp); - - if (tt != type::rcbrace) - fail (t) << "expected } instead of " << t; - - tt = peek (); + count = 1; continue; } @@ -887,20 +909,91 @@ namespace build << "expansion"; } + // If we are a second half of a pair. + // + if (pair != 0) + { + // Check that there are no nested pairs. + // + if (n.pair) + fail (t) << "nested pair in variable expansion"; + + // And add another first half unless this is the first instance. + // + if (pair != ns.size ()) + ns.push_back (ns[pair - 1]); + } + ns.emplace_back ((tp1 != nullptr ? *tp1 : string ()), (dp1 != nullptr ? *dp1 : path ()), n.value); } + + count = lv.data.size (); } continue; } + // Untyped name group without a directory prefix, e.g., '{foo bar}'. + // + if (tt == type::lcbrace) + { + next (t, tt); + count = ns.size (); + names (t, tt, + ns, + (pair != 0 + ? pair + : (ns.empty () || !ns.back ().pair ? 0 : ns.size ())), + dp, tp); + count = ns.size () - count; + + if (tt != type::rcbrace) + fail (t) << "expected } instead of " << t; + + tt = peek (); + continue; + } + + // A pair separator (only in the pair mode). + // + if (tt == type::equal && lexer_->mode () == lexer_mode::pairs) + { + if (pair != 0) + fail (t) << "nested pair on the right hand side of a pair"; + + if (count > 1) + fail (t) << "multiple names on the left hand side of a pair"; + + if (count == 0) + { + // Empty LHS, (e.g., {=y}), create an empty name. + // + ns.emplace_back ((tp != nullptr ? *tp : string ()), + (dp != nullptr ? *dp : path ()), + ""); + count = 1; + } + + ns.back ().pair = true; + tt = peek (); + continue; + } + if (!first) break; + // Our caller expected this to be a name. + // if (tt == type::rcbrace) // Empty name, e.g., dir{}. { + // If we are a second half of a pair, add another first half + // unless this is the first instance. + // + if (pair != 0 && pair != ns.size ()) + ns.push_back (ns[pair - 1]); + ns.emplace_back ((tp != nullptr ? *tp : string ()), (dp != nullptr ? *dp : path ()), ""); @@ -909,6 +1002,15 @@ namespace build else fail (t) << "expected name instead of " << t; } + + // Handle the empty RHS in a pair, (e.g., {y=}). + // + if (!ns.empty () && ns.back ().pair) + { + ns.emplace_back ((tp != nullptr ? *tp : string ()), + (dp != nullptr ? *dp : path ()), + ""); + } } void parser:: -- cgit v1.1