From a54abb2f4e5e66877619097bfd281261f99c5103 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 4 May 2020 07:27:47 +0200 Subject: Add recognition for line-leading `%` as token --- libbuild2/context.cxx | 4 ++- libbuild2/lexer+normal.test.testscript | 36 ++++++++++++++++++++++++++ libbuild2/lexer.cxx | 46 ++++++++++++++++++++++++++-------- libbuild2/lexer.hxx | 37 ++++++++++++++++----------- libbuild2/parser.cxx | 5 ++++ libbuild2/test/script/lexer.cxx | 2 +- libbuild2/token.cxx | 1 + libbuild2/token.hxx | 1 + 8 files changed, 104 insertions(+), 28 deletions(-) diff --git a/libbuild2/context.cxx b/libbuild2/context.cxx index 0be0046..fe046ae 100644 --- a/libbuild2/context.cxx +++ b/libbuild2/context.cxx @@ -340,8 +340,10 @@ namespace build2 // And so the first token should be a word which can be either a // variable name (potentially with the directory qualification) or just // the directory, in which case it should be followed by another word - // (unqualified variable name). + // (unqualified variable name). To avoid treating any of the visibility + // modifiers as special we use the cmdvar mode. // + l.mode (lexer_mode::cmdvar); token t (l.next ()); optional dir; diff --git a/libbuild2/lexer+normal.test.testscript b/libbuild2/lexer+normal.test.testscript index c9448c3..e66b81e 100644 --- a/libbuild2/lexer+normal.test.testscript +++ b/libbuild2/lexer+normal.test.testscript @@ -34,3 +34,39 @@ $* <:'x?=y' >>EOO ?= 'y' EOO + +: percent +: Leading percent sign recognition. +: +{ + : first + : + $* <:'%%' >>EOO + % + '%' + EOO + + : space + : + $* <:' %%' >>EOO + % + '%' + EOO + + : newline + : + $* <>EOO + + %% + EOI + % + '%' + + EOO + + : non-token + : + $* <:'x%' >>EOO + 'x%' + EOO +} diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index c0cadd3..1e400e3 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -14,7 +14,10 @@ namespace build2 pair, bool> lexer:: peek_chars () { - sep_ = skip_spaces (); + auto p (skip_spaces ()); + assert (!p.second); + sep_ = p.first; + char r[2] = {'\0', '\0'}; xchar c0 (peek ()); @@ -54,7 +57,11 @@ namespace build2 switch (m) { case lexer_mode::normal: + case lexer_mode::cmdvar: { + // Note: `%` is only recognized at the beginning of the line so it + // should not be included here. + // a = true; s1 = ":<>=+? $(){}#\t\n"; s2 = " == "; @@ -148,6 +155,7 @@ namespace build2 switch (m) { case lexer_mode::normal: + case lexer_mode::cmdvar: case lexer_mode::value: case lexer_mode::values: case lexer_mode::switch_expressions: @@ -161,7 +169,9 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - bool sep (skip_spaces ()); + pair skip (skip_spaces ()); + bool sep (skip.first); // Separated from a previous character. + bool first (skip.second); // First non-whitespace character of a line. xchar c (get ()); uint64_t ln (c.line), cn (c.column); @@ -209,7 +219,8 @@ namespace build2 m == lexer_mode::case_patterns) state_.pop (); - // Re-enable attributes in the normal mode. + // Re-enable attributes in the normal mode (should never be needed in + // cmdvar). // if (state_.top ().mode == lexer_mode::normal) state_.top ().attributes = true; @@ -230,6 +241,14 @@ namespace build2 } } + if (m == lexer_mode::normal && first) + { + switch (c) + { + case '%': return make_token (type::percent); + } + } + // The following characters are special in all modes except attributes. // if (m != lexer_mode::attributes && m != lexer_mode::attribute_value) @@ -267,6 +286,7 @@ namespace build2 // switch_expressions modes. // if (m == lexer_mode::normal || + m == lexer_mode::cmdvar || m == lexer_mode::switch_expressions || m == lexer_mode::case_patterns) { @@ -278,7 +298,8 @@ namespace build2 // The following characters are special in the normal mode. // - if (m == lexer_mode::normal) + if (m == lexer_mode::normal || + m == lexer_mode::cmdvar) { switch (c) { @@ -315,7 +336,8 @@ namespace build2 // The following characters are special in the normal mode. // - if (m == lexer_mode::normal) + if (m == lexer_mode::normal || + m == lexer_mode::cmdvar) { switch (c) { @@ -361,7 +383,7 @@ namespace build2 // This mode is quite a bit like the value mode when it comes to special // characters, except that we have some of our own. - bool sep (skip_spaces ()); + bool sep (skip_spaces ().first); xchar c (get ()); if (eos (c)) @@ -728,7 +750,7 @@ namespace build2 return token (move (lexeme), sep, qtype, qcomp, ln, cn); } - bool lexer:: + pair lexer:: skip_spaces () { bool r (sep_); @@ -739,7 +761,7 @@ namespace build2 // In some special modes we don't skip spaces. // if (!s.sep_space) - return r; + return make_pair (r, false); xchar c (peek ()); bool start (c.column == 1); @@ -758,6 +780,8 @@ namespace build2 { // In some modes we treat newlines as ordinary spaces. // + // Note that in this case we don't adjust start. + // if (!s.sep_newline) { r = true; @@ -772,7 +796,7 @@ namespace build2 break; } - return r; + return make_pair (r, start); } case '#': { @@ -833,12 +857,12 @@ namespace build2 } // Fall through. default: - return r; // Not a space. + return make_pair (r, start); // Not a space. } get (); } - return r; + return make_pair (r, start); } } diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx index 02112cb..c7e96fb 100644 --- a/libbuild2/lexer.hxx +++ b/libbuild2/lexer.hxx @@ -20,17 +20,18 @@ namespace build2 { - // Context-dependent lexing mode. Quoted modes are internal and should not - // be set explicitly. In the value mode we don't treat certain characters - // (e.g., `+`, `=`) as special so that we can use them in the variable - // values, e.g., `foo = g++`. In contrast, in the variable mode, we restrict - // certain character (e.g., `/`) from appearing in the name. The values mode - // is like value but recogizes `,` as special (used in contexts where we - // need to list multiple values). The attributes/attribute_value modes are - // like values where each value is potentially a variable assignment; they - // don't treat `{` and `}` as special (so we cannot have name groups in - // attributes) as well as recognizes `=` and `]`. The eval mode is used in - // the evaluation context. + // Context-dependent lexing mode. + // + // Quoted modes are internal and should not be set explicitly. In the value + // mode we don't treat certain characters (e.g., `+`, `=`) as special so + // that we can use them in the variable values, e.g., `foo = g++`. In + // contrast, in the variable mode, we restrict certain character (e.g., `/`) + // from appearing in the name. The values mode is like value but recogizes + // `,` as special (used in contexts where we need to list multiple + // values). The attributes/attribute_value modes are like values where each + // value is potentially a variable assignment; they don't treat `{` and `}` + // as special (so we cannot have name groups in attributes) as well as + // recognizes `=` and `]`. The eval mode is used in the evaluation context. // // A number of modes are "derived" from the value/values mode by recognizing // a few extra characters: @@ -42,6 +43,9 @@ namespace build2 // split words separated by the pair character (to disable pairs one can // pass `\0` as a pair character). // + // The normal mode recognizes `%` at the beginning of the line as special. + // The cmdvar mode is like normal but does not treat `%` as special. + // // The alternative modes must be set manually. The value/values and derived // modes automatically expires after the end of the line. The attribute mode // expires after the closing `]`. The variable mode expires after the word @@ -70,6 +74,7 @@ namespace build2 enum { normal = base_type::value_next, + cmdvar, variable, value, values, @@ -189,11 +194,13 @@ namespace build2 virtual token word (state current, bool separated); - // Return true if we have seen any spaces. Skipped empty lines - // don't count. In other words, we are only interested in spaces - // that are on the same line as the following non-space character. + // Return true in first if we have seen any spaces. Skipped empty lines + // don't count. In other words, we are only interested in spaces that are + // on the same line as the following non-space character. Return true in + // second if we have started skipping spaces from column 1 (note that + // if this mode does not skip spaces, then second will always be false). // - bool + pair skip_spaces (); // Diagnostics. diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index e87ca95..000670b 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -335,6 +335,11 @@ namespace build2 while (tt != type::eos && !(one && parsed)) { + // Issue better diagnostics for stray `%`. + // + if (tt == type::percent) + fail (t) << "recipe without target"; + // Extract attributes if any. // assert (attributes_.empty ()); diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx index 26d77b5..4683bc7 100644 --- a/libbuild2/test/script/lexer.cxx +++ b/libbuild2/test/script/lexer.cxx @@ -174,7 +174,7 @@ namespace build2 token lexer:: next_line () { - bool sep (skip_spaces ()); + bool sep (skip_spaces ().first); xchar c (get ()); uint64_t ln (c.line), cn (c.column); diff --git a/libbuild2/token.cxx b/libbuild2/token.cxx index 4975a02..11b080e 100644 --- a/libbuild2/token.cxx +++ b/libbuild2/token.cxx @@ -24,6 +24,7 @@ namespace build2 case token_type::colon: os << q << ':' << q; break; case token_type::dollar: os << q << '$' << q; break; case token_type::question: os << q << '?' << q; break; + case token_type::percent: os << q << '%' << q; break; case token_type::comma: os << q << ',' << q; break; case token_type::lparen: os << q << '(' << q; break; diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx index e48c088..8dad4ba 100644 --- a/libbuild2/token.hxx +++ b/libbuild2/token.hxx @@ -36,6 +36,7 @@ namespace build2 colon, // : dollar, // $ question, // ? + percent, // % comma, // , lparen, // ( -- cgit v1.1