From 818dd4a4e743bc8c93d1be67685b1f2e5db6dcf5 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 17 Nov 2020 11:23:36 +0200 Subject: Implement modules pseudo-directive parsing (p1703, p1857) --- libbuild2/cc/lexer+first.test.testscript | 25 ++++++++ libbuild2/cc/lexer.cxx | 34 ++++++++--- libbuild2/cc/lexer.hxx | 8 ++- libbuild2/cc/lexer.test.cxx | 14 ++++- libbuild2/cc/parser+module.test.testscript | 23 ++++--- libbuild2/cc/parser.cxx | 96 ++++++++++++++++++++---------- libbuild2/cc/parser.hxx | 2 +- 7 files changed, 148 insertions(+), 54 deletions(-) create mode 100644 libbuild2/cc/lexer+first.test.testscript (limited to 'libbuild2/cc') diff --git a/libbuild2/cc/lexer+first.test.testscript b/libbuild2/cc/lexer+first.test.testscript new file mode 100644 index 0000000..5c55030 --- /dev/null +++ b/libbuild2/cc/lexer+first.test.testscript @@ -0,0 +1,25 @@ +# file : libbuild2/cc/lexer+first.test.testscript +# license : MIT; see accompanying LICENSE file + +# Test the first token of a logical line logic. +# + +: basics +: +$* -f <>EOO +; . + ; . +; // Hello +; +; /* Hello +World */ . +EOI +';' t +'.' f +';' t +'.' f +';' t +';' t +';' t +'.' f +EOO diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx index d2be3d8..123a41e 100644 --- a/libbuild2/cc/lexer.cxx +++ b/libbuild2/cc/lexer.cxx @@ -138,10 +138,13 @@ namespace build2 using type = token_type; void lexer:: - next (token& t, xchar c, bool ignore_pp) + next (token& t, pair cf, bool ignore_pp) { - for (;; c = skip_spaces ()) + for (;; cf = skip_spaces ()) { + xchar c (cf.first); + + t.first = cf.second; t.file = &log_file_; t.line = log_line_ ? *log_line_ : c.line; t.column = c.column; @@ -197,7 +200,7 @@ namespace build2 { // Note that we keep using the passed token for buffers. // - c = skip_spaces (false); // Stop at newline. + c = skip_spaces (false).first; // Stop at newline. if (eos (c) || c == '\n') break; @@ -215,7 +218,7 @@ namespace build2 // if (!(c >= '0' && c <= '9')) { - next (t, c, false); + next (t, make_pair (c, false), false); if (t.type == type::identifier) { @@ -230,7 +233,7 @@ namespace build2 if (t.type != type::identifier || t.value != "line") continue; - c = skip_spaces (false); + c = skip_spaces (false).first; if (!(c >= '0' && c <= '9')) fail (c) << "line number expected after #line directive"; @@ -242,7 +245,7 @@ namespace build2 continue; // Parse the tail, if any. } - next (t, c, false); + next (t, make_pair (c, false), false); } break; } @@ -823,7 +826,7 @@ namespace build2 // See if we have the file. // - c = skip_spaces (false); + c = skip_spaces (false).first; if (c == '\"') { @@ -1007,16 +1010,24 @@ namespace build2 } auto lexer:: - skip_spaces (bool nl) -> xchar + skip_spaces (bool nl) -> pair { xchar c (get ()); + // Besides the first character, we also need to take into account any + // newlines that we are skipping. For example, the first character may + // be a space at the end of the line which we will skip along with the + // following newline. + // + bool first (c.column == 1); + for (; !eos (c); c = get ()) { switch (c) { case '\n': if (!nl) break; + first = true; // Fall through. case ' ': case '\t': @@ -1072,11 +1083,16 @@ namespace build2 if (!nl) break; + first = true; continue; } // C comment. // + // Note that for the first logic we consider a C comment to be + // entirely part of the same logical line even if there are + // newlines inside. + // if (p == '*') { get (p); @@ -1132,7 +1148,7 @@ namespace build2 break; } - return c; + return make_pair (c, first); } ostream& diff --git a/libbuild2/cc/lexer.hxx b/libbuild2/cc/lexer.hxx index d3fe807..b4e1045 100644 --- a/libbuild2/cc/lexer.hxx +++ b/libbuild2/cc/lexer.hxx @@ -25,7 +25,8 @@ namespace build2 // as #line, #pragma, but not #include (which is diagnosed). Currently, // all preprocessor directives except #line are ignored and no values are // saved from literals. The #line directive (and its shorthand notation) - // is recognized to provide the logical token location. + // is recognized to provide the logical token location. Note that the + // modules-related pseudo-directives are not recognized or handled. // // While at it we also calculate the checksum of the input ignoring // comments, whitespaces, etc. This is used to detect changes that do not @@ -58,6 +59,7 @@ namespace build2 struct token { token_type type = token_type::eos; + bool first = false; // First token of a logical line. string value; // Logical position. @@ -121,7 +123,7 @@ namespace build2 private: void - next (token&, xchar, bool); + next (token&, pair, bool); void number_literal (token&, xchar); @@ -141,7 +143,7 @@ namespace build2 void line_directive (token&, xchar); - xchar + pair skip_spaces (bool newline = true); // The char_scanner adaptation for newline escape sequence processing. diff --git a/libbuild2/cc/lexer.test.cxx b/libbuild2/cc/lexer.test.cxx index 852d8b2..284d592 100644 --- a/libbuild2/cc/lexer.test.cxx +++ b/libbuild2/cc/lexer.test.cxx @@ -16,12 +16,19 @@ namespace build2 { namespace cc { - // Usage: argv[0] [-l] [] + // Usage: argv[0] [-l] [-f] [] + // + // -l + // Print location. + // + // -f + // Print first flag. // int main (int argc, char* argv[]) { bool loc (false); + bool first (false); path file; for (int i (1); i != argc; ++i) @@ -30,6 +37,8 @@ namespace build2 if (a == "-l") loc = true; + else if (a == "-f") + first = true; else { file = path (argv[i]); @@ -61,6 +70,9 @@ namespace build2 { cout << t; + if (first) + cout << ' ' << (t.first ? 't' : 'f'); + if (loc) cout << ' ' << *t.file << ':' << t.line << ':' << t.column; diff --git a/libbuild2/cc/parser+module.test.testscript b/libbuild2/cc/parser+module.test.testscript index b92f80b..e4ec139 100644 --- a/libbuild2/cc/parser+module.test.testscript +++ b/libbuild2/cc/parser+module.test.testscript @@ -45,6 +45,17 @@ EOI export import foo; EOO +: non-import +: +$* <; +class import; +EOI + : non-module : $* <:6:1: warning: extraneous '}' EOE -: import-missing-name -: -$* <>EOE != 0 -import ; -EOI -:1:8: error: module or header name expected instead of ';' -EOE - : module-missing-name : $* <>EOE != 0 diff --git a/libbuild2/cc/parser.cxx b/libbuild2/cc/parser.cxx index 55be8b7..fbf076c 100644 --- a/libbuild2/cc/parser.cxx +++ b/libbuild2/cc/parser.cxx @@ -43,8 +43,8 @@ namespace build2 token t; for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; ) { - // Break to stop, continue to continue, set n to false if the - // next token already extracted. + // Break to stop, continue to continue, and set n to false if the + // next token is already extracted. // n = true; @@ -71,37 +71,63 @@ namespace build2 // [export] import [] ; // [export] import [] ; // + // The leading module/export/import keyword should be the first + // token of a logical line and only if certain characters appear + // after module/import and all the tokens are on the same line, + // then the line is recognized as a pseudo-directive; see p1857 + // for details. + // // Additionally, when include is translated to an import, it's // normally replaced with the special __import keyword since it // may appear in C context. // - const string& id (t.value); - - if (bb == 0) + if (bb == 0 && t.first) { - if (id == "import" || id == "__import") + const string& id (t.value); // Note: tracks t. + + // Handle the export prefix which can appear for both module + // and import. + // + bool ex (false); + if (id == "export") { - parse_import (t, false); + if (l_->next (t) != type::identifier || t.first) + { + n = false; // Could be module/import on next line. + continue; + } + + ex = true; + // Fall through. } - else if (id == "module") + + if (id == "module") { - parse_module (t, false); + location_value l (get_location (t)); + l_->next (t); + + if ((t.type == type::semi || + t.type == type::identifier) && !t.first) + parse_module (t, ex, move (l)); + else + n = false; } - else if (id == "export") + else if (id == "import" || id == "__import") { - if (l_->next (t) == type::identifier) - { - if (id == "module") parse_module (t, true); - else if (id == "import") parse_import (t, true); - else n = false; // Something else (e.g., export namespace). - } + l_->next (t); + + if ((t.type == type::less || + t.type == type::string || + t.type == type::identifier) && !t.first) + parse_import (t, ex); else n = false; } } continue; } - default: continue; + default: + continue; } break; @@ -120,6 +146,8 @@ namespace build2 // if anything in between fails (probably by having it sitting in a // diag_frame). So let's keep it simple for now. // + // @@ We now do that for missing include, so could do here as well. + // if (bb != 0) warn (t) << (bb > 0 ? "missing '}'" : "extraneous '}'"); @@ -134,12 +162,12 @@ namespace build2 void parser:: parse_import (token& t, bool ex) { - // enter: import keyword + // enter: token after import keyword // leave: semi string un; unit_type ut; - switch (l_->next (t)) // Start of module/header name. + switch (t.type) // Start of module/header name. { case type::less: case type::string: @@ -155,15 +183,19 @@ namespace build2 break; } default: - fail (t) << "module or header name expected instead of " << t << endf; + assert (false); } // Should be {}-balanced. // - for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + for (; + t.type != type::eos && t.type != type::semi && !t.first; + l_->next (t)) ; if (t.type != type::semi) fail (t) << "';' expected instead of " << t; + else if (t.first) + fail (t) << "';' must be on the same line"; // For now we skip header units (see a comment on module type/info // string serialization in compile rule for details). Note that @@ -191,21 +223,17 @@ namespace build2 } void parser:: - parse_module (token& t, bool ex) + parse_module (token& t, bool ex, location_value l) { - // enter: module keyword + // enter: token after module keyword (l is the module keyword location) // leave: semi - location_value l (get_location (t)); - - l_->next (t); - // Handle the leading 'module;' marker (p0713). // // Note that we don't bother diagnosing invalid/duplicate markers // leaving that to the compiler. // - if (!ex && t.type == type::semi) + if (!ex && t.type == type::semi && !t.first) { module_marker_ = move (l); return; @@ -217,10 +245,14 @@ namespace build2 // Should be {}-balanced. // - for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + for (; + t.type != type::eos && t.type != type::semi && !t.first; + l_->next (t)) ; if (t.type != type::semi) fail (t) << "';' expected instead of " << t; + else if (t.first) + fail (t) << "';' must be on the same line"; if (!u_->module_info.name.empty ()) fail (l) << "multiple module declarations"; @@ -241,12 +273,12 @@ namespace build2 // for (;; l_->next (t)) { - if (t.type != type::identifier) + if (t.type != type::identifier || t.first) fail (t) << "module name expected instead of " << t; n += t.value; - if (l_->next (t) != type::dot) + if (l_->next (t) != type::dot || t.first) break; n += '.'; @@ -271,7 +303,7 @@ namespace build2 { while (l_->next (t) != type::greater) { - if (t.type == type::eos) + if (t.type == type::eos || t.first) fail (t) << "closing '>' expected after header name" << endf; } } diff --git a/libbuild2/cc/parser.hxx b/libbuild2/cc/parser.hxx index 7b33ef9..7c893b5 100644 --- a/libbuild2/cc/parser.hxx +++ b/libbuild2/cc/parser.hxx @@ -31,7 +31,7 @@ namespace build2 parse_import (token&, bool); void - parse_module (token&, bool); + parse_module (token&, bool, location_value); string parse_module_name (token&); -- cgit v1.1