// file : libbuild2/cc/parser.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbuild2/cc/parser.hxx> #include <libbuild2/cc/lexer.hxx> using namespace std; using namespace butl; namespace build2 { namespace cc { using type = token_type; unit parser:: parse (ifdstream& is, const path_name& in) { lexer l (is, in); l_ = &l; unit u; u_ = &u; // If the source has errors then we want the compiler to issues the // diagnostics. However, the errors could as likely be because we are // mis-parsing things. Initially, as a middle ground, we were going to // issue warnings. But the problem with this approach is that they are // easy to miss. So for now we fail. And it turns out we don't mis- // parse much. // We keep a {}-balance and skip everything at depth 1 and greater. // While after P1703 and P1857 everything that we are interested in // (module and import declarations) are treated as pseudo-pp-directives // and recognized everywhere, they are illegal everywhere execept at // depth 0. So we are going to skip for performance reasons and expect // the compiler to complain about the syntax rather than, say, module // BMI not being found. // int64_t bb (0); token t; for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; ) { // Break to stop, continue to continue, set n to false if the // next token already extracted. // n = true; switch (t.type) { case type::lcbrace: { ++bb; continue; } case type::rcbrace: { if (--bb < 0) break; // Imbalance. continue; } case type::identifier: { // Constructs we need to recognize: // // module ; // [export] module <module-name> [<attributes>] ; // [export] import <module-name> [<attributes>] ; // [export] import <header-name> [<attributes>] ; // // Additionally, when include is translated to an import, it's // normally replaced with the special __import keyword since it // may appear in C context. // const string& id (t.value); if (bb == 0) { if (id == "import" || id == "__import") { parse_import (t, false); } else if (id == "module") { parse_module (t, false); } else if (id == "export") { if (l_->next (t) == type::identifier) { if (id == "module") parse_module (t, true); else if (id == "import") parse_import (t, true); else n = false; // Something else (e.g., export namespace). } else n = false; } } continue; } default: continue; } break; } // We used to issue an error here but the diagnostics and, especially, // the location are not very helpful. While the compilers don't do a // much better job at it, there are often other semantic errors that are // more helpful and which we cannot match. So now we warn and let the // compiler fail. // // Another option would have been to postpone this diagnostics until // after the compiler fails (and thus also confirming that it indeed has // failed) but that would require propagating this state from apply() to // perform_update() and also making sure we issue this diagnostics even // if anything in between fails (probably by having it sitting in a // diag_frame). So let's keep it simple for now. // if (bb != 0) warn (t) << (bb > 0 ? "missing '}'" : "extraneous '}'"); if (module_marker_ && u.module_info.name.empty ()) fail (*module_marker_) << "module declaration expected after " << "leading module marker"; checksum = l.checksum (); return u; } void parser:: parse_import (token& t, bool ex) { // enter: import keyword // leave: semi string un; unit_type ut; switch (l_->next (t)) // Start of module/header name. { case type::less: case type::string: { un = parse_header_name (t); ut = unit_type::module_header; break; } case type::identifier: { un = parse_module_name (t); ut = unit_type::module_iface; break; } default: fail (t) << "module or header name expected instead of " << t << endf; } // Should be {}-balanced. // for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; if (t.type != type::semi) fail (t) << "';' expected instead of " << t; // For now we skip header units (see a comment on module type/info // string serialization in compile rule for details). Note that // currently parse_header_name() always returns empty name. // if (ut == unit_type::module_header) return; // Ignore duplicates. We don't expect a large numbers of (direct) // imports so vector/linear search is probably more efficient than a // set. // auto& is (u_->module_info.imports); auto i (find_if (is.begin (), is.end (), [&un] (const module_import& i) { return i.name == un; })); if (i == is.end ()) is.push_back (module_import {ut, move (un), ex, 0}); else i->exported = i->exported || ex; } void parser:: parse_module (token& t, bool ex) { // enter: module keyword // leave: semi location_value l (get_location (t)); l_->next (t); // Handle the leading 'module;' marker (p0713). // // Note that we don't bother diagnosing invalid/duplicate markers // leaving that to the compiler. // if (!ex && t.type == type::semi) { module_marker_ = move (l); return; } // Otherwise it should be the start of the module name. // string n (parse_module_name (t)); // Should be {}-balanced. // for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; if (t.type != type::semi) fail (t) << "';' expected instead of " << t; if (!u_->module_info.name.empty ()) fail (l) << "multiple module declarations"; u_->type = ex ? unit_type::module_iface : unit_type::module_impl; u_->module_info.name = move (n); } string parser:: parse_module_name (token& t) { // enter: first token of module name // leave: token after module name string n; // <identifier>[ . <identifier>]* // for (;; l_->next (t)) { if (t.type != type::identifier) fail (t) << "module name expected instead of " << t; n += t.value; if (l_->next (t) != type::dot) break; n += '.'; } return n; } string parser:: parse_header_name (token& t) { // enter: first token of module name, either string or less // leave: token after module name string n; // NOTE: actual name is a TODO if/when we need it. // if (t.type == type::string) /*n = move (t.value)*/; else { while (l_->next (t) != type::greater) { if (t.type == type::eos) fail (t) << "closing '>' expected after header name" << endf; } } l_->next (t); return n; } } }