diff options
Diffstat (limited to 'libbuild2/script/lexer.cxx')
-rw-r--r-- | libbuild2/script/lexer.cxx | 431 |
1 files changed, 431 insertions, 0 deletions
diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx new file mode 100644 index 0000000..d78e999 --- /dev/null +++ b/libbuild2/script/lexer.cxx @@ -0,0 +1,431 @@ +// file : libbuild2/script/lexer.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/script/lexer.hxx> + +#include <cstring> // strchr() + +using namespace std; + +namespace build2 +{ + namespace script + { + using type = token_type; + + void lexer:: + mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data) + { + bool a (false); // attributes + + const char* s1 (nullptr); + const char* s2 (nullptr); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes + + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + + switch (m) + { + case lexer_mode::command_expansion: + { + // Note that whitespaces are not word separators in this mode. + // + s1 = "|&<>"; + s2 = " "; + s = false; + break; + } + case lexer_mode::here_line_single: + { + // This one is like a single-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + // Note that it might be tempting to enable line continuation + // escapes. However, we will then have to also enable escaping of + // the backslash, which makes it a lot less tempting. + // + s1 = "\n"; + s2 = " "; + esc = ""; // Disable escape sequences. + s = false; + q = false; + break; + } + case lexer_mode::here_line_double: + { + // This one is like a double-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + s1 = "$(\n"; + s2 = " "; + s = false; + q = false; + break; + } + default: + { + // Make sure pair separators are only enabled where we expect + // them. + // + // @@ Should we disable pair separators in the eval mode? + // + assert (ps == '\0' || + m == lexer_mode::eval || + m == lexer_mode::attribute_value); + + base_lexer::mode (m, ps, esc); + return; + } + } + + assert (ps == '\0'); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); + } + + token lexer:: + next () + { + token r; + + switch (state_.top ().mode) + { + case lexer_mode::command_expansion: + case lexer_mode::here_line_single: + case lexer_mode::here_line_double: + r = next_line (); + break; + default: + r = base_lexer::next (); + break; + } + + if (r.qtype != quote_type::unquoted) + ++quoted_; + + return r; + } + + token lexer:: + next_line () + { + bool sep (skip_spaces ().first); + + xchar c (get ()); + uint64_t ln (c.line), cn (c.column); + + const state& st (state_.top ()); + lexer_mode m (st.mode); + + auto make_token = [&sep, &m, ln, cn] (type t) + { + bool q (m == lexer_mode::here_line_double); + + return token (t, string (), sep, + (q ? quote_type::double_ : quote_type::unquoted), q, + ln, cn, + token_printer); + }; + + if (eos (c)) + return make_token (type::eos); + + // NOTE: remember to update mode() if adding new special characters. + + if (m != lexer_mode::command_expansion) + { + switch (c) + { + case '\n': + { + sep = true; // Treat newline as always separated. + return make_token (type::newline); + } + } + } + + if (m != lexer_mode::here_line_single) + { + switch (c) + { + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + } + } + + // Command operators. + // + if (m == lexer_mode::command_expansion) + { + if (optional<token> t = next_cmd_op (c, sep)) + return move (*t); + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + optional<token> lexer:: + next_cmd_op (const xchar& c, bool sep) + { + auto make_token = [&sep, &c] (type t, string v = string ()) + { + return token (t, move (v), sep, + quote_type::unquoted, false, + c.line, c.column, + token_printer); + }; + + auto make_token_with_modifiers = + [&make_token, this] (type t, + const char* mods, // To recorgnize. + const char* stop = nullptr) // To stop after. + { + string v; + if (mods != nullptr) + { + for (xchar p (peek ()); + (strchr (mods, p) != nullptr && // Modifier. + strchr (v.c_str (), p) == nullptr); // Not already seen. + p = peek ()) + { + get (); + v += p; + + if (stop != nullptr && strchr (stop, p) != nullptr) + break; + } + } + + return make_token (t, move (v)); + }; + + switch (c) + { + // |, || + // + case '|': + { + if (peek () == '|') + { + get (); + return make_token (type::log_or); + } + else + return make_token (type::pipe); + } + // &, && + // + case '&': + { + xchar p (peek ()); + + if (p == '&') + { + get (); + return make_token (type::log_and); + } + + // These modifiers are mutually exclusive so stop after seeing + // either one. + // + return make_token_with_modifiers (type::clean, "!?", "!?"); + } + // < + // + case '<': + { + optional<type> r; + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '=' || p == '<') // <| <- <= << + { + xchar c (get ()); + + switch (p) + { + case '|': return make_token (type::in_pass); // <| + case '-': return make_token (type::in_null); // <- + case '=': return make_token (type::in_file); // <= + case '<': // << + { + p = peek (); + + if (p == '=' || p == '<') // <<= <<< + { + xchar c (get ()); + + switch (p) + { + case '=': + { + r = type::in_doc; // <<= + break; + } + case '<': + { + p = peek (); + + if (p == '=') + { + get (); + r = type::in_str; // <<<= + } + + if (!r && redirect_aliases.lll) + r = type::in_lll; // <<< + + // We can still end up with the << or < redirect alias, + // if any of them is present. + // + if (!r) + unget (c); + } + + break; + } + } + + if (!r && redirect_aliases.ll) + r = type::in_ll; // << + + // We can still end up with the < redirect alias, if it is + // present. + // + if (!r) + unget (c); + + break; + } + } + } + + if (!r && redirect_aliases.l) + r = type::in_l; // < + + if (!r) + return nullopt; + + // Handle modifiers. + // + const char* mods (nullptr); + + switch (redirect_aliases.resolve (*r)) + { + case type::in_str: + case type::in_doc: mods = ":/"; break; + } + + token t (make_token_with_modifiers (*r, mods)); + + return t; + } + // > + // + case '>': + { + optional<type> r; + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '!' || p == '&' || // >| >- >! >& + p == '=' || p == '+' || p == '?' || p == '>') // >= >+ >? >> + { + xchar c (get ()); + + switch (p) + { + case '|': return make_token (type::out_pass); // >| + case '-': return make_token (type::out_null); // >- + case '!': return make_token (type::out_trace); // >! + case '&': return make_token (type::out_merge); // >& + case '=': return make_token (type::out_file_ovr); // >= + case '+': return make_token (type::out_file_app); // >+ + case '?': return make_token (type::out_file_cmp); // >? + case '>': // >> + { + p = peek (); + + if (p == '?' || p == '>') // >>? >>> + { + xchar c (get ()); + + switch (p) + { + case '?': + { + r = type::out_doc; // >>? + break; + } + case '>': + { + p = peek (); + + if (p == '?') + { + get (); + r = type::out_str; // >>>? + } + + if (!r && redirect_aliases.ggg) + r = type::out_ggg; // >>> + + // We can still end up with the >> or > redirect alias, + // if any of themis present. + // + if (!r) + unget (c); + } + + break; + } + } + + if (!r && redirect_aliases.gg) + r = type::out_gg; // >> + + // We can still end up with the > redirect alias, if it is + // present. + // + if (!r) + unget (c); + + break; + } + } + } + + if (!r && redirect_aliases.g) + r = type::out_g; // > + + if (!r) + return nullopt; + + // Handle modifiers. + // + const char* mods (nullptr); + const char* stop (nullptr); + + switch (redirect_aliases.resolve (*r)) + { + case type::out_str: + case type::out_doc: mods = ":/~"; stop = "~"; break; + } + + return make_token_with_modifiers (*r, mods, stop); + } + } + + return nullopt; + } + } +} |