// file : build/lexer -*- C++ -*- // copyright : Copyright (c) 2014-2015 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #ifndef BUILD_LEXER #define BUILD_LEXER #include <stack> #include <string> #include <iosfwd> #include <cstddef> // size_t #include <cstdint> // uint64_t #include <cassert> #include <exception> #include <butl/char-scanner> #include <build/types> #include <build/utility> #include <build/token> #include <build/diagnostics> namespace build { // Context-dependent lexing mode. In the value mode we don't treat // certain characters (e.g., +, =) as special so that we can use // them in the variable values, e.g., 'foo = g++'. In contrast, in // the variable mode, we restrict certain character (e.g., /) from // appearing in the name. The pairs mode is just like value except // that we split names separated by the pair character. The eval // mode is used in the evaluation context. // // The alternnative modes must be set manually. The value and pairs // modes are automatically reset after the end of the line. The // variable mode is reset after the name token. And the eval mode // is reset after the closing ')'. // // Quoted is an internal mode and should not be set explicitly. // enum class lexer_mode {normal, variable, value, pairs, eval, quoted}; class lexer: protected butl::char_scanner { public: lexer (std::istream& is, const std::string& name, void (*processor) (token&, const lexer&) = nullptr) : char_scanner (is), fail (name), processor_ (processor), sep_ (false) { mode_.push (lexer_mode::normal); } const std::string& name () const {return fail.name_;} // Note: sets mode for the next token. If mode is pairs, then // the second argument specifies the separator character. // void mode (lexer_mode m, char pair_separator = '=') { mode_.push (m); pair_separator_ = pair_separator; } // Expire the current mode early. // void expire_mode () {mode_.pop ();} lexer_mode mode () const {return mode_.top ();} char pair_separator () const {return pair_separator_;} // Scanner. // token next (); // Peek at the first character of the next token. Return the character // or 0 if the next token will be eos. Also return an indicator of // whether the next token will be separated. // pair<char, bool> peek_char (); private: token next_impl (); token next_eval (); token next_quoted (); token name (bool separated); // Return true if we have seen any spaces. Skipped empty lines // don't count. In other words, we are only interested in spaces // that are on the same line as the following non-space character. // bool skip_spaces (); xchar escape (); // Diagnostics. // private: struct fail_mark_base: build::fail_mark_base<failed> { fail_mark_base (const std::string& n): name_ (n) {} location_prologue operator() (const xchar&) const; std::string name_; }; typedef diag_mark<fail_mark_base> fail_mark; private: fail_mark fail; void (*processor_) (token&, const lexer&); std::stack<lexer_mode> mode_; char pair_separator_; bool sep_; // True if we skipped spaces in peek(). }; } #endif // BUILD_LEXER