Split build system into library and driver

author: Boris Kolpackov <boris@codesynthesis.com> 2019-06-24 12:01:19 +0200
committer: Karen Arutyunov <karen@codesynthesis.com> 2019-07-01 18:13:55 +0300
commit: 977d07a3ae47ef204665d1eda2d642e5064724f3 (patch)
tree: 525a3d6421f61ce789b690191d3c30fc09be3517 /libbuild2/lexer.hxx
parent: 7161b24963dd9da4d218f92c736b77c35c328a2d (diff)
1 files changed, 207 insertions, 0 deletions
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
new file mode 100644
index 0000000..f987071
--- /dev/null
+++ b/libbuild2/lexer.hxx
@@ -0,0 +1,207 @@
+// file      : libbuild2/lexer.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_LEXER_HXX
+#define LIBBUILD2_LEXER_HXX
+
+#include <stack>
+
+#include <libbutl/char-scanner.mxx>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/token.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/export.hxx>
+
+namespace build2
+{
+  // Context-dependent lexing mode. In the value mode we don't treat certain
+  // characters (e.g., '+', '=') as special so that we can use them in the
+  // variable values, e.g., 'foo = g++'. In contrast, in the variable mode, we
+  // restrict certain character (e.g., '/') from appearing in the name. The
+  // attribute mode is like value except it doesn't treat '{' and '}' as
+  // special (so we cannot have name groups in attributes). The eval mode is
+  // used in the evaluation context. Quoted modes are internal and should not
+  // be set explicitly.
+  //
+  // Note that the normal, value, and eval modes split words separated by the
+  // pair character (to disable pairs one can pass '\0' as a pair character).
+  //
+  // The alternnative modes must be set manually. The value mode automatically
+  // expires after the end of the line. The attribute mode expires after the
+  // closing ']'. The variable mode expires after the word token. And the eval
+  // mode expires after the closing ')'.
+  //
+  // Note that normally it is only safe to switch mode when the current token
+  // is not quoted (or, more generally, when you are not in the double-quoted
+  // mode) unless the mode treats the double-quote as a separator (e.g.,
+  // variable name mode). Failed that your mode (which now will be the top of
+  // the mode stack) will prevent proper recognition of the closing quote.
+  //
+
+  // Extendable/inheritable enum-like class.
+  //
+  struct lexer_mode: lexer_mode_base
+  {
+    using base_type = lexer_mode_base;
+
+    enum
+    {
+      normal = base_type::value_next,
+      variable,
+      value,
+      attribute,
+      eval,
+      single_quoted,
+      double_quoted,
+      buildspec,
+
+      value_next
+    };
+
+    lexer_mode () = default;
+    lexer_mode (value_type v): base_type (v) {}
+    lexer_mode (base_type v): base_type (v) {}
+  };
+
+  class LIBBUILD2_SYMEXPORT lexer: public butl::char_scanner
+  {
+  public:
+    // If escape is not NULL then only escape sequences with characters from
+    // this string are considered "effective escapes" with all others passed
+    // through as is. Note that the escape string is not copied.
+    //
+    lexer (istream& is,
+           const path& name,
+           uint64_t line = 1, // Start line in the stream.
+           const char* escapes = nullptr)
+        : lexer (is, name, line, escapes, true /* set_mode */) {}
+
+    const path&
+    name () const {return name_;}
+
+    // Note: sets mode for the next token. The second argument can be used to
+    // specifythe pair separator character (if the mode supports pairs). If
+    // escapes not specified, then inherit the current mode's (thought a mode
+    // can also override it).
+    //
+    virtual void
+    mode (lexer_mode,
+          char pair_separator = '\0',
+          optional<const char*> escapes = nullopt);
+
+    // Expire the current mode early.
+    //
+    void
+    expire_mode () {state_.pop ();}
+
+    lexer_mode
+    mode () const {return state_.top ().mode;}
+
+    char
+    pair_separator () const {return state_.top ().sep_pair;}
+
+    // Scanner. Note that it is ok to call next() again after getting eos.
+    //
+    // If you extend the lexer and add a custom lexer mode, then you must
+    // override next() and handle the custom mode there.
+    //
+    virtual token
+    next ();
+
+    // Peek at the first character of the next token. Return the character
+    // or '\0' if the next token will be eos. Also return an indicator of
+    // whether the next token will be separated.
+    //
+    pair<char, bool>
+    peek_char ();
+
+  protected:
+    struct state
+    {
+      lexer_mode mode;
+
+      char sep_pair;
+      bool sep_space;    // Are whitespaces separators (see skip_spaces())?
+      bool sep_newline;  // Is newline special (see skip_spaces())?
+      bool quotes;       // Recognize quoted fragments.
+
+      const char* escapes; // Effective escape sequences to recognize.
+
+      // Word separator characters. For two-character sequence put the first
+      // one in sep_first and the second one in the corresponding position of
+      // sep_second. If it's a single-character sequence, then put space in
+      // sep_second. If there are multiple sequences that start with the same
+      // character, then repeat the first character in sep_first.
+      //
+      const char* sep_first;
+      const char* sep_second;
+    };
+
+    token
+    next_eval ();
+
+    token
+    next_quoted ();
+
+    // Lex a word assuming current is the top state (which may already have
+    // been "expired" from the top).
+    //
+    virtual token
+    word (state current, bool separated);
+
+    // Return true if we have seen any spaces. Skipped empty lines
+    // don't count. In other words, we are only interested in spaces
+    // that are on the same line as the following non-space character.
+    //
+    bool
+    skip_spaces ();
+
+    // Diagnostics.
+    //
+  protected:
+    fail_mark fail;
+
+    // Lexer state.
+    //
+  protected:
+    lexer (istream& is,
+           const path& name,
+           uint64_t line,
+           const char* escapes,
+           bool set_mode)
+        : char_scanner (is, true /* crlf */, line),
+          fail ("error", &name_),
+          name_ (name),
+          sep_ (false)
+    {
+      if (set_mode)
+        mode (lexer_mode::normal, '@', escapes);
+    }
+
+    const path name_;
+    std::stack<state> state_;
+
+    bool sep_; // True if we skipped spaces in peek().
+  };
+}
+
+// Diagnostics plumbing.
+//
+namespace butl // ADL
+{
+  inline build2::location
+  get_location (const butl::char_scanner::xchar& c, const void* data)
+  {
+    using namespace build2;
+
+    assert (data != nullptr); // E.g., must be &lexer::name_.
+    return location (static_cast<const path*> (data), c.line, c.column);
+  }
+}
+
+#endif // LIBBUILD2_LEXER_HXX
author	Boris Kolpackov <boris@codesynthesis.com>	2019-06-24 12:01:19 +0200
committer	Karen Arutyunov <karen@codesynthesis.com>	2019-07-01 18:13:55 +0300
commit	977d07a3ae47ef204665d1eda2d642e5064724f3 (patch)
tree	525a3d6421f61ce789b690191d3c30fc09be3517 /libbuild2/lexer.hxx
parent	7161b24963dd9da4d218f92c736b77c35c328a2d (diff)