aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/parser.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/parser.hxx')
-rw-r--r--libbuild2/parser.hxx673
1 files changed, 673 insertions, 0 deletions
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
new file mode 100644
index 0000000..658f266
--- /dev/null
+++ b/libbuild2/parser.hxx
@@ -0,0 +1,673 @@
+// file : libbuild2/parser.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_PARSER_HXX
+#define LIBBUILD2_PARSER_HXX
+
+#include <stack>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/spec.hxx>
+#include <libbuild2/lexer.hxx>
+#include <libbuild2/token.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/export.hxx>
+
+namespace build2
+{
+ class scope;
+ class target;
+ class prerequisite;
+
+ class LIBBUILD2_SYMEXPORT parser
+ {
+ public:
+ // If boot is true, then we are parsing bootstrap.build and modules
+ // should only be bootstrapped.
+ //
+ explicit
+ parser (bool boot = false): fail ("error", &path_), boot_ (boot) {}
+
+ // Issue diagnostics and throw failed in case of an error.
+ //
+ void
+ parse_buildfile (istream&, const path& name, scope& root, scope& base);
+
+ buildspec
+ parse_buildspec (istream&, const path& name);
+
+ token
+ parse_variable (lexer&, scope&, const variable&, token_type kind);
+
+ pair<value, token>
+ parse_variable_value (lexer&, scope&, const dir_path*, const variable&);
+
+ names
+ parse_export_stub (istream& is, const path& p, scope& r, scope& b)
+ {
+ parse_buildfile (is, p, r, b);
+ return move (export_value_);
+ }
+
+ // Recursive descent parser.
+ //
+ protected:
+
+ // Pattern expansion mode.
+ //
+ enum class pattern_mode
+ {
+ ignore, // Treat as ordinary names.
+ detect, // Ignore pair/dir/type if the first name is a pattern.
+ expand // Expand to ordinary names.
+ };
+
+ // If one is true then parse a single (logical) line (logical means it
+ // can actually be several lines, e.g., an if-block). Return false if
+ // nothing has been parsed (i.e., we are still on the same token).
+ //
+ // Note that after this function returns, the token is the first token of
+ // the next line (or eos).
+ //
+ bool
+ parse_clause (token&, token_type&, bool one = false);
+
+ void
+ parse_variable_block (token&, token_type&, const target_type*, string);
+
+ // Ad hoc target names inside < ... >.
+ //
+ struct adhoc_names_loc
+ {
+ names ns;
+ location loc;
+ };
+
+ using adhoc_names = small_vector<adhoc_names_loc, 1>;
+
+ void
+ enter_adhoc_members (adhoc_names_loc&&, bool);
+
+ small_vector<reference_wrapper<target>, 1>
+ enter_targets (names&&, const location&, adhoc_names&&, size_t);
+
+ bool
+ parse_dependency (token&, token_type&,
+ names&&, const location&,
+ adhoc_names&&,
+ names&&, const location&,
+ bool = false);
+
+ void
+ parse_assert (token&, token_type&);
+
+ void
+ parse_print (token&, token_type&);
+
+ void
+ parse_diag (token&, token_type&);
+
+ void
+ parse_dump (token&, token_type&);
+
+ void
+ parse_source (token&, token_type&);
+
+ void
+ parse_include (token&, token_type&);
+
+ void
+ parse_run (token&, token_type&);
+
+ void
+ parse_import (token&, token_type&);
+
+ void
+ parse_export (token&, token_type&);
+
+ void
+ parse_using (token&, token_type&);
+
+ void
+ parse_define (token&, token_type&);
+
+ void
+ parse_if_else (token&, token_type&);
+
+ void
+ parse_for (token&, token_type&);
+
+ void
+ parse_variable (token&, token_type&, const variable&, token_type);
+
+ void
+ parse_type_pattern_variable (token&, token_type&,
+ const target_type&, string,
+ const variable&, token_type, const location&);
+
+ const variable&
+ parse_variable_name (names&&, const location&);
+
+ // Note: calls attributes_push() that the caller must pop.
+ //
+ value
+ parse_variable_value (token&, token_type&);
+
+ void
+ apply_variable_attributes (const variable&);
+
+ void
+ apply_value_attributes (const variable*, // Optional.
+ value& lhs,
+ value&& rhs,
+ token_type assign_kind);
+
+ // Return the value pack (values can be NULL/typed). Note that for an
+ // empty eval context ('()' potentially with whitespaces in between) the
+ // result is an empty pack, not a pack of one empty.
+ //
+ values
+ parse_eval (token&, token_type&, pattern_mode);
+
+ values
+ parse_eval_comma (token&, token_type&, pattern_mode, bool = false);
+
+ value
+ parse_eval_ternary (token&, token_type&, pattern_mode, bool = false);
+
+ value
+ parse_eval_or (token&, token_type&, pattern_mode, bool = false);
+
+ value
+ parse_eval_and (token&, token_type&, pattern_mode, bool = false);
+
+ value
+ parse_eval_comp (token&, token_type&, pattern_mode, bool = false);
+
+ value
+ parse_eval_value (token&, token_type&, pattern_mode, bool = false);
+
+ // Attributes stack. We can have nested attributes, for example:
+ //
+ // x = [bool] ([uint64] $x == [uint64] $y)
+ //
+ // In this example we only apply the value attributes after evaluating
+ // the context, which has its own attributes.
+ //
+ struct attributes
+ {
+ bool has; // Has attributes flag.
+ location loc; // Start of attributes location.
+ vector<pair<string, string>> ats; // Attributes.
+
+ explicit operator bool () const {return has;}
+ };
+
+ // Push a new entry into the attributes_ stack. If the next token is '['
+ // parse the attribute sequence until ']' storing the result in the new
+ // stack entry and setting the 'has' flag (unless the attribute list is
+ // empty). Then get the next token and, if standalone is false, verify
+ // it is not newline/eos (i.e., there is something after it). Return the
+ // indication of whether there are any attributes and their location.
+ //
+ // Note that during pre-parsing nothing is pushed into the stack and
+ // the returned attributes object indicates there are no attributes.
+ //
+ pair<bool, location>
+ attributes_push (token&, token_type&, bool standalone = false);
+
+ attributes
+ attributes_pop ()
+ {
+ assert (!pre_parse_);
+ attributes r (move (attributes_.top ()));
+ attributes_.pop ();
+ return r;
+ }
+
+ attributes&
+ attributes_top () {return attributes_.top ();}
+
+ // Source a stream optionnaly entering it as a buildfile and performing
+ // the default target processing.
+ //
+ void
+ source (istream&,
+ const path&,
+ const location&,
+ bool enter,
+ bool default_target);
+
+ // If chunk is true, then parse the smallest but complete, name-wise,
+ // chunk of input. Note that in this case you may still end up with
+ // multiple names, for example, {foo bar} or $foo. In the pre-parse mode
+ // always return empty list of names.
+ //
+ // The what argument is used in diagnostics (e.g., "expected <what>
+ // instead of ...".
+ //
+ // The separators argument specifies the special characters to recognize
+ // inside the name. These can be the directory separators and the '%'
+ // project separator. Note that even if it is NULL, the result may still
+ // contain non-simple names due to variable expansions.
+ //
+
+ static const string name_separators;
+
+ names
+ parse_names (token& t, token_type& tt,
+ pattern_mode pmode,
+ bool chunk = false,
+ const char* what = "name",
+ const string* separators = &name_separators)
+ {
+ names ns;
+ parse_names (t, tt,
+ ns,
+ pmode,
+ chunk,
+ what,
+ separators,
+ 0,
+ nullopt, nullptr, nullptr);
+ return ns;
+ }
+
+ // Return true if this token starts a name. Or, to put it another way,
+ // calling parse_names() on this token won't fail with the "expected name
+ // instead of <this-token>" error. Only consider '(' if the second
+ // argument is true.
+ //
+ bool
+ start_names (token_type&, bool lparen = true);
+
+ // As above but return the result as a value, which can be typed and NULL.
+ //
+ value
+ parse_value (token& t, token_type& tt,
+ pattern_mode pmode,
+ const char* what = "name",
+ const string* separators = &name_separators,
+ bool chunk = false)
+ {
+ names ns;
+ auto r (parse_names (t, tt,
+ ns,
+ pmode,
+ chunk,
+ what,
+ separators,
+ 0,
+ nullopt, nullptr, nullptr));
+
+ value v (r.type); // Potentially typed NULL value.
+
+ // This should not fail since we are typing the result of reversal from
+ // the typed value.
+ //
+ if (r.not_null)
+ v.assign (move (ns), nullptr);
+
+ return v;
+ }
+
+ // Append names and return the indication if the parsed value is not NULL
+ // and whether it is typed (and whether it is a pattern if pattern_mode is
+ // detect).
+ //
+ // You may have noticed that what we return here is essentially a value
+ // and doing it this way (i.e., reversing it to untyped names and
+ // returning its type so that it can potentially be "typed back") is kind
+ // of backwards. The reason we are doing it this way is because in many
+ // places we expect things untyped and if we were to always return a
+ // (potentially typed) value, then we would have to reverse it in all
+ // those places. Still it may make sense to look into redesigning the
+ // whole thing one day.
+ //
+ // Currently the only way for the result to be NULL or have a type is if
+ // it is the result of a sole, unquoted variable expansion, function call,
+ // or context evaluation.
+ //
+ struct parse_names_result
+ {
+ bool not_null;
+ const value_type* type;
+ optional<const target_type*> pattern;
+ };
+
+ parse_names_result
+ parse_names (token&, token_type&,
+ names&,
+ pattern_mode,
+ bool chunk = false,
+ const char* what = "name",
+ const string* separators = &name_separators,
+ size_t pairn = 0,
+ const optional<project_name>& prj = nullopt,
+ const dir_path* dir = nullptr,
+ const string* type = nullptr,
+ bool cross = true,
+ bool curly = false);
+
+ size_t
+ parse_names_trailer (token&, token_type&,
+ names&,
+ pattern_mode,
+ const char* what,
+ const string* separators,
+ size_t pairn,
+ const optional<project_name>& prj,
+ const dir_path* dir,
+ const string* type,
+ bool cross);
+
+ size_t
+ expand_name_pattern (const location&,
+ names&&,
+ names&,
+ const char* what,
+ size_t pairn,
+ const dir_path* dir,
+ const string* type,
+ const target_type*);
+
+ size_t
+ splice_names (const location&,
+ const names_view&,
+ names&&,
+ names&,
+ const char* what,
+ size_t pairn,
+ const optional<project_name>& prj,
+ const dir_path* dir,
+ const string* type);
+
+ // Skip until newline or eos.
+ //
+ void
+ skip_line (token&, token_type&);
+
+ // Skip until block-closing } or eos, taking into account nested blocks.
+ //
+ void
+ skip_block (token&, token_type&);
+
+ // Return true if the name token can be considered a directive keyword.
+ //
+ bool
+ keyword (token&);
+
+ // Buildspec.
+ //
+ buildspec
+ parse_buildspec_clause (token&, token_type&, size_t);
+
+ // Customization hooks.
+ //
+ protected:
+ // If qual is not empty, then its pair member should indicate the kind
+ // of qualification: ':' -- target, '/' -- scope.
+ //
+ virtual lookup
+ lookup_variable (name&& qual, string&& name, const location&);
+
+ // Utilities.
+ //
+ protected:
+ class enter_scope;
+ class enter_target;
+ class enter_prerequisite;
+
+ // Switch to a new current scope. Note that this function might also have
+ // to switch to a new root scope if the new current scope is in another
+ // project. So both must be saved and restored.
+ //
+ void
+ switch_scope (const dir_path&);
+
+ void
+ process_default_target (token&);
+
+ // Enter buildfile as a target.
+ //
+ void
+ enter_buildfile (const path&);
+
+ // Lexer.
+ //
+ protected:
+ location
+ get_location (const token& t) const
+ {
+ return build2::get_location (t, *path_);
+ }
+
+ token_type
+ next (token&, token_type&);
+
+ // If the current token is newline, then get the next token. Otherwise,
+ // fail unless the current token is eos (i.e., optional newline at the end
+ // of stream). If the after argument is not \0, use it in diagnostics as
+ // the token after which the newline was expectd.
+ //
+ token_type
+ next_after_newline (token&, token_type&, char after = '\0');
+
+ // Be careful with peeking and switching the lexer mode. See keyword()
+ // for more information.
+ //
+ token_type
+ peek ();
+
+ token_type
+ peek (lexer_mode m, char ps = '\0')
+ {
+ // The idea is that if we already have something peeked, then it should
+ // be in the same mode. We also don't re-set the mode since it may have
+ // expired after the first token.
+ //
+ if (peeked_)
+ {
+ assert (peek_.mode == m);
+ return peek_.token.type;
+ }
+
+ mode (m, ps);
+ return peek ();
+ }
+
+ const token&
+ peeked () const
+ {
+ assert (peeked_);
+ return peek_.token;
+ }
+
+ void
+ mode (lexer_mode m, char ps = '\0')
+ {
+ if (replay_ != replay::play)
+ lexer_->mode (m, ps);
+ else
+ // As a sanity check, make sure the mode matches the next token. Note
+ // that we don't check the pair separator since it can be overriden by
+ // the lexer's mode() implementation.
+ //
+ assert (replay_i_ != replay_data_.size () &&
+ replay_data_[replay_i_].mode == m);
+ }
+
+ lexer_mode
+ mode () const
+ {
+ if (replay_ != replay::play)
+ return lexer_->mode ();
+ else
+ {
+ assert (replay_i_ != replay_data_.size ());
+ return replay_data_[replay_i_].mode;
+ }
+ }
+
+ void
+ expire_mode ()
+ {
+ if (replay_ != replay::play)
+ lexer_->expire_mode ();
+ }
+
+ // Token saving and replaying. Note that it can only be used in certain
+ // contexts. Specifically, the code that parses a replay must not interact
+ // with the lexer directly (e.g., the keyword() test). Replays also cannot
+ // nest. For now we don't enforce any of this.
+ //
+ // Note also that the peeked token is not part of the replay, until it
+ // is "got".
+ //
+ void
+ replay_save ()
+ {
+ assert (replay_ == replay::stop);
+ replay_ = replay::save;
+ }
+
+ void
+ replay_play ()
+ {
+ assert ((replay_ == replay::save && !replay_data_.empty ()) ||
+ (replay_ == replay::play && replay_i_ == replay_data_.size ()));
+
+ if (replay_ == replay::save)
+ replay_path_ = path_; // Save old path.
+
+ replay_i_ = 0;
+ replay_ = replay::play;
+ }
+
+ void
+ replay_stop ()
+ {
+ if (replay_ == replay::play)
+ path_ = replay_path_; // Restore old path.
+
+ replay_data_.clear ();
+ replay_ = replay::stop;
+ }
+
+ struct replay_guard
+ {
+ replay_guard (parser& p, bool start = true)
+ : p_ (start ? &p : nullptr)
+ {
+ if (p_ != nullptr)
+ p_->replay_save ();
+ }
+
+ void
+ play ()
+ {
+ if (p_ != nullptr)
+ p_->replay_play ();
+ }
+
+ ~replay_guard ()
+ {
+ if (p_ != nullptr)
+ p_->replay_stop ();
+ }
+
+ private:
+ parser* p_;
+ };
+
+ // Stop saving and get the data.
+ //
+ replay_tokens
+ replay_data ()
+ {
+ assert (replay_ == replay::save);
+
+ replay_tokens r (move (replay_data_));
+ replay_data_.clear ();
+ replay_ = replay::stop;
+ return r;
+ }
+
+ // Set the data and start playing.
+ //
+ void
+ replay_data (replay_tokens&& d)
+ {
+ assert (replay_ == replay::stop);
+
+ replay_path_ = path_; // Save old path.
+
+ replay_data_ = move (d);
+ replay_i_ = 0;
+ replay_ = replay::play;
+ }
+
+ // Implementation details, don't call directly.
+ //
+ replay_token
+ lexer_next ()
+ {
+ lexer_mode m (lexer_->mode ()); // Get it first since it may expire.
+ return replay_token {lexer_->next (), path_, m};
+ }
+
+ const replay_token&
+ replay_next ()
+ {
+ assert (replay_i_ != replay_data_.size ());
+ const replay_token& rt (replay_data_[replay_i_++]);
+
+ // Update the path. Note that theoretically it is possible that peeking
+ // at the next token will "change" the path of the current token. The
+ // workaround would be to call get_location() before peeking.
+ //
+ path_ = rt.file;
+
+ return rt;
+ }
+
+ // Diagnostics.
+ //
+ protected:
+ const fail_mark fail;
+
+ protected:
+ bool pre_parse_ = false;
+ bool boot_;
+
+ const path* path_; // Current path.
+ lexer* lexer_;
+
+ prerequisite* prerequisite_ = nullptr; // Current prerequisite, if any.
+ target* target_ = nullptr; // Current target, if any.
+ scope* scope_ = nullptr; // Current base scope (out_base).
+ scope* root_ = nullptr; // Current root scope (out_root).
+
+ const dir_path* pbase_ = nullptr; // Current pattern base directory.
+
+ std::stack<attributes> attributes_;
+
+ target* default_target_;
+ names export_value_;
+
+ replay_token peek_;
+ bool peeked_ = false;
+
+ enum class replay {stop, save, play} replay_ = replay::stop;
+ replay_tokens replay_data_;
+ size_t replay_i_; // Position of the next token during replay.
+ const path* replay_path_; // Path before replay began (to be restored).
+ };
+}
+
+#endif // LIBBUILD2_PARSER_HXX