diff options
Diffstat (limited to 'libbuild2')
94 files changed, 38288 insertions, 0 deletions
diff --git a/libbuild2/.gitignore b/libbuild2/.gitignore new file mode 100644 index 0000000..292d2f8 --- /dev/null +++ b/libbuild2/.gitignore @@ -0,0 +1,5 @@ +# Unit test executables and Testscript output directories +# (can be symlinks). +# +*.test +test-*.test diff --git a/libbuild2/action.hxx b/libbuild2/action.hxx new file mode 100644 index 0000000..9fa2a16 --- /dev/null +++ b/libbuild2/action.hxx @@ -0,0 +1,202 @@ +// file : libbuild2/action.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_ACTION_HXX +#define LIBBUILD2_ACTION_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // While we are using uint8_t for the meta/operation ids, we assume + // that each is limited to 4 bits (max 128 entries) so that we can + // store the combined action id in uint8_t as well. This makes our + // life easier when it comes to defining switch labels for action + // ids (no need to mess with endian-ness). + // + // Note that 0 is not a valid meta/operation/action id. + // + using meta_operation_id = uint8_t; + using operation_id = uint8_t; + using action_id = uint8_t; + + // Meta-operations and operations are not the end of the story. We also have + // operation nesting (currently only one level deep) which is used to + // implement pre/post operations (currently, but may be useful for other + // things). Here is the idea: the test operation needs to make sure that the + // targets that it needs to test are up-to-date. So it runs update as its + // pre-operation. It is almost like an ordinary update except that it has + // test as its outer operation (the meta-operations are always the same). + // This way a rule can recognize that this is "update for test" and do + // something differently. For example, if an executable is not a test, then + // there is no use updating it. At the same time, most rules will ignore the + // fact that this is a nested update and for them it is "update as usual". + // + // This inner/outer operation support is implemented by maintaining two + // independent "target states" (see target::state; initially we tried to do + // it via rule/recipe override but that didn't end up well, to put it + // mildly). While the outer operation normally "directs" the inner, inner + // rules can still be matched/executed directly, without outer's involvement + // (e.g., because of other inner rules). A typical implementation of an + // outer rule either returns noop or delegates to the inner rule. In + // particular, it should not replace or override the inner's logic. + // + // While most of the relevant target state is duplicated, certain things are + // shared among the inner/outer rules, such as the target data pad and the + // group state. In particular, it is assumed the group state is always + // determined by the inner rule (see resolve_members()). + // + // Normally, an outer rule will be responsible for any additional, outer + // operation-specific work. Sometimes, however, the inner rule needs to + // customize its behavior. In this case the outer and inner rules must + // communicate this explicitly (normally via the target's data pad) and + // there is a number of restrictions to this approach. See + // cc::{link,install}_rule for details. + // + struct action + { + action (): inner_id (0), outer_id (0) {} // Invalid action. + + // If this is not a nested operation, then outer should be 0. + // + action (meta_operation_id m, operation_id inner, operation_id outer = 0) + : inner_id ((m << 4) | inner), + outer_id (outer == 0 ? 0 : (m << 4) | outer) {} + + meta_operation_id + meta_operation () const {return inner_id >> 4;} + + operation_id + operation () const {return inner_id & 0xF;} + + operation_id + outer_operation () const {return outer_id & 0xF;} + + bool inner () const {return outer_id == 0;} + bool outer () const {return outer_id != 0;} + + action + inner_action () const + { + return action (meta_operation (), operation ()); + } + + // Implicit conversion operator to action_id for the switch() statement, + // etc. Most places only care about the inner operation. + // + operator action_id () const {return inner_id;} + + action_id inner_id; + action_id outer_id; + }; + + inline bool + operator== (action x, action y) + { + return x.inner_id == y.inner_id && x.outer_id == y.outer_id; + } + + inline bool + operator!= (action x, action y) {return !(x == y);} + + bool operator> (action, action) = delete; + bool operator< (action, action) = delete; + bool operator>= (action, action) = delete; + bool operator<= (action, action) = delete; + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, action); // operation.cxx + + // Inner/outer operation state container. + // + template <typename T> + struct action_state + { + T data[2]; // [0] -- inner, [1] -- outer. + + T& operator[] (action a) {return data[a.inner () ? 0 : 1];} + const T& operator[] (action a) const {return data[a.inner () ? 0 : 1];} + }; + + // Id constants for build-in and pre-defined meta/operations. + // + const meta_operation_id noop_id = 1; // nomop? + const meta_operation_id perform_id = 2; + const meta_operation_id configure_id = 3; + const meta_operation_id disfigure_id = 4; + const meta_operation_id create_id = 5; + const meta_operation_id dist_id = 6; + const meta_operation_id info_id = 7; + + // The default operation is a special marker that can be used to indicate + // that no operation was explicitly specified by the user. If adding + // something here remember to update the man page. + // + const operation_id default_id = 1; // Shall be first. + const operation_id update_id = 2; // Shall be second. + const operation_id clean_id = 3; + + const operation_id test_id = 4; + const operation_id update_for_test_id = 5; // update(for test) alias. + + const operation_id install_id = 6; + const operation_id uninstall_id = 7; + const operation_id update_for_install_id = 8; // update(for install) alias. + + const action_id perform_update_id = (perform_id << 4) | update_id; + const action_id perform_clean_id = (perform_id << 4) | clean_id; + const action_id perform_test_id = (perform_id << 4) | test_id; + const action_id perform_install_id = (perform_id << 4) | install_id; + const action_id perform_uninstall_id = (perform_id << 4) | uninstall_id; + + const action_id configure_update_id = (configure_id << 4) | update_id; + + // Recipe execution mode. + // + // When a target is a prerequisite of another target, its recipe can be + // executed before the dependent's recipe (the normal case) or after. + // We will call these "front" and "back" execution modes, respectively + // (think "the prerequisite is 'front-running' the dependent"). + // + // There could also be several dependent targets and the prerequisite's + // recipe can be execute as part of the first dependent (the normal + // case) or last (or for all/some of them; see the recipe execution + // protocol in <target>). We will call these "first" and "last" + // execution modes, respectively. + // + // Now you may be having a hard time imagining where a mode other than + // the normal one (first/front) could be useful. An the answer is, + // compensating or inverse operations such as clean, uninstall, etc. + // If we use the last/back mode for, say, clean, then we will remove + // targets in the order inverse to the way they were updated. While + // this sounds like an elegant idea, are there any practical benefits + // of doing it this way? As it turns out there is (at least) one: when + // we are removing a directory (see fsdir{}), we want to do it after + // all the targets that depend on it (such as files, sub-directories) + // were removed. If we do it before, then the directory won't be empty + // yet. + // + // It appears that this execution mode is dictated by the essence of + // the operation. Constructive operations (those that "do") seem to + // naturally use the first/front mode. That is, we need to "do" the + // prerequisite first before we can "do" the dependent. While the + // destructive ones (those that "undo") seem to need last/back. That + // is, we need to "undo" all the dependents before we can "undo" the + // prerequisite (say, we need to remove all the files before we can + // remove their directory). + // + // If you noticed the parallel with the way C++ construction and + // destruction works for base/derived object then you earned a gold + // star! + // + // Note that the front/back mode is realized in the dependen's recipe + // (which is another indication that it is a property of the operation). + // + enum class execution_mode {first, last}; +} + +#endif // LIBBUILD2_ACTION_HXX diff --git a/libbuild2/algorithm.cxx b/libbuild2/algorithm.cxx new file mode 100644 index 0000000..963714b --- /dev/null +++ b/libbuild2/algorithm.cxx @@ -0,0 +1,2205 @@ +// file : libbuild2/algorithm.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/algorithm.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/rule.hxx> +#include <libbuild2/file.hxx> // import() +#include <libbuild2/search.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> +#include <libbuild2/prerequisite.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + const target& + search (const target& t, const prerequisite_key& pk) + { + assert (phase == run_phase::match); + + // If this is a project-qualified prerequisite, then this is import's + // business. + // + if (pk.proj) + return import (pk); + + if (const target* pt = pk.tk.type->search (t, pk)) + return *pt; + + return create_new_target (pk); + } + + const target* + search_existing (const prerequisite_key& pk) + { + assert (phase == run_phase::match || phase == run_phase::execute); + + return pk.proj ? import_existing (pk) : search_existing_target (pk); + } + + const target& + search (const target& t, name n, const scope& s) + { + assert (phase == run_phase::match); + + auto rp (s.find_target_type (n, location ())); + const target_type* tt (rp.first); + optional<string>& ext (rp.second); + + if (tt == nullptr) + fail << "unknown target type " << n.type << " in name " << n; + + if (!n.dir.empty ()) + n.dir.normalize (false, true); // Current dir collapses to an empty one. + + // @@ OUT: for now we assume the prerequisite's out is undetermined. + // Would need to pass a pair of names. + // + return search (t, + *tt, + n.dir, + dir_path (), + n.value, + ext ? &*ext : nullptr, + &s, + n.proj); + } + + const target* + search_existing (const name& cn, const scope& s, const dir_path& out) + { + assert (phase == run_phase::match || phase == run_phase::execute); + + name n (cn); + auto rp (s.find_target_type (n, location ())); + const target_type* tt (rp.first); + optional<string>& ext (rp.second); + + // For now we treat an unknown target type as an unknown target. Seems + // logical. + // + if (tt == nullptr) + return nullptr; + + if (!n.dir.empty ()) + n.dir.normalize (false, true); // Current dir collapses to an empty one. + + bool q (cn.qualified ()); + + // @@ OUT: for now we assume the prerequisite's out is undetermined. + // Would need to pass a pair of names. + // + prerequisite_key pk { + n.proj, {tt, &n.dir, q ? &empty_dir_path : &out, &n.value, ext}, &s}; + + return q ? import_existing (pk) : search_existing_target (pk); + } + + // target_lock + // + static +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + const target_lock* target_lock_stack = nullptr; + + const target_lock* target_lock:: + stack () noexcept + { + return target_lock_stack; + } + + const target_lock* target_lock:: + stack (const target_lock* s) noexcept + { + const target_lock* r (target_lock_stack); + target_lock_stack = s; + return r; + } + + // If the work_queue is absent, then we don't wait. + // + target_lock + lock_impl (action a, const target& ct, optional<scheduler::work_queue> wq) + { + assert (phase == run_phase::match); + + // Most likely the target's state is (count_touched - 1), that is, 0 or + // previously executed, so let's start with that. + // + size_t b (target::count_base ()); + size_t e (b + target::offset_touched - 1); + + size_t appl (b + target::offset_applied); + size_t busy (b + target::offset_busy); + + atomic_count& task_count (ct[a].task_count); + + while (!task_count.compare_exchange_strong ( + e, + busy, + memory_order_acq_rel, // Synchronize on success. + memory_order_acquire)) // Synchronize on failure. + { + // Wait for the count to drop below busy if someone is already working + // on this target. + // + if (e >= busy) + { + // Check for dependency cycles. The cycle members should be evident + // from the "while ..." info lines that will follow. + // + if (dependency_cycle (a, ct)) + fail << "dependency cycle detected involving target " << ct; + + if (!wq) + return target_lock {a, nullptr, e - b}; + + // We also unlock the phase for the duration of the wait. Why? + // Consider this scenario: we are trying to match a dir{} target whose + // buildfile still needs to be loaded. Let's say someone else started + // the match before us. So we wait for their completion and they wait + // to switch the phase to load. Which would result in a deadlock + // unless we release the phase. + // + phase_unlock ul; + e = sched.wait (busy - 1, task_count, *wq); + } + + // We don't lock already applied or executed targets. + // + if (e >= appl) + return target_lock {a, nullptr, e - b}; + } + + // We now have the lock. Analyze the old value and decide what to do. + // + target& t (const_cast<target&> (ct)); + target::opstate& s (t[a]); + + size_t offset; + if (e <= b) + { + // First lock for this operation. + // + s.rule = nullptr; + s.dependents.store (0, memory_order_release); + + offset = target::offset_touched; + } + else + { + offset = e - b; + assert (offset == target::offset_touched || + offset == target::offset_tried || + offset == target::offset_matched); + } + + return target_lock {a, &t, offset}; + } + + void + unlock_impl (action a, target& t, size_t offset) + { + assert (phase == run_phase::match); + + atomic_count& task_count (t[a].task_count); + + // Set the task count and wake up any threads that might be waiting for + // this target. + // + task_count.store (offset + target::count_base (), memory_order_release); + sched.resume (task_count); + } + + target& + add_adhoc_member (target& t, + const target_type& tt, + const dir_path& dir, + const dir_path& out, + string n) + { + tracer trace ("add_adhoc_member"); + + const_ptr<target>* mp (&t.member); + for (; *mp != nullptr && !(*mp)->is_a (tt); mp = &(*mp)->member) ; + + target& m (*mp != nullptr // Might already be there. + ? **mp + : targets.insert (tt, + dir, + out, + move (n), + nullopt /* ext */, + true /* implied */, + trace).first); + if (*mp == nullptr) + { + *mp = &m; + m.group = &t; + } + + return m; + }; + + // Return the matching rule or NULL if no match and try_match is true. + // + const rule_match* + match_impl (action a, target& t, const rule* skip, bool try_match) + { + // If this is an outer operation (Y-for-X), then we look for rules + // registered for the outer id (X). Note that we still pass the original + // action to the rule's match() function so that it can distinguish + // between a pre/post operation (Y-for-X) and the actual operation (X). + // + meta_operation_id mo (a.meta_operation ()); + operation_id o (a.inner () ? a.operation () : a.outer_operation ()); + + const scope& bs (t.base_scope ()); + + for (auto tt (&t.type ()); tt != nullptr; tt = tt->base) + { + // Search scopes outwards, stopping at the project root. + // + for (const scope* s (&bs); + s != nullptr; + s = s->root () ? global_scope : s->parent_scope ()) + { + const operation_rule_map* om (s->rules[mo]); + + if (om == nullptr) + continue; // No entry for this meta-operation id. + + // First try the map for the actual operation. If that doesn't yeld + // anything, try the wildcard map. + // + for (operation_id oi (o), oip (o); oip != 0; oip = oi, oi = 0) + { + const target_type_rule_map* ttm ((*om)[oi]); + + if (ttm == nullptr) + continue; // No entry for this operation id. + + if (ttm->empty ()) + continue; // Empty map for this operation id. + + auto i (ttm->find (tt)); + + if (i == ttm->end () || i->second.empty ()) + continue; // No rules registered for this target type. + + const auto& rules (i->second); // Hint map. + + // @@ TODO + // + // Different rules can be used for different operations (update vs + // test is a good example). So, at some point, we will probably have + // to support a list of hints or even an operation-hint map (e.g., + // 'hint=cxx test=foo' if cxx supports the test operation but we + // want the foo rule instead). This is also the place where the + // '{build clean}=cxx' construct (which we currently do not support) + // can come handy. + // + // Also, ignore the hint (that is most likely ment for a different + // operation) if this is a unique match. + // + string hint; + auto rs (rules.size () == 1 + ? make_pair (rules.begin (), rules.end ()) + : rules.find_sub (hint)); + + for (auto i (rs.first); i != rs.second; ++i) + { + const auto& r (*i); + const string& n (r.first); + const rule& ru (r.second); + + if (&ru == skip) + continue; + + { + auto df = make_diag_frame ( + [a, &t, &n](const diag_record& dr) + { + if (verb != 0) + dr << info << "while matching rule " << n << " to " + << diag_do (a, t); + }); + + if (!ru.match (a, t, hint)) + continue; + } + + // Do the ambiguity test. + // + bool ambig (false); + + diag_record dr; + for (++i; i != rs.second; ++i) + { + const string& n1 (i->first); + const rule& ru1 (i->second); + + { + auto df = make_diag_frame ( + [a, &t, &n1](const diag_record& dr) + { + if (verb != 0) + dr << info << "while matching rule " << n1 << " to " + << diag_do (a, t); + }); + + // @@ TODO: this makes target state in match() undetermined + // so need to fortify rules that modify anything in match + // to clear things. + // + // @@ Can't we temporarily swap things out in target? + // + if (!ru1.match (a, t, hint)) + continue; + } + + if (!ambig) + { + dr << fail << "multiple rules matching " << diag_doing (a, t) + << info << "rule " << n << " matches"; + ambig = true; + } + + dr << info << "rule " << n1 << " also matches"; + } + + if (!ambig) + return &r; + else + dr << info << "use rule hint to disambiguate this match"; + } + } + } + } + + if (!try_match) + { + diag_record dr; + dr << fail << "no rule to " << diag_do (a, t); + + if (verb < 4) + dr << info << "re-run with --verbose=4 for more information"; + } + + return nullptr; + } + + recipe + apply_impl (action a, + target& t, + const pair<const string, reference_wrapper<const rule>>& r) + { + auto df = make_diag_frame ( + [a, &t, &r](const diag_record& dr) + { + if (verb != 0) + dr << info << "while applying rule " << r.first << " to " + << diag_do (a, t); + }); + + return r.second.get ().apply (a, t); + } + + // If step is true then perform only one step of the match/apply sequence. + // + // If try_match is true, then indicate whether there is a rule match with + // the first half of the result. + // + static pair<bool, target_state> + match_impl (target_lock& l, + bool step = false, + bool try_match = false) + { + assert (l.target != nullptr); + + action a (l.action); + target& t (*l.target); + target::opstate& s (t[a]); + + // Intercept and handle matching an ad hoc group member. + // + if (t.adhoc_member ()) + { + assert (!step); + + const target& g (*t.group); + + // It feels natural to "convert" this call to the one for the group, + // including the try_match part. Semantically, we want to achieve the + // following: + // + // [try_]match (a, g); + // match_recipe (l, group_recipe); + // + auto df = make_diag_frame ( + [a, &t](const diag_record& dr) + { + if (verb != 0) + dr << info << "while matching group rule to " << diag_do (a, t); + }); + + pair<bool, target_state> r (match (a, g, 0, nullptr, try_match)); + + if (r.first) + { + if (r.second != target_state::failed) + { + match_inc_dependens (a, g); + match_recipe (l, group_recipe); + } + } + else + l.offset = target::offset_tried; + + return r; // Group state. + } + + try + { + // Continue from where the target has been left off. + // + switch (l.offset) + { + case target::offset_tried: + { + if (try_match) + return make_pair (false, target_state::unknown); + + // To issue diagnostics ... + } + // Fall through. + case target::offset_touched: + { + // Match. + // + + // Clear the rule-specific variables, resolved targets list, and the + // data pad before calling match(). The rule is free to modify these + // in its match() (provided that it matches) in order to, for + // example, convey some information to apply(). + // + s.vars.clear (); + t.prerequisite_targets[a].clear (); + if (a.inner ()) t.clear_data (); + + const rule_match* r (match_impl (a, t, nullptr, try_match)); + + assert (l.offset != target::offset_tried); // Should have failed. + + if (r == nullptr) // Not found (try_match == true). + { + l.offset = target::offset_tried; + return make_pair (false, target_state::unknown); + } + + s.rule = r; + l.offset = target::offset_matched; + + if (step) + // Note: s.state is still undetermined. + return make_pair (true, target_state::unknown); + + // Otherwise ... + } + // Fall through. + case target::offset_matched: + { + // Apply. + // + set_recipe (l, apply_impl (a, t, *s.rule)); + l.offset = target::offset_applied; + break; + } + default: + assert (false); + } + } + catch (const failed&) + { + // As a sanity measure clear the target data since it can be incomplete + // or invalid (mark()/unmark() should give you some ideas). + // + s.vars.clear (); + t.prerequisite_targets[a].clear (); + if (a.inner ()) t.clear_data (); + + s.state = target_state::failed; + l.offset = target::offset_applied; + } + + return make_pair (true, s.state); + } + + // If try_match is true, then indicate whether there is a rule match with + // the first half of the result. + // + pair<bool, target_state> + match (action a, + const target& ct, + size_t start_count, + atomic_count* task_count, + bool try_match) + { + // If we are blocking then work our own queue one task at a time. The + // logic here is that we may have already queued other tasks before this + // one and there is nothing bad (except a potentially deep stack trace) + // about working through them while we wait. On the other hand, we want + // to continue as soon as the lock is available in order not to nest + // things unnecessarily. + // + // That's what we used to do but that proved to be too deadlock-prone. For + // example, we may end up popping the last task which needs a lock that we + // are already holding. A fuzzy feeling is that we need to look for tasks + // (compare their task_counts?) that we can safely work on (though we will + // need to watch out for indirections). So perhaps it's just better to keep + // it simple and create a few extra threads. + // + target_lock l ( + lock_impl (a, + ct, + task_count == nullptr + ? optional<scheduler::work_queue> (scheduler::work_none) + : nullopt)); + + if (l.target != nullptr) + { + assert (l.offset < target::offset_applied); // Shouldn't lock otherwise. + + if (try_match && l.offset == target::offset_tried) + return make_pair (false, target_state::unknown); + + if (task_count == nullptr) + return match_impl (l, false /* step */, try_match); + + // Pass "disassembled" lock since the scheduler queue doesn't support + // task destruction. + // + target_lock::data ld (l.release ()); + + // Also pass our diagnostics and lock stacks (this is safe since we + // expect the caller to wait for completion before unwinding its stack). + // + if (sched.async (start_count, + *task_count, + [a, try_match] (const diag_frame* ds, + const target_lock* ls, + target& t, size_t offset) + { + // Switch to caller's diag and lock stacks. + // + diag_frame::stack_guard dsg (ds); + target_lock::stack_guard lsg (ls); + + try + { + phase_lock pl (run_phase::match); // Can throw. + { + target_lock l {a, &t, offset}; // Reassemble. + match_impl (l, false /* step */, try_match); + // Unlock within the match phase. + } + } + catch (const failed&) {} // Phase lock failure. + }, + diag_frame::stack (), + target_lock::stack (), + ref (*ld.target), + ld.offset)) + return make_pair (true, target_state::postponed); // Queued. + + // Matched synchronously, fall through. + } + else + { + // Already applied, executed, or busy. + // + if (l.offset >= target::offset_busy) + return make_pair (true, target_state::busy); + + // Fall through. + } + + return ct.try_matched_state (a, false); + } + + group_view + resolve_members_impl (action a, const target& g, target_lock l) + { + // Note that we will be unlocked if the target is already applied. + // + group_view r; + + // Continue from where the target has been left off. + // + switch (l.offset) + { + case target::offset_touched: + case target::offset_tried: + { + // Match (locked). + // + if (match_impl (l, true).second == target_state::failed) + throw failed (); + + if ((r = g.group_members (a)).members != nullptr) + break; + + // To apply ... + } + // Fall through. + case target::offset_matched: + { + // @@ Doing match without execute messes up our target_count. Does + // not seem like it will be easy to fix (we don't know whether + // someone else will execute this target). + // + // @@ What if we always do match & execute together? After all, + // if a group can be resolved in apply(), then it can be + // resolved in match()! + // + + // Apply (locked). + // + if (match_impl (l, true).second == target_state::failed) + throw failed (); + + if ((r = g.group_members (a)).members != nullptr) + break; + + // Unlock and to execute ... + // + l.unlock (); + } + // Fall through. + case target::offset_applied: + { + // Execute (unlocked). + // + // Note that we use execute_direct() rather than execute() here to + // sidestep the dependents count logic. In this context, this is by + // definition the first attempt to execute this rule (otherwise we + // would have already known the members list) and we really do need + // to execute it now. + // + { + phase_switch ps (run_phase::execute); + execute_direct (a, g); + } + + r = g.group_members (a); + break; + } + } + + return r; + } + + void + resolve_group_impl (action, const target&, target_lock l) + { + match_impl (l, true /* step */, true /* try_match */); + } + + template <typename R, typename S> + static void + match_prerequisite_range (action a, target& t, + R&& r, + const S& ms, + const scope* s) + { + auto& pts (t.prerequisite_targets[a]); + + // Start asynchronous matching of prerequisites. Wait with unlocked phase + // to allow phase switching. + // + wait_guard wg (target::count_busy (), t[a].task_count, true); + + size_t i (pts.size ()); // Index of the first to be added. + for (auto&& p: forward<R> (r)) + { + // Ignore excluded. + // + include_type pi (include (a, t, p)); + + if (!pi) + continue; + + prerequisite_target pt (ms + ? ms (a, t, p, pi) + : prerequisite_target (&search (t, p), pi)); + + if (pt.target == nullptr || (s != nullptr && !pt.target->in (*s))) + continue; + + match_async (a, *pt.target, target::count_busy (), t[a].task_count); + pts.push_back (move (pt)); + } + + wg.wait (); + + // Finish matching all the targets that we have started. + // + for (size_t n (pts.size ()); i != n; ++i) + { + const target& pt (*pts[i]); + match (a, pt); + } + } + + void + match_prerequisites (action a, target& t, + const match_search& ms, + const scope* s) + { + match_prerequisite_range (a, t, group_prerequisites (t), ms, s); + } + + void + match_prerequisite_members (action a, target& t, + const match_search_member& msm, + const scope* s) + { + match_prerequisite_range (a, t, group_prerequisite_members (a, t), msm, s); + } + + template <typename T> + void + match_members (action a, target& t, T const* ts, size_t n) + { + // Pretty much identical to match_prerequisite_range() except we don't + // search. + // + wait_guard wg (target::count_busy (), t[a].task_count, true); + + for (size_t i (0); i != n; ++i) + { + const target* m (ts[i]); + + if (m == nullptr || marked (m)) + continue; + + match_async (a, *m, target::count_busy (), t[a].task_count); + } + + wg.wait (); + + // Finish matching all the targets that we have started. + // + for (size_t i (0); i != n; ++i) + { + const target* m (ts[i]); + + if (m == nullptr || marked (m)) + continue; + + match (a, *m); + } + } + + // Instantiate only for what we need. + // + template LIBBUILD2_SYMEXPORT void + match_members<const target*> (action, target&, + const target* const*, size_t); + + template LIBBUILD2_SYMEXPORT void + match_members<prerequisite_target> (action, target&, + prerequisite_target const*, size_t); + + const fsdir* + inject_fsdir (action a, target& t, bool parent) + { + tracer trace ("inject_fsdir"); + + // If t is a directory (name is empty), say foo/bar/, then t is bar and + // its parent directory is foo/. + // + const dir_path& d (parent && t.name.empty () ? t.dir.directory () : t.dir); + + const scope& bs (scopes.find (d)); + const scope* rs (bs.root_scope ()); + + // If root scope is NULL, then this can mean that we are out of any + // project or if the directory is in src_root. In both cases we don't + // inject anything unless explicitly requested. + // + // Note that we also used to bail out if this is the root of the + // project. But that proved not to be such a great idea in case of + // subprojects (e.g., tests/). + // + const fsdir* r (nullptr); + if (rs != nullptr && !d.sub (rs->src_path ())) + { + l6 ([&]{trace << d << " for " << t;}); + + // Target is in the out tree, so out directory is empty. + // + r = &search<fsdir> (t, d, dir_path (), string (), nullptr, nullptr); + } + else + { + // See if one was mentioned explicitly. + // + for (const prerequisite& p: group_prerequisites (t)) + { + if (p.is_a<fsdir> ()) + { + const target& pt (search (t, p)); + + if (pt.dir == d) + { + r = &pt.as<fsdir> (); + break; + } + } + } + } + + if (r != nullptr) + { + match (a, *r); + t.prerequisite_targets[a].emplace_back (r); + } + + return r; + } + + // Execute the specified recipe (if any) and the scope operation callbacks + // (if any/applicable) then merge and return the resulting target state. + // + static target_state + execute_recipe (action a, target& t, const recipe& r) + { + target_state ts (target_state::unknown); + + try + { + auto df = make_diag_frame ( + [a, &t](const diag_record& dr) + { + if (verb != 0) + dr << info << "while " << diag_doing (a, t); + }); + + // If this is a dir{} target, see if we have any operation callbacks + // in the corresponding scope. + // + const dir* op_t (t.is_a<dir> ()); + const scope* op_s (nullptr); + + using op_iterator = scope::operation_callback_map::const_iterator; + pair<op_iterator, op_iterator> op_p; + + if (op_t != nullptr) + { + op_s = &scopes.find (t.dir); + + if (op_s->out_path () == t.dir && !op_s->operation_callbacks.empty ()) + { + op_p = op_s->operation_callbacks.equal_range (a); + + if (op_p.first == op_p.second) + op_s = nullptr; // Ignore. + } + else + op_s = nullptr; // Ignore. + } + + // Pre operations. + // + // Note that here we assume the dir{} target cannot be part of a group + // and as a result we (a) don't try to avoid calling post callbacks in + // case of a group failure and (b) merge the pre and post states with + // the group state. + // + if (op_s != nullptr) + { + for (auto i (op_p.first); i != op_p.second; ++i) + if (const auto& f = i->second.pre) + ts |= f (a, *op_s, *op_t); + } + + // Recipe. + // + ts |= r != nullptr ? r (a, t) : target_state::unchanged; + + // Post operations. + // + if (op_s != nullptr) + { + for (auto i (op_p.first); i != op_p.second; ++i) + if (const auto& f = i->second.post) + ts |= f (a, *op_s, *op_t); + } + + // See the recipe documentation for details on what's going on here. + // Note that if the result is group, then the group's state can be + // failed. + // + switch (t[a].state = ts) + { + case target_state::changed: + case target_state::unchanged: + break; + case target_state::postponed: + ts = t[a].state = target_state::unchanged; + break; + case target_state::group: + ts = (*t.group)[a].state; + break; + default: + assert (false); + } + } + catch (const failed&) + { + ts = t[a].state = target_state::failed; + } + + return ts; + } + + void + update_backlink (const file& f, const path& l, bool changed, backlink_mode m) + { + using mode = backlink_mode; + + const path& p (f.path ()); + dir_path d (l.directory ()); + + // At low verbosity levels we print the command if the target changed or + // the link does not exist (we also treat errors as "not exist" and let + // the link update code below handle it). + // + // Note that in the changed case we print it even if the link is not + // actually updated to signal to the user that the updated out target is + // now available in src. + // + if (verb <= 2) + { + if (changed || !butl::entry_exists (l, + false /* follow_symlinks */, + true /* ignore_errors */)) + { + const char* c (nullptr); + switch (m) + { + case mode::link: + case mode::symbolic: c = verb >= 2 ? "ln -s" : "ln"; break; + case mode::hard: c = "ln"; break; + case mode::copy: + case mode::overwrite: c = l.to_directory () ? "cp -r" : "cp"; break; + } + + // Note: 'ln foo/ bar/' means a different thing. + // + if (verb >= 2) + text << c << ' ' << p.string () << ' ' << l.string (); + else + text << c << ' ' << f << " -> " << d; + } + } + + // What if there is no such subdirectory in src (some like to stash their + // executables in bin/ or some such). The easiest is probably just to + // create it even though we won't be cleaning it up. + // + if (!exists (d)) + mkdir_p (d, 2 /* verbosity */); + + update_backlink (p, l, m); + } + + void + update_backlink (const path& p, const path& l, bool changed, backlink_mode m) + { + // As above but with a slightly different diagnostics. + + using mode = backlink_mode; + + dir_path d (l.directory ()); + + if (verb <= 2) + { + if (changed || !butl::entry_exists (l, + false /* follow_symlinks */, + true /* ignore_errors */)) + { + const char* c (nullptr); + switch (m) + { + case mode::link: + case mode::symbolic: c = verb >= 2 ? "ln -s" : "ln"; break; + case mode::hard: c = "ln"; break; + case mode::copy: + case mode::overwrite: c = l.to_directory () ? "cp -r" : "cp"; break; + } + + if (verb >= 2) + text << c << ' ' << p.string () << ' ' << l.string (); + else + text << c << ' ' << p.string () << " -> " << d; + } + } + + if (!exists (d)) + mkdir_p (d, 2 /* verbosity */); + + update_backlink (p, l, m); + } + + static inline void + try_rmbacklink (const path& l, + backlink_mode m, + bool ie /* ignore_errors */= false) + { + // See also clean_backlink() below. + + using mode = backlink_mode; + + if (l.to_directory ()) + { + switch (m) + { + case mode::link: + case mode::symbolic: + case mode::hard: try_rmsymlink (l, true /* directory */, ie); break; + case mode::copy: try_rmdir_r (path_cast<dir_path> (l), ie); break; + case mode::overwrite: break; + } + } + else + { + // try_rmfile() should work for symbolic and hard file links. + // + switch (m) + { + case mode::link: + case mode::symbolic: + case mode::hard: + case mode::copy: try_rmfile (l, ie); break; + case mode::overwrite: break; + } + } + } + + void + update_backlink (const path& p, const path& l, backlink_mode om) + { + using mode = backlink_mode; + + bool d (l.to_directory ()); + mode m (om); // Keep original mode. + + auto print = [&p, &l, &m, d] () + { + if (verb >= 3) + { + const char* c (nullptr); + switch (m) + { + case mode::link: + case mode::symbolic: c = "ln -sf"; break; + case mode::hard: c = "ln -f"; break; + case mode::copy: + case mode::overwrite: c = d ? "cp -r" : "cp"; break; + } + + text << c << ' ' << p.string () << ' ' << l.string (); + } + }; + + try + { + // Normally will be there. + // + if (!dry_run) + try_rmbacklink (l, m); + + // Skip (ad hoc) targets that don't exist. + // + if (!(d ? dir_exists (p) : file_exists (p))) + return; + + for (; !dry_run; ) // Retry/fallback loop. + try + { + switch (m) + { + case mode::link: + case mode::symbolic: mksymlink (p, l, d); break; + case mode::hard: mkhardlink (p, l, d); break; + case mode::copy: + case mode::overwrite: + { + if (d) + { + // Currently, for a directory, we do a "copy-link": we make the + // target directory and then link each entry (for now this is + // only used to "link" a Windows DLL assembly with only files + // inside). + // + dir_path fr (path_cast<dir_path> (p)); + dir_path to (path_cast<dir_path> (l)); + + try_mkdir (to); + + for (const auto& de: dir_iterator (fr, + false /* ignore_dangling */)) + { + path f (fr / de.path ()); + path t (to / de.path ()); + + update_backlink (f, t, mode::link); + } + } + else + cpfile (p, l, cpflags::overwrite_content); + + break; + } + } + + break; // Success. + } + catch (const system_error& e) + { + // If symlinks not supported, try a hardlink. + // + if (m == mode::link) + { + // Note that we are not guaranteed that the system_error exception + // is of the generic category. + // + int c (e.code ().value ()); + if (e.code ().category () == generic_category () && + (c == ENOSYS || // Not implemented. + c == EPERM)) // Not supported by the filesystem(s). + { + m = mode::hard; + continue; + } + } + + throw; + } + } + catch (const system_error& e) + { + const char* w (nullptr); + switch (m) + { + case mode::link: + case mode::symbolic: w = "symbolic link"; break; + case mode::hard: w = "hard link"; break; + case mode::copy: + case mode::overwrite: w = "copy"; break; + } + + print (); + fail << "unable to make " << w << ' ' << l << ": " << e; + } + + print (); + } + + void + clean_backlink (const path& l, uint16_t v /*verbosity*/, backlink_mode m) + { + // Like try_rmbacklink() but with diagnostics and error handling. + + using mode = backlink_mode; + + if (l.to_directory ()) + { + switch (m) + { + case mode::link: + case mode::symbolic: + case mode::hard: rmsymlink (l, true /* directory */, v); break; + case mode::copy: rmdir_r (path_cast<dir_path> (l), true, v); break; + case mode::overwrite: break; + } + } + else + { + // remfile() should work for symbolic and hard file links. + // + switch (m) + { + case mode::link: + case mode::symbolic: + case mode::hard: + case mode::copy: rmfile (l, v); break; + case mode::overwrite: break; + } + } + } + + // If target/link path are syntactically to a directory, then the backlink + // is assumed to be to a directory, otherwise -- to a file. + // + struct backlink: auto_rm<path> + { + using path_type = build2::path; + + reference_wrapper<const path_type> target; + backlink_mode mode; + + backlink (const path_type& t, path_type&& l, backlink_mode m) + : auto_rm<path_type> (move (l)), target (t), mode (m) + { + assert (t.to_directory () == path.to_directory ()); + } + + ~backlink () + { + if (active) + { + try_rmbacklink (path, mode, true /* ignore_errors */); + active = false; + } + } + + backlink (backlink&&) = default; + backlink& operator= (backlink&&) = default; + }; + + // Normally (i.e., on sane platforms that don't have things like PDBs, etc) + // there will be just one backlink so optimize for that. + // + using backlinks = small_vector<backlink, 1>; + + static optional<backlink_mode> + backlink_test (const target& t, const lookup& l) + { + using mode = backlink_mode; + + optional<mode> r; + const string& v (cast<string> (l)); + + if (v == "true") r = mode::link; + else if (v == "symbolic") r = mode::symbolic; + else if (v == "hard") r = mode::hard; + else if (v == "copy") r = mode::copy; + else if (v == "overwrite") r = mode::overwrite; + else if (v != "false") + fail << "invalid backlink variable value '" << v << "' " + << "specified for target " << t; + + return r; + } + + static optional<backlink_mode> + backlink_test (action a, target& t) + { + // Note: the order of these checks is from the least to most expensive. + + // Only for plain update/clean. + // + if (a.outer () || (a != perform_update_id && a != perform_clean_id)) + return nullopt; + + // Only file-based targets in the out tree can be backlinked. + // + if (!t.out.empty () || !t.is_a<file> ()) + return nullopt; + + // Neither an out-of-project nor in-src configuration can be forwarded. + // + const scope& bs (t.base_scope ()); + const scope* rs (bs.root_scope ()); + if (rs == nullptr || bs.src_path () == bs.out_path ()) + return nullopt; + + // Only for forwarded configurations. + // + if (!cast_false<bool> (rs->vars[var_forwarded])) + return nullopt; + + lookup l (t.state[a][var_backlink]); + + // If not found, check for some defaults in the global scope (this does + // not happen automatically since target type/pattern-specific lookup + // stops at the project boundary). + // + if (!l.defined ()) + l = global_scope->find (*var_backlink, t.key ()); + + return l ? backlink_test (t, l) : nullopt; + } + + static backlinks + backlink_collect (action a, target& t, backlink_mode m) + { + using mode = backlink_mode; + + const scope& s (t.base_scope ()); + + backlinks bls; + auto add = [&bls, &s] (const path& p, mode m) + { + bls.emplace_back (p, s.src_path () / p.leaf (s.out_path ()), m); + }; + + // First the target itself. + // + add (t.as<file> ().path (), m); + + // Then ad hoc group file/fsdir members, if any. + // + for (const target* mt (t.member); mt != nullptr; mt = mt->member) + { + const path* p (nullptr); + + if (const file* f = mt->is_a<file> ()) + { + p = &f->path (); + + if (p->empty ()) // The "trust me, it's somewhere" case. + p = nullptr; + } + else if (const fsdir* d = mt->is_a<fsdir> ()) + p = &d->dir; + + if (p != nullptr) + { + // Check for a custom backlink mode for this member. If none, then + // inherit the one from the group (so if the user asked to copy .exe, + // we will also copy .pdb). + // + // Note that we want to avoid group or tt/patter-spec lookup. And + // since this is an ad hoc member (which means it was either declared + // in the buildfile or added by the rule), we assume that the value, + // if any, will be set as a rule-specific variable (since setting it + // as a target-specific wouldn't be MT-safe). @@ Don't think this + // applies to declared ad hoc members. + // + lookup l (mt->state[a].vars[var_backlink]); + + optional<mode> bm (l ? backlink_test (*mt, l) : m); + + if (bm) + add (*p, *bm); + } + } + + return bls; + } + + static inline backlinks + backlink_update_pre (action a, target& t, backlink_mode m) + { + return backlink_collect (a, t, m); + } + + static void + backlink_update_post (target& t, target_state ts, backlinks& bls) + { + if (ts == target_state::failed) + return; // Let auto rm clean things up. + + // Make backlinks. + // + for (auto b (bls.begin ()), i (b); i != bls.end (); ++i) + { + const backlink& bl (*i); + + if (i == b) + update_backlink (t.as<file> (), + bl.path, + ts == target_state::changed, + bl.mode); + else + update_backlink (bl.target, bl.path, bl.mode); + } + + // Cancel removal. + // + for (backlink& bl: bls) + bl.cancel (); + } + + static void + backlink_clean_pre (action a, target& t, backlink_mode m) + { + backlinks bls (backlink_collect (a, t, m)); + + for (auto b (bls.begin ()), i (b); i != bls.end (); ++i) + { + // Printing anything at level 1 will probably just add more noise. + // + backlink& bl (*i); + bl.cancel (); + clean_backlink (bl.path, i == b ? 2 : 3 /* verbosity */, bl.mode); + } + } + + static target_state + execute_impl (action a, target& t) + { + target::opstate& s (t[a]); + + assert (s.task_count.load (memory_order_consume) == target::count_busy () + && s.state == target_state::unknown); + + target_state ts; + try + { + // Handle target backlinking to forwarded configurations. + // + // Note that this function will never be called if the recipe is noop + // which is ok since such targets are probably not interesting for + // backlinking. + // + backlinks bls; + optional<backlink_mode> blm (backlink_test (a, t)); + + if (blm) + { + if (a == perform_update_id) + bls = backlink_update_pre (a, t, *blm); + else + backlink_clean_pre (a, t, *blm); + } + + ts = execute_recipe (a, t, s.recipe); + + if (blm) + { + if (a == perform_update_id) + backlink_update_post (t, ts, bls); + } + } + catch (const failed&) + { + // If we could not backlink the target, then the best way to signal the + // failure seems to be to mark the target as failed. + // + ts = s.state = target_state::failed; + } + + // Decrement the target count (see set_recipe() for details). + // + if (a.inner ()) + { + recipe_function** f (s.recipe.target<recipe_function*> ()); + if (f == nullptr || *f != &group_action) + target_count.fetch_sub (1, memory_order_relaxed); + } + + // Decrement the task count (to count_executed) and wake up any threads + // that might be waiting for this target. + // + size_t tc (s.task_count.fetch_sub ( + target::offset_busy - target::offset_executed, + memory_order_release)); + assert (tc == target::count_busy ()); + sched.resume (s.task_count); + + return ts; + } + + target_state + execute (action a, + const target& ct, + size_t start_count, + atomic_count* task_count) + { + target& t (const_cast<target&> (ct)); // MT-aware. + target::opstate& s (t[a]); + + // Update dependency counts and make sure they are not skew. + // + size_t gd (dependency_count.fetch_sub (1, memory_order_relaxed)); + size_t td (s.dependents.fetch_sub (1, memory_order_release)); + assert (td != 0 && gd != 0); + td--; + + // Handle the "last" execution mode. + // + // This gets interesting when we consider interaction with groups. It seem + // to make sense to treat group members as dependents of the group, so, + // for example, if we try to clean the group via three of its members, + // only the last attempt will actually execute the clean. This means that + // when we match a group member, inside we should also match the group in + // order to increment the dependents count. This seems to be a natural + // requirement: if we are delegating to the group, we need to find a + // recipe for it, just like we would for a prerequisite. + // + // Note that we are also going to treat the group state as postponed. + // This is not a mistake: until we execute the recipe, we want to keep + // returning postponed. And once the recipe is executed, it will reset the + // state to group (see group_action()). To put it another way, the + // execution of this member is postponed, not of the group. + // + // Note also that the target execution is postponed with regards to this + // thread. For other threads the state will still be unknown (until they + // try to execute it). + // + if (current_mode == execution_mode::last && td != 0) + return target_state::postponed; + + // Try to atomically change applied to busy. + // + size_t tc (target::count_applied ()); + + size_t exec (target::count_executed ()); + size_t busy (target::count_busy ()); + + if (s.task_count.compare_exchange_strong ( + tc, + busy, + memory_order_acq_rel, // Synchronize on success. + memory_order_acquire)) // Synchronize on failure. + { + // Handle the noop recipe. + // + if (s.state == target_state::unchanged) + { + // There could still be scope operations. + // + if (t.is_a<dir> ()) + execute_recipe (a, t, nullptr /* recipe */); + + s.task_count.store (exec, memory_order_release); + sched.resume (s.task_count); + } + else + { + if (task_count == nullptr) + return execute_impl (a, t); + + // Pass our diagnostics stack (this is safe since we expect the + // caller to wait for completion before unwinding its diag stack). + // + if (sched.async (start_count, + *task_count, + [a] (const diag_frame* ds, target& t) + { + diag_frame::stack_guard dsg (ds); + execute_impl (a, t); + }, + diag_frame::stack (), + ref (t))) + return target_state::unknown; // Queued. + + // Executed synchronously, fall through. + } + } + else + { + // Either busy or already executed. + // + if (tc >= busy) return target_state::busy; + else assert (tc == exec); + } + + return t.executed_state (a, false); + } + + target_state + execute_direct (action a, const target& ct) + { + target& t (const_cast<target&> (ct)); // MT-aware. + target::opstate& s (t[a]); + + // Similar logic to match() above except we execute synchronously. + // + size_t tc (target::count_applied ()); + + size_t exec (target::count_executed ()); + size_t busy (target::count_busy ()); + + if (s.task_count.compare_exchange_strong ( + tc, + busy, + memory_order_acq_rel, // Synchronize on success. + memory_order_acquire)) // Synchronize on failure. + { + if (s.state == target_state::unknown) + execute_impl (a, t); + else + { + assert (s.state == target_state::unchanged || + s.state == target_state::failed); + + if (s.state == target_state::unchanged) + { + if (t.is_a<dir> ()) + execute_recipe (a, t, nullptr /* recipe */); + } + + s.task_count.store (exec, memory_order_release); + sched.resume (s.task_count); + } + } + else + { + // If the target is busy, wait for it. + // + if (tc >= busy) sched.wait (exec, s.task_count, scheduler::work_none); + else assert (tc == exec); + } + + return t.executed_state (a); + } + + static inline void + blank_adhoc_member (const target*&) + { + } + + static inline void + blank_adhoc_member (prerequisite_target& pt) + { + if (pt.adhoc) + pt.target = nullptr; + } + + template <typename T> + target_state + straight_execute_members (action a, atomic_count& tc, + T ts[], size_t n, size_t p) + { + target_state r (target_state::unchanged); + + // Start asynchronous execution of prerequisites. + // + wait_guard wg (target::count_busy (), tc); + + n += p; + for (size_t i (p); i != n; ++i) + { + const target*& mt (ts[i]); + + if (mt == nullptr) // Skipped. + continue; + + target_state s (execute_async (a, *mt, target::count_busy (), tc)); + + if (s == target_state::postponed) + { + r |= s; + mt = nullptr; + } + } + + wg.wait (); + + // Now all the targets in prerequisite_targets must be either still busy + // or executed and synchronized (and we have blanked out all the postponed + // ones). + // + for (size_t i (p); i != n; ++i) + { + if (ts[i] == nullptr) + continue; + + const target& mt (*ts[i]); + + // If the target is still busy, wait for its completion. + // + const auto& tc (mt[a].task_count); + if (tc.load (memory_order_acquire) >= target::count_busy ()) + sched.wait (target::count_executed (), tc, scheduler::work_none); + + r |= mt.executed_state (a); + + blank_adhoc_member (ts[i]); + } + + return r; + } + + template <typename T> + target_state + reverse_execute_members (action a, atomic_count& tc, + T ts[], size_t n, size_t p) + { + // Pretty much as straight_execute_members() but in reverse order. + // + target_state r (target_state::unchanged); + + wait_guard wg (target::count_busy (), tc); + + n = p - n; + for (size_t i (p); i != n; ) + { + const target*& mt (ts[--i]); + + if (mt == nullptr) + continue; + + target_state s (execute_async (a, *mt, target::count_busy (), tc)); + + if (s == target_state::postponed) + { + r |= s; + mt = nullptr; + } + } + + wg.wait (); + + for (size_t i (p); i != n; ) + { + if (ts[--i] == nullptr) + continue; + + const target& mt (*ts[i]); + + const auto& tc (mt[a].task_count); + if (tc.load (memory_order_acquire) >= target::count_busy ()) + sched.wait (target::count_executed (), tc, scheduler::work_none); + + r |= mt.executed_state (a); + + blank_adhoc_member (ts[i]); + } + + return r; + } + + // Instantiate only for what we need. + // + template LIBBUILD2_SYMEXPORT target_state + straight_execute_members<const target*> ( + action, atomic_count&, const target*[], size_t, size_t); + + template LIBBUILD2_SYMEXPORT target_state + reverse_execute_members<const target*> ( + action, atomic_count&, const target*[], size_t, size_t); + + template LIBBUILD2_SYMEXPORT target_state + straight_execute_members<prerequisite_target> ( + action, atomic_count&, prerequisite_target[], size_t, size_t); + + template LIBBUILD2_SYMEXPORT target_state + reverse_execute_members<prerequisite_target> ( + action, atomic_count&, prerequisite_target[], size_t, size_t); + + pair<optional<target_state>, const target*> + execute_prerequisites (const target_type* tt, + action a, const target& t, + const timestamp& mt, const execute_filter& ef, + size_t n) + { + assert (current_mode == execution_mode::first); + + auto& pts (t.prerequisite_targets[a]); + + if (n == 0) + n = pts.size (); + + // Pretty much as straight_execute_members() but hairier. + // + target_state rs (target_state::unchanged); + + wait_guard wg (target::count_busy (), t[a].task_count); + + for (size_t i (0); i != n; ++i) + { + const target*& pt (pts[i]); + + if (pt == nullptr) // Skipped. + continue; + + target_state s ( + execute_async ( + a, *pt, target::count_busy (), t[a].task_count)); + + if (s == target_state::postponed) + { + rs |= s; + pt = nullptr; + } + } + + wg.wait (); + + bool e (mt == timestamp_nonexistent); + const target* rt (tt != nullptr ? nullptr : &t); + + for (size_t i (0); i != n; ++i) + { + prerequisite_target& p (pts[i]); + + if (p == nullptr) + continue; + + const target& pt (*p.target); + + const auto& tc (pt[a].task_count); + if (tc.load (memory_order_acquire) >= target::count_busy ()) + sched.wait (target::count_executed (), tc, scheduler::work_none); + + target_state s (pt.executed_state (a)); + rs |= s; + + // Should we compare the timestamp to this target's? + // + if (!e && (p.adhoc || !ef || ef (pt, i))) + { + // If this is an mtime-based target, then compare timestamps. + // + if (const mtime_target* mpt = pt.is_a<mtime_target> ()) + { + timestamp mp (mpt->mtime ()); + + // The same logic as in mtime_target::newer() (but avoids a call to + // state()). + // + if (mt < mp || (mt == mp && s == target_state::changed)) + e = true; + } + else + { + // Otherwise we assume the prerequisite is newer if it was changed. + // + if (s == target_state::changed) + e = true; + } + } + + if (p.adhoc) + p.target = nullptr; // Blank out. + else + { + if (rt == nullptr && pt.is_a (*tt)) + rt = &pt; + } + } + + assert (rt != nullptr); + + return pair<optional<target_state>, const target*> ( + e ? optional<target_state> () : rs, + tt != nullptr ? rt : nullptr); + } + + target_state + noop_action (action a, const target& t) + { + text << "noop action triggered for " << diag_doing (a, t); + assert (false); // We shouldn't be called (see set_recipe()). + return target_state::unchanged; + } + + target_state + group_action (action a, const target& t) + { + // If the group is busy, we wait, similar to prerequisites. + // + const target& g (*t.group); + + target_state gs (execute (a, g)); + + if (gs == target_state::busy) + sched.wait (target::count_executed (), + g[a].task_count, + scheduler::work_none); + + // Return target_state::group to signal to execute() that this target's + // state comes from the group (which, BTW, can be failed). + // + // There is just one small problem: if the returned group state is + // postponed, then this means the group hasn't been executed yet. And if + // we return target_state::group, then this means any state queries (see + // executed_state()) will be directed to the target which might still not + // be executed or, worse, is being executed as we query. + // + // So in this case we return target_state::postponed (which will result in + // the member being treated as unchanged). This is how it is done for + // prerequisites and seeing that we've been acting as if the group is our + // prerequisite, there is no reason to deviate (see the recipe return + // value documentation for details). + // + return gs != target_state::postponed ? target_state::group : gs; + } + + target_state + default_action (action a, const target& t) + { + return execute_prerequisites (a, t); + } + + target_state + perform_clean_extra (action a, const file& ft, + const clean_extras& extras, + const clean_adhoc_extras& adhoc_extras) + { + // Clean the extras first and don't print the commands at verbosity level + // below 3. Note the first extra file/directory that actually got removed + // for diagnostics below. + // + // Note that dry-run is taken care of by the filesystem functions. + // + target_state er (target_state::unchanged); + bool ed (false); + path ep; + + auto clean_extra = [&er, &ed, &ep] (const file& f, + const path* fp, + const clean_extras& es) + { + for (const char* e: es) + { + size_t n; + if (e == nullptr || (n = strlen (e)) == 0) + continue; + + path p; + bool d; + + if (path::traits_type::absolute (e)) + { + p = path (e); + d = p.to_directory (); + } + else + { + if ((d = (e[n - 1] == '/'))) + --n; + + if (fp == nullptr) + { + fp = &f.path (); + assert (!fp->empty ()); // Must be assigned. + } + + p = *fp; + for (; *e == '-'; ++e) + p = p.base (); + + p.append (e, n); + } + + target_state r (target_state::unchanged); + + if (d) + { + dir_path dp (path_cast<dir_path> (p)); + + switch (build2::rmdir_r (dp, true, 3)) + { + case rmdir_status::success: + { + r = target_state::changed; + break; + } + case rmdir_status::not_empty: + { + if (verb >= 3) + text << dp << " is current working directory, not removing"; + break; + } + case rmdir_status::not_exist: + break; + } + } + else + { + if (rmfile (p, 3)) + r = target_state::changed; + } + + if (r == target_state::changed && ep.empty ()) + { + ed = d; + ep = move (p); + } + + er |= r; + } + }; + + const path& fp (ft.path ()); + + if (!fp.empty () && !extras.empty ()) + clean_extra (ft, nullptr, extras); + + target_state tr (target_state::unchanged); + + // Check if we were asked not to actually remove the files. The extras are + // tricky: some of them, like depdb should definitely be removed. But + // there could also be those that shouldn't. Currently we only use this + // for auto-generated source code where the only extra file, if any, is + // depdb so for now we treat them as "to remove" but in the future we may + // need to have two lists. + // + bool clean (cast_true<bool> (ft[var_clean])); + + // Now clean the ad hoc group file members, if any. + // + for (const target* m (ft.member); m != nullptr; m = m->member) + { + const file* mf (m->is_a<file> ()); + const path* mp (mf != nullptr ? &mf->path () : nullptr); + + if (mf == nullptr || mp->empty ()) + continue; + + if (!adhoc_extras.empty ()) + { + auto i (find_if (adhoc_extras.begin (), + adhoc_extras.end (), + [mf] (const clean_adhoc_extra& e) + { + return mf->is_a (e.type); + })); + + if (i != adhoc_extras.end ()) + clean_extra (*mf, mp, i->extras); + } + + if (!clean) + continue; + + // Make this "primary target" for diagnostics/result purposes if the + // primary target is unreal. + // + if (fp.empty ()) + { + if (rmfile (*mp, *mf)) + tr = target_state::changed; + } + else + { + target_state r (rmfile (*mp, 3) + ? target_state::changed + : target_state::unchanged); + + if (r == target_state::changed && ep.empty ()) + ep = *mp; + + er |= r; + } + } + + // Now clean the primary target and its prerequisited in the reverse order + // of update: first remove the file, then clean the prerequisites. + // + if (clean && !fp.empty () && rmfile (fp, ft)) + tr = target_state::changed; + + // Update timestamp in case there are operations after us that could use + // the information. + // + ft.mtime (timestamp_nonexistent); + + // Clean prerequisites. + // + tr |= reverse_execute_prerequisites (a, ft); + + // Factor the result of removing the extra files into the target state. + // While strictly speaking removing them doesn't change the target state, + // if we don't do this, then we may end up removing the file but still + // saying that everything is clean (e.g., if someone removes the target + // file but leaves the extra laying around). That would be confusing. + // + // What would also be confusing is if we didn't print any commands in + // this case. + // + if (tr != target_state::changed && er == target_state::changed) + { + if (verb > (current_diag_noise ? 0 : 1) && verb < 3) + { + if (ed) + text << "rm -r " << path_cast<dir_path> (ep); + else + text << "rm " << ep; + } + } + + tr |= er; + return tr; + } + + target_state + perform_clean (action a, const target& t) + { + const file& f (t.as<file> ()); + assert (!f.path ().empty ()); + return perform_clean_extra (a, f, {}); + } + + target_state + perform_clean_depdb (action a, const target& t) + { + const file& f (t.as<file> ()); + assert (!f.path ().empty ()); + return perform_clean_extra (a, f, {".d"}); + } + + target_state + perform_clean_group (action a, const target& xg) + { + const mtime_target& g (xg.as<mtime_target> ()); + + // Similar logic to perform_clean_extra() above. + // + target_state r (target_state::unchanged); + + if (cast_true<bool> (g[var_clean])) + { + for (group_view gv (g.group_members (a)); gv.count != 0; --gv.count) + { + if (const target* m = gv.members[gv.count - 1]) + { + if (rmfile (m->as<file> ().path (), *m)) + r |= target_state::changed; + } + } + } + + g.mtime (timestamp_nonexistent); + + r |= reverse_execute_prerequisites (a, g); + return r; + } + + target_state + perform_clean_group_depdb (action a, const target& g) + { + // The same twisted target state merging logic as in perform_clean_extra(). + // + target_state er (target_state::unchanged); + path ep; + + group_view gv (g.group_members (a)); + if (gv.count != 0) + { + ep = gv.members[0]->as<file> ().path () + ".d"; + + if (rmfile (ep, 3)) + er = target_state::changed; + } + + target_state tr (perform_clean_group (a, g)); + + if (tr != target_state::changed && er == target_state::changed) + { + if (verb > (current_diag_noise ? 0 : 1) && verb < 3) + text << "rm " << ep; + } + + tr |= er; + return tr; + } +} diff --git a/libbuild2/algorithm.hxx b/libbuild2/algorithm.hxx new file mode 100644 index 0000000..50e8423 --- /dev/null +++ b/libbuild2/algorithm.hxx @@ -0,0 +1,778 @@ +// file : libbuild2/algorithm.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_ALGORITHM_HXX +#define LIBBUILD2_ALGORITHM_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/action.hxx> +#include <libbuild2/target.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class scope; + class prerequisite; + class prerequisite_key; + + // The default prerequisite search implementation. It first calls the + // prerequisite-type-specific search function. If that doesn't yeld + // anything, it creates a new target. + // + const target& + search (const target&, const prerequisite&); + + // As above but only search for an already existing target. + // + const target* + search_existing (const prerequisite&); + + // As above but cache a target searched in a custom way. + // + const target& + search_custom (const prerequisite&, const target&); + + // As above but specify the prerequisite to search as a key. + // + LIBBUILD2_SYMEXPORT const target& + search (const target&, const prerequisite_key&); + + LIBBUILD2_SYMEXPORT const target* + search_existing (const prerequisite_key&); + + // Uniform search interface for prerequisite/prerequisite_member. + // + inline const target& + search (const target& t, const prerequisite_member& p) {return p.search (t);} + + // As above but override the target type. Useful for searching for + // target group members where we need to search for a different + // target type. + // + const target& + search (const target&, const target_type&, const prerequisite_key&); + + // As above but specify the prerequisite to search as individual key + // components. Scope can be NULL if the directory is absolute. + // + const target& + search (const target&, + const target_type& type, + const dir_path& dir, + const dir_path& out, + const string& name, + const string* ext = nullptr, // NULL means unspecified. + const scope* = nullptr, // NULL means dir is absolute. + const optional<project_name>& proj = nullopt); + + const target* + search_existing (const target_type& type, + const dir_path& dir, + const dir_path& out, + const string& name, + const string* ext = nullptr, + const scope* = nullptr, + const optional<project_name>& proj = nullopt); + + // As above but specify the target type as template argument. + // + template <typename T> + const T& + search (const target&, + const dir_path& dir, + const dir_path& out, + const string& name, + const string* ext = nullptr, + const scope* = nullptr); + + // Search for a target identified by the name. The semantics is "as if" we + // first created a prerequisite based on this name in exactly the same way + // as the parser would and then searched based on this prerequisite. + // + LIBBUILD2_SYMEXPORT const target& + search (const target&, name, const scope&); + + // Unlike the above version, this one can be called during the execute + // phase. Return NULL for unknown target types. + // + LIBBUILD2_SYMEXPORT const target* + search_existing (const name&, + const scope&, + const dir_path& out = dir_path ()); + + // Target match lock: a non-const target reference and the target::offset_* + // state that has already been "achieved". Note that target::task_count + // itself is set to busy for the duration or the lock. While at it we also + // maintain a stack of active locks in the current dependency chain (used to + // detect dependency cycles). + // + struct LIBBUILD2_SYMEXPORT target_lock + { + using action_type = build2::action; + using target_type = build2::target; + + action_type action; + target_type* target = nullptr; + size_t offset = 0; + + explicit operator bool () const {return target != nullptr;} + + void + unlock (); + + // Movable-only type with move-assignment only to NULL lock. + // + target_lock () = default; + target_lock (target_lock&&); + target_lock& operator= (target_lock&&); + + target_lock (const target_lock&) = delete; + target_lock& operator= (const target_lock&) = delete; + + // Implementation details. + // + ~target_lock (); + target_lock (action_type, target_type*, size_t); + + struct data + { + action_type action; + target_type* target; + size_t offset; + }; + + data + release (); + + // Tip of the stack. + // + static const target_lock* + stack () noexcept; + + // Set the new and return the previous tip of the stack. + // + static const target_lock* + stack (const target_lock*) noexcept; + + const target_lock* prev; + + void + unstack (); + + struct stack_guard + { + explicit stack_guard (const target_lock* s): s_ (stack (s)) {} + ~stack_guard () {stack (s_);} + const target_lock* s_; + }; + }; + + // If this target is already locked in this dependency chain, then return + // the corresponding lock. Return NULL otherwise (so can be used a boolean + // predicate). + // + const target_lock* + dependency_cycle (action, const target&); + + // If the target is already applied (for this action) or executed, then no + // lock is acquired. Otherwise, the target must not yet be matched for this + // action. + // + // @@ MT fuzzy: what if it is already in the desired state, why assert? + // Currently we only use it with match_recipe() and if it is matched + // but not applied, then it's not clear why we are overriding that + // match. + // + target_lock + lock (action, const target&); + + // Add an ad hoc member to the end of the chain assuming that an already + // existing member of this target type is the same. Return the newly added + // or already existing target. The member directories (dir and out) are + // expected to be absolute and normalized. + // + // Note that here and in find_adhoc_member() below (as well as in + // perform_clean_extra()) we use target type (as opposed to, say, type and + // name) as the member's identity. This fits our current needs where every + // (rule-managed) ad hoc member has a unique target type and we have no need + // for multiple members of the same type. This also allows us to support + // things like changing the ad hoc member name by declaring it in a + // buildfile. + // + LIBBUILD2_SYMEXPORT target& + add_adhoc_member (target&, + const target_type&, + const dir_path& dir, + const dir_path& out, + string name); + + // If the extension is specified then it is added to the member's target + // name. + // + target& + add_adhoc_member (target&, const target_type&, const char* ext = nullptr); + + template <typename T> + inline T& + add_adhoc_member (target& g, const target_type& tt, const char* e = nullptr) + { + return static_cast<T&> (add_adhoc_member (g, tt, e)); + } + + template <typename T> + inline T& + add_adhoc_member (target& g, const char* e = nullptr) + { + return add_adhoc_member<T> (g, T::static_type, e); + } + + // Find an ad hoc member of the specified target type returning NULL if not + // found. + // + target* + find_adhoc_member (target&, const target_type&); + + const target* + find_adhoc_member (const target&, const target_type&); + + template <typename T> + inline T* + find_adhoc_member (target& g, const target_type& tt) + { + return static_cast<T*> (find_adhoc_member (g, tt)); + } + + template <typename T> + inline const T* + find_adhoc_member (const target& g, const target_type& tt) + { + return static_cast<const T*> (find_adhoc_member (g, tt)); + } + + template <typename T> + inline const T* + find_adhoc_member (const target& g) + { + return find_adhoc_member<T> (g, T::static_type); + } + + template <typename T> + inline T* + find_adhoc_member (target& g) + { + return find_adhoc_member<T> (g, T::static_type); + } + + // Match and apply a rule to the action/target with ambiguity detection. + // Increment the target's dependents count, which means that you should call + // this function with the intent to also call execute(). Return the target + // state translating target_state::failed to the failed exception unless + // instructed otherwise. + // + // The try_match() version doesn't issue diagnostics if there is no rule + // match (but fails as match() for all other errors, like rule ambiguity, + // inability to apply, etc). The first half of the result indicated whether + // there was a rule match. + // + // The unmatch argument allows optimizations that avoid calling execute(). + // If it is unmatch::unchanged then only unmatch the target if it is known + // to be unchanged after match. If it is unmatch::safe, then unmatch the + // target if it is safe (this includes unchanged or if we know that someone + // else will execute this target). Return true if unmatch succeeded. Always + // throw if failed. + // + enum class unmatch {none, unchanged, safe}; + + target_state + match (action, const target&, bool fail = true); + + pair<bool, target_state> + try_match (action, const target&, bool fail = true); + + bool + match (action, const target&, unmatch); + + // Start asynchronous match. Return target_state::postponed if the + // asynchrounous operation has been started and target_state::busy if the + // target has already been busy. Regardless of the result, match() must be + // called in order to complete the operation (except target_state::failed). + // + // If fail is false, then return target_state::failed if the target match + // failed. Otherwise, throw the failed exception if keep_going is false and + // return target_state::failed otherwise. + // + target_state + match_async (action, const target&, + size_t start_count, atomic_count& task_count, + bool fail = true); + + // Match by specifying the recipe directly and without incrementing the + // dependency counts. The target must be locked. + // + void + match_recipe (target_lock&, recipe); + + // Match a "delegate rule" from withing another rules' apply() function + // avoiding recursive matches (thus the third argument). Unless try_match is + // true, fail if no rule is found. Otherwise return empty recipe. Note that + // unlike match(), this function does not increment the dependents count and + // the two rules must coordinate who is using the target's data pad and/or + // prerequisite_targets. See also the companion execute_delegate(). + // + recipe + match_delegate (action, target&, const rule&, bool try_match = false); + + // Match a rule for the inner operation from withing the outer rule's + // apply() function. See also the companion execute_inner(). + // + target_state + match_inner (action, const target&); + + bool + match_inner (action, const target&, unmatch); + + // The standard prerequisite search and match implementations. They call + // search() (unless a custom is provided) and then match() (unless custom + // returned NULL) for each prerequisite in a loop omitting out of project + // prerequisites for the clean operation. If this target is a member of a + // group, then first do this to the group's prerequisites. + // + using match_search = function< + prerequisite_target (action, + const target&, + const prerequisite&, + include_type)>; + + void + match_prerequisites (action, target&, const match_search& = nullptr); + + // As above but go into group members. + // + // Note that if we cleaning, this function doesn't go into group members, as + // an optimization (the group should clean everything up). + // + using match_search_member = function< + prerequisite_target (action, + const target&, + const prerequisite_member&, + include_type)>; + + void + match_prerequisite_members (action, target&, + const match_search_member& = nullptr); + + // As above but omit prerequisites that are not in the specified scope. + // + void + match_prerequisites (action, target&, const scope&); + + void + match_prerequisite_members (action, target&, const scope&); + + // Match (already searched) members of a group or similar prerequisite-like + // dependencies. Similar in semantics to match_prerequisites(). Any marked + // target pointers are skipped. + // + // T can only be const target* or prerequisite_target. + // + template <typename T> + void + match_members (action, target&, T const*, size_t); + + template <size_t N> + inline void + match_members (action a, target& t, const target* (&ts)[N]) + { + match_members (a, t, ts, N); + } + + inline void + match_members (action a, + target& t, + prerequisite_targets& ts, + size_t start = 0) + { + match_members (a, t, ts.data () + start, ts.size () - start); + } + + // Unless already known, match, and, if necessary, execute the group in + // order to resolve its members list. Note that even after that the member's + // list might still not be available (e.g., if some wildcard/ fallback rule + // matched). + // + // If the action is for an outer operation, then it is changed to inner + // which means the members are always resolved by the inner (e.g., update) + // rule. This feels right since this is the rule that will normally do the + // work (e.g., update) and therefore knows what it will produce (and if we + // don't do this, then the group resolution will be racy since we will use + // two different task_count instances for synchronization). + // + group_view + resolve_members (action, const target&); + + // Unless already known, match the target in order to resolve its group. + // + // Unlike the member case, a rule can only decide whether a target is a + // member of the group in its match() since otherwise it (presumably) should + // not match (and some other rule may). + // + // If the action is for an outer operation, then it is changed to inner, the + // same as for members. + // + const target* + resolve_group (action, const target&); + + // Inject dependency on the target's directory fsdir{}, unless it is in the + // src tree or is outside of any project (say, for example, an installation + // directory). If the parent argument is true, then inject the parent + // directory of a target that is itself a directory (name is empty). Return + // the injected target or NULL. Normally this function is called from the + // rule's apply() function. + // + // As an extension, this function will also search for an existing fsdir{} + // prerequisite for the directory and if one exists, return that (even if + // the target is in src tree). This can be used, for example, to place + // output into an otherwise non-existent directory. + // + LIBBUILD2_SYMEXPORT const fsdir* + inject_fsdir (action, target&, bool parent = true); + + // Execute the action on target, assuming a rule has been matched and the + // recipe for this action has been set. This is the synchrounous executor + // implementation (but may still return target_state::busy if the target + // is already being executed). Decrements the dependents count. + // + // Note: does not translate target_state::failed to the failed exception. + // + target_state + execute (action, const target&); + + // As above but wait for completion if the target is busy and translate + // target_state::failed to the failed exception. + // + target_state + execute_wait (action, const target&); + + // As above but start asynchronous execution. Return target_state::unknown + // if the asynchrounous execution has been started and target_state::busy if + // the target has already been busy. + // + // If fail is false, then return target_state::failed if the target match + // failed. Otherwise, throw the failed exception if keep_going is false and + // return target_state::failed otherwise. + // + target_state + execute_async (action, const target&, + size_t start_count, atomic_count& task_count, + bool fail = true); + + // Execute the recipe obtained with match_delegate(). Note that the target's + // state is neither checked nor updated by this function. In other words, + // the appropriate usage is to call this function from another recipe and to + // factor the obtained state into the one returned. + // + target_state + execute_delegate (const recipe&, action, const target&); + + // Execute the inner operation matched with match_inner(). Note that the + // returned target state is for the inner operation. The appropriate usage + // is to call this function from the outer operation's recipe and to factor + // the obtained state into the one returned (similar to how we do it for + // prerequisites). + // + // Note: waits for the completion if the target is busy and translates + // target_state::failed to the failed exception. + // + target_state + execute_inner (action, const target&); + + // A special version of the above that should be used for "direct" and "now" + // execution, that is, side-stepping the normal target-prerequisite + // relationship (so no dependents count is decremented) and execution order + // (so this function never returns the postponed target state). + // + // Note: waits for the completion if the target is busy and translates + // target_state::failed to the failed exception. + // + LIBBUILD2_SYMEXPORT target_state + execute_direct (action, const target&); + + // The default prerequisite execute implementation. Call execute_async() on + // each non-ignored (non-NULL) prerequisite target in a loop and then wait + // for their completion. Return target_state::changed if any of them were + // changed and target_state::unchanged otherwise. If a prerequisite's + // execution is postponed (and thus its state cannot be queried MT-safely) + // of if the prerequisite is marked as ad hoc, then set its pointer in + // prerequisite_targets to NULL. If count is not 0, then only the first + // count prerequisites are executed beginning from start. + // + // Note that because after the call the ad hoc prerequisites are no longer + // easily accessible, this function shouldn't be used in rules that make a + // timestamp-based out-of-date'ness determination (which must take into + // account such prerequisites). Instead, consider the below versions that + // incorporate the timestamp check and do the right thing. + // + target_state + straight_execute_prerequisites (action, const target&, + size_t count = 0, size_t start = 0); + + // As above but iterates over the prerequisites in reverse. + // + target_state + reverse_execute_prerequisites (action, const target&, size_t count = 0); + + // Call straight or reverse depending on the current mode. + // + target_state + execute_prerequisites (action, const target&, size_t count = 0); + + // As above but execute prerequisites for the inner action (that have + // been matched with match_inner()). + // + target_state + straight_execute_prerequisites_inner (action, const target&, + size_t count = 0, size_t start = 0); + + target_state + reverse_execute_prerequisites_inner (action, const target&, size_t count = 0); + + target_state + execute_prerequisites_inner (action, const target&, size_t count = 0); + + // A version of the above that also determines whether the action needs to + // be executed on the target based on the passed timestamp and filter. If + // count is not 0, then only the first count prerequisites are executed. + // + // The filter is passed each prerequisite target and is expected to signal + // which ones should be used for timestamp comparison. If the filter is + // NULL, then all the prerequisites are used. Note that ad hoc prerequisites + // are always used. + // + // Note that the return value is an optional target state. If the target + // needs updating, then the value is absent. Otherwise it is the state that + // should be returned. This is used to handle the situation where some + // prerequisites were updated but no update of the target is necessary. In + // this case we still signal that the target was (conceptually, but not + // physically) changed. This is important both to propagate the fact that + // some work has been done and to also allow our dependents to detect this + // case if they are up to something tricky (like recursively linking liba{} + // prerequisites). + // + // Note that because we use mtime, this function should normally only be + // used in the perform_update action (which is straight). + // + using execute_filter = function<bool (const target&, size_t pos)>; + + optional<target_state> + execute_prerequisites (action, const target&, + const timestamp&, + const execute_filter& = nullptr, + size_t count = 0); + + // Another version of the above that does two extra things for the caller: + // it determines whether the action needs to be executed on the target based + // on the passed timestamp and finds a prerequisite of the specified type + // (e.g., a source file). If there are multiple prerequisites of this type, + // then the first is returned (this can become important if additional + // prerequisites of the same type get injected). + // + template <typename T> + pair<optional<target_state>, const T&> + execute_prerequisites (action, const target&, + const timestamp&, + const execute_filter& = nullptr, + size_t count = 0); + + pair<optional<target_state>, const target&> + execute_prerequisites (const target_type&, + action, const target&, + const timestamp&, + const execute_filter& = nullptr, + size_t count = 0); + + template <typename T> + pair<optional<target_state>, const T&> + execute_prerequisites (const target_type&, + action, const target&, + const timestamp&, + const execute_filter& = nullptr, + size_t count = 0); + + // Execute members of a group or similar prerequisite-like dependencies. + // Similar in semantics to execute_prerequisites(). + // + // T can only be const target* or prerequisite_target. If it is the latter, + // the ad hoc blank out semantics described in execute_prerequsites() is in + // effect. + // + template <typename T> + target_state + straight_execute_members (action, atomic_count&, T[], size_t, size_t); + + template <typename T> + target_state + reverse_execute_members (action, atomic_count&, T[], size_t, size_t); + + template <typename T> + inline target_state + straight_execute_members (action a, const target& t, + T ts[], size_t c, size_t s) + { + return straight_execute_members (a, t[a].task_count, ts, c, s); + } + + template <typename T> + inline target_state + reverse_execute_members (action a, const target& t, + T ts[], size_t c, size_t s) + { + return reverse_execute_members (a, t[a].task_count, ts, c, s); + } + + // Call straight or reverse depending on the current mode. + // + target_state + execute_members (action, const target&, const target*[], size_t); + + template <size_t N> + inline target_state + straight_execute_members (action a, const target& t, const target* (&ts)[N]) + { + return straight_execute_members (a, t, ts, N, 0); + } + + template <size_t N> + inline target_state + reverse_execute_members (action a, const target& t, const target* (&ts)[N]) + { + return reverse_execute_members (a, t, ts, N, N); + } + + template <size_t N> + inline target_state + execute_members (action a, const target& t, const target* (&ts)[N]) + { + return execute_members (a, t, ts, N); + } + + // Return noop_recipe instead of using this function directly. + // + LIBBUILD2_SYMEXPORT target_state + noop_action (action, const target&); + + // Default action implementation which forwards to the prerequisites. + // Use default_recipe instead of using this function directly. + // + LIBBUILD2_SYMEXPORT target_state + default_action (action, const target&); + + // Standard perform(clean) action implementation for the file target + // (or derived). + // + LIBBUILD2_SYMEXPORT target_state + perform_clean (action, const target&); + + // As above, but also removes the auxiliary dependency database (.d file). + // + LIBBUILD2_SYMEXPORT target_state + perform_clean_depdb (action, const target&); + + // As above but clean the target group. The group should be an mtime_target + // and members should be files. + // + LIBBUILD2_SYMEXPORT target_state + perform_clean_group (action, const target&); + + // As above but clean both the target group and depdb. The depdb file path + // is derived from the first member file path. + // + LIBBUILD2_SYMEXPORT target_state + perform_clean_group_depdb (action, const target&); + + // Helper for custom perform(clean) implementations that cleans extra files + // and directories (recursively) specified as a list of either absolute + // paths or "path derivation directives". The directive string can be NULL, + // or empty in which case it is ignored. If the last character in a + // directive is '/', then the resulting path is treated as a directory + // rather than a file. The directive can start with zero or more '-' + // characters which indicate the number of extensions that should be + // stripped before the new extension (if any) is added (so if you want to + // strip the extension, specify just "-"). For example: + // + // perform_clean_extra (a, t, {".d", ".dlls/", "-.dll"}); + // + // The extra files/directories are removed first in the specified order + // followed by the ad hoc group member, then target itself, and, finally, + // the prerequisites in the reverse order. + // + // You can also clean extra files derived from ad hoc group members that are + // "indexed" using using their target types (see add/find_adhoc_member() for + // details). + // + // Note that if the target path is empty then it is assumed "unreal" and is + // not cleaned (but its prerequisites/members still are). + // + using clean_extras = small_vector<const char*, 8>; + + struct clean_adhoc_extra + { + const target_type& type; + clean_extras extras; + }; + + using clean_adhoc_extras = small_vector<clean_adhoc_extra, 2>; + + LIBBUILD2_SYMEXPORT target_state + perform_clean_extra (action, const file&, + const clean_extras&, + const clean_adhoc_extras& = {}); + + inline target_state + perform_clean_extra (action a, const file& f, + initializer_list<const char*> e) + { + return perform_clean_extra (a, f, clean_extras (e)); + } + + // Update/clean a backlink issuing appropriate diagnostics at appropriate + // levels depending on the overload and the changed argument. + // + enum class backlink_mode + { + link, // Make a symbolic link if possible, hard otherwise. + symbolic, // Make a symbolic link. + hard, // Make a hard link. + copy, // Make a copy. + overwrite // Copy over but don't remove on clean (committed gen code). + }; + + LIBBUILD2_SYMEXPORT void + update_backlink (const file& target, + const path& link, + bool changed, + backlink_mode = backlink_mode::link); + + LIBBUILD2_SYMEXPORT void + update_backlink (const path& target, + const path& link, + bool changed, + backlink_mode = backlink_mode::link); + + LIBBUILD2_SYMEXPORT void + update_backlink (const path& target, + const path& link, + backlink_mode = backlink_mode::link); + + LIBBUILD2_SYMEXPORT void + clean_backlink (const path& link, + uint16_t verbosity, + backlink_mode = backlink_mode::link); +} + +#include <libbuild2/algorithm.ixx> + +#endif // LIBBUILD2_ALGORITHM_HXX diff --git a/libbuild2/algorithm.ixx b/libbuild2/algorithm.ixx new file mode 100644 index 0000000..7d68611 --- /dev/null +++ b/libbuild2/algorithm.ixx @@ -0,0 +1,764 @@ +// file : libbuild2/algorithm.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/rule.hxx> +#include <libbuild2/context.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + inline const target& + search (const target& t, const prerequisite& p) + { + assert (phase == run_phase::match); + + const target* r (p.target.load (memory_order_consume)); + + if (r == nullptr) + r = &search_custom (p, search (t, p.key ())); + + return *r; + } + + inline const target* + search_existing (const prerequisite& p) + { + assert (phase == run_phase::match || phase == run_phase::execute); + + const target* r (p.target.load (memory_order_consume)); + + if (r == nullptr) + { + r = search_existing (p.key ()); + + if (r != nullptr) + search_custom (p, *r); + } + + return r; + } + + inline const target& + search_custom (const prerequisite& p, const target& t) + { + assert (phase == run_phase::match || phase == run_phase::execute); + + const target* e (nullptr); + if (!p.target.compare_exchange_strong ( + e, &t, + memory_order_release, + memory_order_consume)) + assert (e == &t); + + return t; + } + + inline const target& + search (const target& t, const target_type& tt, const prerequisite_key& k) + { + return search ( + t, + prerequisite_key { + k.proj, {&tt, k.tk.dir, k.tk.out, k.tk.name, k.tk.ext}, k.scope}); + } + + inline const target& + search (const target& t, + const target_type& type, + const dir_path& dir, + const dir_path& out, + const string& name, + const string* ext, + const scope* scope, + const optional<project_name>& proj) + { + return search ( + t, + prerequisite_key { + proj, + { + &type, + &dir, &out, &name, + ext != nullptr ? optional<string> (*ext) : nullopt + }, + scope}); + } + + inline const target* + search_existing (const target_type& type, + const dir_path& dir, + const dir_path& out, + const string& name, + const string* ext, + const scope* scope, + const optional<project_name>& proj) + { + return search_existing ( + prerequisite_key { + proj, + { + &type, + &dir, &out, &name, + ext != nullptr ? optional<string> (*ext) : nullopt + }, + scope}); + } + + template <typename T> + inline const T& + search (const target& t, + const dir_path& dir, + const dir_path& out, + const string& name, + const string* ext, + const scope* scope) + { + return search ( + t, T::static_type, dir, out, name, ext, scope).template as<T> (); + } + + LIBBUILD2_SYMEXPORT target_lock + lock_impl (action, const target&, optional<scheduler::work_queue>); + + LIBBUILD2_SYMEXPORT void + unlock_impl (action, target&, size_t); + + inline target_lock:: + target_lock (action_type a, target_type* t, size_t o) + : action (a), target (t), offset (o) + { + if (target != nullptr) + prev = stack (this); + } + + inline void target_lock:: + unstack () + { + if (target != nullptr && prev != this) + { + const target_lock* cur (stack (prev)); + assert (cur == this); + prev = this; + } + } + + inline void target_lock:: + unlock () + { + if (target != nullptr) + { + unlock_impl (action, *target, offset); + + if (prev != this) + { + const target_lock* cur (stack (prev)); + assert (cur == this); + } + + target = nullptr; + } + } + + inline auto target_lock:: + release () -> data + { + data r {action, target, offset}; + + if (target != nullptr) + { + if (prev != this) + { + const target_lock* cur (stack (prev)); + assert (cur == this); + } + + target = nullptr; + } + + return r; + } + + inline target_lock:: + ~target_lock () + { + unlock (); + } + + inline target_lock:: + target_lock (target_lock&& x) + : action (x.action), target (x.target), offset (x.offset) + { + if (target != nullptr) + { + if (x.prev != &x) + { + const target_lock* cur (stack (this)); + assert (cur == &x); + prev = x.prev; + } + else + prev = this; + + x.target = nullptr; + } + } + + inline target_lock& target_lock:: + operator= (target_lock&& x) + { + if (this != &x) + { + assert (target == nullptr); + + action = x.action; + target = x.target; + offset = x.offset; + + if (target != nullptr) + { + if (x.prev != &x) + { + const target_lock* cur (stack (this)); + assert (cur == &x); + prev = x.prev; + } + else + prev = this; + + x.target = nullptr; + } + } + + return *this; + } + + inline const target_lock* + dependency_cycle (action a, const target& t) + { + const target_lock* l (target_lock::stack ()); + + for (; l != nullptr; l = l->prev) + { + if (l->action == a && l->target == &t) + break; + } + + return l; + } + + inline target_lock + lock (action a, const target& t) + { + // We don't allow locking a target that has already been matched. + // + target_lock r (lock_impl (a, t, scheduler::work_none)); + assert (!r || + r.offset == target::offset_touched || + r.offset == target::offset_tried); + return r; + } + + inline target& + add_adhoc_member (target& t, const target_type& tt, const char* e) + { + string n (t.name); + + if (e != nullptr) + { + n += '.'; + n += e; + } + + return add_adhoc_member (t, tt, t.dir, t.out, move (n)); + } + + inline target* + find_adhoc_member (target& g, const target_type& tt) + { + target* m (g.member); + for (; m != nullptr && !m->is_a (tt); m = m->member) ; + return m; + } + + inline const target* + find_adhoc_member (const target& g, const target_type& tt) + { + const target* m (g.member); + for (; m != nullptr && !m->is_a (tt); m = m->member) ; + return m; + } + + LIBBUILD2_SYMEXPORT const rule_match* + match_impl (action, target&, const rule* skip, bool try_match = false); + + LIBBUILD2_SYMEXPORT recipe + apply_impl (action, target&, const rule_match&); + + LIBBUILD2_SYMEXPORT pair<bool, target_state> + match (action, const target&, size_t, atomic_count*, bool try_match = false); + + inline void + match_inc_dependens (action a, const target& t) + { + dependency_count.fetch_add (1, memory_order_relaxed); + t[a].dependents.fetch_add (1, memory_order_release); + } + + inline target_state + match (action a, const target& t, bool fail) + { + assert (phase == run_phase::match); + + target_state r (match (a, t, 0, nullptr).second); + + if (r != target_state::failed) + match_inc_dependens (a, t); + else if (fail) + throw failed (); + + return r; + } + + inline pair<bool, target_state> + try_match (action a, const target& t, bool fail) + { + assert (phase == run_phase::match); + + pair<bool, target_state> r ( + match (a, t, 0, nullptr, true /* try_match */)); + + if (r.first) + { + if (r.second != target_state::failed) + match_inc_dependens (a, t); + else if (fail) + throw failed (); + } + + return r; + } + + inline bool + match (action a, const target& t, unmatch um) + { + assert (phase == run_phase::match); + + target_state s (match (a, t, 0, nullptr).second); + + if (s == target_state::failed) + throw failed (); + + switch (um) + { + case unmatch::none: break; + case unmatch::unchanged: + { + if (s == target_state::unchanged) + return true; + + break; + } + case unmatch::safe: + { + // Safe if unchanged or someone else is also a dependent (note that + // we never decrement this count during match so that someone else + // cannot change their mind). + // + if (s == target_state::unchanged || + t[a].dependents.load (memory_order_consume) != 0) + return true; + + break; + } + } + + match_inc_dependens (a, t); + return false; + } + + inline target_state + match_async (action a, const target& t, + size_t sc, atomic_count& tc, + bool fail) + { + assert (phase == run_phase::match); + target_state r (match (a, t, sc, &tc).second); + + if (fail && !keep_going && r == target_state::failed) + throw failed (); + + return r; + } + + inline void + set_recipe (target_lock& l, recipe&& r) + { + target::opstate& s ((*l.target)[l.action]); + + s.recipe = move (r); + + // If this is a noop recipe, then mark the target unchanged to allow for + // some optimizations. + // + recipe_function** f (s.recipe.target<recipe_function*> ()); + + if (f != nullptr && *f == &noop_action) + s.state = target_state::unchanged; + else + { + s.state = target_state::unknown; + + // This gets tricky when we start considering direct execution, etc. So + // here seems like the best place to do it. + // + // We also ignore the group recipe since group action means real recipe + // is in the group and so this feels right conceptually. + // + // We also avoid incrementing this count twice for the same target if we + // have both the inner and outer operations. In our model the outer + // operation is either noop or it must delegate to the inner. While it's + // possible the inner is noop while the outer is not, it is not very + // likely. The alternative (trying to "merge" the count keeping track of + // whether inner and/or outer is noop) gets hairy rather quickly. + // + if (l.action.inner ()) + { + if (f == nullptr || *f != &group_action) + target_count.fetch_add (1, memory_order_relaxed); + } + } + } + + inline void + match_recipe (target_lock& l, recipe r) + { + assert (phase == run_phase::match && l.target != nullptr); + + (*l.target)[l.action].rule = nullptr; // No rule. + set_recipe (l, move (r)); + l.offset = target::offset_applied; + } + + inline recipe + match_delegate (action a, target& t, const rule& dr, bool try_match) + { + assert (phase == run_phase::match); + + // Note: we don't touch any of the t[a] state since that was/will be set + // for the delegating rule. + // + const rule_match* r (match_impl (a, t, &dr, try_match)); + return r != nullptr ? apply_impl (a, t, *r) : empty_recipe; + } + + inline target_state + match_inner (action a, const target& t) + { + // In a sense this is like any other dependency. + // + assert (a.outer ()); + return match (a.inner_action (), t); + } + + inline bool + match_inner (action a, const target& t, unmatch um) + { + assert (a.outer ()); + return match (a.inner_action (), t, um); + } + + LIBBUILD2_SYMEXPORT group_view + resolve_members_impl (action, const target&, target_lock); + + inline group_view + resolve_members (action a, const target& g) + { + group_view r; + + if (a.outer ()) + a = a.inner_action (); + + // We can be called during execute though everything should have been + // already resolved. + // + switch (phase) + { + case run_phase::match: + { + // Grab a target lock to make sure the group state is synchronized. + // + target_lock l (lock_impl (a, g, scheduler::work_none)); + r = g.group_members (a); + + // If the group members are alrealy known or there is nothing else + // we can do, then unlock and return. + // + if (r.members == nullptr && l.offset != target::offset_executed) + r = resolve_members_impl (a, g, move (l)); + + break; + } + case run_phase::execute: r = g.group_members (a); break; + case run_phase::load: assert (false); + } + + return r; + } + + LIBBUILD2_SYMEXPORT void + resolve_group_impl (action, const target&, target_lock); + + inline const target* + resolve_group (action a, const target& t) + { + if (a.outer ()) + a = a.inner_action (); + + switch (phase) + { + case run_phase::match: + { + // Grab a target lock to make sure the group state is synchronized. + // + target_lock l (lock_impl (a, t, scheduler::work_none)); + + // If the group is alrealy known or there is nothing else we can do, + // then unlock and return. + // + if (t.group == nullptr && l.offset < target::offset_tried) + resolve_group_impl (a, t, move (l)); + + break; + } + case run_phase::execute: break; + case run_phase::load: assert (false); + } + + return t.group; + } + + LIBBUILD2_SYMEXPORT void + match_prerequisites (action, target&, const match_search&, const scope*); + + LIBBUILD2_SYMEXPORT void + match_prerequisite_members (action, target&, + const match_search_member&, + const scope*); + + inline void + match_prerequisites (action a, target& t, const match_search& ms) + { + match_prerequisites ( + a, + t, + ms, + (a.operation () != clean_id ? nullptr : &t.root_scope ())); + } + + inline void + match_prerequisite_members (action a, target& t, + const match_search_member& msm) + { + if (a.operation () != clean_id) + match_prerequisite_members (a, t, msm, nullptr); + else + { + // Note that here we don't iterate over members even for see-through + // groups since the group target should clean eveything up. A bit of an + // optimization. + // + match_search ms ( + msm + ? [&msm] (action a, + const target& t, + const prerequisite& p, + include_type i) + { + return msm (a, t, prerequisite_member {p, nullptr}, i); + } + : match_search ()); + + match_prerequisites (a, t, ms, &t.root_scope ()); + } + } + + inline void + match_prerequisites (action a, target& t, const scope& s) + { + match_prerequisites (a, t, nullptr, &s); + } + + inline void + match_prerequisite_members (action a, target& t, const scope& s) + { + match_prerequisite_members (a, t, nullptr, &s); + } + + LIBBUILD2_SYMEXPORT target_state + execute (action, const target&, size_t, atomic_count*); + + inline target_state + execute (action a, const target& t) + { + return execute (a, t, 0, nullptr); + } + + inline target_state + execute_wait (action a, const target& t) + { + if (execute (a, t) == target_state::busy) + sched.wait (target::count_executed (), + t[a].task_count, + scheduler::work_none); + + return t.executed_state (a); + } + + inline target_state + execute_async (action a, const target& t, + size_t sc, atomic_count& tc, + bool fail) + { + target_state r (execute (a, t, sc, &tc)); + + if (fail && !keep_going && r == target_state::failed) + throw failed (); + + return r; + } + + inline target_state + execute_delegate (const recipe& r, action a, const target& t) + { + return r (a, t); + } + + inline target_state + execute_inner (action a, const target& t) + { + assert (a.outer ()); + return execute_wait (a.inner_action (), t); + } + + inline target_state + straight_execute_prerequisites (action a, const target& t, + size_t c, size_t s) + { + auto& p (t.prerequisite_targets[a]); + return straight_execute_members (a, t, + p.data (), + c == 0 ? p.size () - s: c, + s); + } + + inline target_state + reverse_execute_prerequisites (action a, const target& t, size_t c) + { + auto& p (t.prerequisite_targets[a]); + return reverse_execute_members (a, t, + p.data (), + c == 0 ? p.size () : c, + p.size ()); + } + + inline target_state + execute_prerequisites (action a, const target& t, size_t c) + { + return current_mode == execution_mode::first + ? straight_execute_prerequisites (a, t, c) + : reverse_execute_prerequisites (a, t, c); + } + + inline target_state + straight_execute_prerequisites_inner (action a, const target& t, + size_t c, size_t s) + { + assert (a.outer ()); + auto& p (t.prerequisite_targets[a]); + return straight_execute_members (a.inner_action (), + t[a].task_count, + p.data (), + c == 0 ? p.size () - s : c, + s); + } + + inline target_state + reverse_execute_prerequisites_inner (action a, const target& t, size_t c) + { + assert (a.outer ()); + auto& p (t.prerequisite_targets[a]); + return reverse_execute_members (a.inner_action (), + t[a].task_count, + p.data (), + c == 0 ? p.size () : c, + p.size ()); + } + + inline target_state + execute_prerequisites_inner (action a, const target& t, size_t c) + { + return current_mode == execution_mode::first + ? straight_execute_prerequisites_inner (a, t, c) + : reverse_execute_prerequisites_inner (a, t, c); + } + + // If the first argument is NULL, then the result is treated as a boolean + // value. + // + LIBBUILD2_SYMEXPORT pair<optional<target_state>, const target*> + execute_prerequisites (const target_type*, + action, const target&, + const timestamp&, const execute_filter&, + size_t); + + inline optional<target_state> + execute_prerequisites (action a, const target& t, + const timestamp& mt, const execute_filter& ef, + size_t n) + { + return execute_prerequisites (nullptr, a, t, mt, ef, n).first; + } + + template <typename T> + inline pair<optional<target_state>, const T&> + execute_prerequisites (action a, const target& t, + const timestamp& mt, const execute_filter& ef, + size_t n) + { + auto p (execute_prerequisites (T::static_type, a, t, mt, ef, n)); + return pair<optional<target_state>, const T&> ( + p.first, static_cast<const T&> (p.second)); + } + + inline pair<optional<target_state>, const target&> + execute_prerequisites (const target_type& tt, + action a, const target& t, + const timestamp& mt, const execute_filter& ef, + size_t n) + { + auto p (execute_prerequisites (&tt, a, t, mt, ef, n)); + return pair<optional<target_state>, const target&> (p.first, *p.second); + } + + template <typename T> + inline pair<optional<target_state>, const T&> + execute_prerequisites (const target_type& tt, + action a, const target& t, + const timestamp& mt, const execute_filter& ef, + size_t n) + { + auto p (execute_prerequisites (tt, a, t, mt, ef, n)); + return pair<optional<target_state>, const T&> ( + p.first, static_cast<const T&> (p.second)); + } + + inline target_state + execute_members (action a, const target& t, const target* ts[], size_t n) + { + return current_mode == execution_mode::first + ? straight_execute_members (a, t, ts, n, 0) + : reverse_execute_members (a, t, ts, n, n); + } +} diff --git a/libbuild2/buildfile b/libbuild2/buildfile new file mode 100644 index 0000000..99f616c --- /dev/null +++ b/libbuild2/buildfile @@ -0,0 +1,85 @@ +# file : libbuild2/buildfile +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +import int_libs = libbutl%lib{butl} + +./: lib{build2}: libul{build2}: {hxx ixx txx cxx}{** -config \ + -version \ + -**.test...} \ + {hxx}{config version} \ + $int_libs + +# Include the generated config and version headers into the distribution (so +# that we don't pick up installed ones) and don't remove them when cleaning in +# src (so that clean results in a state identical to distributed). +# +hxx{config}: in{config} +hxx{version}: in{version} $src_root/manifest + +hxx{config version}: +{ + dist = true + clean = ($src_root != $out_root) +} + +# Unit tests. +# +exe{*.test}: +{ + test = true + install = false +} + +for t: cxx{**.test...} +{ + d = $directory($t) + n = $name($t)... + b = $path.base($name($t)) + + ./: $d/exe{$n}: $t $d/{hxx ixx txx}{+$n} $d/testscript{+$n +$b+*.test...} + $d/exe{$n}: libul{build2}: bin.whole = false +} + +# Build options. +# +obja{*}: cxx.poptions += -DLIBBUILD2_STATIC_BUILD +objs{*}: cxx.poptions += -DLIBBUILD2_SHARED_BUILD + +# Pass our compiler target to be used as libbuild2 host. +# +obj{context}: cxx.poptions += -DBUILD2_HOST_TRIPLET=\"$cxx.target\" +obja{context}: cxx.poptions += -DLIBBUILD2_STATIC_BUILD +objs{context}: cxx.poptions += -DLIBBUILD2_SHARED_BUILD + +if ($cxx.target.class != "windows") + cxx.libs += -lpthread + +# Export options. +# +lib{build2}: +{ + cxx.export.poptions = "-I$out_root" "-I$src_root" + cxx.export.libs = $int_libs +} + +liba{build2}: cxx.export.poptions += -DLIBBUILD2_STATIC +libs{build2}: cxx.export.poptions += -DLIBBUILD2_SHARED + +# For pre-releases use the complete version to make sure they cannot be used +# in place of another pre-release or the final version. See the version module +# for details on the version.* variable values. +# +if $version.pre_release + lib{build2}: bin.lib.version = @"-$version.project_id" +else + lib{build2}: bin.lib.version = @"-$version.major.$version.minor" + +# Install into the libbuild2/ subdirectory of, say, /usr/include/ +# recreating subdirectories. +# +{hxx ixx txx}{*}: +{ + install = include/libbuild2/ + install.subdirs = true +} diff --git a/libbuild2/config.hxx b/libbuild2/config.hxx new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/libbuild2/config.hxx diff --git a/libbuild2/config.hxx.in b/libbuild2/config.hxx.in new file mode 100644 index 0000000..62110da --- /dev/null +++ b/libbuild2/config.hxx.in @@ -0,0 +1,37 @@ +// file : libbuild2/config.hxx.in -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +// This file is included by <libbuild2/types.hxx> so normally you don't need +// to include it directly. Note that this file is included unprocessed (i.e., +// as an .in) during bootstrap. +// +// Also, note that some BUILD_* configuration macros are passed directly from +// the buildfile with the -D options. + +#ifndef LIBBUILD2_CONFIG_HXX +#define LIBBUILD2_CONFIG_HXX + +// Currently the value is adjusted manually during release but in the future +// the idea is to use version metadata (e.g., 1.2.3-a.1+0.stage). This way it +// will all be managed in a central place (manifest), we can teach the version +// module to extract it, and we can also set it for the other packages in the +// toolchain. Bootstrap will be a problem though. (Maybe set it to nullptr and +// say that it shall not be queried?) +// +#define LIBBUILD2_STAGE true + +// Modification time sanity checks are by default only enabled for the staged +// version but this can be overridden at runtime with --[no-]mtime-check. +// +#if LIBBUILD2_STAGE +# define LIBBUILD2_MTIME_CHECK true +#else +# define LIBBUILD2_MTIME_CHECK false +#endif + +#ifdef BUILD2_BOOTSTRAP +#else +#endif + +#endif // LIBBUILD2_CONFIG_HXX diff --git a/libbuild2/context.cxx b/libbuild2/context.cxx new file mode 100644 index 0000000..d56abb3 --- /dev/null +++ b/libbuild2/context.cxx @@ -0,0 +1,1026 @@ +// file : libbuild2/context.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/context.hxx> + +#include <sstream> +#include <exception> // uncaught_exception[s]() + +#include <libbuild2/rule.hxx> +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbutl/ft/exception.hxx> // uncaught_exceptions + +// For command line variable parsing. +// +#include <libbuild2/token.hxx> +#include <libbuild2/lexer.hxx> +#include <libbuild2/parser.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + scheduler sched; + + run_phase phase; + phase_mutex phase_mutex::instance; + + size_t load_generation; + + bool phase_mutex:: + lock (run_phase p) + { + bool r; + + { + mlock l (m_); + bool u (lc_ == 0 && mc_ == 0 && ec_ == 0); // Unlocked. + + // Increment the counter. + // + condition_variable* v (nullptr); + switch (p) + { + case run_phase::load: lc_++; v = &lv_; break; + case run_phase::match: mc_++; v = &mv_; break; + case run_phase::execute: ec_++; v = &ev_; break; + } + + // If unlocked, switch directly to the new phase. Otherwise wait for the + // phase switch. Note that in the unlocked case we don't need to notify + // since there is nobody waiting (all counters are zero). + // + if (u) + { + phase = p; + r = !fail_; + } + else if (phase != p) + { + sched.deactivate (); + for (; phase != p; v->wait (l)) ; + r = !fail_; + l.unlock (); // Important: activate() can block. + sched.activate (); + } + else + r = !fail_; + } + + // In case of load, acquire the exclusive access mutex. + // + if (p == run_phase::load) + { + lm_.lock (); + r = !fail_; // Re-query. + } + + return r; + } + + void phase_mutex:: + unlock (run_phase p) + { + // In case of load, release the exclusive access mutex. + // + if (p == run_phase::load) + lm_.unlock (); + + { + mlock l (m_); + + // Decrement the counter and see if this phase has become unlocked. + // + bool u (false); + switch (p) + { + case run_phase::load: u = (--lc_ == 0); break; + case run_phase::match: u = (--mc_ == 0); break; + case run_phase::execute: u = (--ec_ == 0); break; + } + + // If the phase is unlocked, pick a new phase and notify the waiters. + // Note that we notify all load waiters so that they can all serialize + // behind the second-level mutex. + // + if (u) + { + condition_variable* v; + + if (lc_ != 0) {phase = run_phase::load; v = &lv_;} + else if (mc_ != 0) {phase = run_phase::match; v = &mv_;} + else if (ec_ != 0) {phase = run_phase::execute; v = &ev_;} + else {phase = run_phase::load; v = nullptr;} + + if (v != nullptr) + { + l.unlock (); + v->notify_all (); + } + } + } + } + + bool phase_mutex:: + relock (run_phase o, run_phase n) + { + // Pretty much a fused unlock/lock implementation except that we always + // switch into the new phase. + // + assert (o != n); + + bool r; + + if (o == run_phase::load) + lm_.unlock (); + + { + mlock l (m_); + bool u (false); + + switch (o) + { + case run_phase::load: u = (--lc_ == 0); break; + case run_phase::match: u = (--mc_ == 0); break; + case run_phase::execute: u = (--ec_ == 0); break; + } + + // Set if will be waiting or notifying others. + // + condition_variable* v (nullptr); + switch (n) + { + case run_phase::load: v = lc_++ != 0 || !u ? &lv_ : nullptr; break; + case run_phase::match: v = mc_++ != 0 || !u ? &mv_ : nullptr; break; + case run_phase::execute: v = ec_++ != 0 || !u ? &ev_ : nullptr; break; + } + + if (u) + { + phase = n; + r = !fail_; + + // Notify others that could be waiting for this phase. + // + if (v != nullptr) + { + l.unlock (); + v->notify_all (); + } + } + else // phase != n + { + sched.deactivate (); + for (; phase != n; v->wait (l)) ; + r = !fail_; + l.unlock (); // Important: activate() can block. + sched.activate (); + } + } + + if (n == run_phase::load) + { + lm_.lock (); + r = !fail_; // Re-query. + } + + return r; + } + + // C++17 deprecated uncaught_exception() so use uncaught_exceptions() if + // available. + // + static inline bool + uncaught_exception () + { +#ifdef __cpp_lib_uncaught_exceptions + return std::uncaught_exceptions () != 0; +#else + return std::uncaught_exception (); +#endif + } + + // phase_lock + // + static +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + phase_lock* phase_lock_instance; + + phase_lock:: + phase_lock (run_phase p) + : p (p) + { + if (phase_lock* l = phase_lock_instance) + assert (l->p == p); + else + { + if (!phase_mutex::instance.lock (p)) + { + phase_mutex::instance.unlock (p); + throw failed (); + } + + phase_lock_instance = this; + + //text << this_thread::get_id () << " phase acquire " << p; + } + } + + phase_lock:: + ~phase_lock () + { + if (phase_lock_instance == this) + { + phase_lock_instance = nullptr; + phase_mutex::instance.unlock (p); + + //text << this_thread::get_id () << " phase release " << p; + } + } + + // phase_unlock + // + phase_unlock:: + phase_unlock (bool u) + : l (u ? phase_lock_instance : nullptr) + { + if (u) + { + phase_lock_instance = nullptr; + phase_mutex::instance.unlock (l->p); + + //text << this_thread::get_id () << " phase unlock " << l->p; + } + } + + phase_unlock:: + ~phase_unlock () noexcept (false) + { + if (l != nullptr) + { + bool r (phase_mutex::instance.lock (l->p)); + phase_lock_instance = l; + + // Fail unless we are already failing. Note that we keep the phase + // locked since there will be phase_lock down the stack to unlock it. + // + if (!r && !uncaught_exception ()) + throw failed (); + + //text << this_thread::get_id () << " phase lock " << l->p; + } + } + + // phase_switch + // + phase_switch:: + phase_switch (run_phase n) + : o (phase), n (n) + { + if (!phase_mutex::instance.relock (o, n)) + { + phase_mutex::instance.relock (n, o); + throw failed (); + } + + phase_lock_instance->p = n; + + if (n == run_phase::load) // Note: load lock is exclusive. + load_generation++; + + //text << this_thread::get_id () << " phase switch " << o << " " << n; + } + + phase_switch:: + ~phase_switch () noexcept (false) + { + // If we are coming off a failed load phase, mark the phase_mutex as + // failed to terminate all other threads since the build state may no + // longer be valid. + // + if (n == run_phase::load && uncaught_exception ()) + { + mlock l (phase_mutex::instance.m_); + phase_mutex::instance.fail_ = true; + } + + bool r (phase_mutex::instance.relock (n, o)); + phase_lock_instance->p = o; + + // Similar logic to ~phase_unlock(). + // + if (!r && !uncaught_exception ()) + throw failed (); + + //text << this_thread::get_id () << " phase restore " << n << " " << o; + } + + const variable* var_src_root; + const variable* var_out_root; + const variable* var_src_base; + const variable* var_out_base; + const variable* var_forwarded; + + const variable* var_project; + const variable* var_amalgamation; + const variable* var_subprojects; + const variable* var_version; + + const variable* var_project_url; + const variable* var_project_summary; + + const variable* var_import_target; + + const variable* var_clean; + const variable* var_backlink; + const variable* var_include; + + const char var_extension[10] = "extension"; + + const variable* var_build_meta_operation; + + string current_mname; + string current_oname; + + const meta_operation_info* current_mif; + const operation_info* current_inner_oif; + const operation_info* current_outer_oif; + size_t current_on; + execution_mode current_mode; + bool current_diag_noise; + + atomic_count dependency_count; + atomic_count target_count; + atomic_count skip_count; + + bool keep_going = false; + bool dry_run = false; + + void (*config_save_variable) (scope&, const variable&, uint64_t); + + const string& (*config_preprocess_create) (const variable_overrides&, + values&, + vector_view<opspec>&, + bool, + const location&); + + variable_overrides + reset (const strings& cmd_vars) + { + tracer trace ("reset"); + + // @@ Need to unload modules when we dynamically load them. + // + + l6 ([&]{trace << "resetting build state";}); + + auto& vp (variable_pool::instance); + auto& sm (scope_map::instance); + + variable_overrides vos; + + targets.clear (); + sm.clear (); + vp.clear (); + + // Reset meta/operation tables. Note that the order should match the id + // constants in <libbuild2/operation.hxx>. + // + meta_operation_table.clear (); + meta_operation_table.insert ("noop"); + meta_operation_table.insert ("perform"); + meta_operation_table.insert ("configure"); + meta_operation_table.insert ("disfigure"); + + if (config_preprocess_create != nullptr) + meta_operation_table.insert ( + meta_operation_data ("create", config_preprocess_create)); + + meta_operation_table.insert ("dist"); + meta_operation_table.insert ("info"); + + operation_table.clear (); + operation_table.insert ("default"); + operation_table.insert ("update"); + operation_table.insert ("clean"); + operation_table.insert ("test"); + operation_table.insert ("update-for-test"); + operation_table.insert ("install"); + operation_table.insert ("uninstall"); + operation_table.insert ("update-for-install"); + + // Create global scope. Note that the empty path is a prefix for any other + // path. See the comment in <libbutl/prefix-map.mxx> for details. + // + auto make_global_scope = [] () -> scope& + { + auto i (scope_map::instance.insert (dir_path ())); + scope& r (i->second); + r.out_path_ = &i->first; + global_scope = scope::global_ = &r; + return r; + }; + + scope& gs (make_global_scope ()); + + // Setup the global scope before parsing any variable overrides since they + // may reference these things. + // + + gs.assign<dir_path> ("build.work") = work; + gs.assign<dir_path> ("build.home") = home; + + // Build system driver process path. + // + gs.assign<process_path> ("build.path") = + process_path (nullptr, // Will be filled by value assignment. + path (argv0.recall_string ()), + path (argv0.effect)); + + // Build system verbosity level. + // + gs.assign<uint64_t> ("build.verbosity") = verb; + + // Build system version (similar to what we do in the version module + // except here we don't include package epoch/revision). + // + { + const standard_version& v (build_version); + + auto set = [&gs] (const char* var, auto val) + { + using T = decltype (val); + gs.assign (variable_pool::instance.insert<T> (var)) = move (val); + }; + + set ("build.version", v.string_project ()); + + set ("build.version.number", v.version); + set ("build.version.id", v.string_project_id ()); + + set ("build.version.major", uint64_t (v.major ())); + set ("build.version.minor", uint64_t (v.minor ())); + set ("build.version.patch", uint64_t (v.patch ())); + + optional<uint16_t> a (v.alpha ()); + optional<uint16_t> b (v.beta ()); + + set ("build.version.alpha", a.has_value ()); + set ("build.version.beta", b.has_value ()); + set ("build.version.pre_release", v.pre_release ().has_value ()); + set ("build.version.pre_release_string", v.string_pre_release ()); + set ("build.version.pre_release_number", uint64_t (a ? *a : b ? *b : 0)); + + set ("build.version.snapshot", v.snapshot ()); // bool + set ("build.version.snapshot_sn", v.snapshot_sn); // uint64 + set ("build.version.snapshot_id", v.snapshot_id); // string + set ("build.version.snapshot_string", v.string_snapshot ()); + + // Allow detection (for example, in tests) whether this is a staged + // toolchain. + // + // Note that it is either staged or public, without queued, since we do + // not re-package things during the queued-to-public transition. + // + set ("build.version.stage", LIBBUILD2_STAGE); + } + + // Enter the host information. Rather than jumping through hoops like + // config.guess, for now we are just going to use the compiler target we + // were built with. While it is not as precise (for example, a binary + // built for i686 might be running on x86_64), it is good enough of an + // approximation/fallback since most of the time we are interested in just + // the target class (e.g., linux, windows, macosx). + // + { + // Did the user ask us to use config.guess? + // + string orig (config_guess + ? run<string> (3, + *config_guess, + [](string& l, bool) {return move (l);}) + : BUILD2_HOST_TRIPLET); + + l5 ([&]{trace << "original host: '" << orig << "'";}); + + try + { + target_triplet t (orig); + + l5 ([&]{trace << "canonical host: '" << t.string () << "'; " + << "class: " << t.class_;}); + + // Also enter as build.host.{cpu,vendor,system,version,class} for + // convenience of access. + // + gs.assign<string> ("build.host.cpu") = t.cpu; + gs.assign<string> ("build.host.vendor") = t.vendor; + gs.assign<string> ("build.host.system") = t.system; + gs.assign<string> ("build.host.version") = t.version; + gs.assign<string> ("build.host.class") = t.class_; + + gs.assign<target_triplet> ("build.host") = move (t); + } + catch (const invalid_argument& e) + { + fail << "unable to parse build host '" << orig << "': " << e << + info << "consider using the --config-guess option"; + } + } + + // Register builtin target types. + // + { + target_type_map& t (gs.target_types); + + t.insert<file> (); + t.insert<alias> (); + t.insert<dir> (); + t.insert<fsdir> (); + t.insert<exe> (); + t.insert<doc> (); + t.insert<man> (); + t.insert<man1> (); + + { + auto& tt (t.insert<manifest> ()); + t.insert_file ("manifest", tt); + } + + { + auto& tt (t.insert<buildfile> ()); + t.insert_file ("buildfile", tt); + } + } + + // Parse and enter the command line variables. We do it before entering + // any other variables so that all the variables that are overriden are + // marked as such first. Then, as we enter variables, we can verify that + // the override is alowed. + // + for (size_t i (0); i != cmd_vars.size (); ++i) + { + const string& s (cmd_vars[i]); + + istringstream is (s); + is.exceptions (istringstream::failbit | istringstream::badbit); + + // Similar to buildspec we do "effective escaping" and only for ['"\$(] + // (basically what's necessary inside a double-quoted literal plus the + // single quote). + // + lexer l (is, path ("<cmdline>"), 1 /* line */, "\'\"\\$("); + + // At the buildfile level the scope-specific variable should be + // separated from the directory with a whitespace, for example: + // + // ./ foo=$bar + // + // However, requiring this for command line variables would be too + // inconvinient so we support both. + // + // We also have the optional visibility modifier as a first character of + // the variable name: + // + // ! - global + // % - project + // / - scope + // + // The last one clashes a bit with the directory prefix: + // + // ./ /foo=bar + // .//foo=bar + // + // But that's probably ok (the need for a scope-qualified override with + // scope visibility should be pretty rare). Note also that to set the + // value on the global scope we use !. + // + // And so the first token should be a word which can be either a + // variable name (potentially with the directory qualification) or just + // the directory, in which case it should be followed by another word + // (unqualified variable name). + // + token t (l.next ()); + + optional<dir_path> dir; + if (t.type == token_type::word) + { + string& v (t.value); + size_t p (path::traits_type::rfind_separator (v)); + + if (p != string::npos && p != 0) // If first then visibility. + { + if (p == v.size () - 1) + { + // Separate directory. + // + dir = dir_path (move (v)); + t = l.next (); + + // Target-specific overrides are not yet supported (and probably + // never will be; the beast is already complex enough). + // + if (t.type == token_type::colon) + fail << "'" << s << "' is a target-specific override" << + info << "use double '--' to treat this argument as buildspec"; + } + else + { + // Combined directory. + // + // If double separator (visibility marker), then keep the first in + // name. + // + if (p != 0 && path::traits_type::is_separator (v[p - 1])) + --p; + + dir = dir_path (t.value, 0, p + 1); // Include the separator. + t.value.erase (0, p + 1); // Erase the separator. + } + + if (dir->relative ()) + { + // Handle the special relative to base scope case (.../). + // + auto i (dir->begin ()); + + if (*i == "...") + dir = dir_path (++i, dir->end ()); // Note: can become empty. + else + dir->complete (); // Relative to CWD. + } + + if (dir->absolute ()) + dir->normalize (); + } + } + + token_type tt (l.next ().type); + + // The token should be the variable name followed by =, +=, or =+. + // + if (t.type != token_type::word || t.value.empty () || + (tt != token_type::assign && + tt != token_type::prepend && + tt != token_type::append)) + { + fail << "expected variable assignment instead of '" << s << "'" << + info << "use double '--' to treat this argument as buildspec"; + } + + // Take care of the visibility. Note that here we rely on the fact that + // none of these characters are lexer's name separators. + // + char c (t.value[0]); + + if (path::traits_type::is_separator (c)) + c = '/'; // Normalize. + + string n (t.value, c == '!' || c == '%' || c == '/' ? 1 : 0); + + if (c == '!' && dir) + fail << "scope-qualified global override of variable " << n; + + variable& var (const_cast<variable&> ( + vp.insert (n, true /* overridable */))); + + const variable* o; + { + variable_visibility v (c == '/' ? variable_visibility::scope : + c == '%' ? variable_visibility::project : + variable_visibility::normal); + + const char* k (tt == token_type::assign ? "__override" : + tt == token_type::append ? "__suffix" : "__prefix"); + + unique_ptr<variable> p ( + new variable { + n + '.' + to_string (i + 1) + '.' + k, + nullptr /* aliases */, + nullptr /* type */, + nullptr /* overrides */, + v}); + + // Back link. + // + p->aliases = p.get (); + if (var.overrides != nullptr) + swap (p->aliases, + const_cast<variable*> (var.overrides.get ())->aliases); + + // Forward link. + // + p->overrides = move (var.overrides); + var.overrides = move (p); + + o = var.overrides.get (); + } + + // Currently we expand project overrides in the global scope to keep + // things simple. Pass original variable for diagnostics. Use current + // working directory as pattern base. + // + parser p; + pair<value, token> r (p.parse_variable_value (l, gs, &work, var)); + + if (r.second.type != token_type::eos) + fail << "unexpected " << r.second << " in variable assignment " + << "'" << s << "'"; + + // Make sure the value is not typed. + // + if (r.first.type != nullptr) + fail << "typed override of variable " << n; + + // Global and absolute scope overrides we can enter directly. Project + // and relative scope ones will be entered by the caller for each + // amalgamation/project. + // + if (c == '!' || (dir && dir->absolute ())) + { + scope& s (c == '!' ? gs : sm.insert (*dir)->second); + + auto p (s.vars.insert (*o)); + assert (p.second); // Variable name is unique. + + value& v (p.first); + v = move (r.first); + } + else + vos.push_back ( + variable_override {var, *o, move (dir), move (r.first)}); + } + + // Enter builtin variables and patterns. + // + + // All config. variables are by default overridable. + // + vp.insert_pattern ("config.**", nullopt, true, nullopt, true, false); + + // file.cxx:import() (note that order is important; see insert_pattern()). + // + vp.insert_pattern<abs_dir_path> ( + "config.import.*", true, variable_visibility::normal, true); + vp.insert_pattern<path> ( + "config.import.**", true, variable_visibility::normal, true); + + // module.cxx:load_module(). + // + { + auto v_p (variable_visibility::project); + + vp.insert_pattern<bool> ("**.booted", false, v_p); + vp.insert_pattern<bool> ("**.loaded", false, v_p); + vp.insert_pattern<bool> ("**.configured", false, v_p); + } + + { + auto v_p (variable_visibility::project); + auto v_t (variable_visibility::target); + auto v_q (variable_visibility::prereq); + + var_src_root = &vp.insert<dir_path> ("src_root"); + var_out_root = &vp.insert<dir_path> ("out_root"); + var_src_base = &vp.insert<dir_path> ("src_base"); + var_out_base = &vp.insert<dir_path> ("out_base"); + + var_forwarded = &vp.insert<bool> ("forwarded", v_p); + + // Note that subprojects is not typed since the value requires + // pre-processing (see file.cxx). + // + var_project = &vp.insert<project_name> ("project", v_p); + var_amalgamation = &vp.insert<dir_path> ("amalgamation", v_p); + var_subprojects = &vp.insert ("subprojects", v_p); + var_version = &vp.insert<string> ("version", v_p); + + var_project_url = &vp.insert<string> ("project.url", v_p); + var_project_summary = &vp.insert<string> ("project.summary", v_p); + + var_import_target = &vp.insert<name> ("import.target"); + + var_clean = &vp.insert<bool> ("clean", v_t); + var_backlink = &vp.insert<string> ("backlink", v_t); + var_include = &vp.insert<string> ("include", v_q); + + vp.insert<string> (var_extension, v_t); + + // Backlink executables and (generated) documentation by default. + // + gs.target_vars[exe::static_type]["*"].assign (var_backlink) = "true"; + gs.target_vars[doc::static_type]["*"].assign (var_backlink) = "true"; + + var_build_meta_operation = &vp.insert<string> ("build.meta_operation"); + } + + // Register builtin rules. + // + { + rule_map& r (gs.rules); // Note: global scope! + + //@@ outer + r.insert<alias> (perform_id, 0, "alias", alias_rule::instance); + + r.insert<fsdir> (perform_update_id, "fsdir", fsdir_rule::instance); + r.insert<fsdir> (perform_clean_id, "fsdir", fsdir_rule::instance); + + r.insert<mtime_target> (perform_update_id, "file", file_rule::instance); + r.insert<mtime_target> (perform_clean_id, "file", file_rule::instance); + } + + return vos; + } + + dir_path + src_out (const dir_path& out, const scope& r) + { + assert (r.root ()); + return src_out (out, r.out_path (), r.src_path ()); + } + + dir_path + out_src (const dir_path& src, const scope& r) + { + assert (r.root ()); + return out_src (src, r.out_path (), r.src_path ()); + } + + dir_path + src_out (const dir_path& o, + const dir_path& out_root, const dir_path& src_root) + { + assert (o.sub (out_root)); + return src_root / o.leaf (out_root); + } + + dir_path + out_src (const dir_path& s, + const dir_path& out_root, const dir_path& src_root) + { + assert (s.sub (src_root)); + return out_root / s.leaf (src_root); + } + + // diag_do(), etc. + // + string + diag_do (const action&) + { + const meta_operation_info& m (*current_mif); + const operation_info& io (*current_inner_oif); + const operation_info* oo (current_outer_oif); + + string r; + + // perform(update(x)) -> "update x" + // configure(update(x)) -> "configure updating x" + // + if (m.name_do.empty ()) + r = io.name_do; + else + { + r = m.name_do; + + if (io.name_doing[0] != '\0') + { + r += ' '; + r += io.name_doing; + } + } + + if (oo != nullptr) + { + r += " (for "; + r += oo->name; + r += ')'; + } + + return r; + } + + void + diag_do (ostream& os, const action& a, const target& t) + { + os << diag_do (a) << ' ' << t; + } + + string + diag_doing (const action&) + { + const meta_operation_info& m (*current_mif); + const operation_info& io (*current_inner_oif); + const operation_info* oo (current_outer_oif); + + string r; + + // perform(update(x)) -> "updating x" + // configure(update(x)) -> "configuring updating x" + // + if (!m.name_doing.empty ()) + r = m.name_doing; + + if (io.name_doing[0] != '\0') + { + if (!r.empty ()) r += ' '; + r += io.name_doing; + } + + if (oo != nullptr) + { + r += " (for "; + r += oo->name; + r += ')'; + } + + return r; + } + + void + diag_doing (ostream& os, const action& a, const target& t) + { + os << diag_doing (a) << ' ' << t; + } + + string + diag_did (const action&) + { + const meta_operation_info& m (*current_mif); + const operation_info& io (*current_inner_oif); + const operation_info* oo (current_outer_oif); + + string r; + + // perform(update(x)) -> "updated x" + // configure(update(x)) -> "configured updating x" + // + if (!m.name_did.empty ()) + { + r = m.name_did; + + if (io.name_doing[0] != '\0') + { + r += ' '; + r += io.name_doing; + } + } + else + r += io.name_did; + + if (oo != nullptr) + { + r += " (for "; + r += oo->name; + r += ')'; + } + + return r; + } + + void + diag_did (ostream& os, const action& a, const target& t) + { + os << diag_did (a) << ' ' << t; + } + + void + diag_done (ostream& os, const action&, const target& t) + { + const meta_operation_info& m (*current_mif); + const operation_info& io (*current_inner_oif); + const operation_info* oo (current_outer_oif); + + // perform(update(x)) -> "x is up to date" + // configure(update(x)) -> "updating x is configured" + // + if (m.name_done.empty ()) + { + os << t; + + if (io.name_done[0] != '\0') + os << ' ' << io.name_done; + + if (oo != nullptr) + os << " (for " << oo->name << ')'; + } + else + { + if (io.name_doing[0] != '\0') + os << io.name_doing << ' '; + + if (oo != nullptr) + os << "(for " << oo->name << ") "; + + os << t << ' ' << m.name_done; + } + } +} diff --git a/libbuild2/context.hxx b/libbuild2/context.hxx new file mode 100644 index 0000000..66874e7 --- /dev/null +++ b/libbuild2/context.hxx @@ -0,0 +1,572 @@ +// file : libbuild2/context.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CONTEXT_HXX +#define LIBBUILD2_CONTEXT_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/operation.hxx> +#include <libbuild2/scheduler.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Main (and only) scheduler. Started up and shut down in main(). + // + LIBBUILD2_SYMEXPORT extern scheduler sched; + + // In order to perform each operation the build system goes through the + // following phases: + // + // load - load the buildfiles + // match - search prerequisites and match rules + // execute - execute the matched rule + // + // The build system starts with a "serial load" phase and then continues + // with parallel match and execute. Match, however, can be interrupted + // both with load and execute. + // + // Match can be interrupted with "exclusive load" in order to load + // additional buildfiles. Similarly, it can be interrupted with (parallel) + // execute in order to build targetd required to complete the match (for + // example, generated source code or source code generators themselves). + // + // Such interruptions are performed by phase change that is protected by + // phase_mutex (which is also used to synchronize the state changes between + // phases). + // + // Serial load can perform arbitrary changes to the build state. Exclusive + // load, however, can only perform "island appends". That is, it can create + // new "nodes" (variables, scopes, etc) but not (semantically) change + // already existing nodes or invalidate any references to such (the idea + // here is that one should be able to load additional buildfiles as long as + // they don't interfere with the existing build state). The "islands" are + // identified by the load_generation number (0 for the initial/serial + // load). It is incremented in case of a phase switch and can be stored in + // various "nodes" to verify modifications are only done "within the + // islands". + // + LIBBUILD2_SYMEXPORT extern run_phase phase; + LIBBUILD2_SYMEXPORT extern size_t load_generation; + + // A "tri-mutex" that keeps all the threads in one of the three phases. When + // a thread wants to switch a phase, it has to wait for all the other + // threads to do the same (or release their phase locks). The load phase is + // exclusive. + // + // The interleaving match and execute is interesting: during match we read + // the "external state" (e.g., filesystem entries, modifications times, etc) + // and capture it in the "internal state" (our dependency graph). During + // execute we are modifying the external state with controlled modifications + // of the internal state to reflect the changes (e.g., update mtimes). If + // you think about it, it's pretty clear that we cannot safely perform both + // of these actions simultaneously. A good example would be running a code + // generator and header dependency extraction simultaneously: the extraction + // process may pick up headers as they are being generated. As a result, we + // either have everyone treat the external state as read-only or write-only. + // + // There is also one more complication: if we are returning from a load + // phase that has failed, then the build state could be seriously messed up + // (things like scopes not being setup completely, etc). And once we release + // the lock, other threads that are waiting will start relying on this + // messed up state. So a load phase can mark the phase_mutex as failed in + // which case all currently blocked and future lock()/relock() calls return + // false. Note that in this case we still switch to the desired phase. See + // the phase_{lock,switch,unlock} implementations for details. + // + class LIBBUILD2_SYMEXPORT phase_mutex + { + public: + // Acquire a phase lock potentially blocking (unless already in the + // desired phase) until switching to the desired phase is possible. + // + bool + lock (run_phase); + + // Release the phase lock potentially allowing (unless there are other + // locks on this phase) switching to a different phase. + // + void + unlock (run_phase); + + // Switch from one phase to another. Semantically, just unlock() followed + // by lock() but more efficient. + // + bool + relock (run_phase unlock, run_phase lock); + + private: + friend struct phase_lock; + friend struct phase_unlock; + friend struct phase_switch; + + phase_mutex () + : fail_ (false), lc_ (0), mc_ (0), ec_ (0) + { + phase = run_phase::load; + } + + static phase_mutex instance; + + private: + // We have a counter for each phase which represents the number of threads + // in or waiting for this phase. + // + // We use condition variables to wait for a phase switch. The load phase + // is exclusive so we have a separate mutex to serialize it (think of it + // as a second level locking). + // + // When the mutex is unlocked (all three counters become zero, the phase + // is always changed to load (this is also the initial state). + // + mutex m_; + + bool fail_; + + size_t lc_; + size_t mc_; + size_t ec_; + + condition_variable lv_; + condition_variable mv_; + condition_variable ev_; + + mutex lm_; + }; + + // Grab a new phase lock releasing it on destruction. The lock can be + // "owning" or "referencing" (recursive). + // + // On the referencing semantics: If there is already an instance of + // phase_lock in this thread, then the new instance simply references it. + // + // The reason for this semantics is to support the following scheduling + // pattern (in actual code we use wait_guard to RAII it): + // + // atomic_count task_count (0); + // + // { + // phase_lock l (run_phase::match); // (1) + // + // for (...) + // { + // sched.async (task_count, + // [] (...) + // { + // phase_lock pl (run_phase::match); // (2) + // ... + // }, + // ...); + // } + // } + // + // sched.wait (task_count); // (3) + // + // Here is what's going on here: + // + // 1. We first get a phase lock "for ourselves" since after the first + // iteration of the loop, things may become asynchronous (including + // attempts to switch the phase and modify the structure we are iteration + // upon). + // + // 2. The task can be queued or it can be executed synchronously inside + // async() (refer to the scheduler class for details on this semantics). + // + // If this is an async()-synchronous execution, then the task will create + // a referencing phase_lock. If, however, this is a queued execution + // (including wait()-synchronous), then the task will create a top-level + // phase_lock. + // + // Note that we only acquire the lock once the task starts executing + // (there is no reason to hold the lock while the task is sitting in the + // queue). This optimization assumes that whatever else we pass to the + // task (for example, a reference to a target) is stable (in other words, + // such a reference cannot become invalid). + // + // 3. Before calling wait(), we release our phase lock to allow switching + // the phase. + // + struct LIBBUILD2_SYMEXPORT phase_lock + { + explicit phase_lock (run_phase); + ~phase_lock (); + + phase_lock (phase_lock&&) = delete; + phase_lock (const phase_lock&) = delete; + + phase_lock& operator= (phase_lock&&) = delete; + phase_lock& operator= (const phase_lock&) = delete; + + run_phase p; + }; + + // Assuming we have a lock on the current phase, temporarily release it + // and reacquire on destruction. + // + struct LIBBUILD2_SYMEXPORT phase_unlock + { + phase_unlock (bool unlock = true); + ~phase_unlock () noexcept (false); + + phase_lock* l; + }; + + // Assuming we have a lock on the current phase, temporarily switch to a + // new phase and switch back on destruction. + // + struct LIBBUILD2_SYMEXPORT phase_switch + { + explicit phase_switch (run_phase); + ~phase_switch () noexcept (false); + + run_phase o, n; + }; + + // Wait for a task count optionally and temporarily unlocking the phase. + // + struct wait_guard + { + ~wait_guard () noexcept (false); + + wait_guard (); // Empty. + + explicit + wait_guard (atomic_count& task_count, + bool phase = false); + + wait_guard (size_t start_count, + atomic_count& task_count, + bool phase = false); + + void + wait (); + + // Note: move-assignable to empty only. + // + wait_guard (wait_guard&&); + wait_guard& operator= (wait_guard&&); + + wait_guard (const wait_guard&) = delete; + wait_guard& operator= (const wait_guard&) = delete; + + size_t start_count; + atomic_count* task_count; + bool phase; + }; + + // Cached variables. + // + // Note: consider printing in info meta-operation if adding anything here. + // + LIBBUILD2_SYMEXPORT extern const variable* var_src_root; + LIBBUILD2_SYMEXPORT extern const variable* var_out_root; + LIBBUILD2_SYMEXPORT extern const variable* var_src_base; + LIBBUILD2_SYMEXPORT extern const variable* var_out_base; + LIBBUILD2_SYMEXPORT extern const variable* var_forwarded; + + LIBBUILD2_SYMEXPORT extern const variable* var_project; + LIBBUILD2_SYMEXPORT extern const variable* var_amalgamation; + LIBBUILD2_SYMEXPORT extern const variable* var_subprojects; + LIBBUILD2_SYMEXPORT extern const variable* var_version; + + // project.url + // + LIBBUILD2_SYMEXPORT extern const variable* var_project_url; + + // project.summary + // + LIBBUILD2_SYMEXPORT extern const variable* var_project_summary; + + // import.target + // + LIBBUILD2_SYMEXPORT extern const variable* var_import_target; + + // [bool] target visibility + // + LIBBUILD2_SYMEXPORT extern const variable* var_clean; + + // Forwarded configuration backlink mode. Valid values are: + // + // false - no link. + // true - make a link using appropriate mechanism. + // symbolic - make a symbolic link. + // hard - make a hard link. + // copy - make a copy. + // overwrite - copy over but don't remove on clean (committed gen code). + // + // Note that it can be set by a matching rule as a rule-specific variable. + // + // [string] target visibility + // + LIBBUILD2_SYMEXPORT extern const variable* var_backlink; + + // Prerequisite inclusion/exclusion. Valid values are: + // + // false - exclude. + // true - include. + // adhoc - include but treat as an ad hoc input. + // + // If a rule uses prerequisites as inputs (as opposed to just matching them + // with the "pass-through" semantics), then the adhoc value signals that a + // prerequisite is an ad hoc input. A rule should match and execute such a + // prerequisite (whether its target type is recognized as suitable input or + // not) and assume that the rest will be handled by the user (e.g., it will + // be passed via a command line argument or some such). Note that this + // mechanism can be used to both treat unknown prerequisite types as inputs + // (for example, linker scripts) as well as prevent treatment of known + // prerequisite types as such while still matching and executing them (for + // example, plugin libraries). + // + // A rule with the "pass-through" semantics should treat the adhoc value + // the same as true. + // + // To query this value in rule implementations use the include() helpers + // from prerequisites.hxx. + // + // [string] prereq visibility + // + LIBBUILD2_SYMEXPORT extern const variable* var_include; + + LIBBUILD2_SYMEXPORT extern const char var_extension[10]; // "extension" + + // The build.* namespace. + // + // .meta_operation + // + LIBBUILD2_SYMEXPORT extern const variable* var_build_meta_operation; + + // Current action (meta/operation). + // + // The names unlike info are available during boot but may not yet be + // lifted. The name is always for an outer operation (or meta operation + // that hasn't been recognized as such yet). + // + LIBBUILD2_SYMEXPORT extern string current_mname; + LIBBUILD2_SYMEXPORT extern string current_oname; + + LIBBUILD2_SYMEXPORT extern const meta_operation_info* current_mif; + LIBBUILD2_SYMEXPORT extern const operation_info* current_inner_oif; + LIBBUILD2_SYMEXPORT extern const operation_info* current_outer_oif; + + // Current operation number (1-based) in the meta-operation batch. + // + LIBBUILD2_SYMEXPORT extern size_t current_on; + + LIBBUILD2_SYMEXPORT extern execution_mode current_mode; + + // Some diagnostics (for example output directory creation/removal by the + // fsdir rule) is just noise at verbosity level 1 unless it is the only + // thing that is printed. So we can only suppress it in certain situations + // (e.g., dist) where we know we have already printed something. + // + LIBBUILD2_SYMEXPORT extern bool current_diag_noise; + + // Total number of dependency relationships and targets with non-noop + // recipe in the current action. + // + // Together with target::dependents the dependency count is incremented + // during the rule search & match phase and is decremented during execution + // with the expectation of it reaching 0. Used as a sanity check. + // + // The target count is incremented after a non-noop recipe is matched and + // decremented after such recipe has been executed. If such a recipe has + // skipped executing the operation, then it should increment the skip count. + // These two counters are used for progress monitoring and diagnostics. + // + LIBBUILD2_SYMEXPORT extern atomic_count dependency_count; + LIBBUILD2_SYMEXPORT extern atomic_count target_count; + LIBBUILD2_SYMEXPORT extern atomic_count skip_count; + + inline void + set_current_mif (const meta_operation_info& mif) + { + if (current_mname != mif.name) + { + current_mname = mif.name; + global_scope->rw ().assign (var_build_meta_operation) = mif.name; + } + + current_mif = &mif; + current_on = 0; // Reset. + } + + inline void + set_current_oif (const operation_info& inner_oif, + const operation_info* outer_oif = nullptr, + bool diag_noise = true) + { + current_oname = (outer_oif == nullptr ? inner_oif : *outer_oif).name; + current_inner_oif = &inner_oif; + current_outer_oif = outer_oif; + current_on++; + current_mode = inner_oif.mode; + current_diag_noise = diag_noise; + + // Reset counters (serial execution). + // + dependency_count.store (0, memory_order_relaxed); + target_count.store (0, memory_order_relaxed); + skip_count.store (0, memory_order_relaxed); + } + + // Keep going flag. + // + // Note that setting it to false is not of much help unless we are running + // serially. In parallel we queue most of the things up before we see any + // failures. + // + LIBBUILD2_SYMEXPORT extern bool keep_going; + + // Dry run flag (see --dry-run|-n). + // + // This flag is set only for the final execute phase (as opposed to those + // that interrupt match) by the perform meta operation's execute() callback. + // + // Note that for this mode to function properly we have to use fake mtimes. + // Specifically, a rule that pretends to update a target must set its mtime + // to system_clock::now() and everyone else must use this cached value. In + // other words, there should be no mtime re-query from the filesystem. The + // same is required for "logical clean" (i.e., dry-run 'clean update' in + // order to see all the command lines). + // + // At first, it may seem like we should also "dry-run" changes to depdb. But + // that would be both problematic (some rules update it in apply() during + // the match phase) and wasteful (why discard information). Also, depdb may + // serve as an input to some commands (for example, to provide C++ module + // mapping) which means that without updating it the commands we print might + // not be runnable (think of the compilation database). + // + // One thing we need to be careful about if we are updating depdb is to not + // render the target up-to-date. But in this case the depdb file will be + // older than the target which in our model is treated as an interrupted + // update (see depdb for details). + // + // Note also that sometimes it makes sense to do a bit more than absolutely + // necessary or to discard information in order to keep the rule logic sane. + // And some rules may choose to ignore this flag altogether. In this case, + // however, the rule should be careful not to rely on functions (notably + // from filesystem) that respect this flag in order not to end up with a + // job half done. + // + LIBBUILD2_SYMEXPORT extern bool dry_run; + + // Config module entry points. + // + LIBBUILD2_SYMEXPORT extern void (*config_save_variable) ( + scope&, const variable&, uint64_t flags); + + LIBBUILD2_SYMEXPORT extern const string& (*config_preprocess_create) ( + const variable_overrides&, + values&, + vector_view<opspec>&, + bool lifted, + const location&); + + // Reset the build state. In particular, this removes all the targets, + // scopes, and variables. + // + LIBBUILD2_SYMEXPORT variable_overrides + reset (const strings& cmd_vars); + + // Return the project name or empty string if unnamed. + // + inline const project_name& + project (const scope& root) + { + auto l (root[var_project]); + return l ? cast<project_name> (l) : empty_project_name; + } + + // Return the src/out directory corresponding to the given out/src. The + // passed directory should be a sub-directory of out/src_root. + // + LIBBUILD2_SYMEXPORT dir_path + src_out (const dir_path& out, const scope& root); + + LIBBUILD2_SYMEXPORT dir_path + src_out (const dir_path& out, + const dir_path& out_root, const dir_path& src_root); + + LIBBUILD2_SYMEXPORT dir_path + out_src (const dir_path& src, const scope& root); + + LIBBUILD2_SYMEXPORT dir_path + out_src (const dir_path& src, + const dir_path& out_root, const dir_path& src_root); + + // Action phrases, e.g., "configure update exe{foo}", "updating exe{foo}", + // and "updating exe{foo} is configured". Use like this: + // + // info << "while " << diag_doing (a, t); + // + class target; + + struct diag_phrase + { + const action& a; + const target& t; + void (*f) (ostream&, const action&, const target&); + }; + + inline ostream& + operator<< (ostream& os, const diag_phrase& p) + { + p.f (os, p.a, p.t); + return os; + } + + LIBBUILD2_SYMEXPORT string + diag_do (const action&); + + LIBBUILD2_SYMEXPORT void + diag_do (ostream&, const action&, const target&); + + inline diag_phrase + diag_do (const action& a, const target& t) + { + return diag_phrase {a, t, &diag_do}; + } + + LIBBUILD2_SYMEXPORT string + diag_doing (const action&); + + LIBBUILD2_SYMEXPORT void + diag_doing (ostream&, const action&, const target&); + + inline diag_phrase + diag_doing (const action& a, const target& t) + { + return diag_phrase {a, t, &diag_doing}; + } + + LIBBUILD2_SYMEXPORT string + diag_did (const action&); + + LIBBUILD2_SYMEXPORT void + diag_did (ostream&, const action&, const target&); + + inline diag_phrase + diag_did (const action& a, const target& t) + { + return diag_phrase {a, t, &diag_did}; + } + + LIBBUILD2_SYMEXPORT void + diag_done (ostream&, const action&, const target&); + + inline diag_phrase + diag_done (const action& a, const target& t) + { + return diag_phrase {a, t, &diag_done}; + } +} + +#include <libbuild2/context.ixx> + +#endif // LIBBUILD2_CONTEXT_HXX diff --git a/libbuild2/context.ixx b/libbuild2/context.ixx new file mode 100644 index 0000000..f947bd7 --- /dev/null +++ b/libbuild2/context.ixx @@ -0,0 +1,60 @@ +// file : libbuild2/context.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + // wait_guard + // + inline wait_guard:: + wait_guard () + : start_count (0), task_count (nullptr), phase (false) + { + } + + inline wait_guard:: + wait_guard (atomic_count& tc, bool p) + : wait_guard (0, tc, p) + { + } + + inline wait_guard:: + wait_guard (size_t sc, atomic_count& tc, bool p) + : start_count (sc), task_count (&tc), phase (p) + { + } + + inline wait_guard:: + ~wait_guard () noexcept (false) + { + if (task_count != nullptr) + wait (); + } + + inline wait_guard:: + wait_guard (wait_guard&& x) + : start_count (x.start_count), task_count (x.task_count), phase (x.phase) + { + x.task_count = nullptr; + } + + inline wait_guard& wait_guard:: + operator= (wait_guard&& x) + { + if (&x != this) + { + assert (task_count == nullptr); + start_count = x.start_count; task_count = x.task_count; phase = x.phase; + x.task_count = nullptr; + } + return *this; + } + + inline void wait_guard:: + wait () + { + phase_unlock u (phase); + sched.wait (start_count, *task_count); + task_count = nullptr; + } +} diff --git a/libbuild2/depdb.cxx b/libbuild2/depdb.cxx new file mode 100644 index 0000000..32e5916 --- /dev/null +++ b/libbuild2/depdb.cxx @@ -0,0 +1,399 @@ +// file : libbuild2/depdb.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/depdb.hxx> + +#ifdef _WIN32 +# include <libbutl/win32-utility.hxx> +#endif + +#include <libbuild2/filesystem.hxx> // mtime() +#include <libbuild2/diagnostics.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + depdb_base:: + depdb_base (const path& p, timestamp mt) + { + fdopen_mode om (fdopen_mode::out | fdopen_mode::binary); + ifdstream::iostate em (ifdstream::badbit); + + if (mt == timestamp_nonexistent) + { + state_ = state::write; + om |= fdopen_mode::create | fdopen_mode::exclusive; + em |= ifdstream::failbit; + } + else + { + state_ = state::read; + om |= fdopen_mode::in; + } + + auto_fd fd; + try + { + fd = fdopen (p, om); + } + catch (const io_error&) + { + bool c (state_ == state::write); + + diag_record dr (fail); + dr << "unable to " << (c ? "create" : "open") << ' ' << p; + + if (c) + dr << info << "did you forget to add fsdir{} prerequisite for " + << "output directory?"; + + dr << endf; + } + + // Open the corresponding stream. Note that if we throw after that, the + // corresponding member will not be destroyed. This is the reason for the + // depdb/base split. + // + if (state_ == state::read) + { + new (&is_) ifdstream (move (fd), em); + buf_ = static_cast<fdbuf*> (is_.rdbuf ()); + } + else + { + new (&os_) ofdstream (move (fd), em); + buf_ = static_cast<fdbuf*> (os_.rdbuf ()); + } + } + + depdb:: + depdb (path_type&& p, timestamp mt) + : depdb_base (p, mt), + path (move (p)), + mtime (mt != timestamp_nonexistent ? mt : timestamp_unknown), + touch (false) + { + // Read/write the database format version. + // + if (state_ == state::read) + { + string* l (read ()); + if (l == nullptr || *l != "1") + write ('1'); + } + else + write ('1'); + } + + depdb:: + depdb (path_type p) + : depdb (move (p), build2::mtime (p)) + { + } + + void depdb:: + change (bool trunc) + { + assert (state_ != state::write); + + // Transfer the file descriptor from ifdstream to ofdstream. Note that the + // steps in this dance must be carefully ordered to make sure we don't + // call any destructors twice in the face of exceptions. + // + auto_fd fd (is_.release ()); + + // Consider this scenario: we are overwriting an old line (so it ends with + // a newline and the "end marker") but the operation failed half way + // through. Now we have the prefix from the new line, the suffix from the + // old, and everything looks valid. So what we need is to somehow + // invalidate the old content so that it can never combine with (partial) + // new content to form a valid line. One way to do that would be to + // truncate the file. + // + if (trunc) + try + { + fdtruncate (fd.get (), pos_); + } + catch (const io_error& e) + { + fail << "unable to truncate " << path << ": " << e; + } + + // Note: the file descriptor position can be beyond the pos_ value due to + // the ifdstream buffering. That's why we need to seek to switch from + // reading to writing. + // + try + { + fdseek (fd.get (), pos_, fdseek_mode::set); + } + catch (const io_error& e) + { + fail << "unable to rewind " << path << ": " << e; + } + + // @@ Strictly speaking, ofdstream can throw which will leave us in a + // non-destructible state. Unlikely but possible. + // + is_.~ifdstream (); + new (&os_) ofdstream (move (fd), + ofdstream::badbit | ofdstream::failbit, + pos_); + buf_ = static_cast<fdbuf*> (os_.rdbuf ()); + + state_ = state::write; + mtime = timestamp_unknown; + } + + string* depdb:: + read_ () + { + // Save the start position of this line so that we can overwrite it. + // + pos_ = buf_->tellg (); + + try + { + // Note that we intentionally check for eof after updating the write + // position. + // + if (state_ == state::read_eof) + return nullptr; + + getline (is_, line_); // Calls line_.erase(). + + // The line should always end with a newline. If it doesn't, then this + // line (and the rest of the database) is assumed corrupted. Also peek + // at the character after the newline. We should either have the next + // line or '\0', which is our "end marker", that is, it indicates the + // database was properly closed. + // + ifdstream::int_type c; + if (is_.fail () || // Nothing got extracted. + is_.eof () || // Eof reached before delimiter. + (c = is_.peek ()) == ifdstream::traits_type::eof ()) + { + // Preemptively switch to writing. While we could have delayed this + // until the user called write(), if the user calls read() again (for + // whatever misguided reason) we will mess up the overwrite position. + // + change (); + return nullptr; + } + + // Handle the "end marker". Note that the caller can still switch to the + // write mode on this line. And, after calling read() again, write to + // the next line (i.e., start from the "end marker"). + // + if (c == '\0') + state_ = state::read_eof; + } + catch (const io_error& e) + { + fail << "unable to read from " << path << ": " << e; + } + + return &line_; + } + + bool depdb:: + skip () + { + if (state_ == state::read_eof) + return true; + + assert (state_ == state::read); + + // The rest is pretty similar in logic to read_() above. + // + pos_ = buf_->tellg (); + + try + { + // Keep reading lines checking for the end marker after each newline. + // + ifdstream::int_type c; + do + { + if ((c = is_.get ()) == '\n') + { + if ((c = is_.get ()) == '\0') + { + state_ = state::read_eof; + return true; + } + } + } while (c != ifdstream::traits_type::eof ()); + } + catch (const io_error& e) + { + fail << "unable to read from " << path << ": " << e; + } + + // Invalid database so change over to writing. + // + change (); + return false; + } + + void depdb:: + write (const char* s, size_t n, bool nl) + { + // Switch to writing if we are still reading. + // + if (state_ != state::write) + change (); + + try + { + os_.write (s, static_cast<streamsize> (n)); + + if (nl) + os_.put ('\n'); + } + catch (const io_error& e) + { + fail << "unable to write to " << path << ": " << e; + } + } + + void depdb:: + write (char c, bool nl) + { + // Switch to writing if we are still reading. + // + if (state_ != state::write) + change (); + + try + { + os_.put (c); + + if (nl) + os_.put ('\n'); + } + catch (const io_error& e) + { + fail << "unable to write to " << path << ": " << e; + } + } + + void depdb:: + close () + { + // If we are at eof, then it means all lines are good, there is the "end + // marker" at the end, and we don't need to do anything, except, maybe + // touch the file. Otherwise, if we are still in the read mode, truncate + // the rest, and then add the "end marker" (we cannot have anything in the + // write mode since we truncate in change()). + // + if (state_ == state::read_eof) + { + if (!touch) + try + { + is_.close (); + return; + } + catch (const io_error& e) + { + fail << "unable to close " << path << ": " << e; + } + + // While there are utime(2)/utimensat(2) (and probably something similar + // for Windows), for now we just overwrite the "end marker". Hopefully + // no implementation will be smart enough to recognize this is a no-op + // and skip updating mtime (which would probably be incorrect, spec- + // wise). And this could even be faster since we already have the file + // descriptor. Or it might be slower since so far we've only been + // reading. + // + pos_ = buf_->tellg (); // The last line is accepted. + change (false /* truncate */); // Write end marker below. + } + else if (state_ != state::write) + { + pos_ = buf_->tellg (); // The last line is accepted. + change (true /* truncate */); + } + + if (mtime_check ()) + start_ = system_clock::now (); + + try + { + os_.put ('\0'); // The "end marker". + os_.close (); + } + catch (const io_error& e) + { + fail << "unable to flush " << path << ": " << e; + } + + // On some platforms (currently confirmed on FreeBSD running as VMs) one + // can sometimes end up with a modification time that is a bit after the + // call to close(). And in some tight cases this can mess with our + // "protocol" that a valid depdb should be no older than the target it is + // for. + // + // Note that this does not seem to be related to clock adjustments but + // rather feels like the modification time is set when the changes + // actually hit some lower-level layer (e.g., OS or filesystem + // driver). One workaround that appears to work is to query the + // mtime. This seems to force that layer to commit to a timestamp. + // +#if defined(__FreeBSD__) + mtime = build2::mtime (path); // Save for debugging/check below. +#endif + } + + void depdb:: + check_mtime_ (const path_type& t, timestamp e) + { + // We could call the static version but then we would have lost additional + // information for some platforms. + // + timestamp t_mt (build2::mtime (t)); + timestamp d_mt (build2::mtime (path)); + + if (d_mt > t_mt) + { + if (e == timestamp_unknown) + e = system_clock::now (); + + fail << "backwards modification times detected:\n" + << " " << start_ << " sequence start\n" +#if defined(__FreeBSD__) + << " " << mtime << " close mtime\n" +#endif + << " " << d_mt << " " << path.string () << '\n' + << " " << t_mt << " " << t.string () << '\n' + << " " << e << " sequence end"; + } + } + + void depdb:: + check_mtime_ (timestamp s, + const path_type& d, + const path_type& t, + timestamp e) + { + using build2::mtime; + + timestamp t_mt (mtime (t)); + timestamp d_mt (mtime (d)); + + if (d_mt > t_mt) + { + fail << "backwards modification times detected:\n" + << " " << s << " sequence start\n" + << " " << d_mt << " " << d.string () << '\n' + << " " << t_mt << " " << t.string () << '\n' + << " " << e << " sequence end"; + } + } +} diff --git a/libbuild2/depdb.hxx b/libbuild2/depdb.hxx new file mode 100644 index 0000000..8a1cd1f --- /dev/null +++ b/libbuild2/depdb.hxx @@ -0,0 +1,288 @@ +// file : libbuild2/depdb.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_DEPDB_HXX +#define LIBBUILD2_DEPDB_HXX + +#include <cstring> // strlen() + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Auxiliary dependency database (those .d files). Prints the diagnostics + // and fails on system and IO errors. + // + // This is a strange beast: a line-oriented, streaming database that can, at + // some point, be switched from reading to (over)writing. The idea is to + // store auxiliary/ad-hoc dependency information in the "invalidation" + // order. That is, if an earlier line is out of date, then all the + // subsequent ones are out of date as well. + // + // As an example, consider a dependency database for foo.o which is built + // from foo.cxx by the cxx.compile rule. The first line could be the rule + // name itself (perhaps with the version). If a different rule is now + // building foo.o, then any dep info that was saved by cxx.compile is + // probably useless. Next we can have the command line options that were + // used to build foo.o. Then could come the source file name followed by the + // extracted header dependencies. If the compile options or the source file + // name have changed, then the header dependencies are likely to have + // changed as well. + // + // As an example, here is what our foo.o.d could look like (the first line + // is the database format version and the last '\0' character is the end + // marker): + // + // 1 + // cxx.compile 1 + // g++-4.8 -I/tmp/foo -O3 + // /tmp/foo/foo.cxx + // /tmp/foo/foo.hxx + // /usr/include/string.h + // /usr/include/stdlib.h + // /tmp/foo/bar.hxx + // ^@ + // + // A race is possible between updating the database and the target. For + // example, we may detect a line mismatch that renders the target out-of- + // date (say, compile options in the above example). We update the database + // but before getting a chance to update the target, we get interrupted. On + // a subsequent re-run, because the database has been updated, we will miss + // the "target requires update" condition. + // + // If we assume that an update of the database also means an update of the + // target, then this "interrupted update" situation can be easily detected + // by comparing the database and target modification timestamps. This is + // also used to handle the dry-run mode where we essentially do the + // interruption ourselves. + // + struct LIBBUILD2_SYMEXPORT depdb_base + { + explicit + depdb_base (const path&, timestamp); + + ~depdb_base (); + + enum class state {read, read_eof, write} state_; + + union + { + ifdstream is_; // read, read_eof + ofdstream os_; // write + }; + + butl::fdbuf* buf_; // Current buffer (for tellg()/tellp()). + }; + + class LIBBUILD2_SYMEXPORT depdb: private depdb_base + { + public: + using path_type = build2::path; + + // The modification time of the database only makes sense while reading + // (in the write mode it will be set to timestamp_unknown). + // + // If touch is set to true, update the database modification time in + // close() even if otherwise no modifications are necessary (i.e., the + // database is in the read mode and is at eof). + // + path_type path; + timestamp mtime; + bool touch; + + // Open the database for reading. Note that if the file does not exist, + // has wrong format version, or is corrupt, then the database will be + // immediately switched to writing. + // + // The failure commonly happens when the user tries to stash the target in + // a non-existent subdirectory but forgets to add the corresponding fsdir{} + // prerequisite. That's why the issued diagnostics may provide the + // corresponding hint. + // + explicit + depdb (path_type); + + // Close the database. If this function is not called, then the database + // may be left in the old/currupt state. Note that in the read mode this + // function will "chop off" lines that haven't been read. + // + // Make sure to also call check_mtime() after updating the target to + // perform the target/database modification times sanity checks. + // + void + close (); + + // Flush any unwritten data to disk. This is primarily useful when reusing + // a (partially written) database as an input to external programs (e.g., + // as a module map). + // + void + flush (); + + // Perform target/database modification times sanity check. + // + // Note that it would also be good to compare the target timestamp against + // the newest prerequisite. However, obtaining this information would cost + // extra (see execute_prerequisites()). So maybe later, if we get a case + // where this is a problem (in a sense, the database is a buffer between + // prerequisites and the target). + // + void + check_mtime (const path_type& target, timestamp end = timestamp_unknown); + + static void + check_mtime (timestamp start, + const path_type& db, + const path_type& target, + timestamp end); + + // Return true if mtime checks are enabled. + // + static bool + mtime_check (); + + // Read the next line. If the result is not NULL, then it is a pointer to + // the next line in the database (which you are free to move from). If you + // then call write(), this line will be overwritten. + // + // If the result is NULL, then it means no next line is unavailable. This + // can be due to several reasons: + // + // - eof reached (you can detect this by calling more() before read()) + // - database is already in the write mode + // - the next line (and the rest of the database are corrupt) + // + string* + read () {return state_ == state::write ? nullptr : read_ ();} + + // Return true if the database is in the read mode and there is at least + // one more line available. Note that there is no guarantee that the line + // is not corrupt. In other words, read() can still return NULL, it just + // won't be because of eof. + // + bool + more () const {return state_ == state::read;} + + bool + reading () const {return state_ != state::write;} + + bool + writing () const {return state_ == state::write;} + + // Skip to the end of the database and return true if it is valid. + // Otherwise, return false, in which case the database must be + // overwritten. Note that this function expects the database to be in the + // read state. + // + bool + skip (); + + // Write the next line. If nl is false then don't write the newline yet. + // Note that this switches the database into the write mode and no further + // reading will be possible. + // + void + write (const string& l, bool nl = true) {write (l.c_str (), l.size (), nl);} + + void + write (const path_type& p, bool nl = true) {write (p.string (), nl);} + + void + write (const char* s, bool nl = true) {write (s, std::strlen (s), nl);} + + void + write (const char*, size_t, bool nl = true); + + void + write (char, bool nl = true); + + // Mark the previously read line as to be overwritte. + // + void + write () {if (state_ != state::write) change ();} + + // Read the next line and compare it to the expected value. If it matches, + // return NULL. Otherwise, overwrite it and return the old value (which + // could also be NULL). This strange-sounding result semantics is used to + // detect the "there is a value but it does not match" case for tracing: + // + // if (string* o = d.expect (...)) + // l4 ([&]{trace << "X mismatch forcing update of " << t;}); + // + string* + expect (const string& v) + { + string* l (read ()); + if (l == nullptr || *l != v) + { + write (v); + return l; + } + + return nullptr; + } + + string* + expect (const path_type& v) + { + string* l (read ()); + if (l == nullptr || + path_type::traits_type::compare (*l, v.string ()) != 0) + { + write (v); + return l; + } + + return nullptr; + } + + string* + expect (const char* v) + { + string* l (read ()); + if (l == nullptr || *l != v) + { + write (v); + return l; + } + + return nullptr; + } + + // Could be supported if required. + // + depdb (depdb&&) = delete; + depdb (const depdb&) = delete; + + depdb& operator= (depdb&&) = delete; + depdb& operator= (const depdb&) = delete; + + private: + depdb (path_type&&, timestamp); + + void + change (bool truncate = true); + + string* + read_ (); + + void + check_mtime_ (const path_type&, timestamp); + + static void + check_mtime_ (timestamp, const path_type&, const path_type&, timestamp); + + private: + uint64_t pos_; // Start of the last returned line. + string line_; // Current line. + timestamp start_; // Sequence start (mtime check). + }; +} + +#include <libbuild2/depdb.ixx> + +#endif // LIBBUILD2_DEPDB_HXX diff --git a/libbuild2/depdb.ixx b/libbuild2/depdb.ixx new file mode 100644 index 0000000..9f73fcb --- /dev/null +++ b/libbuild2/depdb.ixx @@ -0,0 +1,45 @@ +// file : libbuild2/depdb.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + inline depdb_base:: + ~depdb_base () + { + if (state_ != state::write) + is_.~ifdstream (); + else + os_.~ofdstream (); + } + + inline void depdb:: + flush () + { + if (state_ == state::write) + os_.flush (); + } + + inline bool depdb:: + mtime_check () + { + return mtime_check_option ? *mtime_check_option : LIBBUILD2_MTIME_CHECK; + } + + inline void depdb:: + check_mtime (const path_type& t, timestamp e) + { + if (state_ == state::write && mtime_check ()) + check_mtime_ (t, e); + } + + inline void depdb:: + check_mtime (timestamp s, + const path_type& d, + const path_type& t, + timestamp e) + { + if (mtime_check ()) + check_mtime_ (s, d, t, e); + } +} diff --git a/libbuild2/diagnostics.cxx b/libbuild2/diagnostics.cxx new file mode 100644 index 0000000..eab3b78 --- /dev/null +++ b/libbuild2/diagnostics.cxx @@ -0,0 +1,138 @@ +// file : libbuild2/diagnostics.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/diagnostics.hxx> + +#include <cstring> // strchr() + +#include <libbutl/process-io.mxx> + +using namespace std; + +namespace build2 +{ + // Diagnostics state (verbosity level, progress, etc). Keep disabled until + // set from options. + // + uint16_t verb = 0; + + optional<bool> diag_progress_option; + + bool diag_no_line = false; + bool diag_no_column = false; + + bool stderr_term = false; + + void + init_diag (uint16_t v, optional<bool> p, bool nl, bool nc, bool st) + { + verb = v; + diag_progress_option = p; + diag_no_line = nl; + diag_no_column = nc; + stderr_term = st; + } + + // Stream verbosity. + // + const int stream_verb_index = ostream::xalloc (); + + void + print_process (const char* const* args, size_t n) + { + diag_record r (text); + print_process (r, args, n); + } + + void + print_process (diag_record& r, const char* const* args, size_t n) + { + r << butl::process_args {args, n}; + } + + // Diagnostics stack. + // + static +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + const diag_frame* diag_frame_stack = nullptr; + + const diag_frame* diag_frame:: + stack () noexcept + { + return diag_frame_stack; + } + + const diag_frame* diag_frame:: + stack (const diag_frame* f) noexcept + { + const diag_frame* r (diag_frame_stack); + diag_frame_stack = f; + return r; + } + + // Diagnostic facility, project specifics. + // + + void simple_prologue_base:: + operator() (const diag_record& r) const + { + stream_verb (r.os, sverb_); + + if (type_ != nullptr) + r << type_ << ": "; + + if (mod_ != nullptr) + r << mod_ << "::"; + + if (name_ != nullptr) + r << name_ << ": "; + } + + void location_prologue_base:: + operator() (const diag_record& r) const + { + stream_verb (r.os, sverb_); + + if (!loc_.empty ()) + { + r << *loc_.file << ':'; + + if (!diag_no_line) + { + if (loc_.line != 0) + { + r << loc_.line << ':'; + + if (!diag_no_column) + { + if (loc_.column != 0) + r << loc_.column << ':'; + } + } + } + + r << ' '; + } + + if (type_ != nullptr) + r << type_ << ": "; + + if (mod_ != nullptr) + r << mod_ << "::"; + + if (name_ != nullptr) + r << name_ << ": "; + } + + const basic_mark error ("error"); + const basic_mark warn ("warning"); + const basic_mark info ("info"); + const basic_mark text (nullptr, nullptr, nullptr); // No type/data/frame. + const fail_mark fail ("error"); + const fail_end endf; +} diff --git a/libbuild2/diagnostics.hxx b/libbuild2/diagnostics.hxx new file mode 100644 index 0000000..9ad18ff --- /dev/null +++ b/libbuild2/diagnostics.hxx @@ -0,0 +1,436 @@ +// file : libbuild2/diagnostics.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_DIAGNOSTICS_HXX +#define LIBBUILD2_DIAGNOSTICS_HXX + +#include <libbutl/diagnostics.mxx> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + using butl::diag_record; + + // Throw this exception to terminate the build. The handler should + // assume that the diagnostics has already been issued. + // + class failed: public std::exception {}; + + // Print process commmand line. If the number of elements is specified + // (or the second version is used), then it will print the piped multi- + // process command line, if present. In this case, the expected format + // is as follows: + // + // name1 arg arg ... nullptr + // name2 arg arg ... nullptr + // ... + // nameN arg arg ... nullptr nullptr + // + LIBBUILD2_SYMEXPORT void + print_process (diag_record&, const char* const* args, size_t n = 0); + + LIBBUILD2_SYMEXPORT void + print_process (const char* const* args, size_t n = 0); + + inline void + print_process (diag_record& dr, const cstrings& args, size_t n = 0) + { + print_process (dr, args.data (), n != 0 ? n : args.size ()); + } + + inline void + print_process (const cstrings& args, size_t n = 0) + { + print_process (args.data (), n != 0 ? n : args.size ()); + } + + // Program verbosity level (-v/--verbose). + // + // 0 - disabled + // 1 - high-level information messages + // 2 - essential underlying commands that are being executed + // 3 - all underlying commands that are being executed + // 4 - information helpful to the user (e.g., why a rule did not match) + // 5 - information helpful to the developer + // 6 - even more detailed information + // + // While uint8 is more than enough, use uint16 for the ease of printing. + // + + // Forward-declarated in utility.hxx. + // + // extern uint16_t verb; + // const uint16_t verb_never = 7; + + template <typename F> inline void l1 (const F& f) {if (verb >= 1) f ();} + template <typename F> inline void l2 (const F& f) {if (verb >= 2) f ();} + template <typename F> inline void l3 (const F& f) {if (verb >= 3) f ();} + template <typename F> inline void l4 (const F& f) {if (verb >= 4) f ();} + template <typename F> inline void l5 (const F& f) {if (verb >= 5) f ();} + template <typename F> inline void l6 (const F& f) {if (verb >= 6) f ();} + + // Stream verbosity level. Determined by the diagnostic type (e.g., trace + // always has maximum verbosity) as well as the program verbosity. It is + // used to decide whether to print relative/absolute paths and default + // target extensions. + // + // Currently we have the following program to stream verbosity mapping: + // + // fail/error/warn/info <2:{0,0} 2:{0,1} >2:{1,2} + // trace *:{1,2} + // + // A stream that hasn't been (yet) assigned any verbosity explicitly (e.g., + // ostringstream) defaults to maximum. + // + struct stream_verbosity + { + union + { + struct + { + // 0 - print relative. + // 1 - print absolute. + // + uint16_t path: 1; + + // 0 - don't print. + // 1 - print if specified. + // 2 - print as 'foo.?' if unspecified and 'foo.' if specified as + // "no extension" (empty). + // + uint16_t extension: 2; + }; + uint16_t value_; + }; + + constexpr + stream_verbosity (uint16_t p, uint16_t e): path (p), extension (e) {} + + explicit + stream_verbosity (uint16_t v = 0): value_ (v) {} + }; + + constexpr stream_verbosity stream_verb_max = {1, 2}; + + // Default program to stream verbosity mapping, as outlined above. + // + inline stream_verbosity + stream_verb_map () + { + return + verb < 2 ? stream_verbosity (0, 0) : + verb > 2 ? stream_verbosity (1, 2) : + /* */ stream_verbosity (0, 1); + } + + LIBBUILD2_SYMEXPORT extern const int stream_verb_index; + + inline stream_verbosity + stream_verb (ostream& os) + { + long v (os.iword (stream_verb_index)); + return v == 0 + ? stream_verb_max + : stream_verbosity (static_cast<uint16_t> (v - 1)); + } + + inline void + stream_verb (ostream& os, stream_verbosity v) + { + os.iword (stream_verb_index) = static_cast<long> (v.value_) + 1; + } + + // Progress reporting. + // + using butl::diag_progress; + using butl::diag_progress_lock; + + // Return true if progress is to be shown. The max_verb argument is the + // maximum verbosity level that this type of progress should be shown by + // default. + // + inline bool + show_progress (uint16_t max_verb) + { + return diag_progress_option + ? *diag_progress_option + : stderr_term && verb >= 1 && verb <= max_verb; + } + + // Diagnostic facility, base infrastructure. + // + using butl::diag_stream_lock; + using butl::diag_stream; + using butl::diag_epilogue; + + // Diagnostics stack. Each frame is "applied" to the fail/error/warn/info + // diag record. + // + // Unfortunately most of our use-cases don't fit into the 2-pointer small + // object optimization of std::function. So we have to complicate things + // a bit here. + // + struct LIBBUILD2_SYMEXPORT diag_frame + { + explicit + diag_frame (void (*f) (const diag_frame&, const diag_record&)) + : func_ (f) + { + if (func_ != nullptr) + prev_ = stack (this); + } + + diag_frame (diag_frame&& x) + : func_ (x.func_) + { + if (func_ != nullptr) + { + prev_ = x.prev_; + stack (this); + + x.func_ = nullptr; + } + } + + diag_frame& operator= (diag_frame&&) = delete; + + diag_frame (const diag_frame&) = delete; + diag_frame& operator= (const diag_frame&) = delete; + + ~diag_frame () + { + if (func_ != nullptr ) + stack (prev_); + } + + static void + apply (const diag_record& r) + { + for (const diag_frame* f (stack ()); f != nullptr; f = f->prev_) + f->func_ (*f, r); + } + + // Tip of the stack. + // + static const diag_frame* + stack () noexcept; + + // Set the new and return the previous tip of the stack. + // + static const diag_frame* + stack (const diag_frame*) noexcept; + + struct stack_guard + { + explicit stack_guard (const diag_frame* s): s_ (stack (s)) {} + ~stack_guard () {stack (s_);} + const diag_frame* s_; + }; + + private: + void (*func_) (const diag_frame&, const diag_record&); + const diag_frame* prev_; + }; + + template <typename F> + struct diag_frame_impl: diag_frame + { + explicit + diag_frame_impl (F f): diag_frame (&thunk), func_ (move (f)) {} + + private: + static void + thunk (const diag_frame& f, const diag_record& r) + { + static_cast<const diag_frame_impl&> (f).func_ (r); + } + + const F func_; + }; + + template <typename F> + inline diag_frame_impl<F> + make_diag_frame (F f) + { + return diag_frame_impl<F> (move (f)); + } + + // Diagnostic facility, project specifics. + // + struct LIBBUILD2_SYMEXPORT simple_prologue_base + { + explicit + simple_prologue_base (const char* type, + const char* mod, + const char* name, + stream_verbosity sverb) + : type_ (type), mod_ (mod), name_ (name), sverb_ (sverb) {} + + void + operator() (const diag_record& r) const; + + private: + const char* type_; + const char* mod_; + const char* name_; + const stream_verbosity sverb_; + }; + + struct LIBBUILD2_SYMEXPORT location_prologue_base + { + location_prologue_base (const char* type, + const char* mod, + const char* name, + const location& l, + stream_verbosity sverb) + : type_ (type), mod_ (mod), name_ (name), + loc_ (l), + sverb_ (sverb) {} + + location_prologue_base (const char* type, + const char* mod, + const char* name, + path&& f, + stream_verbosity sverb) + : type_ (type), mod_ (mod), name_ (name), + file_ (move (f)), loc_ (&file_), + sverb_ (sverb) {} + + void + operator() (const diag_record& r) const; + + private: + const char* type_; + const char* mod_; + const char* name_; + const path file_; + const location loc_; + const stream_verbosity sverb_; + }; + + struct basic_mark_base + { + using simple_prologue = butl::diag_prologue<simple_prologue_base>; + using location_prologue = butl::diag_prologue<location_prologue_base>; + + explicit + basic_mark_base (const char* type, + const void* data = nullptr, + diag_epilogue* epilogue = &diag_frame::apply, + stream_verbosity (*sverb) () = &stream_verb_map, + const char* mod = nullptr, + const char* name = nullptr) + : sverb_ (sverb), + type_ (type), mod_ (mod), name_ (name), data_ (data), + epilogue_ (epilogue) {} + + simple_prologue + operator() () const + { + return simple_prologue (epilogue_, type_, mod_, name_, sverb_ ()); + } + + location_prologue + operator() (const location& l) const + { + return location_prologue (epilogue_, type_, mod_, name_, l, sverb_ ()); + } + + // fail (relative (src)) << ... + // + location_prologue + operator() (path&& f) const + { + return location_prologue ( + epilogue_, type_, mod_, name_, move (f), sverb_ ()); + } + + template <typename L> + location_prologue + operator() (const L& l) const + { + return location_prologue ( + epilogue_, type_, mod_, name_, get_location (l, data_), sverb_ ()); + } + + protected: + stream_verbosity (*sverb_) (); + const char* type_; + const char* mod_; + const char* name_; + const void* data_; + diag_epilogue* const epilogue_; + }; + using basic_mark = butl::diag_mark<basic_mark_base>; + + LIBBUILD2_SYMEXPORT extern const basic_mark error; + LIBBUILD2_SYMEXPORT extern const basic_mark warn; + LIBBUILD2_SYMEXPORT extern const basic_mark info; + LIBBUILD2_SYMEXPORT extern const basic_mark text; + + // trace + // + struct trace_mark_base: basic_mark_base + { + explicit + trace_mark_base (const char* name, const void* data = nullptr) + : trace_mark_base (nullptr, name, data) {} + + trace_mark_base (const char* mod, + const char* name, + const void* data = nullptr) + : basic_mark_base ("trace", + data, + nullptr, // No diag stack. + []() {return stream_verb_max;}, + mod, + name) {} + }; + using trace_mark = butl::diag_mark<trace_mark_base>; + using tracer = trace_mark; + + // fail + // + struct fail_mark_base: basic_mark_base + { + explicit + fail_mark_base (const char* type, + const void* data = nullptr) + : basic_mark_base (type, + data, + [](const diag_record& r) + { + diag_frame::apply (r); + r.flush (); + throw failed (); + }, + &stream_verb_map, + nullptr, + nullptr) {} + }; + using fail_mark = butl::diag_mark<fail_mark_base>; + + struct fail_end_base + { + [[noreturn]] void + operator() (const diag_record& r) const + { + // If we just throw then the record's destructor will see an active + // exception and will not flush the record. + // + r.flush (); + throw failed (); + } + }; + using fail_end = butl::diag_noreturn_end<fail_end_base>; + + LIBBUILD2_SYMEXPORT extern const fail_mark fail; + LIBBUILD2_SYMEXPORT extern const fail_end endf; +} + +#endif // LIBBUILD2_DIAGNOSTICS_HXX diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx new file mode 100644 index 0000000..a866fe3 --- /dev/null +++ b/libbuild2/dump.cxx @@ -0,0 +1,491 @@ +// file : libbuild2/dump.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/dump.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + // If type is false, don't print the value's type (e.g., because it is the + // same as variable's). + // + static void + dump_value (ostream& os, const value& v, bool type) + { + // First print attributes if any. + // + bool a (!v || (type && v.type != nullptr)); + + if (a) + os << '['; + + const char* s (""); + + if (type && v.type != nullptr) + { + os << s << v.type->name; + s = " "; + } + + if (!v) + { + os << s << "null"; + s = " "; + } + + if (a) + os << ']'; + + // Now the value if there is one. + // + if (v) + { + names storage; + os << (a ? " " : "") << reverse (v, storage); + } + } + + enum class variable_kind {scope, tt_pat, target, rule, prerequisite}; + + static void + dump_variable (ostream& os, + const variable_map& vm, + const variable_map::const_iterator& vi, + const scope& s, + variable_kind k) + { + // Target type/pattern-specific prepends/appends are kept untyped and not + // overriden. + // + if (k == variable_kind::tt_pat && vi.extra () != 0) + { + // @@ Might be useful to dump the cache. + // + const auto& p (vi.untyped ()); + const variable& var (p.first); + const value& v (p.second); + assert (v.type == nullptr); + + os << var << (v.extra == 1 ? " =+ " : " += "); + dump_value (os, v, false); + } + else + { + const auto& p (*vi); + const variable& var (p.first); + const value& v (p.second); + + if (var.type != nullptr) + os << '[' << var.type->name << "] "; + + os << var << " = "; + + // If this variable is overriden, print both the override and the + // original values. + // + // @@ The override semantics for prerequisite-specific variables + // is still fuzzy/unimplemented, so ignore it for now. + // + if (k != variable_kind::prerequisite) + { + if (var.overrides != nullptr && !var.override ()) + { + lookup org (v, var, vm); + + // The original is always from this scope/target, so depth is 1. + // + lookup l ( + s.find_override ( + var, + make_pair (org, 1), + k == variable_kind::target || k == variable_kind::rule, + k == variable_kind::rule).first); + + assert (l.defined ()); // We at least have the original. + + if (org != l) + { + dump_value (os, *l, l->type != var.type); + os << " # original: "; + } + } + } + + dump_value (os, v, v.type != var.type); + } + } + + static void + dump_variables (ostream& os, + string& ind, + const variable_map& vars, + const scope& s, + variable_kind k) + { + for (auto i (vars.begin ()), e (vars.end ()); i != e; ++i) + { + os << endl + << ind; + + dump_variable (os, vars, i, s, k); + } + } + + // Dump target type/pattern-specific variables. + // + static void + dump_variables (ostream& os, + string& ind, + const variable_type_map& vtm, + const scope& s) + { + for (const auto& vt: vtm) + { + const target_type& t (vt.first); + const variable_pattern_map& vpm (vt.second); + + for (const auto& vp: vpm) + { + const string p (vp.first); + const variable_map& vars (vp.second); + + os << endl + << ind; + + if (t != target::static_type) + os << t.name << '{'; + + os << p; + + if (t != target::static_type) + os << '}'; + + os << ':'; + + if (vars.size () == 1) + { + os << ' '; + dump_variable (os, vars, vars.begin (), s, variable_kind::tt_pat); + } + else + { + os << endl + << ind << '{'; + ind += " "; + dump_variables (os, ind, vars, s, variable_kind::tt_pat); + ind.resize (ind.size () - 2); + os << endl + << ind << '}'; + } + } + } + } + + static void + dump_target (optional<action> a, + ostream& os, + string& ind, + const target& t, + const scope& s, + bool rel) + { + // If requested, print the target and its prerequisites relative to the + // scope. To achieve this we are going to temporarily lower the stream + // path verbosity to level 0. + // + stream_verbosity osv, nsv; + if (rel) + { + osv = nsv = stream_verb (os); + nsv.path = 0; + stream_verb (os, nsv); + } + + if (t.group != nullptr) + os << ind << t << " -> " << *t.group << endl; + + os << ind << t << ':'; + + // First print target/rule-specific variables, if any. + // + { + bool tv (!t.vars.empty ()); + bool rv (a && !t.state[*a].vars.empty ()); + + if (tv || rv) + { + if (rel) + stream_verb (os, osv); // We want variable values in full. + + os << endl + << ind << '{'; + ind += " "; + + if (tv) + dump_variables (os, ind, t.vars, s, variable_kind::target); + + if (rv) + { + // To distinguish target and rule-specific variables, we put the + // latter into a nested block. + // + // @@ Maybe if we also print the rule name, then we could make + // the block associated with that? + + if (tv) + os << endl; + + os << endl + << ind << '{'; + ind += " "; + dump_variables (os, ind, t.state[*a].vars, s, variable_kind::rule); + ind.resize (ind.size () - 2); + os << endl + << ind << '}'; + } + + ind.resize (ind.size () - 2); + os << endl + << ind << '}'; + + if (rel) + stream_verb (os, nsv); + + os << endl + << ind << t << ':'; + } + } + + bool used (false); // Target header has been used to display prerequisites. + + // If the target has been matched to a rule, first print resolved + // prerequisite targets. + // + // Note: running serial and task_count is 0 before any operation has + // started. + // + action inner; // @@ Only for the inner part of the action currently. + + if (size_t c = t[inner].task_count.load (memory_order_relaxed)) + { + if (c == target::count_applied () || c == target::count_executed ()) + { + bool f (false); + for (const target* pt: t.prerequisite_targets[inner]) + { + if (pt == nullptr) // Skipped. + continue; + + os << ' ' << *pt; + f = true; + } + + // Only omit '|' if we have no prerequisites nor targets. + // + if (f || !t.prerequisites ().empty ()) + { + os << " |"; + used = true; + } + } + } + + // Print prerequisites. Those that have prerequisite-specific variables + // have to be printed as a separate dependency. + // + const prerequisites& ps (t.prerequisites ()); + for (auto i (ps.begin ()), e (ps.end ()); i != e; ) + { + const prerequisite& p (*i++); + bool ps (!p.vars.empty ()); // Has prerequisite-specific vars. + + if (ps && used) // If it has been used, get a new header. + os << endl + << ind << t << ':'; + + // Print it as a target if one has been cached. + // + if (const target* t = p.target.load (memory_order_relaxed)) // Serial. + os << ' ' << *t; + else + os << ' ' << p; + + if (ps) + { + if (rel) + stream_verb (os, osv); // We want variable values in full. + + os << ':' << endl + << ind << '{'; + ind += " "; + dump_variables (os, ind, p.vars, s, variable_kind::prerequisite); + ind.resize (ind.size () - 2); + os << endl + << ind << '}'; + + if (rel) + stream_verb (os, nsv); + + if (i != e) // If we have another, get a new header. + os << endl + << ind << t << ':'; + } + + used = !ps; + } + + if (rel) + stream_verb (os, osv); + } + + static void + dump_scope (optional<action> a, + ostream& os, + string& ind, + scope_map::const_iterator& i, + bool rel) + { + const scope& p (i->second); + const dir_path& d (i->first); + ++i; + + // We don't want the extra notations (e.g., ~/) provided by diag_relative() + // since we want the path to be relative to the outer scope. Print the root + // scope path (represented by an empty one) as a platform-dependent path + // separator. + // + if (d.empty ()) + os << ind << dir_path::traits_type::directory_separator; + else + { + const dir_path& rd (rel ? relative (d) : d); + os << ind << (rd.empty () ? dir_path (".") : rd); + } + + os << endl + << ind << '{'; + + const dir_path* orb (relative_base); + relative_base = &d; + + ind += " "; + + bool vb (false), sb (false), tb (false); // Variable/scope/target block. + + // Target type/pattern-sepcific variables. + // + if (!p.target_vars.empty ()) + { + dump_variables (os, ind, p.target_vars, p); + vb = true; + } + + // Scope variables. + // + if (!p.vars.empty ()) + { + if (vb) + os << endl; + + dump_variables (os, ind, p.vars, p, variable_kind::scope); + vb = true; + } + + // Nested scopes of which we are an immediate parent. + // + for (auto e (scopes.end ()); i != e && i->second.parent_scope () == &p;) + { + if (vb) + { + os << endl; + vb = false; + } + + if (sb) + os << endl; // Extra newline between scope blocks. + + os << endl; + dump_scope (a, os, ind, i, true /* relative */); + sb = true; + } + + // Targets. + // + // Since targets can occupy multiple lines, we separate them with a + // blank line. + // + for (const auto& pt: targets) + { + const target& t (*pt); + + if (&p != &t.base_scope ()) + continue; + + if (vb || sb || tb) + { + os << endl; + vb = sb = false; + } + + os << endl; + dump_target (a, os, ind, t, p, true /* relative */); + tb = true; + } + + ind.resize (ind.size () - 2); + relative_base = orb; + + os << endl + << ind << '}'; + } + + void + dump (optional<action> a) + { + auto i (scopes.cbegin ()); + assert (&i->second == global_scope); + + // We don't lock diag_stream here as dump() is supposed to be called from + // the main thread prior/after to any other threads being spawned. + // + string ind; + ostream& os (*diag_stream); + dump_scope (a, os, ind, i, false /* relative */); + os << endl; + } + + void + dump (const scope& s, const char* cind) + { + const scope_map_base& m (scopes); // Iterator interface. + auto i (m.find (s.out_path ())); + assert (i != m.end () && &i->second == &s); + + string ind (cind); + ostream& os (*diag_stream); + dump_scope (nullopt /* action */, os, ind, i, false /* relative */); + os << endl; + } + + void + dump (const target& t, const char* cind) + { + string ind (cind); + ostream& os (*diag_stream); + dump_target (nullopt /* action */, + os, + ind, + t, + t.base_scope (), + false /* relative */); + os << endl; + } +} diff --git a/libbuild2/dump.hxx b/libbuild2/dump.hxx new file mode 100644 index 0000000..fd1886b --- /dev/null +++ b/libbuild2/dump.hxx @@ -0,0 +1,34 @@ +// file : libbuild2/dump.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_DUMP_HXX +#define LIBBUILD2_DUMP_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/action.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class scope; + class target; + + // Dump the build state to diag_stream. If action is specified, then assume + // rules have been matched for this action and dump action-specific + // information (like rule-specific variables). + // + LIBBUILD2_SYMEXPORT void + dump (optional<action> = nullopt); + + LIBBUILD2_SYMEXPORT void + dump (const scope&, const char* ind = ""); + + LIBBUILD2_SYMEXPORT void + dump (const target&, const char* ind = ""); +} + +#endif // LIBBUILD2_DUMP_HXX diff --git a/libbuild2/export.hxx b/libbuild2/export.hxx new file mode 100644 index 0000000..514c845 --- /dev/null +++ b/libbuild2/export.hxx @@ -0,0 +1,58 @@ +#pragma once + +// Normally we don't export class templates (but do complete specializations), +// inline functions, and classes with only inline member functions. Exporting +// classes that inherit from non-exported/imported bases (e.g., std::string) +// will end up badly. The only known workarounds are to not inherit or to not +// export. Also, MinGW GCC doesn't like seeing non-exported functions being +// used before their inline definition. The workaround is to reorder code. In +// the end it's all trial and error. +// +// Exportation of explicit template instantiations is even hairier: MinGW GCC +// requires __declspec(dllexport) on the extern template declaration while VC +// wants it on the definition. Use LIBBUILD2_{DEC,DEF}EXPORT for that. +// + +#if defined(LIBBUILD2_STATIC) // Using static. +# define LIBBUILD2_SYMEXPORT +# define LIBBUILD2_DECEXPORT +#elif defined(LIBBUILD2_STATIC_BUILD) // Building static. +# define LIBBUILD2_SYMEXPORT +# define LIBBUILD2_DECEXPORT +# define LIBBUILD2_DEFEXPORT +#elif defined(LIBBUILD2_SHARED) // Using shared. +# ifdef _WIN32 +# define LIBBUILD2_SYMEXPORT __declspec(dllimport) +# define LIBBUILD2_DECEXPORT __declspec(dllimport) +# else +# define LIBBUILD2_SYMEXPORT +# define LIBBUILD2_DECEXPORT +# endif +#elif defined(LIBBUILD2_SHARED_BUILD) // Building shared. +# ifdef _WIN32 +# define LIBBUILD2_SYMEXPORT __declspec(dllexport) +# if defined(_MSC_VER) +# define LIBBUILD2_DECEXPORT +# define LIBBUILD2_DEFEXPORT __declspec(dllexport) +# else +# define LIBBUILD2_DECEXPORT __declspec(dllexport) +# define LIBBUILD2_DEFEXPORT +# endif +# else +# define LIBBUILD2_SYMEXPORT +# define LIBBUILD2_DECEXPORT +# define LIBBUILD2_DEFEXPORT +# endif +#else +// If none of the above macros are defined, then we assume we are being used +// by some third-party build system that cannot/doesn't signal the library +// type. Note that this fallback works for both static and shared but in case +// of shared will be sub-optimal compared to having dllimport. Also note that +// bootstrap ends up here as well. +// +// Using static or shared. +// +# define LIBBUILD2_SYMEXPORT +# define LIBBUILD2_DECEXPORT +# define LIBBUILD2_DEFEXPORT +#endif diff --git a/libbuild2/file.cxx b/libbuild2/file.cxx new file mode 100644 index 0000000..5966168 --- /dev/null +++ b/libbuild2/file.cxx @@ -0,0 +1,1660 @@ +// file : libbuild2/file.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/file.hxx> + +#include <iostream> // cin + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/filesystem.hxx> // exists() +#include <libbuild2/prerequisite.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/token.hxx> +#include <libbuild2/lexer.hxx> +#include <libbuild2/parser.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + // Standard and alternative build file/directory naming schemes. + // + const dir_path std_build_dir ("build"); + const dir_path std_root_dir (dir_path (std_build_dir) /= "root"); + const dir_path std_bootstrap_dir (dir_path (std_build_dir) /= "bootstrap"); + + const path std_root_file (std_build_dir / "root.build"); + const path std_bootstrap_file (std_build_dir / "bootstrap.build"); + const path std_src_root_file (std_bootstrap_dir / "src-root.build"); + const path std_out_root_file (std_bootstrap_dir / "out-root.build"); + const path std_export_file (std_build_dir / "export.build"); + + const string std_build_ext ("build"); + const path std_buildfile_file ("buildfile"); + const path std_buildignore_file (".buildignore"); + + // + + const dir_path alt_build_dir ("build2"); + const dir_path alt_root_dir (dir_path (alt_build_dir) /= "root"); + const dir_path alt_bootstrap_dir (dir_path (alt_build_dir) /= "bootstrap"); + + const path alt_root_file (alt_build_dir / "root.build2"); + const path alt_bootstrap_file (alt_build_dir / "bootstrap.build2"); + const path alt_src_root_file (alt_bootstrap_dir / "src-root.build2"); + const path alt_out_root_file (alt_bootstrap_dir / "out-root.build2"); + const path alt_export_file (alt_build_dir / "export.build2"); + + const string alt_build_ext ("build2"); + const path alt_buildfile_file ("build2file"); + const path alt_buildignore_file (".build2ignore"); + + ostream& + operator<< (ostream& os, const subprojects& sps) + { + for (auto b (sps.begin ()), i (b); os && i != sps.end (); ++i) + { + // See find_subprojects() for details. + // + const project_name& n ( + path::traits_type::is_separator (i->first.string ().back ()) + ? empty_project_name + : i->first); + + os << (i != b ? " " : "") << n << '@' << i->second; + } + + return os; + } + + // Check if the standard/alternative file/directory exists, returning empty + // path if it does not. + // + template <typename T> + static T + exists (const dir_path& d, const T& s, const T& a, optional<bool>& altn) + { + T p; + bool e; + + if (altn) + { + p = d / (*altn ? a : s); + e = exists (p); + } + else + { + // Check the alternative name first since it is more specific. + // + p = d / a; + + if ((e = exists (p))) + altn = true; + else + { + p = d / s; + + if ((e = exists (p))) + altn = false; + } + } + + return e ? p : T (); + } + + bool + is_src_root (const dir_path& d, optional<bool>& altn) + { + // We can't have root without bootstrap.build. + // + return !exists (d, std_bootstrap_file, alt_bootstrap_file, altn).empty (); + } + + bool + is_out_root (const dir_path& d, optional<bool>& altn) + { + return !exists (d, std_src_root_file, alt_src_root_file, altn).empty (); + } + + dir_path + find_src_root (const dir_path& b, optional<bool>& altn) + { + for (dir_path d (b); !d.root () && d != home; d = d.directory ()) + { + if (is_src_root (d, altn)) + return d; + } + + return dir_path (); + } + + pair<dir_path, bool> + find_out_root (const dir_path& b, optional<bool>& altn) + { + for (dir_path d (b); !d.root () && d != home; d = d.directory ()) + { + bool s; + if ((s = is_src_root (d, altn)) || is_out_root (d, altn)) + return make_pair (move (d), s); + } + + return make_pair (dir_path (), false); + } + + dir_path old_src_root; + dir_path new_src_root; + + // Remap the src_root variable value if it is inside old_src_root. + // + static inline void + remap_src_root (value& v) + { + if (!old_src_root.empty ()) + { + dir_path& d (cast<dir_path> (v)); + + if (d.sub (old_src_root)) + d = new_src_root / d.leaf (old_src_root); + } + } + + static void + source (scope& root, scope& base, const path& bf, bool boot) + { + tracer trace ("source"); + + try + { + bool sin (bf.string () == "-"); + + ifdstream ifs; + + if (!sin) + ifs.open (bf); + else + cin.exceptions (ifdstream::failbit | ifdstream::badbit); + + istream& is (sin ? cin : ifs); + + l5 ([&]{trace << "sourcing " << bf;}); + + parser p (boot); + p.parse_buildfile (is, bf, root, base); + } + catch (const io_error& e) + { + fail << "unable to read buildfile " << bf << ": " << e; + } + } + + void + source (scope& root, scope& base, const path& bf) + { + source (root, base, bf, false); + } + + bool + source_once (scope& root, scope& base, const path& bf, scope& once) + { + tracer trace ("source_once"); + + if (!once.buildfiles.insert (bf).second) + { + l5 ([&]{trace << "skipping already sourced " << bf;}); + return false; + } + + source (root, base, bf); + return true; + } + + // Source (once) pre-*.build (pre is true) or post-*.build (otherwise) hooks + // from the specified directory (build/{bootstrap,root}/ of out_root) which + // must exist. + // + static void + source_hooks (scope& root, const dir_path& d, bool pre) + { + // While we could have used the wildcard pattern matching functionality, + // our needs are pretty basic and performance is quite important, so let's + // handle this ourselves. + // + try + { + for (const dir_entry& de: dir_iterator (d, false /* ignore_dangling */)) + { + // If this is a link, then type() will try to stat() it. And if the + // link is dangling or points to something inaccessible, it will fail. + // So let's first check that the name matches and only then check the + // type. + // + const path& n (de.path ()); + + if (n.string ().compare (0, + pre ? 4 : 5, + pre ? "pre-" : "post-") != 0 || + n.extension () != root.root_extra->build_ext) + continue; + + path f (d / n); + + try + { + if (de.type () != entry_type::regular) + continue; + } + catch (const system_error& e) + { + fail << "unable to read buildfile " << f << ": " << e; + } + + source_once (root, root, f); + } + } + catch (const system_error& e) + { + fail << "unable to iterate over " << d << ": " << e; + } + } + + scope_map::iterator + create_root (scope& l, const dir_path& out_root, const dir_path& src_root) + { + auto i (scopes.rw (l).insert (out_root, true /* root */)); + scope& rs (i->second); + + // Set out_path. Note that src_path is set in setup_root() below. + // + if (rs.out_path_ != &i->first) + { + assert (rs.out_path_ == nullptr); + rs.out_path_ = &i->first; + } + + // If this is already a root scope, verify that things are consistent. + // + { + value& v (rs.assign (var_out_root)); + + if (!v) + v = out_root; + else + { + const dir_path& p (cast<dir_path> (v)); + + if (p != out_root) + fail << "new out_root " << out_root << " does not match " + << "existing " << p; + } + } + + if (!src_root.empty ()) + { + value& v (rs.assign (var_src_root)); + + if (!v) + v = src_root; + else + { + const dir_path& p (cast<dir_path> (v)); + + if (p != src_root) + fail << "new src_root " << src_root << " does not match " + << "existing " << p; + } + } + + return i; + } + + void + setup_root (scope& s, bool forwarded) + { + // The caller must have made sure src_root is set on this scope. + // + value& v (s.assign (var_src_root)); + assert (v); + const dir_path& d (cast<dir_path> (v)); + + if (s.src_path_ == nullptr) + s.src_path_ = &d; + else + assert (s.src_path_ == &d); + + s.assign (var_forwarded) = forwarded; + } + + scope& + setup_base (scope_map::iterator i, + const dir_path& out_base, + const dir_path& src_base) + { + scope& s (i->second); + + // Set src/out_base variables. + // + value& ov (s.assign (var_out_base)); + + if (!ov) + ov = out_base; + else + assert (cast<dir_path> (ov) == out_base); + + value& sv (s.assign (var_src_base)); + + if (!sv) + sv = src_base; + else + assert (cast<dir_path> (sv) == src_base); + + // Set src/out_path. The key (i->first) is out_base. + // + if (s.out_path_ == nullptr) + s.out_path_ = &i->first; + else + assert (*s.out_path_ == out_base); + + if (s.src_path_ == nullptr) + s.src_path_ = &cast<dir_path> (sv); + else + assert (*s.src_path_ == src_base); + + return s; + } + + pair<scope&, scope*> + switch_scope (scope& root, const dir_path& p) + { + // First, enter the scope into the map and see if it is in any project. If + // it is not, then there is nothing else to do. + // + auto i (scopes.rw (root).insert (p)); + scope& base (i->second); + scope* rs (base.root_scope ()); + + if (rs != nullptr) + { + // Path p can be src_base or out_base. Figure out which one it is. + // + dir_path out_base (p.sub (rs->out_path ()) ? p : src_out (p, *rs)); + + // Create and bootstrap root scope(s) of subproject(s) that this scope + // may belong to. If any were created, load them. Note that we need to + // do this before figuring out src_base since we may switch the root + // project (and src_root with it). + // + { + scope* nrs (&create_bootstrap_inner (*rs, out_base)); + + if (rs != nrs) + rs = nrs; + } + + // Switch to the new root scope. + // + if (rs != &root) + load_root (*rs); // Load new root(s) recursively. + + // Now we can figure out src_base and finish setting the scope. + // + dir_path src_base (src_out (out_base, *rs)); + setup_base (i, move (out_base), move (src_base)); + } + + return pair<scope&, scope*> (base, rs); + } + + dir_path + bootstrap_fwd (const dir_path& src_root, optional<bool>& altn) + { + path f (exists (src_root, std_out_root_file, alt_out_root_file, altn)); + + if (f.empty ()) + return src_root; + + // We cannot just source the buildfile since there is no scope to do + // this on yet. + // + auto p (extract_variable (f, *var_out_root)); + + if (!p.second) + fail << "variable out_root expected as first line in " << f; + + try + { + return convert<dir_path> (move (p.first)); + } + catch (const invalid_argument& e) + { + fail << "invalid out_root value in " << f << ": " << e << endf; + } + } + + static void + setup_root_extra (scope& root, optional<bool>& altn) + { + assert (altn && root.root_extra == nullptr); + bool a (*altn); + + root.root_extra = unique_ptr<scope::root_data> ( + new scope::root_data { + a, + a ? alt_build_ext : std_build_ext, + a ? alt_build_dir : std_build_dir, + a ? alt_buildfile_file : std_buildfile_file, + a ? alt_buildignore_file : std_buildignore_file, + a ? alt_root_dir : std_root_dir, + a ? alt_bootstrap_dir : std_bootstrap_dir, + a ? alt_bootstrap_file : std_bootstrap_file, + a ? alt_root_file : std_root_file, + a ? alt_export_file : std_export_file, + a ? alt_src_root_file : std_src_root_file, + a ? alt_out_root_file : std_out_root_file, + {}, /* meta_operations */ + {}, /* operations */ + {}, /* modules */ + {} /* override_cache */}); + + // Enter built-in meta-operation and operation names. Loading of + // modules (via the src bootstrap; see below) can result in + // additional meta/operations being added. + // + root.insert_meta_operation (noop_id, mo_noop); + root.insert_meta_operation (perform_id, mo_perform); + root.insert_meta_operation (info_id, mo_info); + + root.insert_operation (default_id, op_default); + root.insert_operation (update_id, op_update); + root.insert_operation (clean_id, op_clean); + } + + void + bootstrap_out (scope& root, optional<bool>& altn) + { + const dir_path& out_root (root.out_path ()); + + path f (exists (out_root, std_src_root_file, alt_src_root_file, altn)); + + if (f.empty ()) + return; + + if (root.root_extra == nullptr) + setup_root_extra (root, altn); + + //@@ TODO: if bootstrap files can source other bootstrap files (for + // example, as a way to express dependecies), then we need a way to + // prevent multiple sourcing. We handle it here but we still need + // something like source_once (once [scope] source) in buildfiles. + // + source_once (root, root, f); + } + + pair<value, bool> + extract_variable (const path& bf, const variable& var) + { + try + { + ifdstream ifs (bf); + + lexer lex (ifs, bf); + token t (lex.next ()); + token_type tt; + + if (t.type != token_type::word || t.value != var.name || + ((tt = lex.next ().type) != token_type::assign && + tt != token_type::prepend && + tt != token_type::append)) + { + return make_pair (value (), false); + } + + parser p; + temp_scope tmp (global_scope->rw ()); + p.parse_variable (lex, tmp, var, tt); + + value* v (tmp.vars.find_to_modify (var).first); + assert (v != nullptr); + + // Steal the value, the scope is going away. + // + return make_pair (move (*v), true); + } + catch (const io_error& e) + { + fail << "unable to read buildfile " << bf << ": " << e << endf; + } + } + + // Extract the project name from bootstrap.build. + // + static project_name + find_project_name (const dir_path& out_root, + const dir_path& fallback_src_root, + optional<bool> out_src, // True if out_root is src_root. + optional<bool>& altn) + { + tracer trace ("find_project_name"); + + // First check if the root scope for this project has already been setup + // in which case we will have src_root and maybe even the name. + // + const dir_path* src_root (nullptr); + const scope& s (scopes.find (out_root)); + + if (s.root_scope () == &s && s.out_path () == out_root) + { + if (s.root_extra != nullptr) + { + if (!altn) + altn = s.root_extra->altn; + else + assert (*altn == s.root_extra->altn); + } + + if (lookup l = s.vars[var_project]) + return cast<project_name> (l); + + src_root = s.src_path_; + } + + // Load the project name. If this subdirectory is the subproject's + // src_root, then we can get directly to that. Otherwise, we first have to + // discover its src_root. + // + value src_root_v; // Need it to live until the end. + + if (src_root == nullptr) + { + if (out_src ? *out_src : is_src_root (out_root, altn)) + src_root = &out_root; + else + { + path f (exists (out_root, std_src_root_file, alt_src_root_file, altn)); + + if (f.empty ()) + { + // Note: the same diagnostics as in main(). + // + if (fallback_src_root.empty ()) + fail << "no bootstrapped src_root for " << out_root << + info << "consider reconfiguring this out_root"; + + src_root = &fallback_src_root; + } + else + { + auto p (extract_variable (f, *var_src_root)); + + if (!p.second) + fail << "variable src_root expected as first line in " << f; + + src_root_v = move (p.first); + remap_src_root (src_root_v); // Remap if inside old_src_root. + src_root = &cast<dir_path> (src_root_v); + + l5 ([&]{trace << "extracted src_root " << *src_root + << " for " << out_root;}); + } + } + } + + project_name name; + { + path f (exists (*src_root, std_bootstrap_file, alt_bootstrap_file, altn)); + + if (f.empty ()) + fail << "no build/bootstrap.build in " << *src_root; + + auto p (extract_variable (f, *var_project)); + + if (!p.second) + fail << "variable " << var_project->name << " expected " + << "as a first line in " << f; + + name = cast<project_name> (move (p.first)); + } + + l5 ([&]{trace << "extracted project name '" << name << "' for " + << *src_root;}); + return name; + } + + // Scan the specified directory for any subprojects. If a subdirectory + // is a subproject, then enter it into the map, handling the duplicates. + // + static void + find_subprojects (subprojects& sps, + const dir_path& d, + const dir_path& root, + bool out) + { + tracer trace ("find_subprojects"); + + try + { + for (const dir_entry& de: dir_iterator (d, true /* ignore_dangling */)) + { + if (de.type () != entry_type::directory) + continue; + + dir_path sd (d / path_cast<dir_path> (de.path ())); + + bool src (false); + optional<bool> altn; + + if (!((out && is_out_root (sd, altn)) || + (src = is_src_root (sd, altn)))) + { + // We used to scan for subproject recursively but this is probably + // too loose (think of some tests laying around). In the future we + // should probably allow specifying something like extra/* or + // extra/** in subprojects. + // + //find_subprojects (sps, sd, root, out); + // + continue; + } + + // Calculate relative subdirectory for this subproject. + // + dir_path dir (sd.leaf (root)); + l5 ([&]{trace << "subproject " << sd << " as " << dir;}); + + // Load its name. Note that here we don't use fallback src_root + // since this function is used to scan both out_root and src_root. + // + project_name name (find_project_name (sd, dir_path (), src, altn)); + + // If the name is empty, then is is an unnamed project. While the + // 'project' variable stays empty, here we come up with a surrogate + // name for a key. The idea is that such a key should never conflict + // with a real project name. We ensure this by using the project's + // sub-directory and appending a trailing directory separator to it. + // + if (name.empty ()) + name = project_name (dir.posix_string () + '/', + project_name::raw_string); + + // @@ Can't use move() because we may need the values in diagnostics + // below. Looks like C++17 try_emplace() is what we need. + // + auto rp (sps.emplace (name, dir)); + + // Handle duplicates. + // + if (!rp.second) + { + const dir_path& dir1 (rp.first->second); + + if (dir != dir1) + fail << "inconsistent subproject directories for " << name << + info << "first alternative: " << dir1 << + info << "second alternative: " << dir; + + l6 ([&]{trace << "skipping duplicate";}); + } + } + } + catch (const system_error& e) + { + fail << "unable to iterate over " << d << ": " << e; + } + } + + bool + bootstrap_src (scope& root, optional<bool>& altn) + { + tracer trace ("bootstrap_src"); + + bool r (false); + + const dir_path& out_root (root.out_path ()); + const dir_path& src_root (root.src_path ()); + + { + path f (exists (src_root, std_bootstrap_file, alt_bootstrap_file, altn)); + + if (root.root_extra == nullptr) + { + // If nothing so far has indicated the naming, assume standard. + // + if (!altn) + altn = false; + + setup_root_extra (root, altn); + } + + if (!f.empty ()) + { + // We assume that bootstrap out cannot load this file explicitly. It + // feels wrong to allow this since that makes the whole bootstrap + // process hard to reason about. But we may try to bootstrap the same + // root scope multiple time. + // + if (root.buildfiles.insert (f).second) + source (root, root, f, true); + else + l5 ([&]{trace << "skipping already sourced " << f;}); + + r = true; + } + } + + // See if we are a part of an amalgamation. There are two key players: the + // outer root scope which may already be present (i.e., we were loaded as + // part of an amalgamation) and the amalgamation variable that may or may + // not be set by the user (in bootstrap.build) or by an earlier call to + // this function for the same scope. When set by the user, the empty + // special value means that the project shall not be amalgamated (and + // which we convert to NULL below). When calculated, the NULL value + // indicates that we are not amalgamated. + // + // Note: the amalgamation variable value is always a relative directory. + // + { + auto rp (root.vars.insert (*var_amalgamation)); // Set NULL by default. + value& v (rp.first); + + if (v && v.empty ()) // Convert empty to NULL. + v = nullptr; + + if (scope* aroot = root.parent_scope ()->root_scope ()) + { + const dir_path& ad (aroot->out_path ()); + dir_path rd (ad.relative (out_root)); + + // If we already have the amalgamation variable set, verify + // that aroot matches its value. + // + if (!rp.second) + { + if (!v) + { + fail << out_root << " cannot be amalgamated" << + info << "amalgamated by " << ad; + } + else + { + const dir_path& vd (cast<dir_path> (v)); + + if (vd != rd) + { + fail << "inconsistent amalgamation of " << out_root << + info << "specified: " << vd << + info << "actual: " << rd << " by " << ad; + } + } + } + else + { + // Otherwise, use the outer root as our amalgamation. + // + l5 ([&]{trace << out_root << " amalgamated as " << rd;}); + v = move (rd); + } + } + else if (rp.second) + { + // If there is no outer root and the amalgamation variable + // hasn't been set, then we need to check if any of the + // outer directories is a project's out_root. If so, then + // that's our amalgamation. + // + optional<bool> altn; + const dir_path& ad (find_out_root (out_root.directory (), altn).first); + + if (!ad.empty ()) + { + dir_path rd (ad.relative (out_root)); + l5 ([&]{trace << out_root << " amalgamated as " << rd;}); + v = move (rd); + } + } + } + + // See if we have any subprojects. In a sense, this is the other + // side/direction of the amalgamation logic above. Here, the subprojects + // variable may or may not be set by the user (in bootstrap.build) or by + // an earlier call to this function for the same scope. When set by the + // user, the empty special value means that there are no subproject and + // none should be searched for (and which we convert to NULL below). + // Otherwise, it is a list of [project@]directory pairs. The directory + // must be relative to our out_root. If the project name is not specified, + // then we have to figure it out. When subprojects are calculated, the + // NULL value indicates that we found no subprojects. + // + { + auto rp (root.vars.insert (*var_subprojects)); // Set NULL by default. + value& v (rp.first); + + if (rp.second) + { + // No subprojects set so we need to figure out if there are any. + // + // First we are going to scan our out_root and find all the + // pre-configured subprojects. Then, if out_root != src_root, + // we are going to do the same for src_root. Here, however, + // we need to watch out for duplicates. + // + subprojects sps; + + if (exists (out_root)) + { + l5 ([&]{trace << "looking for subprojects in " << out_root;}); + find_subprojects (sps, out_root, out_root, true); + } + + if (out_root != src_root) + { + l5 ([&]{trace << "looking for subprojects in " << src_root;}); + find_subprojects (sps, src_root, src_root, false); + } + + if (!sps.empty ()) // Keep it NULL if no subprojects. + v = move (sps); + } + else if (v) + { + // Convert empty to NULL. + // + if (v.empty ()) + v = nullptr; + else + { + // Scan the (untyped) value and convert it to the "canonical" form, + // that is, a list of name@dir pairs. + // + subprojects sps; + names& ns (cast<names> (v)); + + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + // Project name. + // + project_name n; + if (i->pair) + { + if (i->pair != '@') + fail << "unexpected pair style in variable subprojects"; + + try + { + n = convert<project_name> (move (*i)); + + if (n.empty ()) + fail << "empty project name in variable subprojects"; + } + catch (const invalid_argument&) + { + fail << "expected project name instead of '" << *i << "' in " + << "variable subprojects"; + } + + ++i; // Got to have the second half of the pair. + } + + // Directory. + // + dir_path d; + try + { + d = convert<dir_path> (move (*i)); + + if (d.empty ()) + fail << "empty directory in variable subprojects"; + } + catch (const invalid_argument&) + { + fail << "expected directory instead of '" << *i << "' in " + << "variable subprojects"; + } + + // Figure out the project name if the user didn't specify one. + // + if (n.empty ()) + { + optional<bool> altn; + + // Pass fallback src_root since this is a subproject that was + // specified by the user so it is most likely in our src. + // + n = find_project_name (out_root / d, + src_root / d, + nullopt /* out_src */, + altn); + + // See find_subprojects() for details on unnamed projects. + // + if (n.empty ()) + n = project_name (d.posix_string () + '/', + project_name::raw_string); + } + + sps.emplace (move (n), move (d)); + } + + // Change the value to the typed map. + // + v = move (sps); + } + } + } + + return r; + } + + void + bootstrap_pre (scope& root, optional<bool>& altn) + { + const dir_path& out_root (root.out_path ()); + + // This test is a bit loose in a sense that there can be a stray + // build/bootstrap/ directory that will make us mis-treat a project as + // following the standard naming scheme (the other way, while also + // possible, is a lot less likely). If this does becomes a problem, we can + // always tighten the test by also looking for a hook file with the + // correct extension. + // + dir_path d (exists (out_root, std_bootstrap_dir, alt_bootstrap_dir, altn)); + + if (!d.empty ()) + { + if (root.root_extra == nullptr) + setup_root_extra (root, altn); + + source_hooks (root, d, true /* pre */); + } + } + + void + bootstrap_post (scope& root) + { + const dir_path& out_root (root.out_path ()); + + dir_path d (out_root / root.root_extra->bootstrap_dir); + + if (exists (d)) + source_hooks (root, d, false /* pre */); + } + + bool + bootstrapped (scope& root) + { + // Use the subprojects variable set by bootstrap_src() as an indicator. + // It should either be NULL or typed (so we assume that the user will + // never set it to NULL). + // + auto l (root.vars[var_subprojects]); + return l.defined () && (l->null || l->type != nullptr); + } + + // Return true if the inner/outer project (identified by out/src_root) of + // the 'origin' project (identified by orig) should be forwarded. + // + static inline bool + forwarded (const scope& orig, + const dir_path& out_root, + const dir_path& src_root, + optional<bool>& altn) + { + // The conditions are: + // + // 1. Origin is itself forwarded. + // + // 2. Inner/outer src_root != out_root. + // + // 3. Inner/outer out-root.build exists in src_root and refers out_root. + // + return (out_root != src_root && + cast_false<bool> (orig.vars[var_forwarded]) && + bootstrap_fwd (src_root, altn) == out_root); + } + + void + create_bootstrap_outer (scope& root) + { + auto l (root.vars[var_amalgamation]); + + if (!l) + return; + + const dir_path& d (cast<dir_path> (l)); + dir_path out_root (root.out_path () / d); + out_root.normalize (); // No need to actualize (d is a bunch of ..) + + // src_root is a bit more complicated. Here we have three cases: + // + // 1. Amalgamation's src_root is "parallel" to the sub-project's. + // 2. Amalgamation's src_root is the same as its out_root. + // 3. Some other pre-configured (via src-root.build) src_root. + // + // So we need to try all these cases in some sensible order. #3 should + // probably be tried first since that src_root was explicitly configured + // by the user. After that, #2 followed by #1 seems reasonable. + // + scope& rs (create_root (root, out_root, dir_path ())->second); + + bool bstrapped (bootstrapped (rs)); + + optional<bool> altn; + if (!bstrapped) + { + bootstrap_out (rs, altn); // #3 happens here (or it can be #1). + + value& v (rs.assign (var_src_root)); + + if (!v) + { + if (is_src_root (out_root, altn)) // #2 + v = out_root; + else // #1 + { + dir_path src_root (root.src_path () / d); + src_root.normalize (); // No need to actualize (as above). + v = move (src_root); + } + } + else + remap_src_root (v); // Remap if inside old_src_root. + + setup_root (rs, forwarded (root, out_root, v.as<dir_path> (), altn)); + bootstrap_pre (rs, altn); + bootstrap_src (rs, altn); + // bootstrap_post() delayed until after create_bootstrap_outer(). + } + else + { + altn = rs.root_extra->altn; + + if (forwarded (root, rs.out_path (), rs.src_path (), altn)) + rs.assign (var_forwarded) = true; // Only upgrade (see main()). + } + + create_bootstrap_outer (rs); + + if (!bstrapped) + bootstrap_post (rs); + + // Check if we are strongly amalgamated by this outer root scope. + // + if (root.src_path ().sub (rs.src_path ())) + root.strong_ = rs.strong_scope (); // Itself or some outer scope. + } + + scope& + create_bootstrap_inner (scope& root, const dir_path& out_base) + { + scope* r (&root); + + if (auto l = root.vars[var_subprojects]) + { + for (const auto& p: cast<subprojects> (l)) + { + dir_path out_root (root.out_path () / p.second); + + if (!out_base.empty () && !out_base.sub (out_root)) + continue; + + // The same logic to src_root as in create_bootstrap_outer(). + // + scope& rs (create_root (root, out_root, dir_path ())->second); + + optional<bool> altn; + if (!bootstrapped (rs)) + { + bootstrap_out (rs, altn); + + value& v (rs.assign (var_src_root)); + + if (!v) + { + v = is_src_root (out_root, altn) + ? out_root + : (root.src_path () / p.second); + } + else + remap_src_root (v); // Remap if inside old_src_root. + + setup_root (rs, forwarded (root, out_root, v.as<dir_path> (), altn)); + bootstrap_pre (rs, altn); + bootstrap_src (rs, altn); + bootstrap_post (rs); + } + else + { + altn = rs.root_extra->altn; + if (forwarded (root, rs.out_path (), rs.src_path (), altn)) + rs.assign (var_forwarded) = true; // Only upgrade (see main()). + } + + // Check if we strongly amalgamated this inner root scope. + // + if (rs.src_path ().sub (root.src_path ())) + rs.strong_ = root.strong_scope (); // Itself or some outer scope. + + // See if there are more inner roots. + // + r = &create_bootstrap_inner (rs, out_base); + + if (!out_base.empty ()) + break; // We have found our subproject. + } + } + + return *r; + } + + void + load_root (scope& root) + { + tracer trace ("load_root"); + + const dir_path& out_root (root.out_path ()); + const dir_path& src_root (root.src_path ()); + + // As an optimization, check if we have already loaded root.build. If + // that's the case, then we have already been called for this project. + // + path f (src_root / root.root_extra->root_file); + + if (root.buildfiles.find (f) != root.buildfiles.end ()) + return; + + // First load outer roots, if any. + // + if (scope* rs = root.parent_scope ()->root_scope ()) + load_root (*rs); + + // Finish off loading bootstrapped modules. + // + for (auto& p: root.root_extra->modules) + { + module_state& s (p.second); + + if (s.boot && s.first) + load_module (root, root, p.first, s.loc); + } + + for (auto& p: root.root_extra->modules) + { + module_state& s (p.second); + + if (s.boot && !s.first) + load_module (root, root, p.first, s.loc); + } + + // Load hooks and root.build. + // + // We can load the pre hooks before finishing off loading the bootstrapped + // modules (which, in case of config would load config.build) or after and + // one can come up with a plausible use-case for either approach. Note, + // however, that one can probably achieve adequate pre-modules behavior + // with a post-bootstrap hook. + // + dir_path hd (out_root / root.root_extra->root_dir); + bool he (exists (hd)); + + if (he) source_hooks (root, hd, true /* pre */); + if (exists (f)) source_once (root, root, f); + if (he) source_hooks (root, hd, false /* pre */); + } + + scope& + load_project (scope& lock, + const dir_path& out_root, + const dir_path& src_root, + bool forwarded, + bool load) + { + assert (!forwarded || out_root != src_root); + + auto i (create_root (lock, out_root, src_root)); + scope& rs (i->second); + + if (!bootstrapped (rs)) + { + optional<bool> altn; + bootstrap_out (rs, altn); + setup_root (rs, forwarded); + bootstrap_pre (rs, altn); + bootstrap_src (rs, altn); + bootstrap_post (rs); + } + else + { + if (forwarded) + rs.assign (var_forwarded) = true; // Only upgrade (see main()). + } + + if (load) + { + load_root (rs); + setup_base (i, out_root, src_root); // Setup as base. + } + + return rs; + } + + names + import (scope& ibase, name target, const location& loc) + { + tracer trace ("import"); + + l5 ([&]{trace << target << " from " << ibase;}); + + // If there is no project specified for this target, then our run will be + // short and sweet: we simply return it as empty-project-qualified and + // let someone else (e.g., a rule) take a stab at it. + // + if (target.unqualified ()) + { + target.proj = project_name (); + return names {move (target)}; + } + + // Otherwise, get the project name and convert the target to unqualified. + // + project_name proj (move (*target.proj)); + target.proj = nullopt; + + scope& iroot (*ibase.root_scope ()); + + // Figure out this project's out_root. + // + dir_path out_root; + + // First try the config.import.* mechanism. The idea is that if the user + // explicitly told us the project's location, then we should prefer that + // over anything that we may discover. In particular, we will prefer it + // over any bundled subprojects. + // + auto& vp (var_pool.rw (iroot)); + + for (;;) // Break-out loop. + { + string n ("config.import." + proj.variable ()); + + // config.import.<proj> + // + { + // Note: pattern-typed in context.cxx:reset() as an overridable + // variable of type abs_dir_path (path auto-completion). + // + const variable& var (vp.insert (n)); + + if (auto l = iroot[var]) + { + out_root = cast<dir_path> (l); // Normalized and actualized. + + // Mark as part of config. + // + if (config_save_variable != nullptr) + config_save_variable (iroot, var, 0 /* flags */); + + // Empty config.import.* value means don't look in subprojects or + // amalgamations and go straight to the rule-specific import (e.g., + // to use system-installed). + // + if (out_root.empty ()) + { + target.proj = move (proj); + l5 ([&]{trace << "skipping " << target;}); + return names {move (target)}; + } + + break; + } + } + + // config.import.<proj>.<name>.<type> + // config.import.<proj>.<name> + // + // For example: config.import.build2.b.exe=/opt/build2/bin/b + // + if (!target.value.empty ()) + { + auto lookup = [&iroot, &vp, &loc] (string name) -> path + { + // Note: pattern-typed in context.cxx:reset() as an overridable + // variable of type path. + // + const variable& var (vp.insert (move (name))); + + path r; + if (auto l = iroot[var]) + { + r = cast<path> (l); + + if (r.empty ()) + fail (loc) << "empty path in " << var.name; + + if (config_save_variable != nullptr) + config_save_variable (iroot, var, 0 /* flags */); + } + + return r; + }; + + // First try .<name>.<type>, then just .<name>. + // + path p; + if (target.typed ()) + p = lookup (n + '.' + target.value + '.' + target.type); + + if (p.empty ()) + p = lookup (n + '.' + target.value); + + if (!p.empty ()) + { + // If the path is relative, then keep it project-qualified assuming + // import phase 2 knows what to do with it. Think: + // + // config.import.build2.b=b-boot + // + if (p.relative ()) + target.proj = move (proj); + + target.dir = p.directory (); + target.value = p.leaf ().string (); + + return names {move (target)}; + } + } + + // Otherwise search subprojects, starting with our root and then trying + // outer roots for as long as we are inside an amalgamation. + // + for (scope* r (&iroot);; r = r->parent_scope ()->root_scope ()) + { + l5 ([&]{trace << "looking in " << *r;}); + + // First check the amalgamation itself. + // + if (r != &iroot && cast<project_name> (r->vars[var_project]) == proj) + { + out_root = r->out_path (); + break; + } + + if (auto l = r->vars[var_subprojects]) + { + const auto& m (cast<subprojects> (l)); + auto i (m.find (proj)); + + if (i != m.end ()) + { + const dir_path& d ((*i).second); + out_root = r->out_path () / d; + break; + } + } + + if (!r->vars[var_amalgamation]) + break; + } + + break; + } + + // If we couldn't find the project, convert it back into qualified target + // and return to let someone else (e.g., a rule) take a stab at it. + // + if (out_root.empty ()) + { + target.proj = move (proj); + l5 ([&]{trace << "postponing " << target;}); + return names {move (target)}; + } + + // Bootstrap the imported root scope. This is pretty similar to what we do + // in main() except that here we don't try to guess src_root. + // + // The user can also specify the out_root of the amalgamation that contains + // our project. For now we only consider top-level sub-projects. + // + scope* root; + dir_path src_root; + + // See if this is a forwarded configuration. For top-level project we want + // to use the same logic as in main() while for inner subprojects -- as in + // create_bootstrap_inner(). + // + bool fwd (false); + optional<bool> altn; + if (is_src_root (out_root, altn)) + { + src_root = move (out_root); + out_root = bootstrap_fwd (src_root, altn); + fwd = (src_root != out_root); + } + + for (const scope* proot (nullptr); ; proot = root) + { + bool top (proot == nullptr); + + root = &create_root (iroot, out_root, src_root)->second; + + bool bstrapped (bootstrapped (*root)); + + if (!bstrapped) + { + bootstrap_out (*root, altn); + + // Check that the bootstrap process set src_root. + // + auto l (root->vars[*var_src_root]); + if (l) + { + // Note that unlike main() here we fail hard. The idea is that if + // the project we are importing is misconfigured, then it should be + // fixed first. + // + const dir_path& p (cast<dir_path> (l)); + + if (!src_root.empty () && p != src_root) + fail (loc) << "configured src_root " << p << " does not match " + << "discovered " << src_root; + } + else + fail (loc) << "unable to determine src_root for imported " << proj << + info << "consider configuring " << out_root; + + setup_root (*root, + (top + ? fwd + : forwarded (*proot, out_root, l->as<dir_path> (), altn))); + + bootstrap_pre (*root, altn); + bootstrap_src (*root, altn); + if (!top) + bootstrap_post (*root); + } + else + { + altn = root->root_extra->altn; + + if (src_root.empty ()) + src_root = root->src_path (); + + if (top ? fwd : forwarded (*proot, out_root, src_root, altn)) + root->assign (var_forwarded) = true; // Only upgrade (see main()). + } + + if (top) + { + create_bootstrap_outer (*root); + + if (!bstrapped) + bootstrap_post (*root); + } + + // Now we know this project's name as well as all its subprojects. + // + if (cast<project_name> (root->vars[var_project]) == proj) + break; + + if (auto l = root->vars[var_subprojects]) + { + const auto& m (cast<subprojects> (l)); + auto i (m.find (proj)); + + if (i != m.end ()) + { + const dir_path& d ((*i).second); + altn = nullopt; + out_root = root->out_path () / d; + src_root = is_src_root (out_root, altn) ? out_root : dir_path (); + continue; + } + } + + fail (loc) << out_root << " is not out_root for " << proj; + } + + // Load the imported root scope. + // + load_root (*root); + + // Create a temporary scope so that the export stub does not mess + // up any of our variables. + // + temp_scope ts (ibase); + + // "Pass" the imported project's roots to the stub. + // + ts.assign (var_out_root) = move (out_root); + ts.assign (var_src_root) = move (src_root); + + // Also pass the target being imported in the import.target variable. + // + { + value& v (ts.assign (var_import_target)); + + if (!target.empty ()) // Otherwise leave NULL. + v = target; // Can't move (need for diagnostics below). + } + + // Load the export stub. Note that it is loaded in the context + // of the importing project, not the imported one. The export + // stub will normally switch to the imported root scope at some + // point. + // + path es (root->src_path () / root->root_extra->export_file); + + try + { + ifdstream ifs (es); + + l5 ([&]{trace << "importing " << es;}); + + // @@ Should we verify these are all unqualified names? Or maybe + // there is a use-case for the export stub to return a qualified + // name? + // + parser p; + names v (p.parse_export_stub (ifs, es, iroot, ts)); + + // If there were no export directive executed in an export stub, assume + // the target is not exported. + // + if (v.empty () && !target.empty ()) + fail (loc) << "target " << target << " is not exported by project " + << proj; + + return v; + } + catch (const io_error& e) + { + fail (loc) << "unable to read buildfile " << es << ": " << e; + } + + return names (); // Never reached. + } + + const target* + import (const prerequisite_key& pk, bool existing) + { + tracer trace ("import"); + + assert (pk.proj); + const project_name& proj (*pk.proj); + + // Target type-specific search. + // + const target_key& tk (pk.tk); + const target_type& tt (*tk.type); + + // Try to find the executable in PATH (or CWD if relative). + // + if (tt.is_a<exe> ()) + { + path n (*tk.dir); + n /= *tk.name; + if (tk.ext) + { + n += '.'; + n += *tk.ext; + } + + // Only search in PATH (or CWD). + // + process_path pp (process::try_path_search (n, true, dir_path (), true)); + + if (!pp.empty ()) + { + path& p (pp.effect); + assert (!p.empty ()); // We searched for a simple name. + + const exe* t ( + !existing + ? &targets.insert<exe> (tt, + p.directory (), + dir_path (), // No out (out of project). + p.leaf ().base ().string (), + p.extension (), // Always specified. + trace) + : targets.find<exe> (tt, + p.directory (), + dir_path (), + p.leaf ().base ().string (), + p.extension (), + trace)); + + if (t != nullptr) + { + if (!existing) + t->path (move (p)); + else + assert (t->path () == p); + + return t; + } + } + } + + if (existing) + return nullptr; + + // @@ We no longer have location. This is especially bad for the + // empty case, i.e., where do I need to specify the project + // name)? Looks like the only way to do this is to keep location + // in name and then in prerequisite. Perhaps one day... + // + diag_record dr; + dr << fail << "unable to import target " << pk; + + if (proj.empty ()) + dr << info << "consider adding its installation location" << + info << "or explicitly specify its project name"; + else + dr << info << "use config.import." << proj.variable () + << " command line variable to specify its project out_root"; + + dr << endf; + } +} diff --git a/libbuild2/file.hxx b/libbuild2/file.hxx new file mode 100644 index 0000000..e2e8aaa --- /dev/null +++ b/libbuild2/file.hxx @@ -0,0 +1,243 @@ +// file : libbuild2/file.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_FILE_HXX +#define LIBBUILD2_FILE_HXX + +#include <map> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/variable.hxx> // list_value + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class target; + class location; + class prerequisite_key; + + using subprojects = std::map<project_name, dir_path>; + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const subprojects&); // Print as name@dir sequence. + + LIBBUILD2_SYMEXPORT extern const dir_path std_build_dir; // build/ + + // build/root.build + // + LIBBUILD2_SYMEXPORT extern const path std_root_file; + + // build/bootstrap.build + // + LIBBUILD2_SYMEXPORT extern const path std_bootstrap_file; + + LIBBUILD2_SYMEXPORT extern const path std_buildfile_file; // buildfile + LIBBUILD2_SYMEXPORT extern const path alt_buildfile_file; // build2file + + // If the altn argument value is present, then it indicates whether we are + // using the standard or the alternative build file/directory naming. + // + // The overall plan is to run various "file exists" tests using the standard + // and the alternative names. The first test that succeeds determines the + // naming scheme (by setting altn) and from then on all the remaining tests + // only look for things in this scheme. + // + LIBBUILD2_SYMEXPORT bool + is_src_root (const dir_path&, optional<bool>& altn); + + LIBBUILD2_SYMEXPORT bool + is_out_root (const dir_path&, optional<bool>& altn); + + // Given an src_base directory, look for a project's src_root based on the + // presence of known special files. Return empty path if not found. Note + // that if the input is normalized/actualized, then the output will be as + // well. + // + LIBBUILD2_SYMEXPORT dir_path + find_src_root (const dir_path&, optional<bool>& altn); + + // The same as above but for project's out. Note that we also check whether + // a directory happens to be src_root, in case this is an in-tree build with + // the result returned as the second half of the pair. Note also that if the + // input is normalized/actualized, then the output will be as well. + // + LIBBUILD2_SYMEXPORT pair<dir_path, bool> + find_out_root (const dir_path&, optional<bool>& altn); + + // The old/new src_root paths. See main() (where they are set) for details. + // + LIBBUILD2_SYMEXPORT extern dir_path old_src_root; + LIBBUILD2_SYMEXPORT extern dir_path new_src_root; + + // If buildfile is '-', then read from STDIN. + // + LIBBUILD2_SYMEXPORT void + source (scope& root, scope& base, const path&); + + // As above but first check if this buildfile has already been sourced for + // the base scope. Return false if the file has already been sourced. + // + bool + source_once (scope& root, scope& base, const path&); + + // As above but checks against the specified scope rather than base. + // + LIBBUILD2_SYMEXPORT bool + source_once (scope& root, scope& base, const path&, scope& once); + + // Create project's root scope. Only set the src_root variable if the passed + // src_root value is not empty. The scope argument is only used as proof of + // lock. + // + LIBBUILD2_SYMEXPORT scope_map::iterator + create_root (scope&, const dir_path& out_root, const dir_path& src_root); + + // Setup root scope. Note that it assumes the src_root variable has already + // been set. + // + LIBBUILD2_SYMEXPORT void + setup_root (scope&, bool forwarded); + + // Setup the base scope (set *_base variables, etc). + // + LIBBUILD2_SYMEXPORT scope& + setup_base (scope_map::iterator, + const dir_path& out_base, + const dir_path& src_base); + + // Return a scope for the specified directory (first). Note that switching + // to this scope might also involve switch to a new root scope (second) if + // the new scope is in another project. If the new scope is not in any + // project, then NULL is returned in second. + // + LIBBUILD2_SYMEXPORT pair<scope&, scope*> + switch_scope (scope& root, const dir_path&); + + // Bootstrap and optionally load an ad hoc (sub)project (i.e., the kind that + // is not discovered and loaded automatically by bootstrap/load functions + // above). + // + // Note that we expect the outer project (if any) to be bootstrapped and + // loaded and currently we do not add the newly loaded subproject to the + // outer project's subprojects map. + // + // The scope argument is only used as proof of lock. + // + LIBBUILD2_SYMEXPORT scope& + load_project (scope&, + const dir_path& out_root, + const dir_path& src_root, + bool forwarded, + bool load = true); + + // Bootstrap the project's forward. Return the forwarded-to out_root or + // src_root if there is no forward. See is_{src,out}_root() for the altn + // argument semantics. + // + LIBBUILD2_SYMEXPORT dir_path + bootstrap_fwd (const dir_path& src_root, optional<bool>& altn); + + // Bootstrap the project's root scope, the out part. + // + LIBBUILD2_SYMEXPORT void + bootstrap_out (scope& root, optional<bool>& altn); + + // Bootstrap the project's root scope, the src part. Return true if we + // loaded anything (which confirms the src_root is not bogus). + // + LIBBUILD2_SYMEXPORT bool + bootstrap_src (scope& root, optional<bool>& altn); + + // Return true if this scope has already been bootstrapped, that is, the + // following calls have already been made: + // + // bootstrap_out() + // setup_root() + // bootstrap_src() + // + LIBBUILD2_SYMEXPORT bool + bootstrapped (scope& root); + + // Execute pre/post-bootstrap hooks. Similar to bootstrap_out/sr(), should + // only be called once per project bootstrap. + // + LIBBUILD2_SYMEXPORT void + bootstrap_pre (scope& root, optional<bool>& altn); + + LIBBUILD2_SYMEXPORT void + bootstrap_post (scope& root); + + // Create and bootstrap outer root scopes, if any. Loading is done by + // load_root(). + // + LIBBUILD2_SYMEXPORT void + create_bootstrap_outer (scope& root); + + // Create and bootstrap inner root scopes, if any, recursively. + // + // If out_base is not empty, then only bootstrap scope between root and base + // returning the innermost created root scope or root if none were created. + // + // Note that loading is done by load_root(). + // + LIBBUILD2_SYMEXPORT scope& + create_bootstrap_inner (scope& root, const dir_path& out_base = dir_path ()); + + // Load project's root.build (and root pre/post hooks) unless already + // loaded. Also make sure all outer root scopes are loaded prior to loading + // this root scope. + // + LIBBUILD2_SYMEXPORT void + load_root (scope& root); + + // Extract the specified variable value from a buildfile. It is expected to + // be the first non-comment line and not to rely on any variable expansion + // other than those from the global scope or any variable overrides. Return + // an indication of whether the variable was found. + // + LIBBUILD2_SYMEXPORT pair<value, bool> + extract_variable (const path&, const variable&); + + // Import has two phases: the first is triggered by the import + // directive in the buildfile. It will try to find and load the + // project. Failed that, it will return the project-qualified + // name of the target which will be used to create a project- + // qualified prerequisite. This gives the rule that will be + // searching this prerequisite a chance to do some target-type + // specific search. For example, a C++ link rule can search + // for lib{} prerequisites in the C++ compiler default library + // search paths (so that we end up with functionality identical + // to -lfoo). If, however, the rule didn't do any of that (or + // failed to find anything usable), it calls the standard + // prerequisite search() function which sees this is a project- + // qualified prerequisite and goes straight to the second phase + // of import. Here, currently, we simply fail but in the future + // this will be the place where we can call custom "last resort" + // import hooks. For example, we can hook a package manager that + // will say, "Hey, I see you are trying to import foo and I see + // there is a package foo available in repository bar. Wanna + // download and use it?" + // + LIBBUILD2_SYMEXPORT names + import (scope& base, name, const location&); + + const target& + import (const prerequisite_key&); + + // As above but only imports as an already existing target. Unlike the above + // version, this one can be called during the execute phase. + // + // Note: similar to search_existing(). + // + const target* + import_existing (const prerequisite_key&); +} + +#include <libbuild2/file.ixx> + +#endif // LIBBUILD2_FILE_HXX diff --git a/libbuild2/file.ixx b/libbuild2/file.ixx new file mode 100644 index 0000000..f8a79be --- /dev/null +++ b/libbuild2/file.ixx @@ -0,0 +1,31 @@ +// file : libbuild2/file.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/export.hxx> + +namespace build2 +{ + inline bool + source_once (scope& root, scope& base, const path& bf) + { + return source_once (root, base, bf, base); + } + + LIBBUILD2_SYMEXPORT const target* + import (const prerequisite_key&, bool existing); + + inline const target& + import (const prerequisite_key& pk) + { + assert (phase == run_phase::match); + return *import (pk, false); + } + + inline const target* + import_existing (const prerequisite_key& pk) + { + assert (phase == run_phase::match || phase == run_phase::execute); + return import (pk, true); + } +} diff --git a/libbuild2/filesystem.cxx b/libbuild2/filesystem.cxx new file mode 100644 index 0000000..83408fa --- /dev/null +++ b/libbuild2/filesystem.cxx @@ -0,0 +1,274 @@ +// file : libbuild2/filesystem.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/filesystem.hxx> + +#include <libbuild2/context.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + void + touch (const path& p, bool create, uint16_t v) + { + if (verb >= v) + text << "touch " << p; + + if (dry_run) + return; + + try + { + touch_file (p, create); + } + catch (const system_error& e) + { + fail << "unable to touch file " << p << ": " << e << endf; + } + } + + timestamp + mtime (const char* p) + { + try + { + return file_mtime (p); + } + catch (const system_error& e) + { + fail << "unable to obtain file " << p << " modification time: " << e + << endf; + } + } + + fs_status<mkdir_status> + mkdir (const dir_path& d, uint16_t v) + { + // We don't want to print the command if the directory already exists. + // This makes the below code a bit ugly. + // + mkdir_status ms; + + try + { + ms = try_mkdir (d); + } + catch (const system_error& e) + { + if (verb >= v) + text << "mkdir " << d; + + fail << "unable to create directory " << d << ": " << e << endf; + } + + if (ms == mkdir_status::success) + { + if (verb >= v) + text << "mkdir " << d; + } + + return ms; + } + + fs_status<mkdir_status> + mkdir_p (const dir_path& d, uint16_t v) + { + // We don't want to print the command if the directory already exists. + // This makes the below code a bit ugly. + // + mkdir_status ms; + + try + { + ms = try_mkdir_p (d); + } + catch (const system_error& e) + { + if (verb >= v) + text << "mkdir -p " << d; + + fail << "unable to create directory " << d << ": " << e << endf; + } + + if (ms == mkdir_status::success) + { + if (verb >= v) + text << "mkdir -p " << d; + } + + return ms; + } + + fs_status<rmfile_status> + rmsymlink (const path& p, bool d, uint16_t v) + { + auto print = [&p, v] () + { + if (verb >= v) + text << "rm " << p.string (); + }; + + rmfile_status rs; + + try + { + rs = dry_run + ? (butl::entry_exists (p) + ? rmfile_status::success + : rmfile_status::not_exist) + : try_rmsymlink (p, d); + } + catch (const system_error& e) + { + print (); + fail << "unable to remove symlink " << p.string () << ": " << e << endf; + } + + if (rs == rmfile_status::success) + print (); + + return rs; + } + + fs_status<butl::rmdir_status> + rmdir_r (const dir_path& d, bool dir, uint16_t v) + { + using namespace butl; + + if (work.sub (d)) // Don't try to remove working directory. + return rmdir_status::not_empty; + + if (!build2::entry_exists (d)) + return rmdir_status::not_exist; + + if (verb >= v) + text << "rmdir -r " << d; + + if (!dry_run) + { + try + { + butl::rmdir_r (d, dir); + } + catch (const system_error& e) + { + fail << "unable to remove directory " << d << ": " << e; + } + } + + return rmdir_status::success; + } + + bool + exists (const path& f, bool fs, bool ie) + { + try + { + return file_exists (f, fs, ie); + } + catch (const system_error& e) + { + fail << "unable to stat path " << f << ": " << e << endf; + } + } + + bool + exists (const dir_path& d, bool ie) + { + try + { + return dir_exists (d, ie); + } + catch (const system_error& e) + { + fail << "unable to stat path " << d << ": " << e << endf; + } + } + + bool + entry_exists (const path& p, bool fs, bool ie) + { + try + { + return butl::entry_exists (p, fs, ie); + } + catch (const system_error& e) + { + fail << "unable to stat path " << p << ": " << e << endf; + } + } + + bool + empty (const dir_path& d) + { + try + { + return dir_empty (d); + } + catch (const system_error& e) + { + fail << "unable to scan directory " << d << ": " << e << endf; + } + } + + fs_status<mkdir_status> + mkdir_buildignore (const dir_path& d, const path& n, uint16_t verbosity) + { + fs_status<mkdir_status> r (mkdir (d, verbosity)); + + // Create the .buildignore file if the directory was created (and so is + // empty) or the file doesn't exist. + // + path p (d / n); + if (r || !exists (p)) + touch (p, true /* create */, verbosity); + + return r; + } + + bool + empty_buildignore (const dir_path& d, const path& n) + { + try + { + for (const dir_entry& de: dir_iterator (d, false /* ignore_dangling */)) + { + // The .buildignore filesystem entry should be of the regular file + // type. + // + if (de.path () != n || de.ltype () != entry_type::regular) + return false; + } + } + catch (const system_error& e) + { + fail << "unable to scan directory " << d << ": " << e; + } + + return true; + } + + fs_status<rmdir_status> + rmdir_buildignore (const dir_path& d, const path& n, uint16_t verbosity) + { + // We should remove the .buildignore file only if the subsequent rmdir() + // will succeed. In other words if the directory stays after the function + // call then the .buildignore file must stay also, if present. Thus, we + // first check that the directory is otherwise empty and doesn't contain + // the working directory. + // + path p (d / n); + if (exists (p) && empty_buildignore (d, n) && !work.sub (d)) + rmfile (p, verbosity); + + // Note that in case of a system error the directory is likely to stay with + // the .buildignore file already removed. Trying to restore it feels like + // an overkill here. + // + return rmdir (d, verbosity); + } +} diff --git a/libbuild2/filesystem.hxx b/libbuild2/filesystem.hxx new file mode 100644 index 0000000..6dca528 --- /dev/null +++ b/libbuild2/filesystem.hxx @@ -0,0 +1,182 @@ +// file : libbuild2/filesystem.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_FILESYSTEM_HXX +#define LIBBUILD2_FILESYSTEM_HXX + +#include <libbutl/filesystem.mxx> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +// Higher-level filesystem utilities built on top of <libbutl/filesystem.mxx>. +// +// Compared to the libbutl's versions, these handle errors and issue +// diagnostics. Some of them also print the corresponding command line +// equivalent at the specified verbosity level. Note that most of such +// functions also respect the dry_run flag. +// +namespace build2 +{ + using butl::auto_rmfile; + using butl::auto_rmdir; + + // The dual interface wrapper for the {mk,rm}{file,dir}() functions + // below that allows you to use it as a true/false return or a more + // detailed enum from <libbutl/filesystem.mxx> + // + template <typename T> + struct fs_status + { + T v; + fs_status (T v): v (v) {}; + operator T () const {return v;} + explicit operator bool () const {return v == T::success;} + }; + + // Set the file access and modification times (unless dry-run) to the + // current time printing the standard diagnostics starting from the + // specified verbosity level. If the file does not exist and create is true, + // create it and fail otherwise. + // + LIBBUILD2_SYMEXPORT void + touch (const path&, bool create, uint16_t verbosity = 1); + + // Return the modification time for an existing regular file and + // timestamp_nonexistent otherwise. Print the diagnostics and fail on system + // error. + // + LIBBUILD2_SYMEXPORT timestamp + mtime (const char*); + + inline timestamp + mtime (const path& p) + { + return mtime (p.string ().c_str ()); + } + + // Create the directory and print the standard diagnostics starting from the + // specified verbosity level. + // + // Note that these functions ignore the dry_run flag (we might need to save + // something in such a directory, such as depdb, ignoring dry_run). Overall, + // it feels like we should establish the structure even for dry-run. + // + // Note that the implementation may not be suitable if the performance is + // important and it is expected that the directory will exist in most cases. + // See the fsdir{} rule for details. + // + using mkdir_status = butl::mkdir_status; + + LIBBUILD2_SYMEXPORT fs_status<mkdir_status> + mkdir (const dir_path&, uint16_t verbosity = 1); + + LIBBUILD2_SYMEXPORT fs_status<mkdir_status> + mkdir_p (const dir_path&, uint16_t verbosity = 1); + + // Remove the file (unless dry-run) and print the standard diagnostics + // starting from the specified verbosity level. The second argument is only + // used in diagnostics, to print the target name. Passing the path for + // target will result in the relative path being printed. + // + using rmfile_status = butl::rmfile_status; + + template <typename T> + fs_status<rmfile_status> + rmfile (const path&, const T& target, uint16_t verbosity = 1); + + inline fs_status<rmfile_status> + rmfile (const path& f, int verbosity = 1) // Literal overload (int). + { + return rmfile (f, f, static_cast<uint16_t> (verbosity)); + } + + inline fs_status<rmfile_status> + rmfile (const path& f, uint16_t verbosity) // Overload (verb_never). + { + return rmfile (f, f, verbosity); + } + + // Similar to rmfile() but for symlinks. + // + LIBBUILD2_SYMEXPORT fs_status<rmfile_status> + rmsymlink (const path&, bool dir, uint16_t verbosity); + + // Similar to rmfile() but for directories (note: not -r). + // + using rmdir_status = butl::rmdir_status; + + template <typename T> + fs_status<rmdir_status> + rmdir (const dir_path&, const T& target, uint16_t verbosity = 1); + + inline fs_status<rmdir_status> + rmdir (const dir_path& d, int verbosity = 1) // Literal overload (int). + { + return rmdir (d, d, static_cast<uint16_t> (verbosity)); + } + + inline fs_status<rmdir_status> + rmdir (const dir_path& d, uint16_t verbosity) // Overload (verb_never). + { + return rmdir (d, d, verbosity); + } + + // Remove the directory recursively (unless dry-run) and print the standard + // diagnostics starting from the specified verbosity level. Note that this + // function returns not_empty if we try to remove a working directory. If + // the dir argument is false, then the directory itself is not removed. + // + // @@ Collides (via ADL) with butl::rmdir_r(), which sucks. + // + LIBBUILD2_SYMEXPORT fs_status<rmdir_status> + rmdir_r (const dir_path&, bool dir = true, uint16_t verbosity = 1); + + // Check for a file, directory or filesystem entry existence. Print the + // diagnostics and fail on system error, unless ignore_error is true. + // + LIBBUILD2_SYMEXPORT bool + exists (const path&, bool follow_symlinks = true, bool ignore_error = false); + + LIBBUILD2_SYMEXPORT bool + exists (const dir_path&, bool ignore_error = false); + + LIBBUILD2_SYMEXPORT bool + entry_exists (const path&, + bool follow_symlinks = false, + bool ignore_error = false); + + // Check for a directory emptiness. Print the diagnostics and fail on system + // error. + // + LIBBUILD2_SYMEXPORT bool + empty (const dir_path&); + + // Directories containing .buildignore (or .build2ignore in the alternative + // naming scheme) file are automatically ignored by recursive name patterns. + // For now the file is just a marker and its contents don't matter. Note + // that these functions ignore dry-run. + + // Create a directory containing an empty .buildignore file. + // + LIBBUILD2_SYMEXPORT fs_status<mkdir_status> + mkdir_buildignore (const dir_path&, const path&, uint16_t verbosity = 1); + + // Return true if the directory is empty or only contains the .buildignore + // file. Fail if the directory doesn't exist. + // + LIBBUILD2_SYMEXPORT bool + empty_buildignore (const dir_path&, const path&); + + // Remove a directory if it is empty or only contains the .buildignore file. + // + LIBBUILD2_SYMEXPORT fs_status<rmdir_status> + rmdir_buildignore (const dir_path&, const path&, uint16_t verbosity = 1); +} + +#include <libbuild2/filesystem.txx> + +#endif // LIBBUILD2_FILESYSTEM_HXX diff --git a/libbuild2/filesystem.txx b/libbuild2/filesystem.txx new file mode 100644 index 0000000..6166082 --- /dev/null +++ b/libbuild2/filesystem.txx @@ -0,0 +1,111 @@ +// file : libbuild2/filesystem.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <type_traits> // is_base_of + +#include <libbuild2/context.hxx> +#include <libbuild2/diagnostics.hxx> + +namespace build2 +{ + template <typename T> + fs_status<butl::rmfile_status> + rmfile (const path& f, const T& t, uint16_t v) + { + using namespace butl; + + // We don't want to print the command if we couldn't remove the file + // because it does not exist (just like we don't print the update command + // if the file is up to date). This makes the below code a bit ugly. + // + auto print = [&f, &t, v] () + { + if (verb >= v) + { + if (verb >= 2) + text << "rm " << f; + else if (verb) + text << "rm " << t; + } + }; + + rmfile_status rs; + + try + { + rs = dry_run + ? file_exists (f) ? rmfile_status::success : rmfile_status::not_exist + : try_rmfile (f); + } + catch (const system_error& e) + { + print (); + fail << "unable to remove file " << f << ": " << e << endf; + } + + if (rs == rmfile_status::success) + print (); + + return rs; + } + + template <typename T> + fs_status<butl::rmdir_status> + rmdir (const dir_path& d, const T& t, uint16_t v) + { + using namespace butl; + + // We don't want to print the command if we couldn't remove the directory + // because it does not exist (just like we don't print mkdir if it already + // exists) or if it is not empty. This makes the below code a bit ugly. + // + auto print = [&d, &t, v] () + { + if (verb >= v) + { + if (verb >= 2) + text << "rmdir " << d; + else if (verb) + text << (std::is_base_of<dir_path, T>::value ? "rmdir " : "rm ") << t; + } + }; + + bool w (false); // Don't try to remove working directory. + rmdir_status rs; + try + { + rs = dry_run + ? dir_exists (d) ? rmdir_status::success : rmdir_status::not_exist + : !(w = work.sub (d)) ? try_rmdir (d) : rmdir_status::not_empty; + } + catch (const system_error& e) + { + print (); + fail << "unable to remove directory " << d << ": " << e << endf; + } + + switch (rs) + { + case rmdir_status::success: + { + print (); + break; + } + case rmdir_status::not_empty: + { + if (verb >= v && verb >= 2) + { + text << d << " is " + << (w ? "current working directory" : "not empty") + << ", not removing"; + } + break; + } + case rmdir_status::not_exist: + break; + } + + return rs; + } +} diff --git a/libbuild2/function+call.test.testscript b/libbuild2/function+call.test.testscript new file mode 100644 index 0000000..755572e --- /dev/null +++ b/libbuild2/function+call.test.testscript @@ -0,0 +1,161 @@ +# file : libbuild2/function+call.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: qual-implicit +: +$* <'print $dummy.dummy0()' >'abc' + +: qual-explicit +: +$* <'print $dummy.qual()' >'abc' + +: qual-fail +: +$* <'print $qual()' 2>>EOE != 0 +buildfile:1:8: error: unmatched call to qual() + info: candidate: dummy.qual() +EOE + +: derived-base +: Test derived-to-base overload resolution +: +$* <'print $dummy.abs([dir_path] .)' >'false'; +$* <'print $dummy.abs([abs_dir_path] .)' >'true' + +: variadic +: +$* <'print $variadic([bool] true, foo, bar)' >'3' + +: fail +: +$* <'$fail()' 2>>EOE != 0 +error: failed +buildfile:1:2: info: while calling fail() +EOE + +: fail-invalid-arg +: +$* <'$fail_arg(abc)' 2>>EOE != 0 +error: invalid argument: invalid uint64 value: 'abc' +buildfile:1:2: info: while calling fail_arg(<untyped>) +EOE + +: no-match-name +: +$* <'$bogus()' 2>>EOE != 0 +buildfile:1:2: error: unmatched call to bogus() +EOE + +: no-match-count +: +$* <'$dummy0(abc)' 2>>EOE != 0 +buildfile:1:2: error: unmatched call to dummy0(<untyped>) + info: candidate: dummy0(), qualified name dummy.dummy0 +EOE + +: no-match-type +: +$* <'$dummy1([uint64] 123)' 2>>EOE != 0 +buildfile:1:2: error: unmatched call to dummy1(uint64) + info: candidate: dummy1(string), qualified name dummy.dummy1 +EOE + +: ambig +: +$* <'$ambig(abc)' 2>>~/EOE/ != 0 +buildfile:1:2: error: ambiguous call to ambig(<untyped>) +/(( + info: candidate: ambig(<untyped> [, uint64]), qualified name dummy.ambig + info: candidate: ambig(<untyped> [, string]), qualified name dummy.ambig +/)|( + info: candidate: ambig(<untyped> [, string]), qualified name dummy.ambig + info: candidate: ambig(<untyped> [, uint64]), qualified name dummy.ambig +/)) +EOE + +: unmatched +: +$* <'$ambig(abc, def)' 2>>~/EOE/ != 0 +buildfile:1:2: error: unmatched call to ambig(<untyped>, <untyped>) +/(( + info: candidate: ambig(<untyped> [, uint64]), qualified name dummy.ambig + info: candidate: ambig(<untyped> [, string]), qualified name dummy.ambig +/)|( + info: candidate: ambig(<untyped> [, string]), qualified name dummy.ambig + info: candidate: ambig(<untyped> [, uint64]), qualified name dummy.ambig +/)) +EOE + +: reverse +: +$* <'print $reverse([string] abc)' >'abc' + +: optional-absent +: +$* <'print $optional()' >'true' + +: optional-present +: +$* <'print $optional(abc)' >'false' + +: null-true +: +$* <'print $nullable([null])' >'true' + +: null-false +: +$* <'print $nullable(nonull)' >'false' + +: null-fail +: +$* <'$dummy1([string null])' 2>>EOE != 0 +error: invalid argument: null value +buildfile:1:2: info: while calling dummy1(string) +EOE + +: print-call-1-untyped +: +$* <'$bogus(abc)' 2>>EOE != 0 +buildfile:1:2: error: unmatched call to bogus(<untyped>) +EOE + +: print-call-1-typed +: +$* <'$bogus([uint64] 123)' 2>>EOE != 0 +buildfile:1:2: error: unmatched call to bogus(uint64) +EOE + +: print-call-2 +: +$* <'$bogus(abc, [uint64] 123)' 2>>EOE != 0 +buildfile:1:2: error: unmatched call to bogus(<untyped>, uint64) +EOE + +: print-fovl +: +$* <'$ambig([bool] true)' 2>>~/EOE/ != 0 +buildfile:1:2: error: ambiguous call to ambig(bool) +/(( + info: candidate: ambig(<untyped> [, uint64]), qualified name dummy.ambig + info: candidate: ambig(<untyped> [, string]), qualified name dummy.ambig +/)|( + info: candidate: ambig(<untyped> [, string]), qualified name dummy.ambig + info: candidate: ambig(<untyped> [, uint64]), qualified name dummy.ambig +/)) +EOE + +: print-fovl-variadic +: +$* <'$variadic(abc)' 2>>EOE != 0 +buildfile:1:2: error: unmatched call to variadic(<untyped>) + info: candidate: variadic(bool [, ...]) +EOE + +: member-function +: +$* <'print $dummy.length([path] abc)' >'3' + +: data-member +: +$* <'print $dummy.type([name] cxx{foo})' >'cxx' diff --git a/libbuild2/function+syntax.test.testscript b/libbuild2/function+syntax.test.testscript new file mode 100644 index 0000000..f8240f3 --- /dev/null +++ b/libbuild2/function+syntax.test.testscript @@ -0,0 +1,29 @@ +# file : libbuild2/function+syntax.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +$* <'$dump()' >:'' : none +$* <'$dump( )' >:'' : none-in-spaces +$* <'$dump("")' >'{}' : one-empty +$* <'$dump(a)' >'a' : one-single +$* <'$dump(a b c)' >'a b c' : one-list +$* <'$dump(d/t{x y z})' >'d/t{x} d/t{y} d/t{z}' : one-names + +$* <'print a$dummy1([string] b)c' >'abc' : concat +$* <'print $dummy2([uint64] 123, [uint64] 321)' >'444' : multi-arg + +: quoting +: Verify we can inhibit function call with quoting +: +$* <<EOI >>EOO +foo = FOO +bar = BAR + +print $foo"($bar)" +print "$foo"($bar) +print "$foo""($bar)" +EOI +FOOBAR +FOOBAR +FOOBAR +EOO diff --git a/libbuild2/function.cxx b/libbuild2/function.cxx new file mode 100644 index 0000000..2d4dce9 --- /dev/null +++ b/libbuild2/function.cxx @@ -0,0 +1,400 @@ +// file : libbuild2/function.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> + +#include <cstring> // strchr() + +using namespace std; + +namespace build2 +{ + ostream& + operator<< (ostream& os, const function_overload& f) + { + os << f.name << '('; + + bool v (f.arg_max == function_overload::arg_variadic); + size_t n (v ? max (f.arg_min, f.arg_types.size ()): f.arg_max); + + // Handle variadic tail as the last pseudo-argument. + // + for (size_t i (0); i != n + (v ? 1 : 0); ++i) + { + if (i == f.arg_min) + os << (i != 0 ? " [" : "["); + + os << (i != 0 ? ", " : ""); + + if (i == n) // Variadic tail (last). + os << "..."; + else + { + // If count is greater than f.arg_typed, then we assume the rest are + // valid but untyped. + // + const optional<const value_type*> t ( + i < f.arg_types.size () ? f.arg_types[i] : nullopt); + + os << (t ? (*t != nullptr ? (*t)->name : "<untyped>") : "<anytype>"); + } + } + + if (n + (v ? 1 : 0) > f.arg_min) + os << ']'; + + os << ')'; + + if (f.alt_name != nullptr) + { + auto k (strchr (f.alt_name, '.') == nullptr + ? "unqualified" + : "qualified"); + + os << ", " << k << " name " << f.alt_name; + } + + return os; + } + + bool function_map:: + defined (const string& name) const + { + assert (!name.empty ()); + + // If this is a qualified function name then check if it is already + // defined. + // + if (name.back () != '.') + return map_.find (name) != map_.end (); + + // If any function of the specified family is already defined, then one of + // them should be the first element that is greater than the dot-terminated + // family name. Here we rely on the fact that the dot character is less + // than any character of unqualified function and family names. + // + size_t n (name.size ()); + assert (n > 1); + + auto i (map_.upper_bound (name)); + return i != map_.end () && i->first.compare (0, n, name) == 0; + } + + auto function_map:: + insert (string name, function_overload f) -> iterator + { + // Sanity checks. + // + assert (f.arg_min <= f.arg_max && + f.arg_types.size () <= f.arg_max && + f.impl != nullptr); + + auto i (map_.emplace (move (name), move (f))); + + i->second.name = i->first.c_str (); + return i; + } + + pair<value, bool> function_map:: + call (const scope* base, + const string& name, + vector_view<value> args, + const location& loc, + bool fa) const + { + auto print_call = [&name, &args] (ostream& os) + { + os << name << '('; + + for (size_t i (0); i != args.size (); ++i) + { + const value_type* t (args[i].type); + os << (i != 0 ? ", " : "") << (t != nullptr ? t->name : "<untyped>"); + } + + os << ')'; + }; + + // Overload resolution. + // + // Ours is pretty simple: we sort all the overloads into three ranks: + // + // 0 -- all the arguments match exactly (perfect match) + // 1 -- one or more arguments match via the derived-to-base conversion + // 2 -- one or more arguments match via the reversal to untyped + // + // More than one match of the same rank is ambiguous. + // + auto ip (map_.equal_range (name)); + + size_t rank (~0); + small_vector<const function_overload*, 2> ovls; + { + size_t count (args.size ()); + + for (auto it (ip.first); it != ip.second; ++it) + { + const function_overload& f (it->second); + + // Argument count match. + // + if (count < f.arg_min || count > f.arg_max) + continue; + + // Argument types match. + // + size_t r (0); + { + size_t i (0), n (min (count, f.arg_types.size ())); + for (; i != n; ++i) + { + if (!f.arg_types[i]) // Anytyped. + continue; + + const value_type* at (args[i].type); + const value_type* ft (*f.arg_types[i]); + + if (at == ft) // Types match perfectly. + continue; + + if (at != nullptr && ft != nullptr) + { + while ((at = at->base_type) != nullptr && at != ft) ; + + if (at != nullptr) // Types match via derived-to-base. + { + if (r < 1) + r = 1; + continue; + } + } + + if (ft == nullptr) // Types match via reversal to untyped. + { + if (r < 2) + r = 2; + continue; + } + + break; // No match. + } + + if (i != n) + continue; // No match. + } + + // Better or just as good a match? + // + if (r <= rank) + { + if (r < rank) // Better. + { + rank = r; + ovls.clear (); + } + + ovls.push_back (&f); + } + + // Continue looking to detect ambiguities. + } + } + + switch (ovls.size ()) + { + case 1: + { + // Print the call location in case the function fails. + // + auto g ( + make_exception_guard ( + [fa, &loc, &print_call] () + { + if (fa && verb != 0) + { + diag_record dr (info (loc)); + dr << "while calling "; print_call (dr.os); + } + })); + + auto f (ovls.back ()); + + // If one or more arguments match via the reversal to untyped (rank 2), + // then we need to go over the overload's arguments one more time an + // untypify() those that we need to reverse. + // + if (rank == 2) + { + size_t n (args.size ()); + assert (n <= f->arg_types.size ()); + + for (size_t i (0); i != n; ++i) + { + if (f->arg_types[i] && + *f->arg_types[i] == nullptr && + args[i].type != nullptr) + untypify (args[i]); + } + } + + try + { + return make_pair (f->impl (base, move (args), *f), true); + } + catch (const invalid_argument& e) + { + diag_record dr (fail); + dr << "invalid argument"; + + if (*e.what () != '\0') + dr << ": " << e; + + dr << endf; + } + } + case 0: + { + if (!fa) + return make_pair (value (nullptr), false); + + // No match. + // + diag_record dr; + + dr << fail (loc) << "unmatched call to "; print_call (dr.os); + + for (auto i (ip.first); i != ip.second; ++i) + dr << info << "candidate: " << i->second; + + // If this is an unqualified name, then also print qualified + // functions that end with this name. But skip functions that we + // have already printed in the previous loop. + // + if (name.find ('.') == string::npos) + { + size_t n (name.size ()); + + for (auto i (functions.begin ()); i != functions.end (); ++i) + { + const string& q (i->first); + const function_overload& f (i->second); + + if ((f.alt_name == nullptr || f.alt_name != name) && + q.size () > n) + { + size_t p (q.size () - n); + if (q[p - 1] == '.' && q.compare (p, n, name) == 0) + dr << info << "candidate: " << i->second; + } + } + } + + dr << endf; + } + default: + { + // Ambigous match. + // + diag_record dr; + dr << fail (loc) << "ambiguous call to "; print_call (dr.os); + + for (auto f: ovls) + dr << info << "candidate: " << *f; + + dr << endf; + } + } + } + + value function_family:: + default_thunk (const scope* base, + vector_view<value> args, + const function_overload& f) + { + // Call the cast thunk. + // + struct cast_data // Prefix of function_cast::data. + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + }; + + auto d (reinterpret_cast<const cast_data*> (&f.data)); + return d->thunk (base, move (args), d); + } + +#if !defined(_WIN32) + constexpr const optional<const value_type*>* function_args<>::types; +#else + const optional<const value_type*>* const function_args<>::types = nullptr; +#endif + + void function_family::entry:: + insert (string n, function_overload f) const + { + // Figure out qualification. + // + string qn; + size_t p (n.find ('.')); + + if (p == string::npos) + { + if (!qual.empty ()) + { + qn = qual; + qn += '.'; + qn += n; + } + } + else if (p == 0) + { + assert (!qual.empty ()); + n.insert (0, qual); + } + + auto i (qn.empty () ? functions.end () : functions.insert (move (qn), f)); + auto j (functions.insert (move (n), move (f))); + + // If we have both, then set alternative names. + // + if (i != functions.end ()) + { + i->second.alt_name = j->first.c_str (); + j->second.alt_name = i->first.c_str (); + } + } + + // Static-initialize the function map and populate with builtin functions. + // + function_map functions; + + void builtin_functions (); // functions-builtin.cxx + void filesystem_functions (); // functions-filesystem.cxx + void name_functions (); // functions-name.cxx + void path_functions (); // functions-path.cxx + void process_functions (); // functions-process.cxx + void process_path_functions (); // functions-process-path.cxx + void regex_functions (); // functions-regex.cxx + void string_functions (); // functions-string.cxx + void target_triplet_functions (); // functions-target-triplet.cxx + void project_name_functions (); // functions-target-triplet.cxx + + struct functions_init + { + functions_init () + { + builtin_functions (); + filesystem_functions (); + name_functions (); + path_functions (); + process_functions (); + process_path_functions (); + regex_functions (); + string_functions (); + target_triplet_functions (); + project_name_functions (); + } + }; + + static const functions_init init_; +} diff --git a/libbuild2/function.hxx b/libbuild2/function.hxx new file mode 100644 index 0000000..6b2bfe1 --- /dev/null +++ b/libbuild2/function.hxx @@ -0,0 +1,905 @@ +// file : libbuild2/function.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_FUNCTION_HXX +#define LIBBUILD2_FUNCTION_HXX + +#include <map> +#include <utility> // index_sequence +#include <type_traits> // aligned_storage + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/variable.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Functions can be overloaded based on types of their arguments but + // arguments can be untyped and a function can elect to accept an argument + // of any type. + // + // Functions can be qualified (e.g, string.length(), path.directory()) and + // unqualified (e.g., length(), directory()). Only functions overloaded on + // static types can be unqualified plus they should also define a qualified + // alias. + // + // Low-level function implementation would be called with a list of values + // as arguments. There is also higher-level, more convenient support for + // defining functions as pointers to functions (including capture-less + // lambdas), pointers to member functions (e.g., string::size()), or + // pointers to data members (e.g., name::type). In this case the build2 + // function types are automatically matched to C++ function types according + // to these rules: + // + // T - statically-typed (value_traits<T> must be defined) + // names - untyped + // value - any type + // T* - NULL-able argument (here T can be names) + // value* - NULL-able any type (never NULL itself, use value::null) + // optional<T> - optional argument (here T can be T*, names, value) + // + // Optional arguments must be last. In case of a failure the function is + // expected to issue diagnostics and throw failed. Note that the arguments + // are conceptually "moved" and can be reused by the implementation. + // + // A function can also optionally receive the current scope by having the + // first argument of the const scope* type. It may be NULL if the function + // is called out of any scope (e.g., command line). + // + // Note also that we don't pass the location to the function instead + // printing the info message pointing to the call site. + // + // A function can return value or anything that can be converted to value. + // In particular, if a function returns optional<T>, then the result will be + // either NULL or value of type T. + // + // Normally functions come in families that share a common qualification + // (e.g., string. or path.). The function_family class is a "registrar" + // that simplifies handling of function families. For example: + // + // function_family f ("string"); + // + // // Register length() and string.length(). + // // + // f["length"] = &string::size; + // + // // Register string.max_size(). + // // + // f[".max_size"] = []() {return string ().max_size ();}; + // + // For more examples/ideas, study the existing function families (reside + // in the functions-*.cxx files). + // + // Note that normally there will be a function overload that has all the + // parameters untyped with an implementation that falls back to one of the + // overloads that have all the parameters typed, possibly inferring the type + // from the argument value "syntax" (e.g., presence of a trailing slash for + // a directory path). + // + struct function_overload; + + using function_impl = value (const scope*, + vector_view<value>, + const function_overload&); + + struct LIBBUILD2_SYMEXPORT function_overload + { + const char* name; // Set to point to key by insert() below. + const char* alt_name; // Alternative name, NULL if none. This is the + // qualified name for unqualified or vice verse. + + // Arguments. + // + // A function can have a number of optional arguments. Arguments can also + // be typed. A non-existent entry in arg_types means a value of any type. + // A NULL entry means an untyped value. + // + // If arg_max equals to arg_variadic, then the function takes an unlimited + // number of arguments. In this case the semantics of arg_min and + // arg_types is unchanged. + // + static const size_t arg_variadic = size_t (~0); + + using types = vector_view<const optional<const value_type*>>; + + const size_t arg_min; + const size_t arg_max; + const types arg_types; + + // Function implementation. + // + function_impl* const impl; + + // Auxiliary data storage. Note that it is assumed to be POD (no + // destructors, bitwise copy, etc). + // + std::aligned_storage<sizeof (void*) * 3>::type data; + static const size_t data_size = sizeof (decltype (data)); + + function_overload (const char* an, + size_t mi, size_t ma, types ts, + function_impl* im) + : alt_name (an), + arg_min (mi), arg_max (ma), arg_types (move (ts)), + impl (im) {} + + template <typename D> + function_overload (const char* an, + size_t mi, size_t ma, types ts, + function_impl* im, + D d) + : function_overload (an, mi, ma, move (ts), im) + { + // std::is_pod appears to be broken in VC16 and also in GCC up to + // 5 (pointers to members). + // +#if !((defined(_MSC_VER) && _MSC_VER < 2000) || \ + (defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 5)) + static_assert (std::is_pod<D>::value, "type is not POD"); +#endif + static_assert (sizeof (D) <= data_size, "insufficient space"); + new (&data) D (move (d)); + } + }; + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const function_overload&); // Print signature. + + class LIBBUILD2_SYMEXPORT function_map + { + public: + using map_type = std::multimap<string, function_overload>; + using iterator = map_type::iterator; + using const_iterator = map_type::const_iterator; + + iterator + insert (string name, function_overload); + + void + erase (iterator i) {map_.erase (i);} + + value + call (const scope* base, + const string& name, + vector_view<value> args, + const location& l) const + { + return call (base, name, args, l, true).first; + } + + // As above but do not fail if no match was found (but still do if the + // match is ambiguous). Instead return an indication of whether the call + // was made. Used to issue custom diagnostics when calling internal + // functions. + // + pair<value, bool> + try_call (const scope* base, + const string& name, + vector_view<value> args, + const location& l) const + { + return call (base, name, args, l, false); + } + + iterator + begin () {return map_.begin ();} + + iterator + end () {return map_.end ();} + + const_iterator + begin () const {return map_.begin ();} + + const_iterator + end () const {return map_.end ();} + + // Return true if the function with this name is already defined. If the + // name ends with '.', then instead check if any function with this prefix + // (which we call a family) is already defined. + // + bool + defined (const string&) const; + + private: + pair<value, bool> + call (const scope*, + const string&, + vector_view<value>, + const location&, + bool fail) const; + + map_type map_; + }; + + LIBBUILD2_SYMEXPORT extern function_map functions; + + class LIBBUILD2_SYMEXPORT function_family + { + public: + // The call() function above catches invalid_argument and issues + // diagnostics by assuming it is related to function arguments and + // contains useful description. + // + // In order to catch additional exceptions, you can implement a custom + // thunk which would normally call this default implementation. + // + static value + default_thunk (const scope*, vector_view<value>, const function_overload&); + + // A function family uses a common qualification (though you can pass + // empty string to supress it). For an unqualified name (doesn't not + // contain a dot) the qualified version is added automatically. A name + // containing a leading dot is a shortcut notation for a qualified-only + // name. + // + explicit + function_family (string qual, function_impl* thunk = &default_thunk) + : qual_ (qual), thunk_ (thunk) {} + + struct entry; + + entry + operator[] (string name) const; + + static bool + defined (string qual) + { + qual += '.'; + return functions.defined (qual); + } + + private: + const string qual_; + function_impl* thunk_; + }; + + // Implementation details. If you can understand and explain all of this, + // then you are hired ;-)! + // + + template <typename T> + struct function_arg + { + static const bool null = false; + static const bool opt = false; + + static constexpr optional<const value_type*> + type () {return &value_traits<T>::value_type;} + + static T&& + cast (value* v) + { + if (v->null) + throw invalid_argument ("null value"); + + // Use fast but unchecked cast since the caller matched the types. + // + return move (v->as<T> ()); + } + }; + + template <> + struct LIBBUILD2_SYMEXPORT function_arg<names> // Untyped. + { + static const bool null = false; + static const bool opt = false; + + static constexpr optional<const value_type*> + type () {return nullptr;} + + static names&& + cast (value* v) + { + if (v->null) + throw invalid_argument ("null value"); + + return move (v->as<names> ()); + } + }; + + template <> + struct LIBBUILD2_SYMEXPORT function_arg<value> // Anytyped. + { + static const bool null = false; + static const bool opt = false; + + static constexpr optional<const value_type*> + type () {return nullopt;} + + static value&& + cast (value* v) + { + if (v->null) + throw invalid_argument ("null value"); + + return move (*v); + } + }; + + template <typename T> + struct function_arg<T*>: function_arg<T> + { + static const bool null = true; + + static T* + cast (value* v) + { + if (v->null) + return nullptr; + + // This looks bizarre but makes sense. The cast() that we are calling + // returns an r-value reference to (what's inside) v. And it has to + // return an r-value reference to that the value is moved into by-value + // arguments. + // + T&& r (function_arg<T>::cast (v)); + return &r; + } + }; + + template <> + struct LIBBUILD2_SYMEXPORT function_arg<value*>: function_arg<value> + { + static const bool null = true; + + static value* + cast (value* v) {return v;} // NULL indicator in value::null. + }; + + template <typename T> + struct function_arg<optional<T>>: function_arg<T> + { + static const bool opt = true; + + static optional<T> + cast (value* v) + { + return v != nullptr ? optional<T> (function_arg<T>::cast (v)) : nullopt; + } + }; + + // Number of optional arguments. Note that we currently don't check that + // they are all at the end. + // + template <typename A0, typename... A> + struct function_args_opt + { + static const size_t count = (function_arg<A0>::opt ? 1 : 0) + + function_args_opt<A...>::count; + }; + + template <typename A0> + struct function_args_opt<A0> + { + static const size_t count = (function_arg<A0>::opt ? 1 : 0); + }; + + // Argument counts/types. + // + template <typename... A> + struct function_args + { + static const size_t max = sizeof...(A); + static const size_t min = max - function_args_opt<A...>::count; + + // VC15 doesn't realize that a pointer to static object (in our case it is + // &value_trair<T>::value_type) is constexpr. + // + // Note that during the library split we discovered that the constexpr + // variant causes compilation/linkage issues for both MinGW GCC and + // VC. Thus we now only use it for POSIX systems. + // + // #if !defined(_MSC_VER) || _MSC_VER > 1910 + // +#if !defined(_WIN32) + static constexpr const optional<const value_type*> types[max] = { + function_arg<A>::type ()...}; +#else + static const optional<const value_type*> types[max]; +#endif + }; + + template <typename... A> +#if !defined(_WIN32) + constexpr const optional<const value_type*> + function_args<A...>::types[function_args<A...>::max]; +#else + const optional<const value_type*> + function_args<A...>::types[function_args<A...>::max] = { + function_arg<A>::type ()...}; +#endif + + // Specialization for no arguments. + // + template <> + struct LIBBUILD2_SYMEXPORT function_args<> + { + static const size_t max = 0; + static const size_t min = 0; + +#if !defined(_WIN32) + static constexpr const optional<const value_type*>* types = nullptr; +#else + static const optional<const value_type*>* const types; +#endif + }; + + // Cast data/thunk. + // + template <typename R, typename... A> + struct function_cast + { + // A pointer to a standard layout struct is a pointer to its first data + // member, which in our case is the cast thunk. + // + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + R (*const impl) (A...); + }; + + static value + thunk (const scope*, vector_view<value> args, const void* d) + { + return thunk (move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + } + + template <size_t... i> + static value + thunk (vector_view<value> args, + R (*impl) (A...), + std::index_sequence<i...>) + { + return value ( + impl ( + function_arg<A>::cast ( + i < args.size () ? &args[i] : nullptr)...)); + } + }; + + // Specialization for functions that expect the current scope as a first + // argument. + // + template <typename R, typename... A> + struct function_cast<R, const scope*, A...> + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + R (*const impl) (const scope*, A...); + }; + + static value + thunk (const scope* base, vector_view<value> args, const void* d) + { + return thunk (base, move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + } + + template <size_t... i> + static value + thunk (const scope* base, vector_view<value> args, + R (*impl) (const scope*, A...), + std::index_sequence<i...>) + { + return value ( + impl (base, + function_arg<A>::cast ( + i < args.size () ? &args[i] : nullptr)...)); + } + }; + + // Specialization for void return type. In this case we return NULL value. + // + template <typename... A> + struct function_cast<void, A...> + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + void (*const impl) (A...); + }; + + static value + thunk (const scope*, vector_view<value> args, const void* d) + { + thunk (move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + return value (nullptr); + } + + template <size_t... i> + static void + thunk (vector_view<value> args, + void (*impl) (A...), + std::index_sequence<i...>) + { + impl (function_arg<A>::cast (i < args.size () ? &args[i] : nullptr)...); + } + }; + + template <typename... A> + struct function_cast<void, const scope*, A...> + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + void (*const impl) (const scope*, A...); + }; + + static value + thunk (const scope* base, vector_view<value> args, const void* d) + { + thunk (base, move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + return value (nullptr); + } + + template <size_t... i> + static void + thunk (const scope* base, vector_view<value> args, + void (*impl) (const scope*, A...), + std::index_sequence<i...>) + { + impl (base, + function_arg<A>::cast (i < args.size () ? &args[i] : nullptr)...); + } + }; + + // Customization for coerced lambdas (see below). + // +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 6 + template <typename L, typename R, typename... A> + struct function_cast_lamb + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + R (L::*const impl) (A...) const; + }; + + static value + thunk (const scope*, vector_view<value> args, const void* d) + { + return thunk (move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + } + + template <size_t... i> + static value + thunk (vector_view<value> args, + R (L::*impl) (A...) const, + std::index_sequence<i...>) + { + const L* l (nullptr); // Undefined behavior. + + return value ( + (l->*impl) ( + function_arg<A>::cast ( + i < args.size () ? &args[i] : nullptr)...)); + } + }; + + template <typename L, typename R, typename... A> + struct function_cast_lamb<L, R, const scope*, A...> + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + R (L::*const impl) (const scope*, A...) const; + }; + + static value + thunk (const scope* base, vector_view<value> args, const void* d) + { + return thunk (base, move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + } + + template <size_t... i> + static value + thunk (const scope* base, vector_view<value> args, + R (L::*impl) (const scope*, A...) const, + std::index_sequence<i...>) + { + const L* l (nullptr); // Undefined behavior. + + return value ( + (l->*impl) (base, + function_arg<A>::cast ( + i < args.size () ? &args[i] : nullptr)...)); + } + }; + + template <typename L, typename... A> + struct function_cast_lamb<L, void, A...> + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + void (L::*const impl) (A...) const; + }; + + static value + thunk (const scope*, vector_view<value> args, const void* d) + { + thunk (move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + return value (nullptr); + } + + template <size_t... i> + static void + thunk (vector_view<value> args, + void (L::*impl) (A...) const, + std::index_sequence<i...>) + { + const L* l (nullptr); + (l->*impl) ( + function_arg<A>::cast ( + i < args.size () ? &args[i] : nullptr)...); + } + }; + + template <typename L, typename... A> + struct function_cast_lamb<L, void, const scope*, A...> + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + void (L::*const impl) (const scope*, A...) const; + }; + + static value + thunk (const scope* base, vector_view<value> args, const void* d) + { + thunk (base, move (args), + static_cast<const data*> (d)->impl, + std::index_sequence_for<A...> ()); + return value (nullptr); + } + + template <size_t... i> + static void + thunk (const scope* base, vector_view<value> args, + void (L::*impl) (const scope*, A...) const, + std::index_sequence<i...>) + { + const L* l (nullptr); + (l->*impl) (base, + function_arg<A>::cast ( + i < args.size () ? &args[i] : nullptr)...); + } + }; +#endif + + // Customization for member functions. + // + template <typename R, typename T> + struct function_cast_memf + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + R (T::*const impl) () const; + }; + + static value + thunk (const scope*, vector_view<value> args, const void* d) + { + auto mf (static_cast<const data*> (d)->impl); + return value ((function_arg<T>::cast (&args[0]).*mf) ()); + } + }; + + template <typename T> + struct function_cast_memf<void, T> + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + void (T::*const impl) () const; + }; + + static value + thunk (const scope*, vector_view<value> args, const void* d) + { + auto mf (static_cast<const data*> (d)->impl); + (function_arg<T>::cast (args[0]).*mf) (); + return value (nullptr); + } + }; + + // Customization for data members. + // + template <typename R, typename T> + struct function_cast_memd + { + struct data + { + value (*const thunk) (const scope*, vector_view<value>, const void*); + R T::*const impl; + }; + + static value + thunk (const scope*, vector_view<value> args, const void* d) + { + auto dm (static_cast<const data*> (d)->impl); + return value (move (function_arg<T>::cast (&args[0]).*dm)); + } + }; + + struct LIBBUILD2_SYMEXPORT function_family::entry + { + string name; + const string& qual; + function_impl* thunk; + + template <typename R, typename... A> + void + operator= (R (*impl) (A...)) && + { + using args = function_args<A...>; + using cast = function_cast<R, A...>; + + insert (move (name), + function_overload ( + nullptr, + args::min, + args::max, + function_overload::types (args::types, args::max), + thunk, + typename cast::data {&cast::thunk, impl})); + } + + template <typename R, typename... A> + void + operator= (R (*impl) (const scope*, A...)) && + { + using args = function_args<A...>; + using cast = function_cast<R, const scope*, A...>; + + insert (move (name), + function_overload ( + nullptr, + args::min, + args::max, + function_overload::types (args::types, args::max), + thunk, + typename cast::data {&cast::thunk, impl})); + } + + // Support for assigning a (capture-less) lambda. + // + // GCC up until version 6 has a bug (#62052) that is triggered by calling + // a lambda that takes a by-value argument via its "decayed" function + // pointer. To work around this we are not going to decay it and instead + // will call its operator() on NULL pointer; yes, undefined behavior, but + // better than a guaranteed crash. + // +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 6 + template <typename L> + void + operator= (const L&) && + { + move (*this).coerce_lambda (&L::operator()); + } + + template <typename L, typename R, typename... A> + void + coerce_lambda (R (L::*op) (A...) const) && + { + using args = function_args<A...>; + using cast = function_cast_lamb<L, R, A...>; + + insert (move (name), + function_overload ( + nullptr, + args::min, + args::max, + function_overload::types (args::types, args::max), + thunk, + typename cast::data {&cast::thunk, op})); + } + + template <typename L, typename R, typename... A> + void + coerce_lambda (R (L::*op) (const scope*, A...) const) && + { + using args = function_args<A...>; + using cast = function_cast_lamb<L, R, const scope*, A...>; + + insert (move (name), + function_overload ( + nullptr, + args::min, + args::max, + function_overload::types (args::types, args::max), + thunk, + typename cast::data {&cast::thunk, op})); + } +#else + template <typename L> + void + operator= (const L& l) && + { + move (*this).operator= (decay_lambda (&L::operator(), l)); + } + + template <typename L, typename R, typename... A> + static auto + decay_lambda (R (L::*) (A...) const, const L& l) -> R (*) (A...) + { + return static_cast<R (*) (A...)> (l); + } +#endif + + // Support for assigning a pointer to member function (e.g. an accessor). + // + // For now we don't support passing additional (to this) arguments though + // we could probably do that. The issues would be the argument passing + // semantics (e.g., what if it's const&) and the optional/default argument + // handling. + // + template <typename R, typename T> + void + operator= (R (T::*mf) () const) && + { + using args = function_args<T>; + using cast = function_cast_memf<R, T>; + + insert (move (name), + function_overload ( + nullptr, + args::min, + args::max, + function_overload::types (args::types, args::max), + thunk, + typename cast::data {&cast::thunk, mf})); + } + + // Support for assigning a pointer to data member. + // + template <typename R, typename T> + void + operator= (R T::*dm) && + { + using args = function_args<T>; + using cast = function_cast_memd<R, T>; + + insert (move (name), + function_overload ( + nullptr, + args::min, + args::max, + function_overload::types (args::types, args::max), + thunk, + typename cast::data {&cast::thunk, dm})); + } + + private: + void + insert (string, function_overload) const; + }; + + inline auto function_family:: + operator[] (string name) const -> entry + { + return entry {move (name), qual_, thunk_}; + } +} + +#endif // LIBBUILD2_FUNCTION_HXX diff --git a/libbuild2/function.test.cxx b/libbuild2/function.test.cxx new file mode 100644 index 0000000..5e442a3 --- /dev/null +++ b/libbuild2/function.test.cxx @@ -0,0 +1,134 @@ +// file : libbuild2/function.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <iostream> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/parser.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + static const optional<const value_type*> arg_bool[1] = + { + &value_traits<bool>::value_type + }; + + static dir_path + scoped (const scope*, dir_path d) + { + return d; + } + + static void + scoped_void (const scope*, dir_path) + { + } + + int + main (int, char* argv[]) + { + // Fake build system driver, default verbosity. + // + init_diag (1); + init (argv[0]); + reset (strings ()); // No command line variables. + + function_family f ("dummy"); + + f["fail"] = []() {fail << "failed" << endf;}; + f["fail_arg"] = [](names a) {return convert<uint64_t> (move (a[0]));}; + + f["nullable"] = [](names* a) {return a == nullptr;}; + f["optional"] = [](optional<names> a) {return !a;}; + + f["dummy0"] = []() {return "abc";}; + f["dummy1"] = [](string s) {return s;}; + f["dummy2"] = [](uint64_t x, uint64_t y) {return x + y;}; + + f["ambig"] = [](names a, optional<string>) {return a;}; + f["ambig"] = [](names a, optional<uint64_t>) {return a;}; + + f["reverse"] = [](names a) {return a;}; + + f["scoped"] = [](const scope*, names a) {return a;}; + f["scoped_void"] = [](const scope*, names) {}; + f["scoped"] = &scoped; + f["scoped_void"] = &scoped_void; + + f[".qual"] = []() {return "abc";}; + + f[".length"] = &path::size; // Member function. + f[".type"] = &name::type; // Data member. + + f[".abs"] = [](dir_path d) {return d.absolute ();}; + + // Variadic function with first required argument of type bool. Returns + // number of arguments passed. + // + functions.insert ( + "variadic", + function_overload ( + nullptr, + 1, + function_overload::arg_variadic, + function_overload::types (arg_bool, 1), + [] (const scope*, vector_view<value> args, const function_overload&) + { + return value (static_cast<uint64_t> (args.size ())); + })); + + // Dump arguments. + // + functions.insert ( + "dump", + function_overload ( + nullptr, + 0, + function_overload::arg_variadic, + function_overload::types (), + [] (const scope*, vector_view<value> args, const function_overload&) + { + for (value& a: args) + { + if (a.null) + cout << "[null]"; + else if (!a.empty ()) + { + names storage; + cout << reverse (a, storage); + } + cout << endl; + } + return value (nullptr); + })); + + try + { + scope& s (*scope::global_); + + parser p; + p.parse_buildfile (cin, path ("buildfile"), s, s); + } + catch (const failed&) + { + return 1; + } + + return 0; + } +} + +int +main (int argc, char* argv[]) +{ + return build2::main (argc, argv); +} diff --git a/libbuild2/functions-builtin.cxx b/libbuild2/functions-builtin.cxx new file mode 100644 index 0000000..44ae534 --- /dev/null +++ b/libbuild2/functions-builtin.cxx @@ -0,0 +1,56 @@ +// file : libbuild2/functions-builtin.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +namespace build2 +{ + // Return NULL value if an environment variable is not set, untyped value + // otherwise. + // + static inline value + getenvvar (const string& name) + { + optional<string> v (getenv (name)); + + if (!v) + return value (); + + names r; + r.emplace_back (to_name (move (*v))); + return value (move (r)); + } + + void + builtin_functions () + { + function_family f ("builtin"); + + f["type"] = [](value* v) {return v->type != nullptr ? v->type->name : "";}; + + f["null"] = [](value* v) {return v->null;}; + f["empty"] = [](value* v) {return v->null || v->empty ();}; + + f["identity"] = [](value* v) {return move (*v);}; + + // string + // + f["string"] = [](bool b) {return b ? "true" : "false";}; + f["string"] = [](uint64_t i) {return to_string (i);}; + f["string"] = [](name n) {return to_string (n);}; + + // getenv + // + f["getenv"] = [](string name) + { + return getenvvar (name); + }; + + f["getenv"] = [](names name) + { + return getenvvar (convert<string> (move (name))); + }; + } +} diff --git a/libbuild2/functions-filesystem.cxx b/libbuild2/functions-filesystem.cxx new file mode 100644 index 0000000..d98c75d --- /dev/null +++ b/libbuild2/functions-filesystem.cxx @@ -0,0 +1,220 @@ +// file : libbuild2/functions-filesystem.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbutl/filesystem.mxx> + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; + +namespace build2 +{ + // Return paths of filesystem entries that match the pattern. See + // path_search() overloads (below) for details. + // + static names + path_search (const path& pattern, const optional<dir_path>& start) + { + names r; + auto add = [&r] (path&& p, const std::string&, bool interm) -> bool + { + // Canonicalizing paths seems to be the right thing to do. Otherwise, we + // can end up with different separators in the same path on Windows. + // + if (!interm) + r.emplace_back ( + value_traits<path>::reverse (move (p.canonicalize ()))); + + return true; + }; + + // Print paths "as is" in the diagnostics. + // + try + { + if (pattern.absolute ()) + path_search (pattern, add); + else + { + // An absolute start directory must be specified for the relative + // pattern. + // + if (!start || start->relative ()) + { + diag_record dr (fail); + + if (!start) + dr << "start directory is not specified"; + else + dr << "start directory '" << start->representation () + << "' is relative"; + + dr << info << "pattern '" << pattern.representation () + << "' is relative"; + } + + path_search (pattern, add, *start); + } + } + catch (const system_error& e) + { + diag_record d (fail); + d << "unable to scan"; + + // If the pattern is absolute, then the start directory is not used, and + // so printing it would be misleading. + // + if (start && pattern.relative ()) + d << " '" << start->representation () << "'"; + + d << ": " << e + << info << "pattern: '" << pattern.representation () << "'"; + } + + return r; + } + + using butl::path_match; + + // Return true if a path for a filesystem entry matches the pattern. See + // path_match() overloads (below) for details. + // + static bool + path_match (const path& pattern, + const path& entry, + const optional<dir_path>& start) + { + // If pattern and entry are both either absolute or relative and + // non-empty, and the first pattern component is not a self-matching + // wildcard, then ignore the start directory. + // + bool rel (pattern.relative () == entry.relative () && + !pattern.empty () && !entry.empty ()); + + bool self (!pattern.empty () && + (*pattern.begin ()).find ("***") != string::npos); + + if (rel && !self) + return path_match (pattern, entry); + + // The start directory must be specified and be absolute. + // + if (!start || start->relative ()) + { + diag_record dr (fail); + + // Print paths "as is". + // + if (!start) + dr << "start directory is not specified"; + else + dr << "start directory path '" << start->representation () + << "' is relative"; + + dr << info << "pattern: '" << pattern.representation () << "'" + << info << "entry: '" << entry.representation () << "'"; + } + + return path_match (pattern, entry, *start); + } + + void + filesystem_functions () + { + function_family f ("filesystem"); + + // path_search + // + // Return filesystem paths that match the pattern. If the pattern is an + // absolute path, then the start directory is ignored (if present). + // Otherwise, the start directory must be specified and be absolute. + // + f["path_search"] = [](path pattern, optional<dir_path> start) + { + return path_search (pattern, start); + }; + + f["path_search"] = [](path pattern, names start) + { + return path_search (pattern, convert<dir_path> (move (start))); + }; + + f["path_search"] = [](names pattern, optional<dir_path> start) + { + return path_search (convert<path> (move (pattern)), start); + }; + + f["path_search"] = [](names pattern, names start) + { + return path_search (convert<path> (move (pattern)), + convert<dir_path> (move (start))); + }; + + // path_match + // + // Match a filesystem entry name against a name pattern (both are strings), + // or a filesystem entry path against a path pattern. For the latter case + // the start directory may also be required (see below). The semantics of + // the pattern and name/entry arguments is determined according to the + // following rules: + // + // - The arguments must be of the string or path types, or be untyped. + // + // - If one of the arguments is typed, then the other one must be of the + // same type or be untyped. In the later case, an untyped argument is + // converted to the type of the other argument. + // + // - If both arguments are untyped and the start directory is specified, + // then the arguments are converted to the path type. + // + // - If both arguments are untyped and the start directory is not + // specified, then, if one of the arguments is syntactically a path (the + // value contains a directory separator), convert them to the path type, + // otherwise to the string type (match as names). + // + // If pattern and entry paths are both either absolute or relative and + // non-empty, and the first pattern component is not a self-matching + // wildcard (doesn't contain ***), then the start directory is not + // required, and is ignored if specified. Otherwise, the start directory + // must be specified and be an absolute path. + // + // Name matching. + // + f["path_match"] = [](string pattern, string name) + { + return path_match (pattern, name); + }; + + // Path matching. + // + f["path_match"] = [](path pat, path ent, optional<dir_path> start) + { + return path_match (pat, ent, start); + }; + + // The semantics depends on the presence of the start directory or the + // first two argument syntactic representation. + // + f["path_match"] = [](names pat, names ent, optional<names> start) + { + auto path_arg = [] (const names& a) -> bool + { + return a.size () == 1 && + (a[0].directory () || + a[0].value.find_first_of (path::traits_type::directory_separators) != + string::npos); + }; + + return start || path_arg (pat) || path_arg (ent) + ? path_match (convert<path> (move (pat)), // Match as paths. + convert<path> (move (ent)), + start + ? convert<dir_path> (move (*start)) + : optional<dir_path> ()) + : path_match (convert<string> (move (pat)), // Match as strings. + convert<string> (move (ent))); + }; + } +} diff --git a/libbuild2/functions-name.cxx b/libbuild2/functions-name.cxx new file mode 100644 index 0000000..a8e08b6 --- /dev/null +++ b/libbuild2/functions-name.cxx @@ -0,0 +1,109 @@ +// file : libbuild2/functions-name.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/scope.hxx> +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; + +namespace build2 +{ + // Convert name to target'ish name (see below for the 'ish part). Return + // raw/unprocessed data in case this is an unknown target type (or called + // out of scope). See scope::find_target_type() for details. + // + static pair<name, optional<string>> + to_target (const scope* s, name&& n) + { + optional<string> e; + + if (s != nullptr) + { + auto rp (s->find_target_type (n, location ())); + + if (rp.first != nullptr) + n.type = rp.first->name; + + e = move (rp.second); + } + + return make_pair (move (n), move (e)); + } + + void + name_functions () + { + function_family f ("name"); + + // These functions treat a name as a target/prerequisite name. + // + // While on one hand it feels like calling them target.name(), etc., would + // have been more appropriate, on the other hand they can also be called + // on prerequisite names. They also won't always return the same result as + // if we were interrogating an actual target (e.g., the directory may be + // relative). + // + f["name"] = [](const scope* s, name n) + { + return to_target (s, move (n)).first.value; + }; + f["name"] = [](const scope* s, names ns) + { + return to_target (s, convert<name> (move (ns))).first.value; + }; + + // Note: returns NULL if extension is unspecified (default) and empty if + // specified as no extension. + // + f["extension"] = [](const scope* s, name n) + { + return to_target (s, move (n)).second; + }; + f["extension"] = [](const scope* s, names ns) + { + return to_target (s, convert<name> (move (ns))).second; + }; + + f["directory"] = [](const scope* s, name n) + { + return to_target (s, move (n)).first.dir; + }; + f["directory"] = [](const scope* s, names ns) + { + return to_target (s, convert<name> (move (ns))).first.dir; + }; + + f["target_type"] = [](const scope* s, name n) + { + return to_target (s, move (n)).first.type; + }; + f["target_type"] = [](const scope* s, names ns) + { + return to_target (s, convert<name> (move (ns))).first.type; + }; + + // Note: returns NULL if no project specified. + // + f["project"] = [](const scope* s, name n) + { + return to_target (s, move (n)).first.proj; + }; + f["project"] = [](const scope* s, names ns) + { + return to_target (s, convert<name> (move (ns))).first.proj; + }; + + // Name-specific overloads from builtins. + // + function_family b ("builtin"); + + b[".concat"] = [](dir_path d, name n) + { + d /= n.dir; + n.dir = move (d); + return n; + }; + } +} diff --git a/libbuild2/functions-path.cxx b/libbuild2/functions-path.cxx new file mode 100644 index 0000000..6e39812 --- /dev/null +++ b/libbuild2/functions-path.cxx @@ -0,0 +1,361 @@ +// file : libbuild2/functions-path.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; + +namespace build2 +{ + static value + path_thunk (const scope* base, + vector_view<value> args, + const function_overload& f) + try + { + return function_family::default_thunk (base, move (args), f); + } + catch (const invalid_path& e) + { + fail << "invalid path: '" << e.path << "'" << endf; + } + + static value + concat_path_string (path l, string sr) + { + if (path::traits_type::is_separator (sr[0])) // '\0' if empty. + { + sr.erase (0, 1); + path pr (move (sr)); + pr.canonicalize (); // Convert to canonical directory separators. + + // If RHS is syntactically a directory (ends with a trailing slash), + // then return it as dir_path, not path. + // + if (pr.to_directory () || pr.empty ()) + return value ( + path_cast<dir_path> (move (l)) /= path_cast<dir_path> (move (pr))); + else + l /= pr; + } + else + l += sr; + + return value (move (l)); + } + + static value + concat_dir_path_string (dir_path l, string sr) + { + if (path::traits_type::is_separator (sr[0])) // '\0' if empty. + sr.erase (0, 1); + + path pr (move (sr)); + pr.canonicalize (); // Convert to canonical directory separators. + + // If RHS is syntactically a directory (ends with a trailing slash), then + // return it as dir_path, not path. + // + return pr.to_directory () || pr.empty () + ? value (move (l /= path_cast<dir_path> (move (pr)))) + : value (path_cast<path> (move (l)) /= pr); + } + + // Return untyped value or NULL value if extension is not present. + // + static inline value + extension (path p) + { + const char* e (p.extension_cstring ()); + + if (e == nullptr) + return value (); + + names r; + r.emplace_back (e); + return value (move (r)); + } + + template <typename P> + static inline P + leaf (const P& p, const optional<dir_path>& d) + { + if (!d) + return p.leaf (); + + try + { + return p.leaf (*d); + } + catch (const invalid_path&) + { + fail << "'" << *d << "' is not a prefix of '" << p << "'" << endf; + } + } + + void + path_functions () + { + function_family f ("path", &path_thunk); + + // string + // + f["string"] = [](path p) {return move (p).string ();}; + + f["string"] = [](paths v) + { + strings r; + for (auto& p: v) + r.push_back (move (p).string ()); + return r; + }; + + f["string"] = [](dir_paths v) + { + strings r; + for (auto& p: v) + r.push_back (move (p).string ()); + return r; + }; + + // representation + // + f["representation"] = [](path p) {return move (p).representation ();}; + + f["representation"] = [](paths v) + { + strings r; + for (auto& p: v) + r.push_back (move (p).representation ()); + return r; + }; + + f["representation"] = [](dir_paths v) + { + strings r; + for (auto& p: v) + r.push_back (move (p).representation ()); + return r; + }; + + // canonicalize + // + f["canonicalize"] = [](path p) {p.canonicalize (); return p;}; + f["canonicalize"] = [](dir_path p) {p.canonicalize (); return p;}; + + f["canonicalize"] = [](paths v) + { + for (auto& p: v) + p.canonicalize (); + return v; + }; + + f["canonicalize"] = [](dir_paths v) + { + for (auto& p: v) + p.canonicalize (); + return v; + }; + + f[".canonicalize"] = [](names ns) + { + // For each path decide based on the presence of a trailing slash + // whether it is a directory. Return as untyped list of (potentially + // mixed) paths. + // + for (name& n: ns) + { + if (n.directory ()) + n.dir.canonicalize (); + else + n.value = convert<path> (move (n)).canonicalize ().string (); + } + return ns; + }; + + // normalize + // + f["normalize"] = [](path p, optional<value> a) + { + p.normalize (a && convert<bool> (move (*a))); + return p; + }; + + f["normalize"] = [](dir_path p, optional<value> a) + { + p.normalize (a && convert<bool> (move (*a))); + return p; + }; + + f["normalize"] = [](paths v, optional<value> a) + { + bool act (a && convert<bool> (move (*a))); + + for (auto& p: v) + p.normalize (act); + + return v; + }; + f["normalize"] = [](dir_paths v, optional<value> a) + { + bool act (a && convert<bool> (move (*a))); + + for (auto& p: v) + p.normalize (act); + return v; + }; + + f[".normalize"] = [](names ns, optional<value> a) + { + bool act (a && convert<bool> (move (*a))); + + // For each path decide based on the presence of a trailing slash + // whether it is a directory. Return as untyped list of (potentially + // mixed) paths. + // + for (name& n: ns) + { + if (n.directory ()) + n.dir.normalize (act); + else + n.value = convert<path> (move (n)).normalize (act).string (); + } + return ns; + }; + + // directory + // + f["directory"] = &path::directory; + + f["directory"] = [](paths v) + { + dir_paths r; + for (const path& p: v) + r.push_back (p.directory ()); + return r; + }; + + f["directory"] = [](dir_paths v) + { + for (dir_path& p: v) + p = p.directory (); + return v; + }; + + f[".directory"] = [](names ns) + { + // For each path decide based on the presence of a trailing slash + // whether it is a directory. Return as list of directory names. + // + for (name& n: ns) + { + if (n.directory ()) + n.dir = n.dir.directory (); + else + n = convert<path> (move (n)).directory (); + } + return ns; + }; + + // base + // + f["base"] = &path::base; + + f["base"] = [](paths v) + { + for (path& p: v) + p = p.base (); + return v; + }; + + f["base"] = [](dir_paths v) + { + for (dir_path& p: v) + p = p.base (); + return v; + }; + + f[".base"] = [](names ns) + { + // For each path decide based on the presence of a trailing slash + // whether it is a directory. Return as untyped list of (potentially + // mixed) paths. + // + for (name& n: ns) + { + if (n.directory ()) + n.dir = n.dir.base (); + else + n.value = convert<path> (move (n)).base ().string (); + } + return ns; + }; + + // leaf + // + f["leaf"] = &path::leaf; + + f["leaf"] = [](path p, dir_path d) + { + return leaf (p, move (d)); + }; + + f["leaf"] = [](paths v, optional<dir_path> d) + { + for (path& p: v) + p = leaf (p, d); + return v; + }; + + f["leaf"] = [](dir_paths v, optional<dir_path> d) + { + for (dir_path& p: v) + p = leaf (p, d); + return v; + }; + + f[".leaf"] = [](names ns, optional<dir_path> d) + { + // For each path decide based on the presence of a trailing slash + // whether it is a directory. Return as untyped list of (potentially + // mixed) paths. + // + for (name& n: ns) + { + if (n.directory ()) + n.dir = leaf (n.dir, d); + else + n.value = leaf (convert<path> (move (n)), d).string (); + } + return ns; + }; + + // extension + // + f["extension"] = &extension; + + f[".extension"] = [](names ns) + { + return extension (convert<path> (move (ns))); + }; + + // Path-specific overloads from builtins. + // + function_family b ("builtin", &path_thunk); + + b[".concat"] = &concat_path_string; + b[".concat"] = &concat_dir_path_string; + + b[".concat"] = [](path l, names ur) + { + return concat_path_string (move (l), convert<string> (move (ur))); + }; + + b[".concat"] = [](dir_path l, names ur) + { + return concat_dir_path_string (move (l), convert<string> (move (ur))); + }; + } +} diff --git a/libbuild2/functions-process-path.cxx b/libbuild2/functions-process-path.cxx new file mode 100644 index 0000000..65e426b --- /dev/null +++ b/libbuild2/functions-process-path.cxx @@ -0,0 +1,25 @@ +// file : libbuild2/functions-process-path.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; + +namespace build2 +{ + void + process_path_functions () + { + function_family f ("process_path"); + + // As discussed in value_traits<process_path>, we always have recall. + // + f["recall"] = &process_path::recall; + f["effect"] = [](process_path p) + { + return move (p.effect.empty () ? p.recall : p.effect); + }; + } +} diff --git a/libbuild2/functions-process.cxx b/libbuild2/functions-process.cxx new file mode 100644 index 0000000..83188d3 --- /dev/null +++ b/libbuild2/functions-process.cxx @@ -0,0 +1,253 @@ +// file : libbuild2/functions-process.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbutl/regex.mxx> + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + // Ideas for potential further improvements: + // + // - Use scope to query environment. + // - Mode to ignore error/suppress diagnostics and return NULL? + // - Similar regex flags to regex.* functions (icase, etc)? + + // Process arguments. + // + static pair<process_path, strings> + process_args (names&& args, const char* fn) + { + if (args.empty () || args[0].empty ()) + fail << "executable name expected in process." << fn << "()"; + + process_path pp; + try + { + size_t erase; + + // This can be a process_path (pair) or just a path. + // + if (args[0].pair) + { + pp = convert<process_path> (move (args[0]), move (args[1])); + erase = 2; + } + else + { + pp = run_search (convert<path> (move (args[0]))); + erase = 1; + } + + args.erase (args.begin (), args.begin () + erase); + } + catch (const invalid_argument& e) + { + fail << "invalid process." << fn << "() executable path: " << e.what (); + } + + strings sargs; + try + { + sargs = convert<strings> (move (args)); + } + catch (const invalid_argument& e) + { + fail << "invalid process." << fn << "() argument: " << e.what (); + } + + return pair<process_path, strings> (move (pp), move (sargs)); + } + + static process + start (const scope*, + const process_path& pp, + const strings& args, + cstrings& cargs) + { + cargs.reserve (args.size () + 2); + cargs.push_back (pp.recall_string ()); + transform (args.begin (), + args.end (), + back_inserter (cargs), + [] (const string& s) {return s.c_str ();}); + cargs.push_back (nullptr); + + return run_start (3 /* verbosity */, + pp, + cargs.data (), + 0 /* stdin */, + -1 /* stdout */); + } + + static void + finish (cstrings& args, process& pr, bool io) + { + run_finish (args, pr); + + if (io) + fail << "error reading " << args[0] << " output"; + } + + static value + run (const scope* s, const process_path& pp, const strings& args) + { + cstrings cargs; + process pr (start (s, pp, args, cargs)); + + string v; + bool io (false); + try + { + ifdstream is (move (pr.in_ofd)); + + // Note that getline() will fail if there is no output. + // + if (is.peek () != ifdstream::traits_type::eof ()) + getline (is, v, '\0'); + + is.close (); // Detect errors. + } + catch (const io_error&) + { + // Presumably the child process failed and issued diagnostics so let + // finish() try to deal with that first. + // + io = true; + } + + finish (cargs, pr, io); + + names r; + r.push_back (to_name (move (trim (v)))); + return value (move (r)); + } + + regex + parse_regex (const string&, regex::flag_type); // functions-regex.cxx + + static value + run_regex (const scope* s, + const process_path& pp, + const strings& args, + const string& pat, + const optional<string>& fmt) + { + regex re (parse_regex (pat, regex::ECMAScript)); + + cstrings cargs; + process pr (start (s, pp, args, cargs)); + + names r; + bool io (false); + try + { + ifdstream is (move (pr.in_ofd), ifdstream::badbit); + + for (string l; !eof (getline (is, l)); ) + { + if (fmt) + { + pair<string, bool> p (regex_replace_match (l, re, *fmt)); + + if (p.second) + r.push_back (to_name (move (p.first))); + } + else + { + if (regex_match (l, re)) + r.push_back (to_name (move (l))); + } + } + + is.close (); // Detect errors. + } + catch (const io_error&) + { + // Presumably the child process failed and issued diagnostics so let + // finish() try to deal with that first. + // + io = true; + } + + finish (cargs, pr, io); + + return value (move (r)); + } + + static inline value + run_regex (const scope* s, + names&& args, + const string& pat, + const optional<string>& fmt) + { + pair<process_path, strings> pa (process_args (move (args), "run_regex")); + return run_regex (s, pa.first, pa.second, pat, fmt); + } + + void + process_functions () + { + function_family f ("process"); + + // $process.run(<prog>[ <args>...]) + // + // Return trimmed stdout. + // + f[".run"] = [](const scope* s, names args) + { + pair<process_path, strings> pa (process_args (move (args), "run")); + return run (s, pa.first, pa.second); + }; + + f["run"] = [](const scope* s, process_path pp) + { + return run (s, pp, strings ()); + }; + + // $process.run_regex(<prog>[ <args>...], <pat> [, <fmt>]) + // + // Return stdout lines matched and optionally processed with regex. + // + // Each line of stdout (including the customary trailing blank) is matched + // (as a whole) against <pat> and, if successful, returned, optionally + // processed with <fmt>, as an element of a list. + // + f[".run_regex"] = [](const scope* s, names a, string p, optional<string> f) + { + return run_regex (s, move (a), p, f); + }; + + f[".run_regex"] = [] (const scope* s, names a, names p, optional<names> f) + { + return run_regex (s, + move (a), + convert<string> (move (p)), + f ? convert<string> (move (*f)) : nullopt_string); + }; + + f["run_regex"] = [](const scope* s, + process_path pp, + string p, + optional<string> f) + { + return run_regex (s, pp, strings (), p, f); + }; + + f["run_regex"] = [](const scope* s, + process_path pp, + names p, + optional<names> f) + { + return run_regex (s, + pp, strings (), + convert<string> (move (p)), + f ? convert<string> (move (*f)) : nullopt_string); + }; + } +} diff --git a/libbuild2/functions-project-name.cxx b/libbuild2/functions-project-name.cxx new file mode 100644 index 0000000..163e865 --- /dev/null +++ b/libbuild2/functions-project-name.cxx @@ -0,0 +1,63 @@ +// file : libbuild2/functions-project-name.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; + +namespace build2 +{ + void + project_name_functions () + { + function_family f ("project_name"); + + f["string"] = [](project_name p) {return move (p).string ();}; + + f["base"] = [](project_name p, optional<string> ext) + { + return ext ? p.base (ext->c_str ()) : p.base (); + }; + + f["base"] = [](project_name p, names ext) + { + return p.base (convert<string> (move (ext)).c_str ()); + }; + + f["extension"] = &project_name::extension; + f["variable"] = &project_name::variable; + + // Project name-specific overloads from builtins. + // + function_family b ("builtin"); + + b[".concat"] = [](project_name n, string s) + { + string r (move (n).string ()); + r += s; + return r; + }; + + b[".concat"] = [](string s, project_name n) + { + s += n.string (); + return s; + }; + + b[".concat"] = [](project_name n, names ns) + { + string r (move (n).string ()); + r += convert<string> (move (ns)); + return r; + }; + + b[".concat"] = [](names ns, project_name n) + { + string r (convert<string> (move (ns))); + r += n.string (); + return r; + }; + } +} diff --git a/libbuild2/functions-regex.cxx b/libbuild2/functions-regex.cxx new file mode 100644 index 0000000..2c478fe --- /dev/null +++ b/libbuild2/functions-regex.cxx @@ -0,0 +1,542 @@ +// file : libbuild2/functions-regex.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <sstream> + +#include <libbutl/regex.mxx> + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + // Convert value of an arbitrary type to string. + // + static inline string + to_string (value&& v) + { + // Optimize for the string value type. + // + if (v.type != &value_traits<string>::value_type) + untypify (v); + + return convert<string> (move (v)); + } + + // Parse a regular expression. Throw invalid_argument if it is not valid. + // + // Note: also used in functions-process.cxx (thus not static). + // + regex + parse_regex (const string& s, regex::flag_type f) + { + try + { + return regex (s, f); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful (no space). + // + ostringstream os; + os << "invalid regex '" << s << "'" << e; + throw invalid_argument (os.str ()); + } + } + + // Match value of an arbitrary type against the regular expression. See + // match() overloads (below) for details. + // + static value + match (value&& v, const string& re, optional<names>&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + bool subs (false); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert<string> (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "return_subs") + subs = true; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Match. + // + string s (to_string (move (v))); + + if (!subs) + return value (regex_match (s, rge)); // Return boolean value. + + names r; + match_results<string::const_iterator> m; + + if (regex_match (s, m, rge)) + { + assert (!m.empty ()); + + for (size_t i (1); i != m.size (); ++i) + { + if (m[i].matched) + r.emplace_back (m.str (i)); + } + } + + return value (move (r)); + } + + // Determine if there is a match between the regular expression and some + // part of a value of an arbitrary type. See search() overloads (below) + // for details. + // + static value + search (value&& v, const string& re, optional<names>&& flags) + { + // Parse flags. + // + regex::flag_type rf (regex::ECMAScript); + bool match (false); + bool subs (false); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert<string> (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (s == "return_match") + match = true; + else if (s == "return_subs") + subs = true; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + // Parse regex. + // + regex rge (parse_regex (re, rf)); + + // Search. + // + string s (to_string (move (v))); + + if (!match && !subs) + return value (regex_search (s, rge)); // Return boolean value. + + names r; + match_results<string::const_iterator> m; + + if (regex_search (s, m, rge)) + { + assert (!m.empty ()); + + if (match) + { + assert (m[0].matched); + r.emplace_back (m.str (0)); + } + + if (subs) + { + for (size_t i (1); i != m.size (); ++i) + { + if (m[i].matched) + r.emplace_back (m.str (i)); + } + } + } + + return value (move (r)); + } + + static pair<regex::flag_type, regex_constants::match_flag_type> + parse_replacement_flags (optional<names>&& flags, bool first_only = true) + { + regex::flag_type rf (regex::ECMAScript); + regex_constants::match_flag_type mf (regex_constants::match_default); + + if (flags) + { + for (auto& f: *flags) + { + string s (convert<string> (move (f))); + + if (s == "icase") + rf |= regex::icase; + else if (first_only && s == "format_first_only") + mf |= regex_constants::format_first_only; + else if (s == "format_no_copy") + mf |= regex_constants::format_no_copy; + else + throw invalid_argument ("invalid flag '" + s + "'"); + } + } + + return make_pair (rf, mf); + } + + // Replace matched parts in a value of an arbitrary type, using the format + // string. See replace() overloads (below) for details. + // + static names + replace (value&& v, + const string& re, + const string& fmt, + optional<names>&& flags) + { + auto fl (parse_replacement_flags (move (flags))); + regex rge (parse_regex (re, fl.first)); + + names r; + + try + { + r.emplace_back (regex_replace_search (to_string (move (v)), + rge, + fmt, + fl.second).first); + } + catch (const regex_error& e) + { + fail << "unable to replace" << e; + } + + return r; + } + + // Split a value of an arbitrary type into a list of unmatched value parts + // and replacements of the matched parts. See split() overloads (below) for + // details. + // + static names + split (value&& v, + const string& re, + const string& fmt, + optional<names>&& flags) + { + auto fl (parse_replacement_flags (move (flags), false)); + regex rge (parse_regex (re, fl.first)); + + names r; + + try + { + regex_replace_search (to_string (move (v)), rge, fmt, + [&r] (string::const_iterator b, + string::const_iterator e) + { + if (b != e) + r.emplace_back (string (b, e)); + }, + fl.second); + } + catch (const regex_error& e) + { + fail << "unable to split" << e; + } + + return r; + } + + // Replace matched parts of list elements using the format string. See + // apply() overloads (below) for details. + // + static names + apply (names&& s, + const string& re, + const string& fmt, + optional<names>&& flags) + { + auto fl (parse_replacement_flags (move (flags))); + regex rge (parse_regex (re, fl.first)); + + names r; + + try + { + for (auto& v: s) + { + string s (regex_replace_search (convert<string> (move (v)), + rge, + fmt, + fl.second).first); + + if (!s.empty ()) + r.emplace_back (move (s)); + } + } + catch (const regex_error& e) + { + fail << "unable to apply" << e; + } + + return r; + } + + // Replace matched parts of list elements using the format string and + // concatenate the transformed elements. See merge() overloads (below) for + // details. + // + static names + merge (names&& s, + const string& re, + const string& fmt, + optional<string>&& delim, + optional<names>&& flags) + { + auto fl (parse_replacement_flags (move (flags))); + regex rge (parse_regex (re, fl.first)); + + string rs; + + try + { + for (auto& v: s) + { + string s (regex_replace_search (convert<string> (move (v)), + rge, + fmt, + fl.second).first); + + if (!s.empty ()) + { + if (!rs.empty () && delim) + rs.append (*delim); + + rs.append (s); + } + + } + } + catch (const regex_error& e) + { + fail << "unable to merge" << e; + } + + names r; + r.emplace_back (move (rs)); + return r; + } + + void + regex_functions () + { + function_family f ("regex"); + + // $regex.match(<val>, <pat> [, <flags>]) + // + // Match a value of an arbitrary type against the regular expression. + // Convert the value to string prior to matching. Return the boolean value + // unless return_subs flag is specified (see below), in which case return + // names (empty if no match). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // + f[".match"] = [](value s, string re, optional<names> flags) + { + return match (move (s), re, move (flags)); + }; + + f[".match"] = [](value s, names re, optional<names> flags) + { + return match (move (s), convert<string> (move (re)), move (flags)); + }; + + // $regex.search(<val>, <pat> [, <flags>]) + // + // Determine if there is a match between the regular expression and some + // part of a value of an arbitrary type. Convert the value to string prior + // to searching. Return the boolean value unless return_match or + // return_subs flag is specified (see below) in which case return names + // (empty if no match). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // return_match - return names (rather than boolean), that contain a + // sub-string that matches the whole regular expression + // + // return_subs - return names (rather than boolean), that contain + // sub-strings that match the marked sub-expressions + // + // If both return_match and return_subs flags are specified then the + // sub-string that matches the whole regular expression comes first. + // + f[".search"] = [](value s, string re, optional<names> flags) + { + return search (move (s), re, move (flags)); + }; + + f[".search"] = [](value s, names re, optional<names> flags) + { + return search (move (s), convert<string> (move (re)), move (flags)); + }; + + // $regex.replace(<val>, <pat>, <fmt> [, <flags>]) + // + // Replace matched parts in a value of an arbitrary type, using the format + // string. Convert the value to string prior to matching. The result value + // is always untyped, regardless of the argument type. + // + // Substitution escape sequences are extended with a subset of Perl + // sequences (see libbutl/regex.mxx for details). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // format_first_only - only replace the first match + // + // format_no_copy - do not copy unmatched value parts into the result + // + // If both format_first_only and format_no_copy flags are specified then + // the result will only contain the replacement of the first match. + // + f[".replace"] = [](value s, string re, string fmt, optional<names> flags) + { + return replace (move (s), re, fmt, move (flags)); + }; + + f[".replace"] = [](value s, names re, names fmt, optional<names> flags) + { + return replace (move (s), + convert<string> (move (re)), + convert<string> (move (fmt)), + move (flags)); + }; + + // $regex.split(<val>, <pat>, <fmt> [, <flags>]) + // + // Split a value of an arbitrary type into a list of unmatched value parts + // and replacements of the matched parts, omitting empty ones. Convert the + // value to string prior to matching. + // + // Substitution escape sequences are extended with a subset of Perl + // sequences (see libbutl/regex.mxx for details). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // format_no_copy - do not copy unmatched value parts into the result + // + f[".split"] = [](value s, string re, string fmt, optional<names> flags) + { + return split (move (s), re, fmt, move (flags)); + }; + + f[".split"] = [](value s, names re, names fmt, optional<names> flags) + { + return split (move (s), + convert<string> (move (re)), + convert<string> (move (fmt)), + move (flags)); + }; + + // $regex.merge(<vals>, <pat>, <fmt> [, <delim> [, <flags>]]) + // + // Replace matched parts in a list of elements using the regex format + // string. Convert the elements to string prior to matching. The result + // value is untyped and contains concatenation of transformed non-empty + // elements optionally separated with a delimiter. + // + // Substitution escape sequences are extended with a subset of Perl + // sequences (see libbutl/regex.mxx for details). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // format_first_only - only replace the first match + // + // format_no_copy - do not copy unmatched value parts into the result + // + // If both format_first_only and format_no_copy flags are specified then + // the result will be a concatenation of only the first match + // replacements. + // + f[".merge"] = [](names s, + string re, + string fmt, + optional<string> delim, + optional<names> flags) + { + return merge (move (s), re, fmt, move (delim), move (flags)); + }; + + f[".merge"] = [](names s, + names re, + names fmt, + optional<names> delim, + optional<names> flags) + { + return merge (move (s), + convert<string> (move (re)), + convert<string> (move (fmt)), + delim + ? convert<string> (move (*delim)) + : optional<string> (), + move (flags)); + }; + + // $regex.apply(<vals>, <pat>, <fmt> [, <flags>]) + // + // Replace matched parts of each element in a list using the regex format + // string. Convert the elements to string prior to matching. Return a list + // of transformed elements, omitting the empty ones. + // + // Substitution escape sequences are extended with a subset of Perl + // sequences (see libbutl/regex.mxx for details). + // + // The following flags are supported: + // + // icase - match ignoring case + // + // format_first_only - only replace the first match + // + // format_no_copy - do not copy unmatched value parts into the result + // + // If both format_first_only and format_no_copy flags are specified then + // the result elements will only contain the replacement of the first + // match. + // + f[".apply"] = [](names s, string re, string fmt, optional<names> flags) + { + return apply (move (s), re, fmt, move (flags)); + }; + + f[".apply"] = [](names s, names re, names fmt, optional<names> flags) + { + return apply (move (s), + convert<string> (move (re)), + convert<string> (move (fmt)), + move (flags)); + }; + } +} diff --git a/libbuild2/functions-string.cxx b/libbuild2/functions-string.cxx new file mode 100644 index 0000000..22860cb --- /dev/null +++ b/libbuild2/functions-string.cxx @@ -0,0 +1,43 @@ +// file : libbuild2/functions-string.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; + +namespace build2 +{ + void + string_functions () + { + function_family f ("string"); + + f["string"] = [](string s) {return s;}; + + // @@ Shouldn't it concatenate elements into the single string? + // @@ Doesn't seem to be used so far. Can consider removing. + // + // f["string"] = [](strings v) {return v;}; + + // String-specific overloads from builtins. + // + function_family b ("builtin"); + + b[".concat"] = [](string l, string r) {l += r; return l;}; + + b[".concat"] = [](string l, names ur) + { + l += convert<string> (move (ur)); + return l; + }; + + b[".concat"] = [](names ul, string r) + { + string l (convert<string> (move (ul))); + l += r; + return l; + }; + } +} diff --git a/libbuild2/functions-target-triplet.cxx b/libbuild2/functions-target-triplet.cxx new file mode 100644 index 0000000..4394c5a --- /dev/null +++ b/libbuild2/functions-target-triplet.cxx @@ -0,0 +1,36 @@ +// file : libbuild2/functions-target-triplet.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +using namespace std; + +namespace build2 +{ + void + target_triplet_functions () + { + function_family f ("target_triplet"); + + f["string"] = [](target_triplet t) {return t.string ();}; + + // Target triplet-specific overloads from builtins. + // + function_family b ("builtin"); + + b[".concat"] = [](target_triplet l, string sr) {return l.string () + sr;}; + b[".concat"] = [](string sl, target_triplet r) {return sl + r.string ();}; + + b[".concat"] = [](target_triplet l, names ur) + { + return l.string () + convert<string> (move (ur)); + }; + + b[".concat"] = [](names ul, target_triplet r) + { + return convert<string> (move (ul)) + r.string (); + }; + } +} diff --git a/libbuild2/lexer+buildspec.test.testscript b/libbuild2/lexer+buildspec.test.testscript new file mode 100644 index 0000000..a80b2d5 --- /dev/null +++ b/libbuild2/lexer+buildspec.test.testscript @@ -0,0 +1,16 @@ +# file : libbuild2/lexer+buildspec.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +test.arguments = buildspec + +: punctuation +: +$* <:'x,x(x)' >>EOO +'x' +, +'x' + ( +'x' +) +EOO diff --git a/libbuild2/lexer+comment.test.testscript b/libbuild2/lexer+comment.test.testscript new file mode 100644 index 0000000..6ad1202 --- /dev/null +++ b/libbuild2/lexer+comment.test.testscript @@ -0,0 +1,139 @@ +# file : libbuild2/lexer+comment.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: single-line +: +{ + : only + : + $* <<EOI >>:EOO + # comment + EOI + EOO + + : first + : + $* <<EOI >>EOO + # comment + foo + EOI + 'foo' + <newline> + EOO + + : last + : + $* <<EOI >>EOO + foo + # comment + EOI + 'foo' + <newline> + EOO + + : few + : + $* <<EOI >>EOO + foo + # comment + # comment + EOI + 'foo' + <newline> + EOO + + : cont + : + $* <<EOI >>EOO + foo + # comment\\ + bar + EOI + 'foo' + <newline> + 'bar' + <newline> + EOO + + : same + : + $* <<EOI >>EOO + foo # comment + bar # comment + EOI + 'foo' + <newline> + 'bar' + <newline> + EOO +} + +: multi-line +: +{ + : only + : + $* <<EOI >>:EOO + #\ + comment + comment + #\ + EOI + EOO + + : empty + : + $* <<EOI >>:EOO + #\ + #\ + EOI + EOO + + : start-same + : + $* <<EOI >>EOO + foo #\ + comment + comment + #\ + EOI + 'foo' + <newline> + EOO + + : end-same + : + $* <<EOI >>EOO + #\ + comment + comment + foo #\ + bar + EOI + 'bar' + <newline> + EOO + + : end-not + : + $* <<EOI >>EOO + #\ + comment + #\ not an end + foo #\ + bar + EOI + 'bar' + <newline> + EOO + + : unterm + : + $* <<EOI 2>>EOE != 0 + #\ + comment + EOI + stdin:3:1: error: unterminated multi-line comment + EOE +} diff --git a/libbuild2/lexer+eval.test.testscript b/libbuild2/lexer+eval.test.testscript new file mode 100644 index 0000000..86f804a --- /dev/null +++ b/libbuild2/lexer+eval.test.testscript @@ -0,0 +1,76 @@ +# file : libbuild2/lexer+eval.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +test.arguments = eval + +: punctuation +: +$* <:'x:x{x}x[x]x$x?x,x(x)' >>EOO +'x' +: +'x' +{ +'x' +} +'x' +[ +'x' +] +'x' +$ +'x' +? +'x' +, +'x' +( +'x' +) +EOO + +: logical +: +$* <:'x|x||x&x&&x!x!!x)' >>EOO +'x|x' +|| +'x&x' +&& +'x' +! +'x' +! +! +'x' +) +EOO + +: comparison +: +$* <:'x=x==x!=x<x<=x>x>=)' >>EOO +'x=x' +== +'x' +!= +'x' +< +'x' +<= +'x' +> +'x' +>= +) +EOO + +: newline +: +$* <'x' >- 2>>EOE != 0 +stdin:1:2: error: newline in evaluation context +EOE + +: eof +: +$* <:'' 2>>EOE != 0 +stdin:1:1: error: unterminated evaluation context +EOE diff --git a/libbuild2/lexer+quoting.test.testscript b/libbuild2/lexer+quoting.test.testscript new file mode 100644 index 0000000..043737f --- /dev/null +++ b/libbuild2/lexer+quoting.test.testscript @@ -0,0 +1,108 @@ +# file : libbuild2/lexer+quoting.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +test.options += -q + +: unquoted +: +$* <'foo' >>EOO +'foo' +<newline> +EOO + +: comp +: +{ + : single + : + $* <":'foo':" >>EOO + : + 'foo' [S/C] + : + <newline> + EOO + + : double + : + $* <':"foo":' >>EOO + : + 'foo' [D/C] + : + <newline> + EOO + + : single-empty + : + $* <"''" >>EOO + '' [S/C] + <newline> + EOO + + : double-empty + : + $* <'""' >>EOO + '' [D/C] + <newline> + EOO +} + +: part +{ + : quoted + { + : start + : Token start already quoted + : + $* <'"$foo"' >>EOO + '' [D/P] + $ [D/C] + 'foo' [D/P] + <newline> + EOO + + : end + : Token end still quoted + : + $* <'"foo$"' >>EOO + 'foo' [D/P] + $ [D/C] + '' [D/P] + <newline> + EOO + } + + : unquoted + { + : start + : Token starts with unquoted character + : + $* <'f"oo"' >>EOO + 'foo' [D/P] + <newline> + EOO + + : end + : Token continous with unquoted character + : + $* <'"fo"o' >>EOO + 'foo' [D/P] + <newline> + EOO + + : escape + : Token continous with unquoted escaped character + : + $* <'"fo"\"' >>EOO + 'fo"' [D/P] + <newline> + EOO + } +} + +: mixed +: +$* <"\"fo\"'o'" >>EOO +'foo' [M/P] +<newline> +EOO diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx new file mode 100644 index 0000000..fd13c31 --- /dev/null +++ b/libbuild2/lexer.cxx @@ -0,0 +1,720 @@ +// file : libbuild2/lexer.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/lexer.hxx> + +#include <cstring> // strchr() + +using namespace std; + +namespace build2 +{ + using type = token_type; + + pair<char, bool> lexer:: + peek_char () + { + sep_ = skip_spaces (); + xchar c (peek ()); + return make_pair (eos (c) ? '\0' : char (c), sep_); + } + + void lexer:: + mode (lexer_mode m, char ps, optional<const char*> esc) + { + const char* s1 (nullptr); + const char* s2 (nullptr); + bool s (true); + bool n (true); + bool q (true); + + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + + switch (m) + { + case lexer_mode::normal: + { + s1 = ":<>=+ $(){}[]#\t\n"; + s2 = " = "; + break; + } + case lexer_mode::value: + { + s1 = " $(){}[]#\t\n"; + s2 = " "; + break; + } + case lexer_mode::attribute: + { + s1 = " $(]#\t\n"; + s2 = " "; + break; + } + case lexer_mode::eval: + { + s1 = ":<>=!&|?, $(){}[]#\t\n"; + s2 = " = &| "; + break; + } + case lexer_mode::buildspec: + { + // Like the value mode with these differences: + // + // 1. Returns '(' as a separated token provided the state stack depth + // is less than or equal to 3 (initial state plus two buildspec) + // (see parse_buildspec() for details). + // + // 2. Recognizes comma. + // + // 3. Treat newline as an ordinary space. + // + s1 = " $(){}[],\t\n"; + s2 = " "; + n = false; + break; + } + case lexer_mode::single_quoted: + case lexer_mode::double_quoted: + s = false; + // Fall through. + case lexer_mode::variable: + { + // These are handled in an ad hoc way in word(). + assert (ps == '\0'); + break; + } + default: assert (false); // Unhandled custom mode. + } + + state_.push (state {m, ps, s, n, q, *esc, s1, s2}); + } + + token lexer:: + next () + { + const state& st (state_.top ()); + lexer_mode m (st.mode); + + // For some modes we have dedicated imlementations of next(). + // + switch (m) + { + case lexer_mode::normal: + case lexer_mode::value: + case lexer_mode::attribute: + case lexer_mode::variable: + case lexer_mode::buildspec: break; + case lexer_mode::eval: return next_eval (); + case lexer_mode::double_quoted: return next_quoted (); + default: assert (false); // Unhandled custom mode. + } + + bool sep (skip_spaces ()); + + xchar c (get ()); + uint64_t ln (c.line), cn (c.column); + + auto make_token = [&sep, ln, cn] (type t, string v = string ()) + { + return token (t, move (v), + sep, quote_type::unquoted, false, + ln, cn, token_printer); + }; + + if (eos (c)) + return make_token (type::eos); + + // Handle pair separator. + // + if (c == st.sep_pair) + return make_token (type::pair_separator, string (1, c)); + + switch (c) + { + // NOTE: remember to update mode(), next_eval() if adding new special + // characters. + // + case '\n': + { + // Expire value mode at the end of the line. + // + if (m == lexer_mode::value) + state_.pop (); + + sep = true; // Treat newline as always separated. + return make_token (type::newline); + } + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); + case '[': return make_token (type::lsbrace); + case ']': + { + // Expire attribute mode after closing ']'. + // + if (m == lexer_mode::attribute) + state_.pop (); + + return make_token (type::rsbrace); + } + case '$': return make_token (type::dollar); + case ')': return make_token (type::rparen); + case '(': + { + // Left paren is always separated in the buildspec mode. + // + if (m == lexer_mode::buildspec && state_.size () <= 3) + sep = true; + + return make_token (type::lparen); + } + } + + // The following characters are special in the normal and variable modes. + // + if (m == lexer_mode::normal || m == lexer_mode::variable) + { + switch (c) + { + // NOTE: remember to update mode(), next_eval() if adding new special + // characters. + // + case ':': return make_token (type::colon); + case '=': + { + if (peek () == '+') + { + get (); + return make_token (type::prepend); + } + else + return make_token (type::assign); + } + case '+': + { + if (peek () == '=') + { + get (); + return make_token (type::append); + } + } + } + } + + // The following characters are special in the normal mode. + // + if (m == lexer_mode::normal) + { + // NOTE: remember to update mode() if adding new special characters. + // + switch (c) + { + case '<': return make_token (type::labrace); + case '>': return make_token (type::rabrace); + } + } + + // The following characters are special in the buildspec mode. + // + if (m == lexer_mode::buildspec) + { + // NOTE: remember to update mode() if adding new special characters. + // + switch (c) + { + case ',': return make_token (type::comma); + } + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + token lexer:: + next_eval () + { + bool sep (skip_spaces ()); + xchar c (get ()); + + if (eos (c)) + fail (c) << "unterminated evaluation context"; + + const state& st (state_.top ()); + + uint64_t ln (c.line), cn (c.column); + + auto make_token = [sep, ln, cn] (type t, string v = string ()) + { + return token (t, move (v), + sep, quote_type::unquoted, false, + ln, cn, token_printer); + }; + + // This mode is quite a bit like the value mode when it comes to special + // characters, except that we have some of our own. + // + + // Handle pair separator. + // + if (c == st.sep_pair) + return make_token (type::pair_separator, string (1, c)); + + // Note: we don't treat [ and ] as special here. Maybe can use them for + // something later. + // + switch (c) + { + // NOTE: remember to update mode() if adding new special characters. + // + case '\n': fail (c) << "newline in evaluation context" << endf; + case ':': return make_token (type::colon); + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); + case '[': return make_token (type::lsbrace); + case ']': return make_token (type::rsbrace); + case '$': return make_token (type::dollar); + case '?': return make_token (type::question); + case ',': return make_token (type::comma); + case '(': return make_token (type::lparen); + case ')': + { + state_.pop (); // Expire eval mode. + return make_token (type::rparen); + } + // Potentially two-character tokens. + // + case '=': + case '!': + case '<': + case '>': + case '|': + case '&': + { + xchar p (peek ()); + + type r (type::eos); + switch (c) + { + case '|': if (p == '|') r = type::log_or; break; + case '&': if (p == '&') r = type::log_and; break; + + case '<': r = (p == '=' ? type::less_equal : type::less); break; + case '>': r = (p == '=' ? type::greater_equal : type::greater); break; + + case '=': if (p == '=') r = type::equal; break; + + case '!': r = (p == '=' ? type::not_equal : type::log_not); break; + } + + if (r == type::eos) + break; + + switch (r) + { + case type::less: + case type::greater: + case type::log_not: break; + default: get (); + } + + return make_token (r); + } + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + token lexer:: + next_quoted () + { + xchar c (get ()); + + if (eos (c)) + fail (c) << "unterminated double-quoted sequence"; + + uint64_t ln (c.line), cn (c.column); + + auto make_token = [ln, cn] (type t) + { + return token (t, false, quote_type::double_, ln, cn, token_printer); + }; + + switch (c) + { + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + } + + // Otherwise it is a word. + // + unget (c); + return word (state_.top (), false); + } + + token lexer:: + word (state st, bool sep) + { + lexer_mode m (st.mode); + + xchar c (peek ()); + assert (!eos (c)); + + uint64_t ln (c.line), cn (c.column); + + string lexeme; + quote_type qtype (m == lexer_mode::double_quoted + ? quote_type::double_ + : quote_type::unquoted); + + // If we are already in the quoted mode then we didn't start with the + // quote character. + // + bool qcomp (false); + + auto append = [&lexeme, &m, &qcomp] (char c) + { + lexeme += c; + + // An unquoted character after a quoted fragment. + // + if (qcomp && m != lexer_mode::double_quoted) + qcomp = false; + }; + + for (; !eos (c); c = peek ()) + { + // First handle escape sequences. + // + if (c == '\\') + { + // In the variable mode we treat the beginning of the escape sequence + // as a separator (think \"$foo\"). + // + if (m == lexer_mode::variable) + break; + + get (); + xchar p (peek ()); + + const char* esc (st.escapes); + + if (esc == nullptr || + (*esc != '\0' && !eos (p) && strchr (esc, p) != nullptr)) + { + get (); + + if (eos (p)) + fail (p) << "unterminated escape sequence"; + + if (p != '\n') // Ignore if line continuation. + append (p); + + continue; + } + else + unget (c); // Treat as a normal character. + } + + bool done (false); + + // Next take care of the double-quoted mode. This one is tricky since + // we push/pop modes while accumulating the same lexeme for example: + // + // foo" bar "baz + // + if (m == lexer_mode::double_quoted) + { + switch (c) + { + // Only these two characters are special in the double-quoted mode. + // + case '$': + case '(': + { + done = true; + break; + } + // End quote. + // + case '\"': + { + get (); + state_.pop (); + + st = state_.top (); + m = st.mode; + continue; + } + } + } + // We also handle the variable mode in an ad hoc way. + // + else if (m == lexer_mode::variable) + { + if (c != '_' && !(lexeme.empty () ? alpha (c) : alnum (c))) + { + if (c != '.') + done = true; + else + { + // Normally '.' is part of the variable (namespace separator) + // unless it is trailing (think $major.$minor). + // + get (); + xchar p (peek ()); + done = eos (p) || !(alpha (p) || p == '_'); + unget (c); + } + } + } + else + { + // First check if it's a pair separator. + // + if (c == st.sep_pair) + done = true; + else + { + // Then see if this character or character sequence is a separator. + // + for (const char* p (strchr (st.sep_first, c)); + p != nullptr; + p = done ? nullptr : strchr (p + 1, c)) + { + char s (st.sep_second[p - st.sep_first]); + + // See if it has a second. + // + if (s != ' ') + { + get (); + done = (peek () == s); + unget (c); + } + else + done = true; + } + } + + // Handle single and double quotes if enabled for this mode and unless + // they were considered separators. + // + if (st.quotes && !done) + { + switch (c) + { + case '\'': + { + // Enter the single-quoted mode in case the derived lexer needs + // to notice this. + // + mode (lexer_mode::single_quoted); + + switch (qtype) + { + case quote_type::unquoted: + qtype = quote_type::single; + qcomp = lexeme.empty (); + break; + case quote_type::single: + qcomp = false; // Non-contiguous. + break; + case quote_type::double_: + qtype = quote_type::mixed; + // Fall through. + case quote_type::mixed: + qcomp = false; + break; + } + + get (); + for (c = get (); !eos (c) && c != '\''; c = get ()) + lexeme += c; + + if (eos (c)) + fail (c) << "unterminated single-quoted sequence"; + + state_.pop (); + continue; + } + case '\"': + { + get (); + + mode (lexer_mode::double_quoted); + st = state_.top (); + m = st.mode; + + switch (qtype) + { + case quote_type::unquoted: + qtype = quote_type::double_; + qcomp = lexeme.empty (); + break; + case quote_type::double_: + qcomp = false; // Non-contiguous. + break; + case quote_type::single: + qtype = quote_type::mixed; + // Fall through. + case quote_type::mixed: + qcomp = false; + break; + } + + continue; + } + } + } + } + + if (done) + break; + + get (); + append (c); + } + + if (m == lexer_mode::double_quoted) + { + if (eos (c)) + fail (c) << "unterminated double-quoted sequence"; + + // If we are still in the quoted mode then we didn't end with the quote + // character. + // + if (qcomp) + qcomp = false; + } + + // Expire variable mode at the end of the word. + // + if (m == lexer_mode::variable) + state_.pop (); + + return token (move (lexeme), sep, qtype, qcomp, ln, cn); + } + + bool lexer:: + skip_spaces () + { + bool r (sep_); + sep_ = false; + + const state& s (state_.top ()); + + // In some special modes we don't skip spaces. + // + if (!s.sep_space) + return r; + + xchar c (peek ()); + bool start (c.column == 1); + + for (; !eos (c); c = peek ()) + { + switch (c) + { + case ' ': + case '\t': + { + r = true; + break; + } + case '\n': + { + // In some modes we treat newlines as ordinary spaces. + // + if (!s.sep_newline) + { + r = true; + break; + } + + // Skip empty lines. + // + if (start) + { + r = false; + break; + } + + return r; + } + case '#': + { + r = true; + get (); + + // See if this is a multi-line comment in the form: + // + /* + #\ + ... + #\ + */ + auto ml = [&c, this] () -> bool + { + if ((c = peek ()) == '\\') + { + get (); + if ((c = peek ()) == '\n') + return true; + } + + return false; + }; + + if (ml ()) + { + // Scan until we see the closing one. + // + for (; !eos (c); c = peek ()) + { + get (); + if (c == '#' && ml ()) + break; + } + + if (eos (c)) + fail (c) << "unterminated multi-line comment"; + } + else + { + // Read until newline or eos. + // + for (; !eos (c) && c != '\n'; c = peek ()) + get (); + } + + continue; + } + case '\\': + { + get (); + + if (peek () == '\n') + break; // Ignore. + + unget (c); + } + // Fall through. + default: + return r; // Not a space. + } + + get (); + } + + return r; + } +} diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx new file mode 100644 index 0000000..f987071 --- /dev/null +++ b/libbuild2/lexer.hxx @@ -0,0 +1,207 @@ +// file : libbuild2/lexer.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_LEXER_HXX +#define LIBBUILD2_LEXER_HXX + +#include <stack> + +#include <libbutl/char-scanner.mxx> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/token.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Context-dependent lexing mode. In the value mode we don't treat certain + // characters (e.g., '+', '=') as special so that we can use them in the + // variable values, e.g., 'foo = g++'. In contrast, in the variable mode, we + // restrict certain character (e.g., '/') from appearing in the name. The + // attribute mode is like value except it doesn't treat '{' and '}' as + // special (so we cannot have name groups in attributes). The eval mode is + // used in the evaluation context. Quoted modes are internal and should not + // be set explicitly. + // + // Note that the normal, value, and eval modes split words separated by the + // pair character (to disable pairs one can pass '\0' as a pair character). + // + // The alternnative modes must be set manually. The value mode automatically + // expires after the end of the line. The attribute mode expires after the + // closing ']'. The variable mode expires after the word token. And the eval + // mode expires after the closing ')'. + // + // Note that normally it is only safe to switch mode when the current token + // is not quoted (or, more generally, when you are not in the double-quoted + // mode) unless the mode treats the double-quote as a separator (e.g., + // variable name mode). Failed that your mode (which now will be the top of + // the mode stack) will prevent proper recognition of the closing quote. + // + + // Extendable/inheritable enum-like class. + // + struct lexer_mode: lexer_mode_base + { + using base_type = lexer_mode_base; + + enum + { + normal = base_type::value_next, + variable, + value, + attribute, + eval, + single_quoted, + double_quoted, + buildspec, + + value_next + }; + + lexer_mode () = default; + lexer_mode (value_type v): base_type (v) {} + lexer_mode (base_type v): base_type (v) {} + }; + + class LIBBUILD2_SYMEXPORT lexer: public butl::char_scanner + { + public: + // If escape is not NULL then only escape sequences with characters from + // this string are considered "effective escapes" with all others passed + // through as is. Note that the escape string is not copied. + // + lexer (istream& is, + const path& name, + uint64_t line = 1, // Start line in the stream. + const char* escapes = nullptr) + : lexer (is, name, line, escapes, true /* set_mode */) {} + + const path& + name () const {return name_;} + + // Note: sets mode for the next token. The second argument can be used to + // specifythe pair separator character (if the mode supports pairs). If + // escapes not specified, then inherit the current mode's (thought a mode + // can also override it). + // + virtual void + mode (lexer_mode, + char pair_separator = '\0', + optional<const char*> escapes = nullopt); + + // Expire the current mode early. + // + void + expire_mode () {state_.pop ();} + + lexer_mode + mode () const {return state_.top ().mode;} + + char + pair_separator () const {return state_.top ().sep_pair;} + + // Scanner. Note that it is ok to call next() again after getting eos. + // + // If you extend the lexer and add a custom lexer mode, then you must + // override next() and handle the custom mode there. + // + virtual token + next (); + + // Peek at the first character of the next token. Return the character + // or '\0' if the next token will be eos. Also return an indicator of + // whether the next token will be separated. + // + pair<char, bool> + peek_char (); + + protected: + struct state + { + lexer_mode mode; + + char sep_pair; + bool sep_space; // Are whitespaces separators (see skip_spaces())? + bool sep_newline; // Is newline special (see skip_spaces())? + bool quotes; // Recognize quoted fragments. + + const char* escapes; // Effective escape sequences to recognize. + + // Word separator characters. For two-character sequence put the first + // one in sep_first and the second one in the corresponding position of + // sep_second. If it's a single-character sequence, then put space in + // sep_second. If there are multiple sequences that start with the same + // character, then repeat the first character in sep_first. + // + const char* sep_first; + const char* sep_second; + }; + + token + next_eval (); + + token + next_quoted (); + + // Lex a word assuming current is the top state (which may already have + // been "expired" from the top). + // + virtual token + word (state current, bool separated); + + // Return true if we have seen any spaces. Skipped empty lines + // don't count. In other words, we are only interested in spaces + // that are on the same line as the following non-space character. + // + bool + skip_spaces (); + + // Diagnostics. + // + protected: + fail_mark fail; + + // Lexer state. + // + protected: + lexer (istream& is, + const path& name, + uint64_t line, + const char* escapes, + bool set_mode) + : char_scanner (is, true /* crlf */, line), + fail ("error", &name_), + name_ (name), + sep_ (false) + { + if (set_mode) + mode (lexer_mode::normal, '@', escapes); + } + + const path name_; + std::stack<state> state_; + + bool sep_; // True if we skipped spaces in peek(). + }; +} + +// Diagnostics plumbing. +// +namespace butl // ADL +{ + inline build2::location + get_location (const butl::char_scanner::xchar& c, const void* data) + { + using namespace build2; + + assert (data != nullptr); // E.g., must be &lexer::name_. + return location (static_cast<const path*> (data), c.line, c.column); + } +} + +#endif // LIBBUILD2_LEXER_HXX diff --git a/libbuild2/lexer.test.cxx b/libbuild2/lexer.test.cxx new file mode 100644 index 0000000..84520d1 --- /dev/null +++ b/libbuild2/lexer.test.cxx @@ -0,0 +1,98 @@ +// file : libbuild2/lexer.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cassert> +#include <iostream> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/token.hxx> +#include <libbuild2/lexer.hxx> + +using namespace std; + +namespace build2 +{ + // Usage: argv[0] [-q] [<lexer-mode>] + // + int + main (int argc, char* argv[]) + { + bool quote (false); + lexer_mode m (lexer_mode::normal); + + for (int i (1); i != argc; ++i) + { + string a (argv[i]); + + if (a == "-q") + quote = true; + else + { + if (a == "normal") m = lexer_mode::normal; + else if (a == "variable") m = lexer_mode::variable; + else if (a == "value") m = lexer_mode::value; + else if (a == "attribute") m = lexer_mode::attribute; + else if (a == "eval") m = lexer_mode::eval; + else if (a == "buildspec") m = lexer_mode::buildspec; + else assert (false); + break; + } + } + + try + { + cin.exceptions (istream::failbit | istream::badbit); + + // Most alternative modes auto-expire so we need something underneath. + // + lexer l (cin, path ("stdin")); + + if (m != lexer_mode::normal) + l.mode (m); + + // No use printing eos since we will either get it or loop forever. + // + for (token t (l.next ()); t.type != token_type::eos; t = l.next ()) + { + if (t.separated && t.type != token_type::newline) + cout << ' '; + + // Print each token on a separate line without quoting operators. + // + t.printer (cout, t, false); + + if (quote) + { + char q ('\0'); + switch (t.qtype) + { + case quote_type::single: q = 'S'; break; + case quote_type::double_: q = 'D'; break; + case quote_type::mixed: q = 'M'; break; + case quote_type::unquoted: break; + } + + if (q != '\0') + cout << " [" << q << (t.qcomp ? "/C" : "/P") << ']'; + } + + cout << endl; + } + } + catch (const failed&) + { + return 1; + } + + return 0; + } +} + +int +main (int argc, char* argv[]) +{ + return build2::main (argc, argv); +} diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx new file mode 100644 index 0000000..50530f2 --- /dev/null +++ b/libbuild2/module.cxx @@ -0,0 +1,147 @@ +// file : libbuild2/module.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/module.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + available_module_map builtin_modules; + + void + boot_module (scope& rs, const string& name, const location& loc) + { + // First see if this modules has already been loaded for this project. + // + loaded_module_map& lm (rs.root_extra->modules); + auto i (lm.find (name)); + + if (i != lm.end ()) + { + module_state& s (i->second); + + // The only valid situation here is if the module has already been + // bootstrapped. + // + assert (s.boot); + return; + } + + // Otherwise search for this module. + // + auto j (builtin_modules.find (name)); + + if (j == builtin_modules.end ()) + fail (loc) << "unknown module " << name; + + const module_functions& mf (j->second); + + if (mf.boot == nullptr) + fail (loc) << "module " << name << " shouldn't be loaded in bootstrap"; + + i = lm.emplace (name, + module_state {true, false, mf.init, nullptr, loc}).first; + i->second.first = mf.boot (rs, loc, i->second.module); + + rs.assign (var_pool.rw (rs).insert (name + ".booted")) = true; + } + + bool + load_module (scope& rs, + scope& bs, + const string& name, + const location& loc, + bool opt, + const variable_map& hints) + { + // First see if this modules has already been loaded for this project. + // + loaded_module_map& lm (rs.root_extra->modules); + auto i (lm.find (name)); + bool f (i == lm.end ()); + + if (f) + { + // Otherwise search for this module. + // + auto j (builtin_modules.find (name)); + + if (j == builtin_modules.end ()) + { + if (!opt) + fail (loc) << "unknown module " << name; + } + else + { + const module_functions& mf (j->second); + + if (mf.boot != nullptr) + fail (loc) << "module " << name << " should be loaded in bootstrap"; + + i = lm.emplace ( + name, + module_state {false, false, mf.init, nullptr, loc}).first; + } + } + else + { + module_state& s (i->second); + + if (s.boot) + { + s.boot = false; + f = true; // This is a first call to init. + } + } + + // Note: pattern-typed in context.cxx:reset() as project-visibility + // variables of type bool. + // + auto& vp (var_pool.rw (rs)); + value& lv (bs.assign (vp.insert (name + ".loaded"))); + value& cv (bs.assign (vp.insert (name + ".configured"))); + + bool l; // Loaded. + bool c; // Configured. + + // Suppress duplicate init() calls for the same module in the same scope. + // + if (!lv.null) + { + assert (!cv.null); + + l = cast<bool> (lv); + c = cast<bool> (cv); + + if (!opt) + { + if (!l) + fail (loc) << "unknown module " << name; + + // We don't have original diagnostics. We could call init() again so + // that it can issue it. But that means optional modules must be + // prepared to be called again if configuring failed. Let's keep it + // simple for now. + // + if (!c) + fail (loc) << "module " << name << " failed to configure"; + } + } + else + { + l = i != lm.end (); + c = l && i->second.init (rs, bs, loc, i->second.module, f, opt, hints); + + lv = l; + cv = c; + } + + return l && c; + } +} diff --git a/libbuild2/module.hxx b/libbuild2/module.hxx new file mode 100644 index 0000000..5fbed9c --- /dev/null +++ b/libbuild2/module.hxx @@ -0,0 +1,120 @@ +// file : libbuild2/module.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_MODULE_HXX +#define LIBBUILD2_MODULE_HXX + +#include <map> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/variable.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class scope; + class location; + + class module_base + { + public: + virtual + ~module_base () = default; + }; + + // Return true if the module should be initialized first (the order of + // initialization within each group is unspecified). + // + using module_boot_function = + bool (scope& root, + const location&, + unique_ptr<module_base>&); + + // Return false if the module configuration (normally based on the default + // values) was unsuccessful but this is not (yet) an error. One example + // would be the optional use of a module. Or a module might remain + // unconfigured for as long as it is actually not used (e.g., install, + // dist). The return value is used to set the <module>.configured variable. + // + using module_init_function = + bool (scope& root, + scope& base, + const location&, + unique_ptr<module_base>&, + bool first, // First time for this project. + bool optional, // Loaded with using? (optional module). + const variable_map& hints); // Configuration hints (see below). + + struct module_functions + { + module_boot_function* boot; + module_init_function* init; + }; + + // The register() function will be written in C++ and will be called from + // C++ but we need to suppress name mangling to be able to use dlsym() and + // equivalent. + // + extern "C" + using module_register_function = module_functions (); + + // Loaded modules state. + // + struct module_state + { + bool boot; // True if the module boot'ed but not yet init'ed. + bool first; // True if the boot'ed module must be init'ed first. + module_init_function* init; + unique_ptr<module_base> module; + const location loc; // Boot location. + }; + + struct loaded_module_map: std::map<string, module_state> + { + template <typename T> + T* + lookup (const string& name) const + { + auto i (find (name)); + return i != end () + ? static_cast<T*> (i->second.module.get ()) + : nullptr; + } + }; + + // Load and boot the specified module. + // + LIBBUILD2_SYMEXPORT void + boot_module (scope& root, const string& name, const location&); + + // Load (if not already loaded) and initialize the specified module. Used + // by the parser but also by some modules to load prerequisite modules. + // Return true if the module was both successfully loaded and configured + // (false can only be returned if optional). + // + // The config_hints variable map can be used to pass configuration hints + // from one module to another. For example, the cxx modude may pass the + // target platform (which was extracted from the C++ compiler) to the bin + // module (which may not always be able to extract the same information from + // its tools). + // + LIBBUILD2_SYMEXPORT bool + load_module (scope& root, + scope& base, + const string& name, + const location&, + bool optional = false, + const variable_map& config_hints = variable_map ()); + + // Builtin modules. + // + using available_module_map = std::map<string, module_functions>; + LIBBUILD2_SYMEXPORT extern available_module_map builtin_modules; +} + +#endif // LIBBUILD2_MODULE_HXX diff --git a/libbuild2/name.cxx b/libbuild2/name.cxx new file mode 100644 index 0000000..4aac32f --- /dev/null +++ b/libbuild2/name.cxx @@ -0,0 +1,187 @@ +// file : libbuild2/name.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/types.hxx> // Note: not <libbuild2/name.hxx> + +#include <string.h> // strchr() + +#include <libbuild2/diagnostics.hxx> + +namespace build2 +{ + const name empty_name; + const names empty_names; + + string + to_string (const name& n) + { + string r; + + // Note: similar to to_stream() below. + // + if (n.empty ()) + return r; + + if (n.proj) + { + r += n.proj->string (); + r += '%'; + } + + // If the value is empty, then we want to put the last component of the + // directory inside {}, e.g., dir{bar/}, not bar/dir{}. + // + bool v (!n.value.empty ()); + bool t (!n.type.empty ()); + + const dir_path& pd (v ? n.dir : + t ? n.dir.directory () : + dir_path ()); + + if (!pd.empty ()) + r += pd.representation (); + + if (t) + { + r += n.type; + r += '{'; + } + + if (v) + r += n.value; + else + r += (pd.empty () ? n.dir : n.dir.leaf ()).representation (); + + if (t) + r += '}'; + + return r; + } + + ostream& + to_stream (ostream& os, const name& n, bool quote, char pair) + { + auto write_string = [quote, pair, &os](const string& v) + { + char sc[] = { + '{', '}', '[', ']', '$', '(', ')', // Token endings. + ' ', '\t', '\n', '#', // Spaces. + '\\', '"', // Escaping and quoting. + '%', // Project name separator. + '*', '?', // Wildcard characters. + pair, // Pair separator, if any. + '\0'}; + + if (quote && v.find ('\'') != string::npos) + { + // Quote the string with the double quotes rather than with the single + // one. Escape some of the special characters. + // + os << '"'; + + for (auto c: v) + { + if (strchr ("\\$(\"", c) != nullptr) // Special inside double quotes. + os << '\\'; + + os << c; + } + + os << '"'; + } + else if (quote && v.find_first_of (sc) != string::npos) + os << "'" << v << "'"; + else + os << v; + }; + + uint16_t dv (stream_verb (os).path); // Directory verbosity. + + auto write_dir = [dv, quote, &os, &write_string] (const dir_path& d) + { + const string& s (dv < 1 + ? diag_relative (d) + : d.representation ()); + if (quote) + write_string (s); + else + os << s; + }; + + // Note: similar to to_string() below. + // + + // If quoted then print empty name as '' rather than {}. + // + if (quote && n.empty ()) + return os << "''"; + + if (n.proj) + { + write_string (n.proj->string ()); + os << '%'; + } + + // If the value is empty, then we want to print the last component of the + // directory inside {}, e.g., dir{bar/}, not bar/dir{}. We also want to + // print {} for an empty name (unless quoted, which is handled above). + // + bool d (!n.dir.empty ()); + bool v (!n.value.empty ()); + bool t (!n.type.empty ()); + + // Note: relative() may return empty. + // + const dir_path& rd (dv < 1 ? relative (n.dir) : n.dir); // Relative. + const dir_path& pd (v ? rd : + t ? rd.directory () : + dir_path ()); + + if (!pd.empty ()) + write_dir (pd); + + if (t || (!d && !v)) + { + if (t) + write_string (n.type); + + os << '{'; + } + + if (v) + write_string (n.value); + else if (d) + { + if (rd.empty ()) + write_string (dir_path (".").representation ()); + else if (!pd.empty ()) + write_string (rd.leaf ().representation ()); + else + write_dir (rd); + } + + if (t || (!d && !v)) + os << '}'; + + return os; + } + + ostream& + to_stream (ostream& os, const names_view& ns, bool quote, char pair) + { + for (auto i (ns.begin ()), e (ns.end ()); i != e; ) + { + const name& n (*i); + ++i; + to_stream (os, n, quote, pair); + + if (n.pair) + os << n.pair; + else if (i != e) + os << ' '; + } + + return os; + } +} diff --git a/libbuild2/name.hxx b/libbuild2/name.hxx new file mode 100644 index 0000000..1ce073a --- /dev/null +++ b/libbuild2/name.hxx @@ -0,0 +1,172 @@ +// file : libbuild2/name.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +// Note: include <libbuild2/types.hxx> instead of this file directly. +// + +#ifndef LIBBUILD2_NAME_HXX +#define LIBBUILD2_NAME_HXX + +// We cannot include <libbuild2/utility.hxx> since it includes +// <libbuild2/types.hxx>. +// +#include <utility> // move() + +#include <libbuild2/export.hxx> + +namespace build2 +{ + using std::move; + + // A name is what we operate on by default. Depending on the context, it can + // be interpreted as a target or prerequisite name. A name without a type + // and directory can be used to represent any text. A name with directory + // and empty value represents a directory. + // + // A name may also be qualified with a project. If the project name is + // empty, then it means the name is in a project other than our own (e.g., + // it is installed). + // + // A type or project can only be specified if either directory or value are + // not empty. + // + // If pair is not '\0', then this name and the next in the list form a + // pair. Can be used as a bool flag. + // + struct name + { + optional<project_name> proj; + dir_path dir; + string type; + string value; + char pair = '\0'; + + name () {} // = default; Clang needs this to initialize const object. + name (string v): value (move (v)) {} + name (dir_path d): dir (move (d)) {} + name (string t, string v): type (move (t)), value (move (v)) {} + name (dir_path d, string v): dir (move (d)), value (move (v)) {} + + name (dir_path d, string t, string v) + : dir (move (d)), type (move (t)), value (move (v)) {} + + name (optional<project_name> p, dir_path d, string t, string v) + : proj (move (p)), dir (move (d)), type (move (t)), value (move (v)) {} + + bool + qualified () const {return proj.has_value ();} + + bool + unqualified () const {return !qualified ();} + + bool + typed () const {return !type.empty ();} + + bool + untyped () const {return type.empty ();} + + // Note: if dir and value are empty then there should be no proj or type. + // + bool + empty () const {return dir.empty () && value.empty ();} + + // Note that strictly speaking the following tests should be orthogonal + // to qualification. However, the vast majority of cases where we expect + // a simple or directory name, we also expect it to be unqualified. + // + // Note also that empty name is simple but not a directory. + // + bool + simple (bool ignore_qual = false) const + { + return (ignore_qual || unqualified ()) && untyped () && dir.empty (); + } + + bool + directory (bool ignore_qual = false) const + { + return (ignore_qual || unqualified ()) && + untyped () && !dir.empty () && value.empty (); + } + + int + compare (const name&) const; + }; + + LIBBUILD2_SYMEXPORT extern const name empty_name; + + inline bool + operator== (const name& x, const name& y) {return x.compare (y) == 0;} + + inline bool + operator!= (const name& x, const name& y) {return !(x == y);} + + inline bool + operator< (const name& x, const name& y) {return x.compare (y) < 0;} + + // Return string representation of a name. + // + LIBBUILD2_SYMEXPORT string + to_string (const name&); + + // Store a string in a name in a reversible way. If the string ends with a + // trailing directory separator then it is stored as a directory, otherwise + // as a simple name. + // + name + to_name (string); + + // Serialize the name to the stream. If requested, the name components + // containing special characters are quoted. The special characters are: + // + // {}[]$() \t\n#\"'% + // + // If the pair argument is not '\0', then it is added to the above special + // characters set. If the quote character is present in the component then + // it is double quoted rather than single quoted. In this case the following + // characters are escaped: + // + // \$(" + // + // Note that in the quoted mode empty unqualified name is printed as '', + // not {}. + // + LIBBUILD2_SYMEXPORT ostream& + to_stream (ostream&, const name&, bool quote, char pair = '\0'); + + inline ostream& + operator<< (ostream& os, const name& n) {return to_stream (os, n, false);} + + // Vector of names. + // + // Quite often it will contain just one element so we use small_vector<1>. + // Note also that it must be a separate type rather than an alias for + // vector<name> in order to distinguish between untyped variable values + // (names) and typed ones (vector<name>). + // + using names = small_vector<name, 1>; + using names_view = vector_view<const name>; + + LIBBUILD2_SYMEXPORT extern const names empty_names; + + // The same semantics as to_stream(name). + // + LIBBUILD2_SYMEXPORT ostream& + to_stream (ostream&, const names_view&, bool quote, char pair = '\0'); + + inline ostream& + operator<< (ostream& os, const names_view& ns) { + return to_stream (os, ns, false);} + + inline ostream& + operator<< (ostream& os, const names& ns) {return os << names_view (ns);} + + // Pair of names. + // + using name_pair = pair<name, name>; +} + +#include <libbuild2/name.ixx> + +#endif // LIBBUILD2_NAME_HXX diff --git a/libbuild2/name.ixx b/libbuild2/name.ixx new file mode 100644 index 0000000..188126e --- /dev/null +++ b/libbuild2/name.ixx @@ -0,0 +1,40 @@ +// file : libbuild2/name.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + inline int name:: + compare (const name& x) const + { + int r (proj < x.proj ? -1 : (proj > x.proj ? 1 : 0)); + + if (r == 0) + r = dir.compare (x.dir); + + if (r == 0) + r = type.compare (x.type); + + if (r == 0) + r = value.compare (x.value); + + if (r == 0) + r = pair < x.pair ? -1 : (pair > x.pair ? 1 : 0); + + return r; + } + + inline name + to_name (string s) + { + if (!s.empty () && path::traits_type::is_separator (s.back ())) + { + dir_path d (move (s), dir_path::exact); + + if (!d.empty ()) + return name (move (d)); + } + + return name (move (s)); + } +} diff --git a/libbuild2/name.test.cxx b/libbuild2/name.test.cxx new file mode 100644 index 0000000..09fb841 --- /dev/null +++ b/libbuild2/name.test.cxx @@ -0,0 +1,96 @@ +// file : libbuild2/name.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <sstream> + +#include <cassert> +#include <iostream> + +#include <libbuild2/types.hxx> // Includes name. +#include <libbuild2/utility.hxx> + +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + int + main (int, char*[]) + { + using dir = dir_path; + + // Test string representation. + // + { + auto ts = [] (const name& n) {return to_string (n);}; + + assert (ts (name ()) == ""); + + assert (ts (name ("foo")) == "foo"); + + assert (ts (name (dir ("bar/"))) == "bar/"); + assert (ts (name (dir ("bar/baz/"))) == "bar/baz/"); + + assert (ts (name (dir ("bar/"), "dir", "")) == "dir{bar/}"); + assert (ts (name (dir ("bar/baz/"), "dir", "")) == "bar/dir{baz/}"); + + assert (ts (name (dir ("bar/"), "foo")) == "bar/foo"); + + assert (ts (name (dir ("bar/"), "dir", "foo")) == "bar/dir{foo}"); + assert (ts (name (dir ("bar/baz/"), "dir", "foo")) == "bar/baz/dir{foo}"); + } + + // Test stream representation. + // + { + auto ts = [] (const name& n, bool quote = true) + { + ostringstream os; + stream_verb (os, stream_verbosity (0, 1)); + to_stream (os, n, quote); + return os.str (); + }; + + assert (ts (name ()) == "''"); + assert (ts (name (), false) == "{}"); + + assert (ts (name ("foo")) == "foo"); + + assert (ts (name (dir ("bar/"))) == "bar/"); + assert (ts (name (dir ("bar/baz/"))) == "bar/baz/"); + + assert (ts (name (dir ("bar/"), "dir", "")) == "dir{bar/}"); + assert (ts (name (dir ("bar/baz/"), "dir", "")) == "bar/dir{baz/}"); + + assert (ts (name (dir ("bar/"), "foo")) == "bar/foo"); + + assert (ts (name (dir ("bar/"), "dir", "foo")) == "bar/dir{foo}"); + assert (ts (name (dir ("bar/baz/"), "dir", "foo")) == "bar/baz/dir{foo}"); + + // Quoting. + // + assert (ts (name (dir ("bar baz/"), "dir", "foo fox")) == "'bar baz/'dir{'foo fox'}"); + + // Relative logic. + // +#ifndef _WIN32 + dir rb ("/bar/"); + relative_base = &rb; + + assert (ts (name (dir ("/bar/"), "dir", "")) == "dir{./}"); + assert (ts (name (dir ("/bar/"), "", "foo")) == "foo"); + assert (ts (name (dir ("/bar/baz/"), "dir", "")) == "dir{baz/}"); +#endif + } + + return 0; + } +} + +int +main (int argc, char* argv[]) +{ + return build2::main (argc, argv); +} diff --git a/libbuild2/operation.cxx b/libbuild2/operation.cxx new file mode 100644 index 0000000..9d84cc2 --- /dev/null +++ b/libbuild2/operation.cxx @@ -0,0 +1,617 @@ +// file : libbuild2/operation.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/operation.hxx> + +#include <iostream> // cout + +#include <libbuild2/file.hxx> +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + // action + // + ostream& + operator<< (ostream& os, action a) + { + uint16_t + m (a.meta_operation ()), + i (a.operation ()), + o (a.outer_operation ()); + + os << '(' << m << ','; + + if (o != 0) + os << o << '('; + + os << i; + + if (o != 0) + os << ')'; + + os << ')'; + + return os; + } + + // noop + // + const meta_operation_info mo_noop { + noop_id, + "noop", + "", // Presumably we will never need these since we are not going + "", // to do anything. + "", + "", + true, // bootstrap_outer + nullptr, // meta-operation pre + nullptr, // operation pre + &load, + nullptr, // search + nullptr, // match + nullptr, // execute + nullptr, // operation post + nullptr, // meta-operation post + nullptr // include + }; + + // perform + // + void + load (const values&, + scope& root, + const path& bf, + const dir_path& out_base, + const dir_path& src_base, + const location&) + { + // Load project's root.build. + // + load_root (root); + + // Create the base scope. Note that its existence doesn't mean it was + // already setup as a base scope; it can be the same as root. + // + auto i (scopes.rw (root).insert (out_base)); + scope& base (setup_base (i, out_base, src_base)); + + // Load the buildfile unless it is implied. + // + if (!bf.empty ()) + source_once (root, base, bf, root); + } + + void + search (const values&, + const scope&, + const scope& bs, + const path& bf, + const target_key& tk, + const location& l, + action_targets& ts) + { + tracer trace ("search"); + + phase_lock pl (run_phase::match); + + const target* t (targets.find (tk, trace)); + + // Only do the implied buildfile if we haven't loaded one. Failed that we + // may try go this route even though we've concluded the implied buildfile + // is implausible and have loaded an outer buildfile (see main() for + // details). + // + if (t == nullptr && tk.is_a<dir> () && bf.empty ()) + t = dir::search_implied (bs, tk, trace); + + if (t == nullptr) + { + diag_record dr (fail (l)); + + dr << "unknown target " << tk; + + if (!bf.empty ()) + dr << " in " << bf; + } + + ts.push_back (t); + } + + void + match (const values&, action a, action_targets& ts, uint16_t diag, bool prog) + { + tracer trace ("match"); + + { + phase_lock l (run_phase::match); + + // Setup progress reporting if requested. + // + string what; // Note: must outlive monitor_guard. + scheduler::monitor_guard mg; + + if (prog && show_progress (2 /* max_verb */)) + { + size_t incr (stderr_term ? 1 : 10); // Scale depending on output type. + + what = " targets to " + diag_do (a); + + mg = sched.monitor ( + target_count, + incr, + [incr, &what] (size_t c) -> size_t + { + diag_progress_lock pl; + diag_progress = ' '; + diag_progress += to_string (c); + diag_progress += what; + return c + incr; + }); + } + + // Start asynchronous matching of prerequisites keeping track of how + // many we have started. Wait with unlocked phase to allow phase + // switching. + // + size_t i (0), n (ts.size ()); + { + atomic_count task_count (0); + wait_guard wg (task_count, true); + + for (; i != n; ++i) + { + const target& t (ts[i].as_target ()); + l5 ([&]{trace << diag_doing (a, t);}); + + target_state s (match_async (a, t, 0, task_count, false)); + + // Bail out if the target has failed and we weren't instructed to + // keep going. + // + if (s == target_state::failed && !keep_going) + { + ++i; + break; + } + } + + wg.wait (); + } + + // Clear the progress if present. + // + if (mg) + { + diag_progress_lock pl; + diag_progress.clear (); + } + + // We are now running serially. Re-examine targets that we have matched. + // + bool fail (false); + for (size_t j (0); j != n; ++j) + { + action_target& at (ts[j]); + const target& t (at.as_target ()); + + target_state s (j < i + ? match (a, t, false) + : target_state::postponed); + switch (s) + { + case target_state::postponed: + { + // We bailed before matching it (leave state in action_target as + // unknown). + // + if (verb != 0 && diag >= 1) + info << "not " << diag_did (a, t); + + break; + } + case target_state::unknown: + case target_state::unchanged: + { + break; // Matched successfully. + } + case target_state::failed: + { + // Things didn't go well for this target. + // + if (verb != 0 && diag >= 1) + info << "failed to " << diag_do (a, t); + + at.state = s; + fail = true; + break; + } + default: + assert (false); + } + } + + if (fail) + throw failed (); + } + + // Phase restored to load. + // + assert (phase == run_phase::load); + } + + void + execute (const values&, action a, action_targets& ts, + uint16_t diag, bool prog) + { + tracer trace ("execute"); + + // Reverse the order of targets if the execution mode is 'last'. + // + if (current_mode == execution_mode::last) + reverse (ts.begin (), ts.end ()); + + // Tune the scheduler. + // + switch (current_inner_oif->concurrency) + { + case 0: sched.tune (1); break; // Run serially. + case 1: break; // Run as is. + default: assert (false); // Not yet supported. + } + + phase_lock pl (run_phase::execute); // Never switched. + + // Set the dry-run flag. + // + dry_run = dry_run_option; + + // Setup progress reporting if requested. + // + string what; // Note: must outlive monitor_guard. + scheduler::monitor_guard mg; + + if (prog && show_progress (1 /* max_verb */)) + { + size_t init (target_count.load (memory_order_relaxed)); + size_t incr (init > 100 ? init / 100 : 1); // 1%. + + if (init != incr) + { + what = "% of targets " + diag_did (a); + + mg = sched.monitor ( + target_count, + init - incr, + [init, incr, &what] (size_t c) -> size_t + { + size_t p ((init - c) * 100 / init); + size_t s (skip_count.load (memory_order_relaxed)); + + diag_progress_lock pl; + diag_progress = ' '; + diag_progress += to_string (p); + diag_progress += what; + + if (s != 0) + { + diag_progress += " ("; + diag_progress += to_string (s); + diag_progress += " skipped)"; + } + + return c - incr; + }); + } + } + + // Similar logic to execute_members(): first start asynchronous execution + // of all the top-level targets. + // + { + atomic_count task_count (0); + wait_guard wg (task_count); + + for (const action_target& at: ts) + { + const target& t (at.as_target ()); + + l5 ([&]{trace << diag_doing (a, t);}); + + target_state s (execute_async (a, t, 0, task_count, false)); + + // Bail out if the target has failed and we weren't instructed to keep + // going. + // + if (s == target_state::failed && !keep_going) + break; + } + + wg.wait (); + } + + // We are now running serially. + // + + sched.tune (0); // Restore original scheduler settings. + + // Clear the dry-run flag. + // + dry_run = false; + + // Clear the progress if present. + // + if (mg) + { + diag_progress_lock pl; + diag_progress.clear (); + } + + // Print skip count if not zero. Note that we print it regardless of the + // diag level since this is essentially a "summary" of all the commands + // that we did not (and, in fact, used to originally) print. + // + if (verb != 0) + { + if (size_t s = skip_count.load (memory_order_relaxed)) + { + text << "skipped " << diag_doing (a) << ' ' << s << " targets"; + } + } + + // Re-examine all the targets and print diagnostics. + // + bool fail (false); + for (action_target& at: ts) + { + const target& t (at.as_target ()); + + switch ((at.state = t.executed_state (a, false))) + { + case target_state::unknown: + { + // We bailed before executing it (leave state in action_target as + // unknown). + // + if (verb != 0 && diag >= 1) + info << "not " << diag_did (a, t); + + break; + } + case target_state::unchanged: + { + // Nothing had to be done. + // + if (verb != 0 && diag >= 2) + info << diag_done (a, t); + + break; + } + case target_state::changed: + { + // Something has been done. + // + break; + } + case target_state::failed: + { + // Things didn't go well for this target. + // + if (verb != 0 && diag >= 1) + info << "failed to " << diag_do (a, t); + + fail = true; + break; + } + default: + assert (false); + } + } + + if (fail) + throw failed (); + + // We should have executed every target that we matched, provided we + // haven't failed (in which case we could have bailed out early). + // + assert (target_count.load (memory_order_relaxed) == 0); + assert (dependency_count.load (memory_order_relaxed) == 0); + } + + const meta_operation_info mo_perform { + perform_id, + "perform", + "", + "", + "", + "", + true, // bootstrap_outer + nullptr, // meta-operation pre + nullptr, // operation pre + &load, + &search, + &match, + &execute, + nullptr, // operation post + nullptr, // meta-operation post + nullptr // include + }; + + // info + // + static operation_id + info_operation_pre (const values&, operation_id o) + { + if (o != default_id) + fail << "explicit operation specified for meta-operation info"; + + return o; + } + + void + info_load (const values&, + scope& rs, + const path&, + const dir_path& out_base, + const dir_path& src_base, + const location& l) + { + // For info we don't want to go any further than bootstrap so that it can + // be used in pretty much any situation (unresolved imports, etc). We do + // need to setup root as base though. + + if (rs.out_path () != out_base || rs.src_path () != src_base) + fail (l) << "meta-operation info target must be project root directory"; + + setup_base (scopes.rw (rs).insert (out_base), out_base, src_base); + } + + void + info_search (const values&, + const scope& rs, + const scope&, + const path&, + const target_key& tk, + const location& l, + action_targets& ts) + { + // Collect all the projects we need to print information about. + + // We've already verified the target is in the project root. Now verify + // it is dir{}. + // + if (!tk.type->is_a<dir> ()) + fail (l) << "meta-operation info target must be project root directory"; + + ts.push_back (&rs); + } + + static void + info_execute (const values&, action, action_targets& ts, uint16_t, bool) + { + for (size_t i (0); i != ts.size (); ++i) + { + // Separate projects with blank lines. + // + if (i != 0) + cout << endl; + + const scope& rs (*static_cast<const scope*> (ts[i].target)); + + // Print [meta_]operation names. Due to the way our aliasing works, we + // have to go through the [meta_]operation_table. + // + auto print_ops = [] (const auto& ov, const auto& ot) + { + // This is a sparse vector with NULL holes. id 0 is invalid while 1 is + // the noop meta-operation and the default operation; we omit printing + // both. + // + for (uint8_t id (2); id < ov.size (); ++id) + { + if (ov[id] != nullptr) + cout << ' ' << ot[id]; + } + }; + + // This could be a simple project that doesn't set project name. + // + cout + << "project: " << cast_empty<project_name> (rs[var_project]) << endl + << "version: " << cast_empty<string> (rs[var_version]) << endl + << "summary: " << cast_empty<string> (rs[var_project_summary]) << endl + << "url: " << cast_empty<string> (rs[var_project_url]) << endl + << "src_root: " << cast<dir_path> (rs[var_src_root]) << endl + << "out_root: " << cast<dir_path> (rs[var_out_root]) << endl + << "amalgamation: " << cast_empty<dir_path> (rs[var_amalgamation]) << endl + << "subprojects: " << cast_empty<subprojects> (rs[var_subprojects]) << endl + << "operations:"; print_ops (rs.root_extra->operations, operation_table); cout << endl + << "meta-operations:"; print_ops (rs.root_extra->meta_operations, meta_operation_table); cout << endl; + } + } + + const meta_operation_info mo_info { + info_id, + "info", + "", + "", + "", + "", + false, // bootstrap_outer + nullptr, // meta-operation pre + &info_operation_pre, + &info_load, + &info_search, + nullptr, // match + &info_execute, + nullptr, // operation post + nullptr, // meta-operation post + nullptr // include + }; + + // operations + // + const operation_info op_default { + default_id, + 0, + "<default>", + "", + "", + "", + "", + execution_mode::first, + 1, + nullptr, + nullptr + }; + +#ifndef _MSC_VER + constexpr +#else + // VC doesn't "see" this can be const-initialized so we have to hack around + // to ensure correct initialization order. + // + #pragma warning(disable: 4073) + #pragma init_seg(lib) + const +#endif + operation_info op_update { + update_id, + 0, + "update", + "update", + "updating", + "updated", + "is up to date", + execution_mode::first, + 1, + nullptr, + nullptr + }; + + const operation_info op_clean { + clean_id, + 0, + "clean", + "clean", + "cleaning", + "cleaned", + "is clean", + execution_mode::last, + 1, + nullptr, + nullptr + }; + + // Tables. + // + string_table<meta_operation_id, meta_operation_data> meta_operation_table; + string_table<operation_id> operation_table; +} diff --git a/libbuild2/operation.hxx b/libbuild2/operation.hxx new file mode 100644 index 0000000..86f93c6 --- /dev/null +++ b/libbuild2/operation.hxx @@ -0,0 +1,361 @@ +// file : libbuild2/operation.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_OPERATION_HXX +#define LIBBUILD2_OPERATION_HXX + +#include <libbutl/string-table.mxx> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/action.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/prerequisite.hxx> +#include <libbuild2/target-state.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class location; + class scope; + class target_key; + class target; + struct prerequisite_member; + + struct opspec; + + // Meta-operation info. + // + + // Normally a list of resolved and matched targets to execute. But can be + // something else, depending on the meta-operation. + // + // The state is used to print structured result state. If it is not unknown, + // then this is assumed to be a target. + // + struct action_target + { + using target_type = build2::target; + + const void* target = nullptr; + target_state state = target_state::unknown; + + action_target () = default; + action_target (const void* t): target (t) {} + + const target_type& + as_target () const {return *static_cast<const target_type*> (target);} + }; + + class action_targets: public vector<action_target> + { + public: + using vector<action_target>::vector; + + void + reset () {for (auto& x: *this) x.state = target_state::unknown;} + }; + + struct meta_operation_info + { + const meta_operation_id id; + const string name; + + // Name derivatives for diagnostics. If empty, then the meta- + // operation need not be mentioned. + // + const string name_do; // E.g., [to] 'configure'. + const string name_doing; // E.g., [while] 'configuring'. + const string name_did; // E.g., 'configured'. + const string name_done; // E.g., 'is configured'. + + // Whether to bootstrap outer projects. If load() below calls load_root(), + // then this must be true. Note that this happens before + // meta_operation_pre() is called. + // + const bool bootstrap_outer; + + // The first argument in all the callback is the meta-operation + // parameters. + // + // If the meta-operation expects parameters, then it should have a + // non-NULL meta_operation_pre(). Failed that, any parameters will be + // diagnosed as unexpected. + + // Start of meta-operation and operation batches. + // + // If operation_pre() is not NULL, then it may translate default_id + // (and only default_id) to some other operation. If not translated, + // then default_id is used. If, however, operation_pre() is NULL, + // then default_id is translated to update_id. + // + void (*meta_operation_pre) (const values&, const location&); + operation_id (*operation_pre) (const values&, operation_id); + + // Meta-operation-specific logic to load the buildfile, search and match + // the targets, and execute the action on the targets. + // + void (*load) (const values&, + scope& root, + const path& buildfile, + const dir_path& out_base, + const dir_path& src_base, + const location&); + + void (*search) (const values&, + const scope& root, + const scope& base, + const path& buildfile, + const target_key&, + const location&, + action_targets&); + + // Diagnostics levels: + // + // 0 - none (for structured result). + // 1 - failures only (for pre-operations). + // 2 - all (for normal operations). + // + // The false progress argument can be used to suppress progress. If it is + // true, then whether the progress is shown is meta operation-specific (in + // other words, you can suppress it but not force it). + // + void (*match) (const values&, action, action_targets&, + uint16_t diag, bool progress); + + void (*execute) (const values&, action, action_targets&, + uint16_t diag, bool progress); + + // End of operation and meta-operation batches. + // + void (*operation_post) (const values&, operation_id); + void (*meta_operation_post) (const values&); + + // Optional prerequisite inclusion/exclusion override callback. See + // include() for details. + // + include_type (*include) (action, + const target&, + const prerequisite_member&, + include_type); + }; + + // Built-in meta-operations. + // + + // perform + // + + // Load the buildfile. This is the default implementation that first + // calls root_pre(), then creates the scope for out_base, and, finally, + // loads the buildfile unless it has already been loaded for the root + // scope. + // + LIBBUILD2_SYMEXPORT void + load (const values&, + scope&, + const path&, + const dir_path&, + const dir_path&, + const location&); + + // Search and match the target. This is the default implementation + // that does just that and adds a pointer to the target to the list. + // + LIBBUILD2_SYMEXPORT void + search (const values&, + const scope&, + const scope&, + const path&, + const target_key&, + const location&, + action_targets&); + + LIBBUILD2_SYMEXPORT void + match (const values&, action, action_targets&, + uint16_t diag, bool prog); + + // Execute the action on the list of targets. This is the default + // implementation that does just that while issuing appropriate + // diagnostics (unless quiet). + // + LIBBUILD2_SYMEXPORT void + execute (const values&, action, const action_targets&, + uint16_t diag, bool prog); + + LIBBUILD2_SYMEXPORT extern const meta_operation_info mo_noop; + LIBBUILD2_SYMEXPORT extern const meta_operation_info mo_perform; + LIBBUILD2_SYMEXPORT extern const meta_operation_info mo_info; + + // Operation info. + // + // NOTE: keep POD-like to ensure can be constant-initialized in order to + // sidestep static initialization order (relied upon in operation + // aliasing). + // + struct operation_info + { + // If outer_id is not 0, then use that as the outer part of the + // action. + // + const operation_id id; + const operation_id outer_id; + const char* name; + + // Name derivatives for diagnostics. Note that unlike meta-operations, + // these can only be empty for the default operation (id 1), And + // meta-operations that make use of the default operation shall not + // have empty derivatives (failed which only target name will be + // printed). + // + const char* name_do; // E.g., [to] 'update'. + const char* name_doing; // E.g., [while] 'updating'. + const char* name_did; // E.g., [not] 'updated'. + const char* name_done; // E.g., 'is up to date'. + + const execution_mode mode; + + // This is the operation's concurrency multiplier. 0 means run serially, + // 1 means run at hardware concurrency (unless overridden by the user). + // + const size_t concurrency; + + // The first argument in all the callback is the operation parameters. + // + // If the operation expects parameters, then it should have a non-NULL + // pre(). Failed that, any parameters will be diagnosed as unexpected. + + // If the returned operation_id's are not 0, then they are injected + // as pre/post operations for this operation. Can be NULL if unused. + // The returned operation_id shall not be default_id. + // + operation_id (*pre) (const values&, meta_operation_id, const location&); + operation_id (*post) (const values&, meta_operation_id); + }; + + // Built-in operations. + // + LIBBUILD2_SYMEXPORT extern const operation_info op_default; + LIBBUILD2_SYMEXPORT extern const operation_info op_update; + LIBBUILD2_SYMEXPORT extern const operation_info op_clean; + + // Global meta/operation tables. Each registered meta/operation + // is assigned an id which is used as an index in the per-project + // registered meta/operation lists. + // + // We have three types of meta/operations: built-in (e.g., perform, + // update), pre-defined (e.g., configure, test), and dynamically- + // defined. For built-in ones, both the id and implementation are + // part of the build2 core. For pre-defined, the id is registered + // as part of the core but the implementation is loaded as part of + // a module. The idea with pre-defined operations is that they have + // common, well-established semantics but could still be optional. + // Another aspect of pre-defined operations is that often rules + // across multiple modules need to know their ids. Finally, + // dynamically-defined meta/operations have their ids registered + // as part of a module load. In this case, the meta/operation is + // normally (but not necessarily) fully implemented by this module. + // + // Note also that the name of a meta/operation in a sense defines + // its semantics. It would be strange to have an operation called + // test that does two very different things in different projects. + // + // A built-in/pre-defined meta-operation can also provide a pre-processor + // callback that will be called for operation-specs before any project + // discovery/bootstrap is performed. + // + struct meta_operation_data + { + // The processor may modify the parameters, opspec, and change the + // meta-operation by returning a different name. + // + // If lifted is true then the operation name in opspec is bogus (has + // been lifted) and the default/empty name should be assumed instead. + // + using process_func = const string& (const variable_overrides&, + values&, + vector_view<opspec>&, + bool lifted, + const location&); + + meta_operation_data () = default; + meta_operation_data (const char* n, process_func p = nullptr) + : name (n), process (p) {} + + string name; + process_func* process; + }; + + inline ostream& + operator<< (ostream& os, const meta_operation_data& d) + { + return os << d.name; + } + + LIBBUILD2_SYMEXPORT extern butl::string_table<meta_operation_id, + meta_operation_data> + meta_operation_table; + + LIBBUILD2_SYMEXPORT extern butl::string_table<operation_id> operation_table; + + // These are "sparse" in the sense that we may have "holes" that + // are represented as NULL pointers. Also, lookup out of bounds + // is treated as a hole. + // + template <typename T> + struct sparse_vector + { + using base_type = vector<T*>; + using size_type = typename base_type::size_type; + + void + insert (size_type i, T& x) + { + size_type n (v_.size ()); + + if (i < n) + v_[i] = &x; + else + { + if (n != i) + v_.resize (i, nullptr); // Add holes. + v_.push_back (&x); + } + } + + T* + operator[] (size_type i) const + { + return i < v_.size () ? v_[i] : nullptr; + } + + bool + empty () const {return v_.empty ();} + + // Note that this is more of a "max index" rather than size. + // + size_type + size () const {return v_.size ();} + + private: + base_type v_; + }; + + using meta_operations = sparse_vector<const meta_operation_info>; + using operations = sparse_vector<const operation_info>; +} + +namespace butl +{ + template <> + struct string_table_traits<build2::meta_operation_data> + { + static const std::string& + key (const build2::meta_operation_data& d) {return d.name;} + }; +} + +#endif // LIBBUILD2_OPERATION_HXX diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx new file mode 100644 index 0000000..4e8ad23 --- /dev/null +++ b/libbuild2/parser.cxx @@ -0,0 +1,5526 @@ +// file : libbuild2/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/parser.hxx> + +#include <sstream> +#include <iostream> // cout + +#include <libbutl/filesystem.mxx> // path_search(), path_match() + +#include <libbuild2/dump.hxx> +#include <libbuild2/file.hxx> +#include <libbuild2/scope.hxx> +#include <libbuild2/module.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> +#include <libbuild2/prerequisite.hxx> + +using namespace std; + +namespace build2 +{ + using type = token_type; + + class parser::enter_scope + { + public: + enter_scope (): p_ (nullptr), r_ (nullptr), s_ (nullptr), b_ (nullptr) {} + + enter_scope (parser& p, dir_path&& d) + : p_ (&p), r_ (p.root_), s_ (p.scope_), b_ (p.pbase_) + { + // Try hard not to call normalize(). Most of the time we will go just + // one level deeper. + // + bool n (true); + + if (d.relative ()) + { + // Relative scopes are opened relative to out, not src. + // + if (d.simple () && !d.current () && !d.parent ()) + { + d = dir_path (p.scope_->out_path ()) /= d.string (); + n = false; + } + else + d = p.scope_->out_path () / d; + } + + if (n) + d.normalize (); + + p.switch_scope (d); + } + + ~enter_scope () + { + if (p_ != nullptr) + { + p_->scope_ = s_; + p_->root_ = r_; + p_->pbase_ = b_; + } + } + + explicit operator bool () const {return p_ != nullptr;} + + // Note: move-assignable to empty only. + // + enter_scope (enter_scope&& x) {*this = move (x);} + enter_scope& operator= (enter_scope&& x) + { + if (this != &x) + { + p_ = x.p_; + r_ = x.r_; + s_ = x.s_; + b_ = x.b_; + x.p_ = nullptr; + } + return *this; + } + + enter_scope (const enter_scope&) = delete; + enter_scope& operator= (const enter_scope&) = delete; + + private: + parser* p_; + scope* r_; + scope* s_; + const dir_path* b_; // Pattern base. + }; + + class parser::enter_target + { + public: + enter_target (): p_ (nullptr), t_ (nullptr) {} + + enter_target (parser& p, target& t) + : p_ (&p), t_ (p.target_) + { + p.target_ = &t; + } + + enter_target (parser& p, + name&& n, // If n.pair, then o is out dir. + name&& o, + bool implied, + const location& loc, + tracer& tr) + : p_ (&p), t_ (p.target_) + { + p.target_ = &insert_target (p, move (n), move (o), implied, loc, tr); + } + + // Find or insert. + // + static target& + insert_target (parser& p, + name&& n, // If n.pair, then o is out dir. + name&& o, + bool implied, + const location& loc, + tracer& tr) + { + auto r (process_target (p, n, o, loc)); + return targets.insert (*r.first, // target type + move (n.dir), + move (o.dir), + move (n.value), + move (r.second), // extension + implied, + tr).first; + } + + // Only find. + // + static const target* + find_target (parser& p, + name& n, // If n.pair, then o is out dir. + name& o, + const location& loc, + tracer& tr) + { + auto r (process_target (p, n, o, loc)); + return targets.find (*r.first, // target type + n.dir, + o.dir, + n.value, + r.second, // extension + tr); + } + + static pair<const target_type*, optional<string>> + process_target (parser& p, + name& n, // If n.pair, then o is out dir. + name& o, + const location& loc) + { + auto r (p.scope_->find_target_type (n, loc)); + + if (r.first == nullptr) + p.fail (loc) << "unknown target type " << n.type; + + bool src (n.pair); // If out-qualified, then it is from src. + if (src) + { + assert (n.pair == '@'); + + if (!o.directory ()) + p.fail (loc) << "expected directory after '@'"; + } + + dir_path& d (n.dir); + + const dir_path& sd (p.scope_->src_path ()); + const dir_path& od (p.scope_->out_path ()); + + if (d.empty ()) + d = src ? sd : od; // Already dormalized. + else + { + if (d.relative ()) + d = (src ? sd : od) / d; + + d.normalize (); + } + + dir_path out; + if (src && sd != od) // If in-source build, then out must be empty. + { + out = o.dir.relative () ? od / o.dir : move (o.dir); + out.normalize (); + } + o.dir = move (out); // Result. + + return r; + } + + ~enter_target () + { + if (p_ != nullptr) + p_->target_ = t_; + } + + // Note: move-assignable to empty only. + // + enter_target (enter_target&& x) {*this = move (x);} + enter_target& operator= (enter_target&& x) { + p_ = x.p_; t_ = x.t_; x.p_ = nullptr; return *this;} + + enter_target (const enter_target&) = delete; + enter_target& operator= (const enter_target&) = delete; + + private: + parser* p_; + target* t_; + }; + + class parser::enter_prerequisite + { + public: + enter_prerequisite (): p_ (nullptr), r_ (nullptr) {} + + enter_prerequisite (parser& p, prerequisite& r) + : p_ (&p), r_ (p.prerequisite_) + { + assert (p.target_ != nullptr); + p.prerequisite_ = &r; + } + + ~enter_prerequisite () + { + if (p_ != nullptr) + p_->prerequisite_ = r_; + } + + // Note: move-assignable to empty only. + // + enter_prerequisite (enter_prerequisite&& x) {*this = move (x);} + enter_prerequisite& operator= (enter_prerequisite&& x) { + p_ = x.p_; r_ = x.r_; x.p_ = nullptr; return *this;} + + enter_prerequisite (const enter_prerequisite&) = delete; + enter_prerequisite& operator= (const enter_prerequisite&) = delete; + + private: + parser* p_; + prerequisite* r_; + }; + + void parser:: + parse_buildfile (istream& is, const path& p, scope& root, scope& base) + { + path_ = &p; + + lexer l (is, *path_); + lexer_ = &l; + root_ = &root; + scope_ = &base; + pbase_ = scope_->src_path_; + target_ = nullptr; + prerequisite_ = nullptr; + default_target_ = nullptr; + + enter_buildfile (p); // Needs scope_. + + token t; + type tt; + next (t, tt); + + parse_clause (t, tt); + + if (tt != type::eos) + fail (t) << "unexpected " << t; + + process_default_target (t); + } + + token parser:: + parse_variable (lexer& l, scope& s, const variable& var, type kind) + { + path_ = &l.name (); + lexer_ = &l; + scope_ = &s; + pbase_ = scope_->src_path_; // Normally NULL. + target_ = nullptr; + prerequisite_ = nullptr; + + token t; + type tt; + parse_variable (t, tt, var, kind); + return t; + } + + pair<value, token> parser:: + parse_variable_value (lexer& l, + scope& s, + const dir_path* b, + const variable& var) + { + path_ = &l.name (); + lexer_ = &l; + scope_ = &s; + pbase_ = b; + target_ = nullptr; + prerequisite_ = nullptr; + + token t; + type tt; + value rhs (parse_variable_value (t, tt)); + + value lhs; + apply_value_attributes (&var, lhs, move (rhs), type::assign); + + return make_pair (move (lhs), move (t)); + } + + // Test if a string is a wildcard pattern. + // + static inline bool + pattern (const string& s) + { + return s.find_first_of ("*?") != string::npos; + }; + + bool parser:: + parse_clause (token& t, type& tt, bool one) + { + tracer trace ("parser::parse_clause", &path_); + + // clause() should always stop at a token that is at the beginning of + // the line (except for eof). That is, if something is called to parse + // a line, it should parse it until newline (or fail). This is important + // for if-else blocks, directory scopes, etc., that assume the '}' token + // they see is on the new line. + // + bool parsed (false); + + while (tt != type::eos && !(one && parsed)) + { + // Extract attributes if any. + // + assert (attributes_.empty ()); + auto at (attributes_push (t, tt)); + + // We should always start with one or more names, potentially + // <>-grouped. + // + if (!(start_names (tt) || tt == type::labrace)) + { + // Something else. Let our caller handle that. + // + if (at.first) + fail (at.second) << "attributes before " << t; + else + attributes_pop (); + + break; + } + + // Now we will either parse something or fail. + // + if (!parsed) + parsed = true; + + // See if this is one of the directives. + // + if (tt == type::word && keyword (t)) + { + const string& n (t.value); + void (parser::*f) (token&, type&) = nullptr; + + // @@ Is this the only place where some of these are valid? Probably + // also in the var namespace? + // + if (n == "assert" || + n == "assert!") + { + f = &parser::parse_assert; + } + else if (n == "print") // Unlike text goes to stdout. + { + f = &parser::parse_print; + } + else if (n == "fail" || + n == "warn" || + n == "info" || + n == "text") + { + f = &parser::parse_diag; + } + else if (n == "dump") + { + f = &parser::parse_dump; + } + else if (n == "source") + { + f = &parser::parse_source; + } + else if (n == "include") + { + f = &parser::parse_include; + } + else if (n == "run") + { + f = &parser::parse_run; + } + else if (n == "import") + { + f = &parser::parse_import; + } + else if (n == "export") + { + f = &parser::parse_export; + } + else if (n == "using" || + n == "using?") + { + f = &parser::parse_using; + } + else if (n == "define") + { + f = &parser::parse_define; + } + else if (n == "if" || + n == "if!") + { + f = &parser::parse_if_else; + } + else if (n == "else" || + n == "elif" || + n == "elif!") + { + // Valid ones are handled in if_else(). + // + fail (t) << n << " without if"; + } + else if (n == "for") + { + f = &parser::parse_for; + } + + if (f != nullptr) + { + if (at.first) + fail (at.second) << "attributes before " << n; + else + attributes_pop (); + + (this->*f) (t, tt); + continue; + } + } + + location nloc (get_location (t)); + names ns; + + if (tt != type::labrace) + { + ns = parse_names (t, tt, pattern_mode::ignore); + + // Allow things like function calls that don't result in anything. + // + if (tt == type::newline && ns.empty ()) + { + if (at.first) + fail (at.second) << "standalone attributes"; + else + attributes_pop (); + + next (t, tt); + continue; + } + } + + // Handle ad hoc target group specification (<...>). + // + // We keep an "optional" (empty) vector of names parallel to ns. + // + adhoc_names ans; + if (tt == type::labrace) + { + while (tt == type::labrace) + { + // Parse target names inside < >. + // + next (t, tt); + + auto at (attributes_push (t, tt)); + + if (at.first) + fail (at.second) << "attributes before ad hoc target"; + else + attributes_pop (); + + // Allow empty case (<>). + // + if (tt != type::rabrace) + { + location aloc (get_location (t)); + + // The first name (or a pair) is the primary target which we need + // to keep in ns. The rest, if any, are ad hoc members that we + // should move to ans. + // + size_t m (ns.size ()); + parse_names (t, tt, ns, pattern_mode::ignore); + size_t n (ns.size ()); + + // Another empty case (<$empty>). + // + if (m != n) + { + m = n - m - (ns[m].pair ? 2 : 1); // Number of names to move. + + // Allow degenerate case with just the primary target. + // + if (m != 0) + { + n -= m; // Number of names in ns we should end up with. + + ans.resize (n); // Catch up with the names vector. + adhoc_names_loc& a (ans.back ()); + + a.loc = move (aloc); + a.ns.insert (a.ns.end (), + make_move_iterator (ns.begin () + n), + make_move_iterator (ns.end ())); + ns.resize (n); + } + } + } + + if (tt != type::rabrace) + fail (t) << "expected '>' instead of " << t; + + // Parse the next chunk of target names after >, if any. + // + next (t, tt); + if (start_names (tt)) + parse_names (t, tt, ns, pattern_mode::ignore); + } + + if (!ans.empty ()) + ans.resize (ns.size ()); // Catch up with the final chunk. + + if (tt != type::colon) + fail (t) << "expected ':' instead of " << t; + + if (ns.empty ()) + fail (t) << "expected target before ':'"; + } + + // If we have a colon, then this is target-related. + // + if (tt == type::colon) + { + // While '{}:' means empty name, '{$x}:' where x is empty list + // means empty list. + // + if (ns.empty ()) + fail (t) << "expected target before ':'"; + + if (at.first) + fail (at.second) << "attributes before target"; + else + attributes_pop (); + + // Call the specified parsing function (either variable or block) for + // each target. We handle multiple targets by replaying the tokens + // since the value/block may contain variable expansions that would be + // sensitive to the target context in which they are evaluated. The + // function signature is: + // + // void (token& t, type& tt, const target_type* type, string pat) + // + auto for_each = [this, &trace, + &t, &tt, + &ns, &nloc, &ans] (auto&& f) + { + // Note: watch out for an out-qualified single target (two names). + // + replay_guard rg (*this, + ns.size () > 2 || (ns.size () == 2 && !ns[0].pair)); + + for (size_t i (0), e (ns.size ()); i != e; ) + { + name& n (ns[i]); + + if (n.qualified ()) + fail (nloc) << "project name in target " << n; + + // Figure out if this is a target or a target type/pattern (yeah, + // it can be a mixture). + // + if (pattern (n.value)) + { + if (n.pair) + fail (nloc) << "out-qualified target type/pattern"; + + if (!ans.empty () && !ans[i].ns.empty ()) + fail (ans[i].loc) << "ad hoc member in target type/pattern"; + + // If we have the directory, then it is the scope. + // + enter_scope sg; + if (!n.dir.empty ()) + sg = enter_scope (*this, move (n.dir)); + + // Resolve target type. If none is specified or if it is '*', + // use the root of the hierarchy. So these are all equivalent: + // + // *: foo = bar + // {*}: foo = bar + // *{*}: foo = bar + // + const target_type* ti ( + n.untyped () || n.type == "*" + ? &target::static_type + : scope_->find_target_type (n.type)); + + if (ti == nullptr) + fail (nloc) << "unknown target type " << n.type; + + f (t, tt, ti, move (n.value)); + } + else + { + name o (n.pair ? move (ns[++i]) : name ()); + enter_target tg (*this, + move (n), + move (o), + true /* implied */, + nloc, + trace); + + // Enter ad hoc members. + // + if (!ans.empty ()) + { + // Note: index after the pair increment. + // + enter_adhoc_members (move (ans[i]), true /* implied */); + } + + f (t, tt, nullptr, string ()); + } + + if (++i != e) + rg.play (); // Replay. + } + }; + + if (next (t, tt) == type::newline) + { + // See if this is a target block. + // + // Note that we cannot just let parse_dependency() handle this case + // because we can have (a mixture of) target type/patterns. + // + if (next (t, tt) == type::lcbrace && peek () == type::newline) + { + next (t, tt); // Newline. + + // Parse the block for each target. + // + for_each ([this] (token& t, type& tt, + const target_type* type, string pat) + { + next (t, tt); // First token inside the block. + + parse_variable_block (t, tt, type, move (pat)); + + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t; + }); + + next (t, tt); // Presumably newline after '}'. + next_after_newline (t, tt, '}'); // Should be on its own line. + } + else + { + // If not followed by a block, then it's a target without any + // prerequisites. We, however, cannot just fall through to the + // parse_dependency() call because we have already seen the next + // token. + // + // Note also that we treat this as an explicit dependency + // declaration (i.e., not implied). + // + enter_targets (move (ns), nloc, move (ans), 0); + } + + continue; + } + + // Target-specific variable assignment or dependency declaration, + // including a dependency chain and/or prerequisite-specific variable + // assignment. + // + auto at (attributes_push (t, tt)); + + if (!start_names (tt)) + fail (t) << "unexpected " << t; + + // @@ PAT: currently we pattern-expand target-specific vars. + // + const location ploc (get_location (t)); + names pns (parse_names (t, tt, pattern_mode::expand)); + + // Target-specific variable assignment. + // + if (tt == type::assign || tt == type::prepend || tt == type::append) + { + type akind (tt); + const location aloc (get_location (t)); + + const variable& var (parse_variable_name (move (pns), ploc)); + apply_variable_attributes (var); + + if (var.visibility > variable_visibility::target) + { + fail (nloc) << "variable " << var << " has " << var.visibility + << " visibility but is assigned on a target"; + } + + // Parse the assignment for each target. + // + for_each ([this, &var, akind, &aloc] (token& t, type& tt, + const target_type* type, + string pat) + { + if (type == nullptr) + parse_variable (t, tt, var, akind); + else + parse_type_pattern_variable (t, tt, + *type, move (pat), + var, akind, aloc); + }); + + next_after_newline (t, tt); + } + // Dependency declaration potentially followed by a chain and/or a + // prerequisite-specific variable assignment/block. + // + else + { + if (at.first) + fail (at.second) << "attributes before prerequisites"; + else + attributes_pop (); + + bool r (parse_dependency (t, tt, + move (ns), nloc, + move (ans), + move (pns), ploc)); + assert (r); // Block must have been claimed. + } + + continue; + } + + // Variable assignment. + // + // This can take any of the following forms: + // + // x = y + // foo/ x = y (ns will have two elements) + // foo/ [attrs] x = y (tt will be '[') + // + // In the future we may also want to support: + // + // foo/ bar/ x = y + // + if (tt == type::assign || tt == type::prepend || tt == type::append || + tt == type::lsbrace) + { + // Detect and handle the directory scope. If things look off, then we + // let parse_variable_name() complain. + // + dir_path d; + + if ((ns.size () == 2 && ns[0].directory ()) || + (ns.size () == 1 && ns[0].directory () && tt == type::lsbrace)) + { + if (at.first) + fail (at.second) << "attributes before scope directory"; + + if (tt == type::lsbrace) + { + attributes_pop (); + attributes_push (t, tt); + + d = move (ns[0].dir); + nloc = get_location (t); + ns = parse_names (t, tt, pattern_mode::ignore); + + // It got to be a variable assignment. + // + if (tt != type::assign && + tt != type::prepend && + tt != type::append) + fail (t) << "expected variable assignment instead of " << t; + } + else + { + d = move (ns[0].dir); + ns.erase (ns.begin ()); + } + } + + // Make sure not a pattern (see also the target case above and scope + // below). + // + if (pattern (d.string ())) + fail (nloc) << "pattern in directory " << d.representation (); + + if (tt != type::lsbrace) + { + const variable& var (parse_variable_name (move (ns), nloc)); + apply_variable_attributes (var); + + if (var.visibility >= variable_visibility::target) + { + diag_record dr (fail (nloc)); + + dr << "variable " << var << " has " << var.visibility + << " visibility but is assigned on a scope"; + + if (var.visibility == variable_visibility::target) + dr << info << "consider changing it to '*: " << var << "'"; + } + + { + enter_scope sg (d.empty () + ? enter_scope () + : enter_scope (*this, move (d))); + parse_variable (t, tt, var, tt); + } + + next_after_newline (t, tt); + continue; + } + + // Not "our" attribute, see if anyone else likes it. + } + + // See if this is a directory scope. + // + // Note: must be last since we are going to get the next token. + // + if (ns.size () == 1 && ns[0].directory () && tt == type::newline) + { + token ot (t); + + if (next (t, tt) == type::lcbrace && peek () == type::newline) + { + dir_path&& d (move (ns[0].dir)); + + // Make sure not a pattern (see also the target and directory cases + // above). + // + if (pattern (d.string ())) + fail (nloc) << "pattern in directory " << d.representation (); + + next (t, tt); // Newline. + next (t, tt); // First token inside the block. + + if (at.first) + fail (at.second) << "attributes before scope directory"; + else + attributes_pop (); + + // Can contain anything that a top level can. + // + { + enter_scope sg (*this, move (d)); + parse_clause (t, tt); + } + + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t; + + next (t, tt); // Presumably newline after '}'. + next_after_newline (t, tt, '}'); // Should be on its own line. + continue; + } + + t = ot; + // Fall through to fail. + } + + fail (t) << "unexpected " << t << " after " << ns; + } + + return parsed; + } + + void parser:: + parse_variable_block (token& t, type& tt, + const target_type* type, string pat) + { + // Parse a target or prerequisite-specific variable block. If type is not + // NULL, then this is a target type/pattern-specific block. + // + // enter: first token of first line in the block + // leave: rcbrace + // + // This is a more restricted variant of parse_clause() that only allows + // variable assignments. + // + tracer trace ("parser::parse_variable_block", &path_); + + while (tt != type::rcbrace && tt != type::eos) + { + attributes_push (t, tt); + + location nloc (get_location (t)); + names ns (parse_names (t, tt, + pattern_mode::ignore, + false /* chunk */, + "variable name")); + + if (tt != type::assign && + tt != type::prepend && + tt != type::append) + fail (t) << "expected variable assignment instead of " << t; + + const variable& var (parse_variable_name (move (ns), nloc)); + apply_variable_attributes (var); + + if (prerequisite_ != nullptr && + var.visibility > variable_visibility::target) + { + fail (t) << "variable " << var << " has " << var.visibility + << " visibility but is assigned on a target"; + } + + if (type == nullptr) + parse_variable (t, tt, var, tt); + else + parse_type_pattern_variable (t, tt, + *type, pat, // Note: can't move. + var, tt, get_location (t)); + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + + next (t, tt); + } + } + + void parser:: + enter_adhoc_members (adhoc_names_loc&& ans, bool implied) + { + tracer trace ("parser::enter_adhoc_members", &path_); + + names& ns (ans.ns); + const location& loc (ans.loc); + + for (size_t i (0); i != ns.size (); ++i) + { + name&& n (move (ns[i])); + name&& o (n.pair ? move (ns[++i]) : name ()); + + if (n.qualified ()) + fail (loc) << "project name in target " << n; + + // We derive the path unless the target name ends with the '...' escape + // which here we treat as the "let the rule derive the path" indicator + // (see target::split_name() for details). This will only be useful for + // referring to ad hoc members that are managed by the group's matching + // rule. Note also that omitting '...' for such a member could be used + // to override the file name, provided the rule checks if the path has + // already been derived before doing it itself. + // + bool escaped; + { + const string& v (n.value); + size_t p (v.size ()); + + escaped = (p > 3 && + v[--p] == '.' && v[--p] == '.' && v[--p] == '.' && + v[--p] != '.'); + } + + target& at ( + enter_target::insert_target (*this, + move (n), move (o), + implied, + loc, trace)); + + if (target_ == &at) + fail (loc) << "ad hoc group member " << at << " is primary target"; + + // Add as an ad hoc member at the end of the chain skipping duplicates. + // + { + const_ptr<target>* mp (&target_->member); + for (; *mp != nullptr; mp = &(*mp)->member) + { + if (*mp == &at) + { + mp = nullptr; + break; + } + } + + if (mp != nullptr) + { + *mp = &at; + at.group = target_; + } + } + + if (!escaped) + { + if (file* ft = at.is_a<file> ()) + ft->derive_path (); + } + } + } + + small_vector<reference_wrapper<target>, 1> parser:: + enter_targets (names&& tns, const location& tloc, // Target names. + adhoc_names&& ans, // Ad hoc target names. + size_t prereq_size) + { + // Enter all the targets (normally we will have just one) and their ad hoc + // groups. + // + tracer trace ("parser::enter_targets", &path_); + + small_vector<reference_wrapper<target>, 1> tgs; + + for (size_t i (0); i != tns.size (); ++i) + { + name&& n (move (tns[i])); + name&& o (n.pair ? move (tns[++i]) : name ()); + + if (n.qualified ()) + fail (tloc) << "project name in target " << n; + + // Make sure none of our targets are patterns (maybe we will allow + // quoting later). + // + if (pattern (n.value)) + fail (tloc) << "pattern in target " << n; + + enter_target tg (*this, + move (n), move (o), + false /* implied */, + tloc, trace); + + // Enter ad hoc members. + // + if (!ans.empty ()) + { + // Note: index after the pair increment. + // + enter_adhoc_members (move (ans[i]), false /* implied */); + } + + if (default_target_ == nullptr) + default_target_ = target_; + + target_->prerequisites_state_.store (2, memory_order_relaxed); + target_->prerequisites_.reserve (prereq_size); + tgs.push_back (*target_); + } + + return tgs; + } + + bool parser:: + parse_dependency (token& t, token_type& tt, + names&& tns, const location& tloc, // Target names. + adhoc_names&& ans, // Ad hoc target names. + names&& pns, const location& ploc, // Prereq names. + bool chain) + { + // Parse a dependency chain and/or a target/prerequisite-specific variable + // assignment/block. Return true if the following block (if any) has been + // "claimed" (the block "belongs" to targets/prerequisites before the last + // colon). + // + // enter: colon (anything else is not handled) + // leave: - first token on the next line if returning true + // - newline (presumably, must be verified) if returning false + // + // Note that top-level call (with chain == false) is expected to always + // return true. + // + // This dual-return "complication" is necessary to handle non-block cases + // like this: + // + // foo: bar + // {hxx ixx}: install = true + // + tracer trace ("parser::parse_dependency", &path_); + + // First enter all the targets. + // + small_vector<reference_wrapper<target>, 1> tgs ( + enter_targets (move (tns), tloc, move (ans), pns.size ())); + + // Now enter each prerequisite into each target. + // + for (name& pn: pns) + { + // We cannot reuse the names if we (potentially) may need to pass them + // as targets in case of a chain (see below). + // + name n (tt != type::colon ? move (pn) : pn); + + auto rp (scope_->find_target_type (n, ploc)); + const target_type* tt (rp.first); + optional<string>& e (rp.second); + + if (tt == nullptr) + fail (ploc) << "unknown target type " << n.type; + + // Current dir collapses to an empty one. + // + if (!n.dir.empty ()) + n.dir.normalize (false, true); + + // @@ OUT: for now we assume the prerequisite's out is undetermined. The + // only way to specify an src prerequisite will be with the explicit + // @-syntax. + // + // Perhaps use @file{foo} as a way to specify it is in the out tree, + // e.g., to suppress any src searches? The issue is what to use for such + // a special indicator. Also, one can easily and natually suppress any + // searches by specifying the absolute path. + // + prerequisite p (move (n.proj), + *tt, + move (n.dir), + dir_path (), + move (n.value), + move (e), + *scope_); + + for (auto i (tgs.begin ()), e (tgs.end ()); i != e; ) + { + // Move last prerequisite (which will normally be the only one). + // + target& t (*i); + t.prerequisites_.push_back (++i == e + ? move (p) + : prerequisite (p, memory_order_relaxed)); + } + } + + // Call the specified parsing function (either variable or block) for each + // target in tgs (for_each_t) or for the last pns.size() prerequisites of + // each target (for_each_p). + // + // We handle multiple targets and/or prerequisites by replaying the tokens + // (see the target-specific case for details). The function signature is: + // + // void (token& t, type& tt) + // + auto for_each_t = [this, &t, &tt, &tgs] (auto&& f) + { + replay_guard rg (*this, tgs.size () > 1); + + for (auto ti (tgs.begin ()), te (tgs.end ()); ti != te; ) + { + target& tg (*ti); + enter_target tgg (*this, tg); + + f (t, tt); + + if (++ti != te) + rg.play (); // Replay. + } + }; + + auto for_each_p = [this, &t, &tt, &tgs, &pns] (auto&& f) + { + replay_guard rg (*this, tgs.size () > 1 || pns.size () > 1); + + for (auto ti (tgs.begin ()), te (tgs.end ()); ti != te; ) + { + target& tg (*ti); + enter_target tgg (*this, tg); + + for (size_t pn (tg.prerequisites_.size ()), pi (pn - pns.size ()); + pi != pn; ) + { + enter_prerequisite pg (*this, tg.prerequisites_[pi]); + + f (t, tt); + + if (++pi != pn) + rg.play (); // Replay. + } + + if (++ti != te) + rg.play (); // Replay. + } + }; + + // Do we have a dependency chain and/or prerequisite-specific variable + // assignment? If not, check for the target-specific variable block unless + // this is a chained call (in which case the block, if any, "belongs" to + // prerequisites). + // + if (tt != type::colon) + { + if (chain) + return false; + + next_after_newline (t, tt); // Must be a newline then. + + if (tt == type::lcbrace && peek () == type::newline) + { + next (t, tt); // Newline. + + // Parse the block for each target. + // + for_each_t ([this] (token& t, token_type& tt) + { + next (t, tt); // First token inside the block. + + parse_variable_block (t, tt, nullptr, string ()); + + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t; + }); + + next (t, tt); // Presumably newline after '}'. + next_after_newline (t, tt, '}'); // Should be on its own line. + } + + return true; // Claimed or isn't any. + } + + // What should we do if there are no prerequisites (for example, because + // of an empty wildcard result)? We can fail or we can ignore. In most + // cases, however, this is probably an error (for example, forgetting to + // checkout a git submodule) so let's not confuse the user and fail (one + // can always handle the optional prerequisites case with a variable and + // an if). + // + if (pns.empty ()) + fail (ploc) << "no prerequisites in dependency chain or prerequisite-" + << "specific variable assignment"; + + next (t, tt); + auto at (attributes_push (t, tt)); + + // @@ PAT: currently we pattern-expand prerequisite-specific vars. + // + const location loc (get_location (t)); + names ns (tt != type::newline && tt != type::eos + ? parse_names (t, tt, pattern_mode::expand) + : names ()); + + // Prerequisite-specific variable assignment. + // + if (tt == type::assign || tt == type::prepend || tt == type::append) + { + type at (tt); + + const variable& var (parse_variable_name (move (ns), loc)); + apply_variable_attributes (var); + + // Parse the assignment for each prerequisites of each target. + // + for_each_p ([this, &var, at] (token& t, token_type& tt) + { + parse_variable (t, tt, var, at); + }); + + // Pretend that we have claimed the block to cause an error if there is + // one. Failed that, the following would result in a valid (target- + // specific) block: + // + // foo: bar: x = y + // { + // ... + // } + // + next_after_newline (t, tt); + return true; + } + // + // Dependency chain. + // + else + { + if (at.first) + fail (at.second) << "attributes before prerequisites"; + else + attributes_pop (); + + // Note that we could have "pre-resolved" these prerequisites to actual + // targets or, at least, made their directories absolute. We don't do it + // for ease of documentation: with the current semantics we can just say + // that the dependency chain is equivalent to specifying each dependency + // separately. + // + // Also note that supporting ad hoc target group specification in chains + // will be complicated. For example, what if prerequisites that have ad + // hoc targets don't end up being chained? Do we just silently drop + // them? Also, these are prerequsites first that happened to be reused + // as target names so perhaps it is the right thing not to support, + // conceptually. + // + if (parse_dependency (t, tt, + names (pns), ploc, // Note: can't move. + {} /* ad hoc target name */, + move (ns), loc, + true /* chain */)) + return true; + + // Claim the block (if any) for these prerequisites if it hasn't been + // claimed by the inner ones. + // + next_after_newline (t, tt); // Must be a newline. + + if (tt == type::lcbrace && peek () == type::newline) + { + next (t, tt); // Newline. + + // Parse the block for each prerequisites of each target. + // + for_each_p ([this] (token& t, token_type& tt) + { + next (t, tt); // First token inside the block. + + parse_variable_block (t, tt, nullptr, string ()); + + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t; + }); + + next (t, tt); // Presumably newline after '}'. + next_after_newline (t, tt, '}'); // Should be on its own line. + } + + return true; // Claimed or isn't any. + } + } + + void parser:: + source (istream& is, + const path& p, + const location& loc, + bool enter, + bool deft) + { + tracer trace ("parser::source", &path_); + + l5 ([&]{trace (loc) << "entering " << p;}); + + if (enter) + enter_buildfile (p); + + const path* op (path_); + path_ = &p; + + lexer l (is, *path_); + lexer* ol (lexer_); + lexer_ = &l; + + target* odt; + if (deft) + { + odt = default_target_; + default_target_ = nullptr; + } + + token t; + type tt; + next (t, tt); + parse_clause (t, tt); + + if (tt != type::eos) + fail (t) << "unexpected " << t; + + if (deft) + { + process_default_target (t); + default_target_ = odt; + } + + lexer_ = ol; + path_ = op; + + l5 ([&]{trace (loc) << "leaving " << p;}); + } + + void parser:: + parse_source (token& t, type& tt) + { + // The rest should be a list of buildfiles. Parse them as names in the + // value mode to get variable expansion and directory prefixes. + // + mode (lexer_mode::value, '@'); + next (t, tt); + const location l (get_location (t)); + names ns (tt != type::newline && tt != type::eos + ? parse_names (t, tt, + pattern_mode::expand, + false, + "path", + nullptr) + : names ()); + + for (name& n: ns) + { + if (n.pair || n.qualified () || n.typed () || n.value.empty ()) + fail (l) << "expected buildfile instead of " << n; + + // Construct the buildfile path. + // + path p (move (n.dir)); + p /= path (move (n.value)); + + // If the path is relative then use the src directory corresponding + // to the current directory scope. + // + if (scope_->src_path_ != nullptr && p.relative ()) + p = scope_->src_path () / p; + + p.normalize (); + + try + { + ifdstream ifs (p); + source (ifs, + p, + get_location (t), + true /* enter */, + false /* default_target */); + } + catch (const io_error& e) + { + fail (l) << "unable to read buildfile " << p << ": " << e; + } + } + + next_after_newline (t, tt); + } + + void parser:: + parse_include (token& t, type& tt) + { + tracer trace ("parser::parse_include", &path_); + + if (root_->src_path_ == nullptr) + fail (t) << "inclusion during bootstrap"; + + // The rest should be a list of buildfiles. Parse them as names in the + // value mode to get variable expansion and directory prefixes. + // + mode (lexer_mode::value, '@'); + next (t, tt); + const location l (get_location (t)); + names ns (tt != type::newline && tt != type::eos + ? parse_names (t, tt, + pattern_mode::expand, + false, + "path", + nullptr) + : names ()); + + for (name& n: ns) + { + if (n.pair || n.qualified () || n.typed () || n.empty ()) + fail (l) << "expected buildfile instead of " << n; + + // Construct the buildfile path. If it is a directory, then append + // 'buildfile'. + // + path p (move (n.dir)); + + bool a; + if (n.value.empty ()) + a = true; + else + { + a = path::traits_type::is_separator (n.value.back ()); + p /= path (move (n.value)); + } + + if (a) + { + // This shouldn't happen but let's make sure. + // + if (root_->root_extra == nullptr) + fail (l) << "buildfile naming scheme is not yet known"; + + p /= root_->root_extra->buildfile_file; + } + + l6 ([&]{trace (l) << "relative path " << p;}); + + // Determine new out_base. + // + dir_path out_base; + + if (p.relative ()) + { + out_base = scope_->out_path () / p.directory (); + out_base.normalize (); + } + else + { + p.normalize (); + + // Make sure the path is in this project. Include is only meant + // to be used for intra-project inclusion (plus amalgamation). + // + bool in_out (false); + if (!p.sub (root_->src_path ()) && + !(in_out = p.sub (root_->out_path ()))) + fail (l) << "out of project include " << p; + + out_base = in_out + ? p.directory () + : out_src (p.directory (), *root_); + } + + // Switch the scope. Note that we need to do this before figuring + // out the absolute buildfile path since we may switch the project + // root and src_root with it (i.e., include into a sub-project). + // + scope* ors (root_); + scope* ocs (scope_); + const dir_path* opb (pbase_); + switch_scope (out_base); + + if (root_ == nullptr) + fail (l) << "out of project include from " << out_base; + + // Use the new scope's src_base to get absolute buildfile path if it is + // relative. + // + if (p.relative ()) + p = scope_->src_path () / p.leaf (); + + l6 ([&]{trace (l) << "absolute path " << p;}); + + if (!root_->buildfiles.insert (p).second) // Note: may be "new" root. + { + l5 ([&]{trace (l) << "skipping already included " << p;}); + pbase_ = opb; + scope_ = ocs; + root_ = ors; + continue; + } + + try + { + ifdstream ifs (p); + source (ifs, + p, + get_location (t), + true /* enter */, + true /* default_target */); + } + catch (const io_error& e) + { + fail (l) << "unable to read buildfile " << p << ": " << e; + } + + pbase_ = opb; + scope_ = ocs; + root_ = ors; + } + + next_after_newline (t, tt); + } + + void parser:: + parse_run (token& t, type& tt) + { + // run <name> [<arg>...] + // + + // Parse the command line as names in the value mode to get variable + // expansion, etc. + // + mode (lexer_mode::value); + next (t, tt); + const location l (get_location (t)); + + strings args; + try + { + args = convert<strings> (tt != type::newline && tt != type::eos + ? parse_names (t, tt, + pattern_mode::ignore, + false, + "argument", + nullptr) + : names ()); + } + catch (const invalid_argument& e) + { + fail (l) << "invalid run argument: " << e.what (); + } + + if (args.empty () || args[0].empty ()) + fail (l) << "expected executable name after run"; + + cstrings cargs; + cargs.reserve (args.size () + 1); + transform (args.begin (), + args.end (), + back_inserter (cargs), + [] (const string& s) {return s.c_str ();}); + cargs.push_back (nullptr); + + process pr (run_start (3 /* verbosity */, + cargs, + 0 /* stdin */, + -1 /* stdout */, + true /* error */, + empty_dir_path /* cwd */, + l)); + bool bad (false); + try + { + // While a failing process could write garbage to stdout, for simplicity + // let's assume it is well behaved. + // + ifdstream is (move (pr.in_ofd), fdstream_mode::skip); + + // If there is an error in the output, our diagnostics will look like + // this: + // + // <stdout>:2:3 error: unterminated single quote + // buildfile:3:4 info: while parsing foo output + // + { + auto df = make_diag_frame ( + [&args, &l](const diag_record& dr) + { + dr << info (l) << "while parsing " << args[0] << " output"; + }); + + source (is, + path ("<stdout>"), + l, + false /* enter */, + false /* default_target */); + } + + is.close (); // Detect errors. + } + catch (const io_error&) + { + // Presumably the child process failed and issued diagnostics so let + // run_finish() try to deal with that first. + // + bad = true; + } + + run_finish (cargs, pr, l); + + if (bad) + fail (l) << "error reading " << args[0] << " output"; + + next_after_newline (t, tt); + } + + void parser:: + parse_import (token& t, type& tt) + { + tracer trace ("parser::parse_import", &path_); + + if (root_->src_path_ == nullptr) + fail (t) << "import during bootstrap"; + + // General import format: + // + // import [<var>=](<project>|<project>/<target>])+ + // + type atype; // Assignment type. + value* val (nullptr); + const build2::variable* var (nullptr); + + // We are now in the normal lexing mode and here is the problem: we need + // to switch to the value mode so that we don't treat certain characters + // as separators (e.g., + in 'libstdc++'). But at the same time we need + // to detect if we have the <var>= part. So what we are going to do is + // switch to the value mode, get the first token, and then re-parse it + // manually looking for =/=+/+=. + // + mode (lexer_mode::value, '@'); + next (t, tt); + + // Get variable attributes, if any (note that here we will go into a + // nested value mode with a different pair character). + // + auto at (attributes_push (t, tt)); + + const location vloc (get_location (t)); + + if (tt == type::word) + { + // Split the token into the variable name and value at position (p) of + // '=', taking into account leading/trailing '+'. The variable name is + // returned while the token is set to value. If the resulting token + // value is empty, get the next token. Also set assignment type (at). + // + auto split = [&atype, &t, &tt, this] (size_t p) -> string + { + string& v (t.value); + size_t e; + + if (p != 0 && v[p - 1] == '+') // += + { + e = p--; + atype = type::append; + } + else if (p + 1 != v.size () && v[p + 1] == '+') // =+ + { + e = p + 1; + atype = type::prepend; + } + else // = + { + e = p; + atype = type::assign; + } + + string nv (v, e + 1); // value + v.resize (p); // var name + v.swap (nv); + + if (v.empty ()) + next (t, tt); + + return nv; + }; + + // Is this the 'foo=...' case? + // + size_t p (t.value.find ('=')); + auto& vp (var_pool.rw (*scope_)); + + if (p != string::npos) + var = &vp.insert (split (p), true /* overridable */); + // + // This could still be the 'foo =...' case. + // + else if (peek () == type::word) + { + const string& v (peeked ().value); + size_t n (v.size ()); + + // We should start with =/+=/=+. + // + if (n > 0 && + (v[p = 0] == '=' || + (n > 1 && v[0] == '+' && v[p = 1] == '='))) + { + var = &vp.insert (move (t.value), true /* overridable */); + next (t, tt); // Get the peeked token. + split (p); // Returned name should be empty. + } + } + } + + if (var != nullptr) + { + apply_variable_attributes (*var); + + if (var->visibility >= variable_visibility::target) + { + fail (vloc) << "variable " << *var << " has " << var->visibility + << " visibility but is assigned in import"; + } + + val = atype == type::assign + ? &scope_->assign (*var) + : &scope_->append (*var); + } + else + { + if (at.first) + fail (at.second) << "attributes without variable"; + else + attributes_pop (); + } + + // The rest should be a list of projects and/or targets. Parse them as + // names to get variable expansion and directory prefixes. Note: doesn't + // make sense to expand patterns (what's the base directory?) + // + const location l (get_location (t)); + names ns (tt != type::newline && tt != type::eos + ? parse_names (t, tt, pattern_mode::ignore) + : names ()); + + for (name& n: ns) + { + if (n.pair) + fail (l) << "unexpected pair in import"; + + // build2::import() will check the name, if required. + // + names r (build2::import (*scope_, move (n), l)); + + if (val != nullptr) + { + if (atype == type::assign) + { + val->assign (move (r), var); + atype = type::append; // Append subsequent values. + } + else if (atype == type::prepend) + { + // Note: multiple values will be prepended in reverse. + // + val->prepend (move (r), var); + } + else + val->append (move (r), var); + } + } + + next_after_newline (t, tt); + } + + void parser:: + parse_export (token& t, type& tt) + { + tracer trace ("parser::parse_export", &path_); + + scope* ps (scope_->parent_scope ()); + + // This should be temp_scope. + // + if (ps == nullptr || ps->out_path () != scope_->out_path ()) + fail (t) << "export outside export stub"; + + // The rest is a value. Parse it as a variable value to get expansion, + // attributes, etc. build2::import() will check the names, if required. + // + location l (get_location (t)); + value rhs (parse_variable_value (t, tt)); + + // While it may seem like supporting attributes is a good idea here, + // there is actually little benefit in being able to type them or to + // return NULL. + // + // export_value_ = value (); // Reset to untyped NULL value. + // value_attributes (nullptr, + // export_value_, + // move (rhs), + // type::assign); + if (attributes& a = attributes_top ()) + fail (a.loc) << "attributes in export"; + else + attributes_pop (); + + if (!rhs) + fail (l) << "null value in export"; + + if (rhs.type != nullptr) + untypify (rhs); + + export_value_ = move (rhs).as<names> (); + + if (export_value_.empty ()) + fail (l) << "empty value in export"; + + next_after_newline (t, tt); + } + + void parser:: + parse_using (token& t, type& tt) + { + tracer trace ("parser::parse_using", &path_); + + bool optional (t.value.back () == '?'); + + if (optional && boot_) + fail (t) << "optional module in bootstrap"; + + // The rest should be a list of module names. Parse them as names in the + // value mode to get variable expansion, etc. + // + mode (lexer_mode::value, '@'); + next (t, tt); + const location l (get_location (t)); + names ns (tt != type::newline && tt != type::eos + ? parse_names (t, tt, + pattern_mode::ignore, + false, + "module", + nullptr) + : names ()); + + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + string n; + standard_version v; + + if (!i->simple ()) + fail (l) << "expected module name instead of " << *i; + + n = move (i->value); + + if (i->pair) + try + { + if (i->pair != '@') + fail (l) << "unexpected pair style in using directive"; + + ++i; + if (!i->simple ()) + fail (l) << "expected module version instead of " << *i; + + v = standard_version (i->value, standard_version::allow_earliest); + } + catch (const invalid_argument& e) + { + fail (l) << "invalid module version '" << i->value << "': " << e; + } + + // Handle the special 'build' module. + // + if (n == "build") + { + standard_version_constraint c (move (v), false, nullopt, true); // >= + + if (!v.empty ()) + check_build_version (c, l); + } + else + { + assert (v.empty ()); // Module versioning not yet implemented. + + if (boot_) + boot_module (*root_, n, l); + else + load_module (*root_, *scope_, n, l, optional); + } + } + + next_after_newline (t, tt); + } + + void parser:: + parse_define (token& t, type& tt) + { + // define <derived>: <base> + // + // See tests/define. + // + if (next (t, tt) != type::word) + fail (t) << "expected name instead of " << t << " in target type " + << "definition"; + + string dn (move (t.value)); + const location dnl (get_location (t)); + + if (next (t, tt) != type::colon) + fail (t) << "expected ':' instead of " << t << " in target type " + << "definition"; + + next (t, tt); + + if (tt == type::word) + { + // Target. + // + const string& bn (t.value); + const target_type* bt (scope_->find_target_type (bn)); + + if (bt == nullptr) + fail (t) << "unknown target type " << bn; + + if (!scope_->derive_target_type (move (dn), *bt).second) + fail (dnl) << "target type " << dn << " already define in this scope"; + + next (t, tt); // Get newline. + } + else + fail (t) << "expected name instead of " << t << " in target type " + << "definition"; + + next_after_newline (t, tt); + } + + void parser:: + parse_if_else (token& t, type& tt) + { + // Handle the whole if-else chain. See tests/if-else. + // + bool taken (false); // One of the branches has been taken. + + for (;;) + { + string k (move (t.value)); + next (t, tt); + + bool take (false); // Take this branch? + + if (k != "else") + { + // Should we evaluate the expression if one of the branches has + // already been taken? On the one hand, evaluating it is a waste + // of time. On the other, it can be invalid and the only way for + // the user to know their buildfile is valid is to test every + // branch. There could also be side effects. We also have the same + // problem with ignored branch blocks except there evaluating it + // is not an option. So let's skip it. + // + if (taken) + skip_line (t, tt); + else + { + if (tt == type::newline || tt == type::eos) + fail (t) << "expected " << k << "-expression instead of " << t; + + // Parse as names to get variable expansion, evaluation, etc. Note + // that we also expand patterns (could be used in nested contexts, + // etc; e.g., "if pattern expansion is empty" condition). + // + const location l (get_location (t)); + + try + { + // Should evaluate to 'true' or 'false'. + // + bool e ( + convert<bool> ( + parse_value (t, tt, + pattern_mode::expand, + "expression", + nullptr))); + + take = (k.back () == '!' ? !e : e); + } + catch (const invalid_argument& e) { fail (l) << e; } + } + } + else + take = !taken; + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t << " after " << k + << (k != "else" ? "-expression" : ""); + + // This can be a block or a single line. The block part is a bit + // tricky, consider: + // + // else + // {hxx cxx}{options}: install = false + // + // So we treat it as a block if it's followed immediately by newline. + // + if (next (t, tt) == type::lcbrace && peek () == type::newline) + { + next (t, tt); // Get newline. + next (t, tt); + + if (take) + { + parse_clause (t, tt); + taken = true; + } + else + skip_block (t, tt); + + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t << " at the end of " << k + << "-block"; + + next (t, tt); // Presumably newline after '}'. + next_after_newline (t, tt, '}'); // Should be on its own line. + } + else + { + if (take) + { + if (!parse_clause (t, tt, true)) + fail (t) << "expected " << k << "-line instead of " << t; + + taken = true; + } + else + { + skip_line (t, tt); + + if (tt == type::newline) + next (t, tt); + } + } + + // See if we have another el* keyword. + // + if (k != "else" && tt == type::word && keyword (t)) + { + const string& n (t.value); + + if (n == "else" || n == "elif" || n == "elif!") + continue; + } + + break; + } + } + + void parser:: + parse_for (token& t, type& tt) + { + // for <varname>: <value> + // <line> + // + // for <varname>: <value> + // { + // <block> + // } + // + + // First take care of the variable name. There is no reason not to + // support variable attributes. + // + next (t, tt); + attributes_push (t, tt); + + // @@ PAT: currently we pattern-expand for var. + // + const location vloc (get_location (t)); + names vns (parse_names (t, tt, pattern_mode::expand)); + + if (tt != type::colon) + fail (t) << "expected ':' instead of " << t << " after variable name"; + + const variable& var (parse_variable_name (move (vns), vloc)); + apply_variable_attributes (var); + + if (var.visibility >= variable_visibility::target) + { + fail (vloc) << "variable " << var << " has " << var.visibility + << " visibility but is assigned in for-loop"; + } + + // Now the value (list of names) to iterate over. Parse it as a variable + // value to get expansion, attributes, etc. + // + value val; + apply_value_attributes ( + nullptr, val, parse_variable_value (t, tt), type::assign); + + // If this value is a vector, then save its element type so that we + // can typify each element below. + // + const value_type* etype (nullptr); + + if (val && val.type != nullptr) + { + etype = val.type->element_type; + untypify (val); + } + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t << " after for"; + + // Finally the body. The initial thought was to use the token replay + // facility but on closer inspection this didn't turn out to be a good + // idea (no support for nested replays, etc). So instead we are going to + // do a full-blown re-lex. Specifically, we will first skip the line/block + // just as we do for non-taken if/else branches while saving the character + // sequence that comprises the body. Then we re-lex/parse it on each + // iteration. + // + string body; + uint64_t line (lexer_->line); // Line of the first character to be saved. + lexer::save_guard sg (*lexer_, body); + + // This can be a block or a single line, similar to if-else. + // + bool block (next (t, tt) == type::lcbrace && peek () == type::newline); + + if (block) + { + next (t, tt); // Get newline. + next (t, tt); + + skip_block (t, tt); + sg.stop (); + + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t << " at the end of " + << "for-block"; + + next (t, tt); // Presumably newline after '}'. + next_after_newline (t, tt, '}'); // Should be on its own line. + } + else + { + skip_line (t, tt); + sg.stop (); + + if (tt == type::newline) + next (t, tt); + } + + // Iterate. + // + names& ns (val.as<names> ()); + + if (ns.empty ()) + return; + + value& v (scope_->assign (var)); + + istringstream is (move (body)); + + for (auto i (ns.begin ()), e (ns.end ());; ) + { + // Set the variable value. + // + bool pair (i->pair); + names n; + n.push_back (move (*i)); + if (pair) n.push_back (move (*++i)); + v = value (move (n)); + + if (etype != nullptr) + typify (v, *etype, &var); + + lexer l (is, *path_, line); + lexer* ol (lexer_); + lexer_ = &l; + + token t; + type tt; + next (t, tt); + + if (block) + { + next (t, tt); // { + next (t, tt); // <newline> + } + parse_clause (t, tt); + assert (tt == (block ? type::rcbrace : type::eos)); + + lexer_ = ol; + + if (++i == e) + break; + + // Rewind the stream. + // + is.clear (); + is.seekg (0); + } + } + + void parser:: + parse_assert (token& t, type& tt) + { + bool neg (t.value.back () == '!'); + const location al (get_location (t)); + + // Parse the next chunk as names to get variable expansion, evaluation, + // etc. Do it in the value mode so that we don't treat ':', etc., as + // special. + // + mode (lexer_mode::value); + next (t, tt); + + const location el (get_location (t)); + + try + { + // Should evaluate to 'true' or 'false'. + // + bool e ( + convert<bool> ( + parse_value (t, tt, + pattern_mode::expand, + "expression", + nullptr, + true))); + e = (neg ? !e : e); + + if (e) + { + skip_line (t, tt); + + if (tt != type::eos) + next (t, tt); // Swallow newline. + + return; + } + } + catch (const invalid_argument& e) { fail (el) << e; } + + // Being here means things didn't end up well. Parse the description, if + // any, with expansion. Then fail. + // + names ns (tt != type::newline && tt != type::eos + ? parse_names (t, tt, + pattern_mode::ignore, + false, + "description", + nullptr) + : names ()); + + diag_record dr (fail (al)); + + if (ns.empty ()) + dr << "assertion failed"; + else + dr << ns; + } + + void parser:: + parse_print (token& t, type& tt) + { + // Parse the rest as a variable value to get expansion, attributes, etc. + // + value rhs (parse_variable_value (t, tt)); + + value lhs; + apply_value_attributes (nullptr, lhs, move (rhs), type::assign); + + if (lhs) + { + names storage; + cout << reverse (lhs, storage) << endl; + } + else + cout << "[null]" << endl; + + if (tt != type::eos) + next (t, tt); // Swallow newline. + } + + void parser:: + parse_diag (token& t, type& tt) + { + diag_record dr; + const location l (get_location (t)); + + switch (t.value[0]) + { + case 'f': dr << fail (l); break; + case 'w': dr << warn (l); break; + case 'i': dr << info (l); break; + case 't': dr << text (l); break; + default: assert (false); + } + + // Parse the rest as a variable value to get expansion, attributes, etc. + // + value rhs (parse_variable_value (t, tt)); + + value lhs; + apply_value_attributes (nullptr, lhs, move (rhs), type::assign); + + if (lhs) + { + names storage; + dr << reverse (lhs, storage); + } + + if (tt != type::eos) + next (t, tt); // Swallow newline. + } + + void parser:: + parse_dump (token& t, type& tt) + { + // dump [<target>...] + // + // If there are no targets, then we dump the current scope. + // + tracer trace ("parser::parse_dump", &path_); + + const location l (get_location (t)); + next (t, tt); + names ns (tt != type::newline && tt != type::eos + ? parse_names (t, tt, pattern_mode::ignore) + : names ()); + + text (l) << "dump:"; + + // Dump directly into diag_stream. + // + ostream& os (*diag_stream); + + if (ns.empty ()) + { + if (scope_ != nullptr) + dump (*scope_, " "); // Indent two spaces. + else + os << " <no current scope>" << endl; + } + else + { + for (auto i (ns.begin ()), e (ns.end ()); i != e; ) + { + name& n (*i++); + name o (n.pair ? move (*i++) : name ()); + + const target* t (enter_target::find_target (*this, n, o, l, trace)); + + if (t != nullptr) + dump (*t, " "); // Indent two spaces. + else + { + os << " <no target " << n; + if (n.pair && !o.dir.empty ()) os << '@' << o.dir; + os << '>' << endl; + } + + if (i != e) + os << endl; + } + } + + if (tt != type::eos) + next (t, tt); // Swallow newline. + } + + const variable& parser:: + parse_variable_name (names&& ns, const location& l) + { + // The list should contain a single, simple name. + // + if (ns.size () != 1 || !ns[0].simple () || ns[0].empty ()) + fail (l) << "expected variable name instead of " << ns; + + string& n (ns[0].value); + + //@@ OLD + if (n.front () == '.') // Fully qualified name. + n.erase (0, 1); + else + { + //@@ TODO: append namespace if any. + } + + return var_pool.rw (*scope_).insert (move (n), true /* overridable */); + } + + void parser:: + parse_variable (token& t, type& tt, const variable& var, type kind) + { + value rhs (parse_variable_value (t, tt)); + + value& lhs ( + kind == type::assign + + ? (prerequisite_ != nullptr ? prerequisite_->assign (var) : + target_ != nullptr ? target_->assign (var) : + /* */ scope_->assign (var)) + + : (prerequisite_ != nullptr ? prerequisite_->append (var, *target_) : + target_ != nullptr ? target_->append (var) : + /* */ scope_->append (var))); + + apply_value_attributes (&var, lhs, move (rhs), kind); + } + + void parser:: + parse_type_pattern_variable (token& t, token_type& tt, + const target_type& type, string pat, + const variable& var, token_type kind, + const location& loc) + { + // Parse target type/pattern-specific variable assignment. + // + // See old-tests/variable/type-pattern. + + // Note: expanding the value in the current scope context. + // + value rhs (parse_variable_value (t, tt)); + + // Leave the value untyped unless we are assigning. + // + pair<reference_wrapper<value>, bool> p ( + scope_->target_vars[type][move (pat)].insert ( + var, kind == type::assign)); + + value& lhs (p.first); + + // We store prepend/append values untyped (similar to overrides). + // + if (rhs.type != nullptr && kind != type::assign) + untypify (rhs); + + if (p.second) + { + // Note: we are always using assign and we don't pass the variable in + // case of prepend/append in order to keep the value untyped. + // + apply_value_attributes (kind == type::assign ? &var : nullptr, + lhs, + move (rhs), + type::assign); + + // Map assignment type to the value::extra constant. + // + lhs.extra = (kind == type::prepend ? 1 : + kind == type::append ? 2 : + 0); + } + else + { + // Existing value. What happens next depends on what we are trying to do + // and what's already there. + // + // Assignment is the easy one: we simply overwrite what's already + // there. Also, if we are appending/prepending to a previously assigned + // value, then we simply append or prepend normally. + // + if (kind == type::assign || lhs.extra == 0) + { + // Above we've instructed insert() not to type the value so we have to + // compensate for that now. + // + if (kind != type::assign) + { + if (var.type != nullptr && lhs.type != var.type) + typify (lhs, *var.type, &var); + } + else + lhs.extra = 0; // Change to assignment. + + apply_value_attributes (&var, lhs, move (rhs), kind); + } + else + { + // This is an append/prepent to a previously appended or prepended + // value. We can handle it as long as things are consistent. + // + if (kind == type::prepend && lhs.extra == 2) + fail (loc) << "prepend to a previously appended target type/pattern-" + << "specific variable " << var; + + if (kind == type::append && lhs.extra == 1) + fail (loc) << "append to a previously prepended target type/pattern-" + << "specific variable " << var; + + // Do untyped prepend/append. + // + apply_value_attributes (nullptr, lhs, move (rhs), kind); + } + } + + if (lhs.extra != 0 && lhs.type != nullptr) + fail (loc) << "typed prepend/append to target type/pattern-specific " + << "variable " << var; + } + + value parser:: + parse_variable_value (token& t, type& tt) + { + mode (lexer_mode::value, '@'); + next (t, tt); + + // Parse value attributes if any. Note that it's ok not to have anything + // after the attributes (e.g., foo=[null]). + // + attributes_push (t, tt, true); + + return tt != type::newline && tt != type::eos + ? parse_value (t, tt, pattern_mode::expand) + : value (names ()); + } + + static const value_type* + map_type (const string& n) + { + auto ptr = [] (const value_type& vt) {return &vt;}; + + return + n == "bool" ? ptr (value_traits<bool>::value_type) : + n == "uint64" ? ptr (value_traits<uint64_t>::value_type) : + n == "string" ? ptr (value_traits<string>::value_type) : + n == "path" ? ptr (value_traits<path>::value_type) : + n == "dir_path" ? ptr (value_traits<dir_path>::value_type) : + n == "abs_dir_path" ? ptr (value_traits<abs_dir_path>::value_type) : + n == "name" ? ptr (value_traits<name>::value_type) : + n == "name_pair" ? ptr (value_traits<name_pair>::value_type) : + n == "target_triplet" ? ptr (value_traits<target_triplet>::value_type) : + n == "project_name" ? ptr (value_traits<project_name>::value_type) : + + n == "uint64s" ? ptr (value_traits<uint64s>::value_type) : + n == "strings" ? ptr (value_traits<strings>::value_type) : + n == "paths" ? ptr (value_traits<paths>::value_type) : + n == "dir_paths" ? ptr (value_traits<dir_paths>::value_type) : + n == "names" ? ptr (value_traits<vector<name>>::value_type) : + + nullptr; + } + + void parser:: + apply_variable_attributes (const variable& var) + { + attributes a (attributes_pop ()); + + if (!a) + return; + + const location& l (a.loc); + const value_type* type (nullptr); + + for (auto& p: a.ats) + { + string& k (p.first); + string& v (p.second); + + if (const value_type* t = map_type (k)) + { + if (type != nullptr && t != type) + fail (l) << "multiple variable types: " << k << ", " << type->name; + + type = t; + // Fall through. + } + else + { + diag_record dr (fail (l)); + dr << "unknown variable attribute " << k; + + if (!v.empty ()) + dr << '=' << v; + } + + if (!v.empty ()) + fail (l) << "unexpected value for attribute " << k << ": " << v; + } + + if (type != nullptr) + { + if (var.type == nullptr) + { + const bool o (true); // Allow overrides. + var_pool.update (const_cast<variable&> (var), type, nullptr, &o); + } + else if (var.type != type) + fail (l) << "changing variable " << var << " type from " + << var.type->name << " to " << type->name; + } + } + + void parser:: + apply_value_attributes (const variable* var, + value& v, + value&& rhs, + type kind) + { + attributes a (attributes_pop ()); + const location& l (a.loc); + + // Essentially this is an attribute-augmented assign/append/prepend. + // + bool null (false); + const value_type* type (nullptr); + + for (auto& p: a.ats) + { + string& k (p.first); + string& v (p.second); + + if (k == "null") + { + if (rhs && !rhs.empty ()) // Note: null means we had an expansion. + fail (l) << "value with null attribute"; + + null = true; + // Fall through. + } + else if (const value_type* t = map_type (k)) + { + if (type != nullptr && t != type) + fail (l) << "multiple value types: " << k << ", " << type->name; + + type = t; + // Fall through. + } + else + { + diag_record dr (fail (l)); + dr << "unknown value attribute " << k; + + if (!v.empty ()) + dr << '=' << v; + } + + if (!v.empty ()) + fail (l) << "unexpected value for attribute " << k << ": " << v; + } + + // When do we set the type and when do we keep the original? This gets + // tricky for append/prepend where both values contribute. The guiding + // rule here is that if the user specified the type, then they reasonable + // expect the resulting value to be of that type. So for assign we always + // override the type since it's a new value. For append/prepend we + // override if the LHS value is NULL (which also covers undefined). We + // also override if LHS is untyped. Otherwise, we require that the types + // be the same. Also check that the requested value type doesn't conflict + // with the variable type. + // + if (var != nullptr && var->type != nullptr) + { + if (type == nullptr) + { + type = var->type; + } + else if (var->type != type) + { + fail (l) << "conflicting variable " << var->name << " type " + << var->type->name << " and value type " << type->name; + } + } + + // What if both LHS and RHS are typed? For now we do lexical conversion: + // if this specific value can be converted, then all is good. The + // alternative would be to do type conversion: if any value of RHS type + // can be converted to LHS type, then we are good. This may be a better + // option in the future but currently our parse_names() implementation + // untypifies everything if there are multiple names. And having stricter + // rules just for single-element values would be strange. + // + // We also have "weaker" type propagation for the RHS type. + // + bool rhs_type (false); + if (rhs.type != nullptr) + { + // Only consider RHS type if there is no explicit or variable type. + // + if (type == nullptr) + { + type = rhs.type; + rhs_type = true; + } + + // Reduce this to the untyped value case for simplicity. + // + untypify (rhs); + } + + if (kind == type::assign) + { + if (type != v.type) + { + v = nullptr; // Clear old value. + v.type = type; + } + } + else if (type != nullptr) + { + if (!v) + v.type = type; + else if (v.type == nullptr) + typify (v, *type, var); + else if (v.type != type && !rhs_type) + fail (l) << "conflicting original value type " << v.type->name + << " and append/prepend value type " << type->name; + } + + if (null) + { + if (kind == type::assign) // Ignore for prepend/append. + v = nullptr; + } + else + { + if (kind == type::assign) + { + if (rhs) + v.assign (move (rhs).as<names> (), var); + else + v = nullptr; + } + else if (rhs) // Don't append/prepent NULL. + { + if (kind == type::prepend) + v.prepend (move (rhs).as<names> (), var); + else + v.append (move (rhs).as<names> (), var); + } + } + } + + values parser:: + parse_eval (token& t, type& tt, pattern_mode pmode) + { + // enter: lparen + // leave: rparen + + mode (lexer_mode::eval, '@'); // Auto-expires at rparen. + next (t, tt); + + if (tt == type::rparen) + return values (); + + values r (parse_eval_comma (t, tt, pmode, true)); + + if (tt != type::rparen) + fail (t) << "unexpected " << t; // E.g., stray ':'. + + return r; + } + + values parser:: + parse_eval_comma (token& t, type& tt, pattern_mode pmode, bool first) + { + // enter: first token of LHS + // leave: next token after last RHS + + // Left-associative: parse in a loop for as long as we can. + // + values r; + value lhs (parse_eval_ternary (t, tt, pmode, first)); + + if (!pre_parse_) + r.push_back (move (lhs)); + + while (tt == type::comma) + { + next (t, tt); + value rhs (parse_eval_ternary (t, tt, pmode)); + + if (!pre_parse_) + r.push_back (move (rhs)); + } + + return r; + } + + value parser:: + parse_eval_ternary (token& t, type& tt, pattern_mode pmode, bool first) + { + // enter: first token of LHS + // leave: next token after last RHS + + // Right-associative (kind of): we parse what's between ?: without + // regard for priority and we recurse on what's after :. Here is an + // example: + // + // a ? x ? y : z : b ? c : d + // + // This should be parsed/evaluated as: + // + // a ? (x ? y : z) : (b ? c : d) + // + location l (get_location (t)); + value lhs (parse_eval_or (t, tt, pmode, first)); + + if (tt != type::question) + return lhs; + + // Use the pre-parse mechanism to implement short-circuit. + // + bool pp (pre_parse_); + + bool q; + try + { + q = pp ? true : convert<bool> (move (lhs)); + } + catch (const invalid_argument& e) { fail (l) << e << endf; } + + if (!pp) + pre_parse_ = !q; // Short-circuit middle? + + next (t, tt); + value mhs (parse_eval_ternary (t, tt, pmode)); + + if (tt != type::colon) + fail (t) << "expected ':' instead of " << t; + + if (!pp) + pre_parse_ = q; // Short-circuit right? + + next (t, tt); + value rhs (parse_eval_ternary (t, tt, pmode)); + + pre_parse_ = pp; + return q ? move (mhs) : move (rhs); + } + + value parser:: + parse_eval_or (token& t, type& tt, pattern_mode pmode, bool first) + { + // enter: first token of LHS + // leave: next token after last RHS + + // Left-associative: parse in a loop for as long as we can. + // + location l (get_location (t)); + value lhs (parse_eval_and (t, tt, pmode, first)); + + // Use the pre-parse mechanism to implement short-circuit. + // + bool pp (pre_parse_); + + while (tt == type::log_or) + { + try + { + if (!pre_parse_ && convert<bool> (move (lhs))) + pre_parse_ = true; + + next (t, tt); + l = get_location (t); + value rhs (parse_eval_and (t, tt, pmode)); + + if (pre_parse_) + continue; + + // Store the result as bool value. + // + lhs = convert<bool> (move (rhs)); + } + catch (const invalid_argument& e) { fail (l) << e; } + } + + pre_parse_ = pp; + return lhs; + } + + value parser:: + parse_eval_and (token& t, type& tt, pattern_mode pmode, bool first) + { + // enter: first token of LHS + // leave: next token after last RHS + + // Left-associative: parse in a loop for as long as we can. + // + location l (get_location (t)); + value lhs (parse_eval_comp (t, tt, pmode, first)); + + // Use the pre-parse mechanism to implement short-circuit. + // + bool pp (pre_parse_); + + while (tt == type::log_and) + { + try + { + if (!pre_parse_ && !convert<bool> (move (lhs))) + pre_parse_ = true; + + next (t, tt); + l = get_location (t); + value rhs (parse_eval_comp (t, tt, pmode)); + + if (pre_parse_) + continue; + + // Store the result as bool value. + // + lhs = convert<bool> (move (rhs)); + } + catch (const invalid_argument& e) { fail (l) << e; } + } + + pre_parse_ = pp; + return lhs; + } + + value parser:: + parse_eval_comp (token& t, type& tt, pattern_mode pmode, bool first) + { + // enter: first token of LHS + // leave: next token after last RHS + + // Left-associative: parse in a loop for as long as we can. + // + value lhs (parse_eval_value (t, tt, pmode, first)); + + while (tt == type::equal || + tt == type::not_equal || + tt == type::less || + tt == type::less_equal || + tt == type::greater || + tt == type::greater_equal) + { + type op (tt); + location l (get_location (t)); + + next (t, tt); + value rhs (parse_eval_value (t, tt, pmode)); + + if (pre_parse_) + continue; + + // Use (potentially typed) comparison via value. If one of the values is + // typed while the other is not, then try to convert the untyped one to + // the other's type instead of complaining. This seems like a reasonable + // thing to do and will allow us to write: + // + // if ($build.version > 30000) + // + // Rather than having to write: + // + // if ($build.version > [uint64] 30000) + // + if (lhs.type != rhs.type) + { + // @@ Would be nice to pass location for diagnostics. + // + if (lhs.type == nullptr) + { + if (lhs) + typify (lhs, *rhs.type, nullptr); + } + else if (rhs.type == nullptr) + { + if (rhs) + typify (rhs, *lhs.type, nullptr); + } + else + fail (l) << "comparison between " << lhs.type->name << " and " + << rhs.type->name; + } + + bool r; + switch (op) + { + case type::equal: r = lhs == rhs; break; + case type::not_equal: r = lhs != rhs; break; + case type::less: r = lhs < rhs; break; + case type::less_equal: r = lhs <= rhs; break; + case type::greater: r = lhs > rhs; break; + case type::greater_equal: r = lhs >= rhs; break; + default: r = false; assert (false); + } + + // Store the result as a bool value. + // + lhs = value (r); + } + + return lhs; + } + + value parser:: + parse_eval_value (token& t, type& tt, pattern_mode pmode, bool first) + { + // enter: first token of value + // leave: next token after value + + // Parse value attributes if any. Note that it's ok not to have anything + // after the attributes, as in, ($foo == [null]), or even ([null]) + // + auto at (attributes_push (t, tt, true)); + + const location l (get_location (t)); + + value v; + switch (tt) + { + case type::log_not: + { + next (t, tt); + v = parse_eval_value (t, tt, pmode); + + if (pre_parse_) + break; + + try + { + // Store the result as bool value. + // + v = !convert<bool> (move (v)); + } + catch (const invalid_argument& e) { fail (l) << e; } + break; + } + default: + { + // If parse_value() gets called, it expects to see a value. Note that + // it will also handle nested eval contexts. + // + v = (tt != type::colon && + tt != type::question && + tt != type::comma && + + tt != type::rparen && + + tt != type::equal && + tt != type::not_equal && + tt != type::less && + tt != type::less_equal && + tt != type::greater && + tt != type::greater_equal && + + tt != type::log_or && + tt != type::log_and + + ? parse_value (t, tt, pmode) + : value (names ())); + } + } + + // If this is the first expression then handle the eval-qual special case + // (target-qualified name represented as a special ':'-style pair). + // + if (first && tt == type::colon) + { + if (at.first) + fail (at.second) << "attributes before target-qualified variable name"; + + if (!pre_parse_) + attributes_pop (); + + const location nl (get_location (t)); + next (t, tt); + value n (parse_value (t, tt, pattern_mode::ignore)); + + if (tt != type::rparen) + fail (t) << "expected ')' after variable name"; + + if (pre_parse_) + return v; // Empty. + + if (v.type != nullptr || !v || v.as<names> ().size () != 1) + fail (l) << "expected target before ':'"; + + if (n.type != nullptr || !n || n.as<names> ().size () != 1) + fail (nl) << "expected variable name after ':'"; + + names& ns (v.as<names> ()); + ns.back ().pair = ':'; + ns.push_back (move (n.as<names> ().back ())); + return v; + } + else + { + if (pre_parse_) + return v; // Empty. + + // Process attributes if any. + // + if (!at.first) + { + attributes_pop (); + return v; + } + + value r; + apply_value_attributes (nullptr, r, move (v), type::assign); + return r; + } + } + + pair<bool, location> parser:: + attributes_push (token& t, type& tt, bool standalone) + { + location l (get_location (t)); + bool has (tt == type::lsbrace); + + if (!pre_parse_) + attributes_.push (attributes {has, l, {}}); + + if (!has) + return make_pair (false, l); + + // Using '@' for attribute key-value pairs would be just too ugly. Seeing + // that we control what goes into keys/values, let's use a much nicer '='. + // + mode (lexer_mode::attribute, '='); + next (t, tt); + + has = (tt != type::rsbrace); + if (has) + { + names ns ( + parse_names ( + t, tt, pattern_mode::ignore, false, "attribute", nullptr)); + + if (!pre_parse_) + { + attributes& a (attributes_.top ()); + + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + string k, v; + + try + { + k = convert<string> (move (*i)); + } + catch (const invalid_argument&) + { + fail (l) << "invalid attribute key '" << *i << "'"; + } + + if (i->pair) + { + if (i->pair != '=') + fail (l) << "unexpected pair style in attributes"; + + try + { + v = convert<string> (move (*++i)); + } + catch (const invalid_argument&) + { + fail (l) << "invalid attribute value '" << *i << "'"; + } + } + + a.ats.emplace_back (move (k), move (v)); + } + } + } + + if (tt != type::rsbrace) + fail (t) << "expected ']' instead of " << t; + + next (t, tt); + + if (!standalone && (tt == type::newline || tt == type::eos)) + fail (t) << "standalone attributes"; + + return make_pair (has, l); + } + + // Splice names from the name view into the destination name list while + // doing sensible things with pairs, types, etc. Return the number of + // the names added. + // + // If nv points to nv_storage then the names can be moved. + // + size_t parser:: + splice_names (const location& loc, + const names_view& nv, + names&& nv_storage, + names& ns, + const char* what, + size_t pairn, + const optional<project_name>& pp, + const dir_path* dp, + const string* tp) + { + // We could be asked to splice 0 elements (see the name pattern + // expansion). In this case may need to pop the first half of the + // pair. + // + if (nv.size () == 0) + { + if (pairn != 0) + ns.pop_back (); + + return 0; + } + + size_t start (ns.size ()); + + // Move if nv points to nv_storage, + // + bool m (nv.data () == nv_storage.data ()); + + for (const name& cn: nv) + { + name* n (m ? const_cast<name*> (&cn) : nullptr); + + // Project. + // + optional<project_name> p; + if (cn.proj) + { + if (pp) + fail (loc) << "nested project name " << *cn.proj << " in " << what; + + p = m ? move (n->proj) : cn.proj; + } + else if (pp) + p = pp; + + // Directory. + // + dir_path d; + if (!cn.dir.empty ()) + { + if (dp != nullptr) + { + if (cn.dir.absolute ()) + fail (loc) << "nested absolute directory " << cn.dir << " in " + << what; + + d = *dp / cn.dir; + } + else + d = m ? move (n->dir) : cn.dir; + } + else if (dp != nullptr) + d = *dp; + + // Type. + // + string t; + if (!cn.type.empty ()) + { + if (tp != nullptr) + fail (loc) << "nested type name " << cn.type << " in " << what; + + t = m ? move (n->type) : cn.type; + } + else if (tp != nullptr) + t = *tp; + + // Value. + // + string v (m ? move (n->value) : cn.value); + + // If we are a second half of a pair. + // + if (pairn != 0) + { + // Check that there are no nested pairs. + // + if (cn.pair) + fail (loc) << "nested pair in " << what; + + // And add another first half unless this is the first instance. + // + if (pairn != ns.size ()) + ns.push_back (ns[pairn - 1]); + } + + ns.emplace_back (move (p), move (d), move (t), move (v)); + ns.back ().pair = cn.pair; + } + + return ns.size () - start; + } + + // Expand a name pattern. Note that the result can be empty (as in "no + // elements"). + // + size_t parser:: + expand_name_pattern (const location& l, + names&& pat, + names& ns, + const char* what, + size_t pairn, + const dir_path* dp, + const string* tp, + const target_type* tt) + { + assert (!pat.empty () && (tp == nullptr || tt != nullptr)); + + // We are going to accumulate the result in a vector which can result in + // quite a few linear searches. However, thanks to a few optimizations, + // this shouldn't be an issue for the common cases (e.g., a pattern plus + // a few exclusions). + // + names r; + bool dir (false); + + // Figure out the start directory. + // + const dir_path* sp; + dir_path s; + if (dp != nullptr) + { + if (dp->absolute ()) + sp = dp; + else + { + s = *pbase_ / *dp; + sp = &s; + } + } + else + sp = pbase_; + + // Compare string to name as paths and according to dir. + // + auto equal = [&dir] (const string& v, const name& n) -> bool + { + // Use path comparison (which may be slash/case-insensitive). + // + return path::traits_type::compare ( + v, dir ? n.dir.representation () : n.value) == 0; + }; + + // Compare name to pattern as paths and according to dir. + // + auto match = [&dir, sp] (const path& pattern, const name& n) -> bool + { + const path& p (dir ? path_cast<path> (n.dir) : path (n.value)); + return butl::path_match (pattern, p, *sp); + }; + + // Append name/extension to result according to dir. Store an indication + // of whether it was amended as well as whether the extension is present + // in the pair flag. The extension itself is stored in name::type. + // + auto append = [&r, &dir] (string&& v, optional<string>&& e, bool a) + { + name n (dir ? name (dir_path (move (v))) : name (move (v))); + + if (a) + n.pair |= 0x01; + + if (e) + { + n.type = move (*e); + n.pair |= 0x02; + } + + r.push_back (move (n)); + }; + + auto include_match = [&r, &equal, &append] (string&& m, + optional<string>&& e, + bool a) + { + auto i (find_if ( + r.begin (), + r.end (), + [&m, &equal] (const name& n) {return equal (m, n);})); + + if (i == r.end ()) + append (move (m), move (e), a); + }; + + auto include_pattern = + [&r, &append, &include_match, sp, &l, this] (string&& p, + optional<string>&& e, + bool a) + { + // If we don't already have any matches and our pattern doesn't contain + // multiple recursive wildcards, then the result will be unique and we + // can skip checking for duplicated. This should help quite a bit in the + // common cases where we have a pattern plus maybe a few exclusions. + // + bool unique (false); + if (r.empty ()) + { + size_t i (p.find ("**")); + unique = (i == string::npos || p.find ("**", i + 2) == string::npos); + } + + function<void (string&&, optional<string>&&)> appf; + if (unique) + appf = [a, &append] (string&& v, optional<string>&& e) + { + append (move (v), move (e), a); + }; + else + appf = [a, &include_match] (string&& v, optional<string>&& e) + { + include_match (move (v), move (e), a); + }; + + auto process = [this, &e, &appf, sp] (path&& m, + const string& p, + bool interm) + { + // Ignore entries that start with a dot unless the pattern that + // matched them also starts with a dot. Also ignore directories + // containing the .buildignore file (ignoring the test if we don't + // have a sufficiently setup project root). + // + const string& s (m.string ()); + if ((p[0] != '.' && s[path::traits_type::find_leaf (s)] == '.') || + (root_ != nullptr && + root_->root_extra != nullptr && + m.to_directory () && + exists (*sp / m / root_->root_extra->buildignore_file))) + return !interm; + + // Note that we have to make copies of the extension since there will + // multiple entries for each pattern. + // + if (!interm) + appf (move (m).representation (), optional<string> (e)); + + return true; + }; + + try + { + butl::path_search (path (move (p)), process, *sp); + } + catch (const system_error& e) + { + fail (l) << "unable to scan " << *sp << ": " << e; + } + }; + + auto exclude_match = [&r, &equal] (const string& m) + { + // We know there can only be one element so we use find_if() instead of + // remove_if() for efficiency. + // + auto i (find_if ( + r.begin (), + r.end (), + [&m, &equal] (const name& n) {return equal (m, n);})); + + if (i != r.end ()) + r.erase (i); + }; + + auto exclude_pattern = [&r, &match] (string&& p) + { + path pattern (move (p)); + + for (auto i (r.begin ()); i != r.end (); ) + { + if (match (pattern, *i)) + i = r.erase (i); + else + ++i; + } + }; + + // Process the pattern and inclusions/exclusions. + // + for (auto b (pat.begin ()), i (b), end (pat.end ()); i != end; ++i) + { + name& n (*i); + bool first (i == b); + + char s ('\0'); // Inclusion/exclusion sign (+/-). + + // Reduce inclusions/exclusions group (-/+{foo bar}) to simple name/dir. + // + if (n.typed () && n.type.size () == 1) + { + if (!first) + { + s = n.type[0]; + + if (s == '-' || s == '+') + n.type.clear (); + } + else + { + assert (n.type[0] == '+'); // Can only belong to inclusion group. + n.type.clear (); + } + } + + if (n.empty () || !(n.simple () || n.directory ())) + fail (l) << "invalid '" << n << "' in " << what << " pattern"; + + string v (n.simple () ? move (n.value) : move (n.dir).representation ()); + + // Figure out if this is inclusion or exclusion. + // + if (first) + s = '+'; // Treat as inclusion. + else if (s == '\0') + { + s = v[0]; + + assert (s == '-' || s == '+'); // Validated at the token level. + v.erase (0, 1); + + if (v.empty ()) + fail (l) << "empty " << what << " pattern"; + } + + // Amend the pattern or match in a target type-specific manner. + // + // Name splitting must be consistent with scope::find_target_type(). + // Since we don't do it for directories, we have to delegate it to the + // target_type::pattern() call. + // + bool a (false); // Amended. + optional<string> e; // Extension. + { + bool d; + + if (tt != nullptr && tt->pattern != nullptr) + { + a = tt->pattern (*tt, *scope_, v, e, l, false); + d = path::traits_type::is_separator (v.back ()); + } + else + { + d = path::traits_type::is_separator (v.back ()); + + if (!d) + e = target::split_name (v, l); + } + + // Based on the first pattern verify inclusions/exclusions are + // consistently file/directory. + // + if (first) + dir = d; + else if (d != dir) + fail (l) << "inconsistent file/directory result in " << what + << " pattern"; + } + + // Factor non-empty extension back into the name for searching. + // + // Note that doing it at this stage means we don't support extension + // patterns. + // + if (e && !e->empty ()) + { + v += '.'; + v += *e; + } + + try + { + if (s == '+') + include_pattern (move (v), move (e), a); + else + { + if (v.find_first_of ("*?") != string::npos) + exclude_pattern (move (v)); + else + exclude_match (move (v)); + } + } + catch (const invalid_path& e) + { + fail (l) << "invalid path '" << e.path << "' in " << what + << " pattern"; + } + } + + // Post-process the result: remove extension, reverse target type-specific + // pattern/match amendments (essentially: cxx{*} -> *.cxx -> foo.cxx -> + // cxx{foo}), and recombined the result. + // + for (name& n: r) + { + string v; + optional<string> e; + + if (dir) + v = move (n.dir).representation (); + else + { + v = move (n.value); + + if ((n.pair & 0x02) != 0) + { + e = move (n.type); + + // Remove non-empty extension from the name (it got to be there, see + // above). + // + if (!e->empty ()) + v.resize (v.size () - e->size () - 1); + } + } + + bool de (false); // Default extension. + if ((n.pair & 0x01) != 0) + { + de = static_cast<bool> (e); + tt->pattern (*tt, *scope_, v, e, l, true); + de = de && !e; + } + + if (dir) + n.dir = dir_path (move (v)); + else + { + target::combine_name (v, e, de); + n.value = move (v); + } + + n.pair = '\0'; + } + + return splice_names ( + l, names_view (r), move (r), ns, what, pairn, nullopt, dp, tp); + } + + // Parse names inside {} and handle the following "crosses" (i.e., + // {a b}{x y}) if any. Return the number of names added to the list. + // + size_t parser:: + parse_names_trailer (token& t, type& tt, + names& ns, + pattern_mode pmode, + const char* what, + const string* separators, + size_t pairn, + const optional<project_name>& pp, + const dir_path* dp, + const string* tp, + bool cross) + { + assert (!pre_parse_); + + if (pp) + pmode = pattern_mode::ignore; + + next (t, tt); // Get what's after '{'. + const location loc (get_location (t)); // Start of names. + + size_t start (ns.size ()); + + if (pairn == 0 && start != 0 && ns.back ().pair) + pairn = start; + + names r; + + // Parse names until closing '}' expanding patterns. + // + auto parse = [&r, &t, &tt, pmode, what, separators, this] ( + const optional<project_name>& pp, + const dir_path* dp, + const string* tp) + { + const location loc (get_location (t)); + + size_t start (r.size ()); + + // This can be an ordinary name group or a pattern (with inclusions and + // exclusions). We want to detect which one it is since for patterns we + // want just the list of simple names without pair/dir/type added (those + // are added after the pattern expansion in parse_names_pattern()). + // + // Detecting which one it is is tricky. We cannot just peek at the token + // and look for some wildcards since the pattern can be the result of an + // expansion (or, worse, concatenation). Thus pattern_mode::detect: we + // are going to ask parse_names() to detect for us if the first name is + // a pattern. And if it is, to refrain from adding pair/dir/type. + // + optional<const target_type*> pat_tt ( + parse_names ( + t, tt, + r, + pmode == pattern_mode::expand ? pattern_mode::detect : pmode, + false /* chunk */, + what, + separators, + 0, // Handled by the splice_names() call below. + pp, dp, tp, + false /* cross */, + true /* curly */).pattern); + + if (tt != type::rcbrace) + fail (t) << "expected '}' instead of " << t; + + // See if this is a pattern. + // + if (pat_tt) + { + // Move the pattern names our of the result. + // + names ps; + if (start == 0) + ps = move (r); + else + ps.insert (ps.end (), + make_move_iterator (r.begin () + start), + make_move_iterator (r.end ())); + r.resize (start); + + expand_name_pattern (loc, move (ps), r, what, 0, dp, tp, *pat_tt); + } + }; + + // Parse and expand the first group. + // + parse (pp, dp, tp); + + // Handle crosses. The overall plan is to take what's in r, cross each + // element with the next group using the re-parse machinery, and store the + // result back to r. + // + while (cross && peek () == type::lcbrace && !peeked ().separated) + { + next (t, tt); // Get '{'. + + names ln (move (r)); + r.clear (); + + // Cross with empty LHS/RHS is empty. Handle the LHS case now by parsing + // and discaring RHS (empty RHS is handled "naturally" below). + // + if (ln.size () == 0) + { + parse (nullopt, nullptr, nullptr); + r.clear (); + continue; + } + + //@@ This can be a nested replay (which we don't support), for example, + // via target-specific var assignment. Add support for nested (2-level + // replay)? Why not use replay_guard for storage? Alternatively, don't + // use it here (see parse_for() for an alternative approach). + // + replay_guard rg (*this, ln.size () > 1); + for (auto i (ln.begin ()), e (ln.end ()); i != e; ) + { + next (t, tt); // Get what's after '{'. + const location loc (get_location (t)); + + name& l (*i); + + // "Promote" the lhs value to type. + // + if (!l.value.empty ()) + { + if (!l.type.empty ()) + fail (loc) << "nested type name " << l.value; + + l.type.swap (l.value); + } + + parse (l.proj, + l.dir.empty () ? nullptr : &l.dir, + l.type.empty () ? nullptr : &l.type); + + if (++i != e) + rg.play (); // Replay. + } + } + + // Splice the names into the result. Note that we have already handled + // project/dir/type qualification but may still have a pair. Fast-path + // common cases. + // + if (pairn == 0) + { + if (start == 0) + ns = move (r); + else + ns.insert (ns.end (), + make_move_iterator (r.begin ()), + make_move_iterator (r.end ())); + } + else + splice_names (loc, + names_view (r), move (r), + ns, what, + pairn, + nullopt, nullptr, nullptr); + + return ns.size () - start; + } + + bool parser:: + start_names (type& tt, bool lp) + { + return (tt == type::word || + tt == type::lcbrace || // Untyped name group: '{foo ...'. + tt == type::dollar || // Variable expansion: '$foo ...'. + (tt == type::lparen && lp) || // Eval context: '(foo) ...'. + tt == type::pair_separator); // Empty pair LHS: '@foo ...'. + } + + // Slashe(s) plus '%'. Note that here we assume '/' is there since that's + // in our buildfile "syntax". + // + const string parser::name_separators ( + string (path::traits_type::directory_separators) + '%'); + + auto parser:: + parse_names (token& t, type& tt, + names& ns, + pattern_mode pmode, + bool chunk, + const char* what, + const string* separators, + size_t pairn, + const optional<project_name>& pp, + const dir_path* dp, + const string* tp, + bool cross, + bool curly) -> parse_names_result + { + // Note that support for pre-parsing is partial, it does not handle + // groups ({}). + // + // If pairn is not 0, then it is an index + 1 of the first half of the + // pair for which we are parsing the second halves, for example: + // + // a@{b c d{e f} {}} + + tracer trace ("parser::parse_names", &path_); + + if (pp) + pmode = pattern_mode::ignore; + + // Returned value NULL/type and pattern (see below). + // + bool vnull (false); + const value_type* vtype (nullptr); + optional<const target_type*> rpat; + + // Buffer that is used to collect the complete name in case of an + // unseparated variable expansion or eval context, e.g., foo$bar($baz)fox. + // The idea is to concatenate all the individual parts in this buffer and + // then re-inject it into the loop as a single token. + // + // If the concatenation is untyped (see below), then the name should be + // simple (i.e., just a string). + // + bool concat (false); + bool concat_quoted (false); + name concat_data; + + auto concat_typed = [&vnull, &vtype, &concat, &concat_data, this] + (value&& rhs, const location& loc) + { + // If we have no LHS yet, then simply copy value/type. + // + if (concat) + { + small_vector<value, 2> a; + + // Convert LHS to value. + // + a.push_back (value (vtype)); // Potentially typed NULL value. + + if (!vnull) + a.back ().assign (move (concat_data), nullptr); + + // RHS. + // + a.push_back (move (rhs)); + + const char* l ((a[0].type != nullptr ? a[0].type->name : "<untyped>")); + const char* r ((a[1].type != nullptr ? a[1].type->name : "<untyped>")); + + pair<value, bool> p; + { + // Print the location information in case the function fails. + // + auto g ( + make_exception_guard ( + [&loc, l, r] () + { + if (verb != 0) + info (loc) << "while concatenating " << l << " to " << r << + info << "use quoting to force untyped concatenation"; + })); + + p = functions.try_call ( + scope_, "builtin.concat", vector_view<value> (a), loc); + } + + if (!p.second) + fail (loc) << "no typed concatenation of " << l << " to " << r << + info << "use quoting to force untyped concatenation"; + + rhs = move (p.first); + + // It seems natural to expect that a typed concatenation result + // is also typed. + // + assert (rhs.type != nullptr); + } + + vnull = rhs.null; + vtype = rhs.type; + + if (!vnull) + { + if (vtype != nullptr) + untypify (rhs); + + names& d (rhs.as<names> ()); + + // If the value is empty, then untypify() will (typically; no pun + // intended) represent it as an empty sequence of names rather than + // a sequence of one empty name. This is usually what we need (see + // simple_reverse() for details) but not in this case. + // + if (!d.empty ()) + { + assert (d.size () == 1); // Must be a single value. + concat_data = move (d[0]); + } + } + }; + + // Set the result pattern target type and switch to the ignore mode. + // + // The goal of the detect mode is to assemble the "raw" list (the pattern + // itself plus inclusions/exclusions) that will then be passed to + // parse_names_pattern(). So clear pair, directory, and type (they will be + // added during pattern expansion) and change the mode to ignore (to + // prevent any expansions in inclusions/exclusions). + // + auto pattern_detected = + [&pairn, &dp, &tp, &rpat, &pmode] (const target_type* ttp) + { + assert (pmode == pattern_mode::detect); + + pairn = 0; + dp = nullptr; + tp = nullptr; + pmode = pattern_mode::ignore; + rpat = ttp; + }; + + // Return '+' or '-' if a token can start an inclusion or exclusion + // (pattern or group), '\0' otherwise. The result can be used as bool. + // + // @@ Note that we only need to make sure that the leading '+' or '-' + // characters are unquoted. We could consider some partially quoted + // tokens as starting inclusion or exclusion as well, for example + // +'foo*'. However, currently we can not determine which part of a + // token is quoted, and so can't distinguish the above token from + // '+'foo*. This is why we end up with a criteria that is stricter than + // is really required. + // + auto pattern_prefix = [] (const token& t) -> char + { + char c; + return t.type == type::word && ((c = t.value[0]) == '+' || c == '-') && + t.qtype == quote_type::unquoted + ? c + : '\0'; + }; + + // A name sequence potentially starts with a pattern if it starts with a + // literal unquoted plus character. + // + bool ppat (pmode == pattern_mode::detect && pattern_prefix (t) == '+'); + + // Potential pattern inclusion group. To be recognized as such it should + // start with the literal unquoted '+{' string and expand into a non-empty + // name sequence. + // + // The first name in such a group is a pattern, regardless of whether it + // contains wildcard characters or not. The trailing names are inclusions. + // For example the following pattern groups are equivalent: + // + // cxx{+{f* *oo}} + // cxx{f* +*oo} + // + bool pinc (ppat && t.value == "+" && + peek () == type::lcbrace && !peeked ().separated); + + // Number of names in the last group. This is used to detect when + // we need to add an empty first pair element (e.g., @y) or when + // we have a (for now unsupported) multi-name LHS (e.g., {x y}@z). + // + size_t count (0); + size_t start (ns.size ()); + + for (bool first (true);; first = false) + { + // Note that here we assume that, except for the first iterartion, + // tt contains the type of the peeked token. + + // Automatically reset the detect pattern mode to expand after the + // first element. + // + if (pmode == pattern_mode::detect && start != ns.size ()) + pmode = pattern_mode::expand; + + // Return true if the next token (which should be peeked at) won't be + // part of the name. + // + auto last_token = [chunk, this] () + { + const token& t (peeked ()); + type tt (t.type); + + return ((chunk && t.separated) || !start_names (tt)); + }; + + // Return true if the next token (which should be peeked at) won't be + // part of this concatenation. The et argument can be used to recognize + // an extra (unseparated) token type as being concatenated. + // + auto last_concat = [this] (type et = type::eos) + { + const token& t (peeked ()); + type tt (t.type); + + return (t.separated || + (tt != type::word && + tt != type::dollar && + tt != type::lparen && + (et == type::eos ? true : tt != et))); + }; + + // If we have accumulated some concatenations, then we have two options: + // continue accumulating or inject. We inject if the next token is not a + // word, var expansion, or eval context or if it is separated. + // + if (concat && last_concat ()) + { + // Concatenation does not affect the tokens we get, only what we do + // with them. As a result, we never set the concat flag during pre- + // parsing. + // + assert (!pre_parse_); + + bool quoted (concat_quoted); + + concat = false; + concat_quoted = false; + + // If this is a result of typed concatenation, then don't inject. For + // one we don't want any of the "interpretations" performed in the + // word parsing code below. + // + // And if this is the only name, then we also want to preserve the + // type in the result. + // + // There is one exception, however: if the type is path, dir_path, or + // string and what follows is an unseparated '{', then we need to + // untypify it and inject in order to support our directory/target- + // type syntax (this means that a target type must be a valid path + // component). For example: + // + // $out_root/foo/lib{bar} + // $out_root/$libtype{bar} + // + // And here is another exception: if we have a project, directory, or + // type, then this is a name and we should also untypify it (let's for + // now do it for the same set of types as the first exception). For + // example: + // + // dir/{$str} + // file{$str} + // + vnull = false; // A concatenation cannot produce NULL. + + if (vtype != nullptr) + { + bool e1 (tt == type::lcbrace && !peeked ().separated); + bool e2 (pp || dp != nullptr || tp != nullptr); + + if (e1 || e2) + { + if (vtype == &value_traits<path>::value_type || + vtype == &value_traits<string>::value_type) + ; // Representation is already in concat_data.value. + else if (vtype == &value_traits<dir_path>::value_type) + concat_data.value = move (concat_data.dir).representation (); + else + { + diag_record dr (fail (t)); + + if (e1) dr << "expected directory and/or target type"; + else if (e2) dr << "expected name"; + + dr << " instead of " << vtype->name << endf; + } + + vtype = nullptr; + // Fall through to injection. + } + else + { + ns.push_back (move (concat_data)); + + // Clear the type information if that's not the only name. + // + if (start != ns.size () || !last_token ()) + vtype = nullptr; + + // Restart the loop (but now with concat mode off) to handle + // chunking, etc. + // + continue; + } + } + + // Replace the current token with our injection (after handling it we + // will peek at the current token again). + // + // We don't know what exactly was quoted so approximating as partially + // mixed quoted. + // + tt = type::word; + t = token (move (concat_data.value), + true, + quoted ? quote_type::mixed : quote_type::unquoted, + false, + t.line, t.column); + } + else if (!first) + { + // If we are chunking, stop at the next separated token. + // + next (t, tt); + + if (chunk && t.separated) + break; + + // If we are parsing the pattern group, then space-separated tokens + // must start inclusions or exclusions (see above). + // + if (rpat && t.separated && tt != type::rcbrace && !pattern_prefix (t)) + fail (t) << "expected name pattern inclusion or exclusion"; + } + + // Name. + // + // A user may specify a value that is an invalid name (e.g., it contains + // '%' but the project name is invalid). While it may seem natural to + // expect quoting/escaping to be the answer, we may need to quote names + // (e.g., spaces in paths) and so in our model quoted values are still + // treated as names and we rely on reversibility if we need to treat + // them as values. The reasonable solution to the invalid name problem is + // then to treat them as values if they are quoted. + // + if (tt == type::word) + { + tt = peek (); + + if (pre_parse_) + continue; + + string val (move (t.value)); + bool quoted (t.qtype != quote_type::unquoted); + + // Should we accumulate? If the buffer is not empty, then we continue + // accumulating (the case where we are separated should have been + // handled by the injection code above). If the next token is a var + // expansion or eval context and it is not separated, then we need to + // start accumulating. + // + if (concat || // Continue. + !last_concat ()) // Start. + { + // If LHS is typed then do typed concatenation. + // + if (concat && vtype != nullptr) + { + // Create untyped RHS. + // + names ns; + ns.push_back (name (move (val))); + concat_typed (value (move (ns)), get_location (t)); + } + else + { + auto& v (concat_data.value); + + if (v.empty ()) + v = move (val); + else + v += val; + } + + concat = true; + concat_quoted = quoted || concat_quoted; + + continue; + } + + // Find a separator (slash or %). + // + string::size_type p (separators != nullptr + ? val.find_last_of (*separators) + : string::npos); + + // First take care of project. A project-qualified name is not very + // common, so we can afford some copying for the sake of simplicity. + // + optional<project_name> p1; + const optional<project_name>* pp1 (&pp); + + if (p != string::npos) + { + bool last (val[p] == '%'); + string::size_type q (last ? p : val.rfind ('%', p - 1)); + + for (; q != string::npos; ) // Breakout loop. + { + // Process the project name. + // + string proj (val, 0, q); + + try + { + p1 = !proj.empty () + ? project_name (move (proj)) + : project_name (); + } + catch (const invalid_argument& e) + { + if (quoted) // See above. + break; + + fail (t) << "invalid project name '" << proj << "': " << e; + } + + if (pp) + fail (t) << "nested project name " << *p1; + + pp1 = &p1; + + // Now fix the rest of the name. + // + val.erase (0, q + 1); + p = last ? string::npos : p - (q + 1); + + break; + } + } + + string::size_type n (p != string::npos ? val.size () - 1 : 0); + + // See if this is a type name, directory prefix, or both. That + // is, it is followed by an un-separated '{'. + // + if (tt == type::lcbrace && !peeked ().separated) + { + next (t, tt); + + // Resolve the target, if there is one, for the potential pattern + // inclusion group. If we fail, then this is not an inclusion group. + // + const target_type* ttp (nullptr); + + if (pinc) + { + assert (val == "+"); + + if (tp != nullptr && scope_ != nullptr) + { + ttp = scope_->find_target_type (*tp); + + if (ttp == nullptr) + ppat = pinc = false; + } + } + + if (p != n && tp != nullptr && !pinc) + fail (t) << "nested type name " << val; + + dir_path d1; + const dir_path* dp1 (dp); + + string t1; + const string* tp1 (tp); + + try + { + if (p == string::npos) // type + tp1 = &val; + else if (p == n) // directory + { + if (dp == nullptr) + d1 = dir_path (val); + else + d1 = *dp / dir_path (val); + + dp1 = &d1; + } + else // both + { + t1.assign (val, p + 1, n - p); + + if (dp == nullptr) + d1 = dir_path (val, 0, p + 1); + else + d1 = *dp / dir_path (val, 0, p + 1); + + dp1 = &d1; + tp1 = &t1; + } + } + catch (const invalid_path& e) + { + fail (t) << "invalid path '" << e.path << "'"; + } + + count = parse_names_trailer ( + t, tt, ns, pmode, what, separators, pairn, *pp1, dp1, tp1, cross); + + // If empty group or empty name, then this is not a pattern inclusion + // group (see above). + // + if (pinc) + { + if (count != 0 && (count > 1 || !ns.back ().empty ())) + pattern_detected (ttp); + + ppat = pinc = false; + } + + tt = peek (); + + continue; + } + + // See if this is a wildcard pattern. + // + // It should either contain a wildcard character or, in a curly + // context, start with unquoted '+'. + // + if (pmode != pattern_mode::ignore && + !*pp1 && // Cannot be project-qualified. + !quoted && // Cannot be quoted. + ((dp != nullptr && dp->absolute ()) || pbase_ != nullptr) && + ((val.find_first_of ("*?") != string::npos) || + (curly && val[0] == '+'))) + { + // Resolve the target if there is one. If we fail, then this is not + // a pattern. + // + const target_type* ttp (tp != nullptr && scope_ != nullptr + ? scope_->find_target_type (*tp) + : nullptr); + + if (tp == nullptr || ttp != nullptr) + { + if (pmode == pattern_mode::detect) + { + // Strip the literal unquoted plus character for the first + // pattern in the group. + // + if (ppat) + { + assert (val[0] == '+'); + + val.erase (0, 1); + ppat = pinc = false; + } + + // Reset the detect pattern mode to expand if the pattern is not + // followed by the inclusion/exclusion pattern/match. Note that + // if it is '}' (i.e., the end of the group), then it is a single + // pattern and the expansion is what we want. + // + if (!pattern_prefix (peeked ())) + pmode = pattern_mode::expand; + } + + if (pmode == pattern_mode::expand) + { + count = expand_name_pattern (get_location (t), + names {name (move (val))}, + ns, + what, + pairn, + dp, tp, ttp); + continue; + } + + pattern_detected (ttp); + + // Fall through. + } + } + + // If we are a second half of a pair, add another first half + // unless this is the first instance. + // + if (pairn != 0 && pairn != ns.size ()) + ns.push_back (ns[pairn - 1]); + + count = 1; + + // If it ends with a directory separator, then it is a directory. + // Note that at this stage we don't treat '.' and '..' as special + // (unless they are specified with a directory separator) because + // then we would have ended up treating '.: ...' as a directory + // scope. Instead, this is handled higher up the processing chain, + // in scope::find_target_type(). This would also mess up + // reversibility to simple name. + // + if (p == n) + { + // For reversibility to simple name, only treat it as a directory + // if the string is an exact representation. + // + dir_path dir (move (val), dir_path::exact); + + if (!dir.empty ()) + { + if (dp != nullptr) + dir = *dp / dir; + + ns.emplace_back (*pp1, + move (dir), + (tp != nullptr ? *tp : string ()), + string ()); + continue; + } + } + + ns.emplace_back (*pp1, + (dp != nullptr ? *dp : dir_path ()), + (tp != nullptr ? *tp : string ()), + move (val)); + continue; + } + + // Variable expansion, function call, or eval context. + // + if (tt == type::dollar || tt == type::lparen) + { + // These cases are pretty similar in that in both we quickly end up + // with a list of names that we need to splice into the result. + // + location loc; + value result_data; + const value* result (&result_data); + const char* what; // Variable, function, or evaluation context. + bool quoted (t.qtype != quote_type::unquoted); + + if (tt == type::dollar) + { + // Switch to the variable name mode. We want to use this mode for + // $foo but not for $(foo). Since we don't know whether the next + // token is a paren or a word, we turn it on and switch to the eval + // mode if what we get next is a paren. + // + mode (lexer_mode::variable); + next (t, tt); + loc = get_location (t); + + name qual; + string name; + + if (t.separated) + ; // Leave the name empty to fail below. + else if (tt == type::word) + { + if (!pre_parse_) + name = move (t.value); + } + else if (tt == type::lparen) + { + expire_mode (); + values vs (parse_eval (t, tt, pmode)); //@@ OUT will parse @-pair and do well? + + if (!pre_parse_) + { + if (vs.size () != 1) + fail (loc) << "expected single variable/function name"; + + value& v (vs[0]); + + if (!v) + fail (loc) << "null variable/function name"; + + names storage; + vector_view<build2::name> ns (reverse (v, storage)); // Movable. + size_t n (ns.size ()); + + // We cannot handle scope-qualification in the eval context as + // we do for target-qualification (see eval-qual) since then we + // would be treating all paths as qualified variables. So we + // have to do it here. + // + if (n == 2 && ns[0].pair == ':') // $(foo: x) + { + qual = move (ns[0]); + + if (qual.empty ()) + fail (loc) << "empty variable/function qualification"; + } + else if (n == 2 && ns[0].directory ()) // $(foo/ x) + { + qual = move (ns[0]); + qual.pair = '/'; + } + else if (n > 1) + fail (loc) << "expected variable/function name instead of '" + << ns << "'"; + + // Note: checked for empty below. + // + if (!ns[n - 1].simple ()) + fail (loc) << "expected variable/function name instead of '" + << ns[n - 1] << "'"; + + name = move (ns[n - 1].value); + } + } + else + fail (t) << "expected variable/function name instead of " << t; + + if (!pre_parse_ && name.empty ()) + fail (loc) << "empty variable/function name"; + + // Figure out whether this is a variable expansion or a function + // call. + // + tt = peek (); + + // Note that we require function call opening paren to be + // unseparated; consider: $x ($x == 'foo' ? 'FOO' : 'BAR'). + // + if (tt == type::lparen && !peeked ().separated) + { + // Function call. + // + + next (t, tt); // Get '('. + + // @@ Should we use (target/scope) qualification (of name) as the + // context in which to call the function? Hm, interesting... + // + values args (parse_eval (t, tt, pmode)); + tt = peek (); + + if (pre_parse_) + continue; // As if empty result. + + // Note that we "move" args to call(). + // + result_data = functions.call (scope_, name, args, loc); + what = "function call"; + } + else + { + // Variable expansion. + // + + if (pre_parse_) + continue; // As if empty value. + + lookup l (lookup_variable (move (qual), move (name), loc)); + + if (l.defined ()) + result = l.value; // Otherwise leave as NULL result_data. + + what = "variable expansion"; + } + } + else + { + // Context evaluation. + // + + loc = get_location (t); + values vs (parse_eval (t, tt, pmode)); + tt = peek (); + + if (pre_parse_) + continue; // As if empty result. + + switch (vs.size ()) + { + case 0: result_data = value (names ()); break; + case 1: result_data = move (vs[0]); break; + default: fail (loc) << "expected single value"; + } + + what = "context evaluation"; + } + + // We never end up here during pre-parsing. + // + assert (!pre_parse_); + + // Should we accumulate? If the buffer is not empty, then we continue + // accumulating (the case where we are separated should have been + // handled by the injection code above). If the next token is a word + // or an expansion and it is not separated, then we need to start + // accumulating. We also reduce the $var{...} case to concatention + // and injection. + // + if (concat || // Continue. + !last_concat (type::lcbrace)) // Start. + { + // This can be a typed or untyped concatenation. The rules that + // determine which one it is are as follows: + // + // 1. Determine if to preserver the type of RHS: if its first + // token is quoted, then we do not. + // + // 2. Given LHS (if any) and RHS we do typed concatenation if + // either is typed. + // + // Here are some interesting corner cases to meditate on: + // + // $dir/"foo bar" + // $dir"/foo bar" + // "foo"$dir + // "foo""$dir" + // ""$dir + // + + // First if RHS is typed but quoted then convert it to an untyped + // string. + // + // Conversion to an untyped string happens differently, depending + // on whether we are in a quoted or unquoted context. In an + // unquoted context we use $representation() which must return a + // "round-trippable representation" (and if that it not possible, + // then it should not be overloaded for a type). In a quoted + // context we use $string() which returns a "canonical + // representation" (e.g., a directory path without a trailing + // slash). + // + if (result->type != nullptr && quoted) + { + // RHS is already a value but it could be a const reference (to + // the variable value) while we need to move things around. So in + // this case we make a copy. + // + if (result != &result_data) + result = &(result_data = *result); + + const char* t (result_data.type->name); + + pair<value, bool> p; + { + // Print the location information in case the function fails. + // + auto g ( + make_exception_guard ( + [&loc, t] () + { + if (verb != 0) + info (loc) << "while converting " << t << " to string"; + })); + + p = functions.try_call ( + scope_, "string", vector_view<value> (&result_data, 1), loc); + } + + if (!p.second) + fail (loc) << "no string conversion for " << t; + + result_data = move (p.first); + untypify (result_data); // Convert to untyped simple name. + } + + if ((concat && vtype != nullptr) || // LHS typed. + (result->type != nullptr)) // RHS typed. + { + if (result != &result_data) // Same reason as above. + result = &(result_data = *result); + + concat_typed (move (result_data), loc); + } + // + // Untyped concatenation. Note that if RHS is NULL/empty, we still + // set the concat flag. + // + else if (!result->null && !result->empty ()) + { + // This can only an untyped value. + // + // @@ Could move if result == &result_data. + // + const names& lv (cast<names> (*result)); + + // This should be a simple value or a simple directory. + // + if (lv.size () > 1) + fail (loc) << "concatenating " << what << " contains multiple " + << "values"; + + const name& n (lv[0]); + + if (n.qualified ()) + fail (loc) << "concatenating " << what << " contains project " + << "name"; + + if (n.typed ()) + fail (loc) << "concatenating " << what << " contains type"; + + if (!n.dir.empty ()) + { + if (!n.value.empty ()) + fail (loc) << "concatenating " << what << " contains " + << "directory"; + + // Note that here we cannot assume what's in dir is really a + // path (think s/foo/bar/) so we have to reverse it exactly. + // + concat_data.value += n.dir.representation (); + } + else + concat_data.value += n.value; + } + + concat = true; + concat_quoted = quoted || concat_quoted; + } + else + { + // See if we should propagate the value NULL/type. We only do this + // if this is the only expansion, that is, it is the first and the + // next token is not part of the name. + // + if (first && last_token ()) + { + vnull = result->null; + vtype = result->type; + } + + // Nothing else to do here if the result is NULL or empty. + // + if (result->null || result->empty ()) + continue; + + // @@ Could move if nv is result_data; see untypify(). + // + names nv_storage; + names_view nv (reverse (*result, nv_storage)); + + count = splice_names ( + loc, nv, move (nv_storage), ns, what, pairn, pp, dp, tp); + } + + continue; + } + + // Untyped name group without a directory prefix, e.g., '{foo bar}'. + // + if (tt == type::lcbrace) + { + count = parse_names_trailer ( + t, tt, ns, pmode, what, separators, pairn, pp, dp, tp, cross); + tt = peek (); + continue; + } + + // A pair separator. + // + if (tt == type::pair_separator) + { + if (pairn != 0) + fail (t) << "nested pair on the right hand side of a pair"; + + tt = peek (); + + if (!pre_parse_) + { + // Catch double pair separator ('@@'). Maybe we can use for + // something later (e.g., escaping). + // + if (!ns.empty () && ns.back ().pair) + fail (t) << "double pair separator"; + + if (t.separated || count == 0) + { + // Empty LHS, (e.g., @y), create an empty name. The second test + // will be in effect if we have something like v=@y. + // + ns.emplace_back (pp, + (dp != nullptr ? *dp : dir_path ()), + (tp != nullptr ? *tp : string ()), + string ()); + count = 1; + } + else if (count > 1) + fail (t) << "multiple " << what << "s on the left hand side " + << "of a pair"; + + ns.back ().pair = t.value[0]; + + // If the next token is separated, then we have an empty RHS. Note + // that the case where it is not a name/group (e.g., a newline/eos) + // is handled below, once we are out of the loop. + // + if (peeked ().separated) + { + ns.emplace_back (pp, + (dp != nullptr ? *dp : dir_path ()), + (tp != nullptr ? *tp : string ()), + string ()); + count = 0; + } + } + + continue; + } + + // Note: remember to update last_token() test if adding new recognized + // tokens. + + if (!first) + break; + + if (tt == type::rcbrace) // Empty name, e.g., dir{}. + { + // If we are a second half of a pair, add another first half + // unless this is the first instance. + // + if (pairn != 0 && pairn != ns.size ()) + ns.push_back (ns[pairn - 1]); + + ns.emplace_back (pp, + (dp != nullptr ? *dp : dir_path ()), + (tp != nullptr ? *tp : string ()), + string ()); + break; + } + else + // Our caller expected this to be something. + // + fail (t) << "expected " << what << " instead of " << t; + } + + // Handle the empty RHS in a pair, (e.g., y@). + // + if (!ns.empty () && ns.back ().pair) + { + ns.emplace_back (pp, + (dp != nullptr ? *dp : dir_path ()), + (tp != nullptr ? *tp : string ()), + string ()); + } + + return parse_names_result {!vnull, vtype, rpat}; + } + + void parser:: + skip_line (token& t, type& tt) + { + for (; tt != type::newline && tt != type::eos; next (t, tt)) ; + } + + void parser:: + skip_block (token& t, type& tt) + { + // Skip until } or eos, keeping track of the {}-balance. + // + for (size_t b (0); tt != type::eos; ) + { + if (tt == type::lcbrace || tt == type::rcbrace) + { + type ptt (peek ()); + if (ptt == type::newline || ptt == type::eos) // Block { or }. + { + if (tt == type::lcbrace) + ++b; + else + { + if (b == 0) + break; + + --b; + } + } + } + + skip_line (t, tt); + + if (tt != type::eos) + next (t, tt); + } + } + + bool parser:: + keyword (token& t) + { + assert (replay_ == replay::stop); // Can't be used in a replay. + assert (t.type == type::word); + + // The goal here is to allow using keywords as variable names and + // target types without imposing ugly restrictions/decorators on + // keywords (e.g., '.using' or 'USING'). A name is considered a + // potential keyword if: + // + // - it is not quoted [so a keyword can always be escaped] and + // - next token is '\n' (or eos) or '(' [so if(...) will work] or + // - next token is separated and is not '=', '=+', or '+=' [which + // means a "directive trailer" can never start with one of them]. + // + // See tests/keyword. + // + if (t.qtype == quote_type::unquoted) + { + // We cannot peek at the whole token here since it might have to be + // lexed in a different mode. So peek at its first character. + // + pair<char, bool> p (lexer_->peek_char ()); + char c (p.first); + + // @@ Just checking for leading '+' is not sufficient, for example: + // + // print +foo + // + return c == '\n' || c == '\0' || c == '(' || + (p.second && c != '=' && c != '+'); + } + + return false; + } + + // Buildspec parsing. + // + + // Here is the problem: we "overload" '(' and ')' to mean operation + // application rather than the eval context. At the same time we want to use + // parse_names() to parse names, get variable expansion/function calls, + // quoting, etc. We just need to disable the eval context. The way this is + // done has two parts: Firstly, we parse names in chunks and detect and + // handle the opening paren ourselves. In other words, a buildspec like + // 'clean (./)' is "chunked" as 'clean', '(', etc. While this is fairly + // straightforward, there is one snag: concatenating eval contexts, as in + // 'clean(./)'. Normally, this will be treated as a single chunk and we + // don't want that. So here comes the trick (or hack, if you like): the + // buildspec lexer mode makes every opening paren token "separated" (i.e., + // as if it was preceeded by a space). This will disable concatenating + // eval. + // + // In fact, because this is only done in the buildspec mode, we can still + // use eval contexts provided that we quote them: '"cle(an)"'. Note that + // function calls also need quoting (since a separated '(' is not treated as + // function call): '"$identity(update)"'. + // + // This poses a problem, though: if it's quoted then it is a concatenated + // expansion and therefore cannot contain multiple values, for example, + // $identity(foo/ bar/). So what we do is disable this chunking/separation + // after both meta-operation and operation were specified. So if we specify + // both explicitly, then we can use eval context, function calls, etc., + // normally: perform(update($identity(foo/ bar/))). + // + buildspec parser:: + parse_buildspec (istream& is, const path& name) + { + path_ = &name; + + // We do "effective escaping" and only for ['"\$(] (basically what's + // necessary inside a double-quoted literal plus the single quote). + // + lexer l (is, *path_, 1 /* line */, "\'\"\\$("); + lexer_ = &l; + scope_ = root_ = scope::global_; + pbase_ = &work; // Use current working directory. + target_ = nullptr; + prerequisite_ = nullptr; + + // Turn on the buildspec mode/pairs recognition with '@' as the pair + // separator (e.g., src_root/@out_root/exe{foo bar}). + // + mode (lexer_mode::buildspec, '@'); + + token t; + type tt; + next (t, tt); + + buildspec r (tt != type::eos + ? parse_buildspec_clause (t, tt, 0) + : buildspec ()); + + if (tt != type::eos) + fail (t) << "expected operation or target instead of " << t; + + return r; + } + + static bool + opname (const name& n) + { + // First it has to be a non-empty simple name. + // + if (n.pair || !n.simple () || n.empty ()) + return false; + + // Like C identifier but with '-' instead of '_' as the delimiter. + // + for (size_t i (0); i != n.value.size (); ++i) + { + char c (n.value[i]); + if (c != '-' && !(i != 0 ? alnum (c) : alpha (c))) + return false; + } + + return true; + } + + buildspec parser:: + parse_buildspec_clause (token& t, type& tt, size_t depth) + { + buildspec bs; + + for (bool first (true);; first = false) + { + // We always start with one or more names. Eval context (lparen) only + // allowed if quoted. + // + if (!start_names (tt, mode () == lexer_mode::double_quoted)) + { + if (first) + fail (t) << "expected operation or target instead of " << t; + + break; + } + + const location l (get_location (t)); // Start of names. + + // This call will parse the next chunk of output and produce zero or + // more names. + // + names ns (parse_names (t, tt, pattern_mode::expand, depth < 2)); + + if (ns.empty ()) // Can happen if pattern expansion. + fail (l) << "expected operation or target"; + + // What these names mean depends on what's next. If it is an opening + // paren, then they are operation/meta-operation names. Otherwise they + // are targets. + // + if (tt == type::lparen) // Got by parse_names(). + { + if (ns.empty ()) + fail (t) << "expected operation name before '('"; + + for (const name& n: ns) + if (!opname (n)) + fail (l) << "expected operation name instead of '" << n << "'"; + + // Inside '(' and ')' we have another, nested, buildspec. Push another + // mode to keep track of the depth (used in the lexer implementation + // to decide when to stop separating '('). + // + mode (lexer_mode::buildspec, '@'); + + next (t, tt); // Get what's after '('. + const location l (get_location (t)); // Start of nested names. + buildspec nbs (parse_buildspec_clause (t, tt, depth + 1)); + + // Parse additional operation/meta-operation parameters. + // + values params; + while (tt == type::comma) + { + next (t, tt); + + // Note that for now we don't expand patterns. If it turns out we + // need this, then will probably have to be (meta-) operation- + // specific (via pre-parse or some such). + // + params.push_back (tt != type::rparen + ? parse_value (t, tt, pattern_mode::ignore) + : value (names ())); + } + + if (tt != type::rparen) + fail (t) << "expected ')' instead of " << t; + + expire_mode (); + next (t, tt); // Get what's after ')'. + + // Merge the nested buildspec into ours. But first determine if we are + // an operation or meta-operation and do some sanity checks. + // + bool meta (false); + for (const metaopspec& nms: nbs) + { + // We definitely shouldn't have any meta-operations. + // + if (!nms.name.empty ()) + fail (l) << "nested meta-operation " << nms.name; + + if (!meta) + { + // If we have any operations in the nested spec, then this mean + // that our names are meta-operation names. + // + for (const opspec& nos: nms) + { + if (!nos.name.empty ()) + { + meta = true; + break; + } + } + } + } + + // No nested meta-operations means we should have a single + // metaopspec object with empty meta-operation name. + // + assert (nbs.size () == 1); + const metaopspec& nmo (nbs.back ()); + + if (meta) + { + for (name& n: ns) + { + bs.push_back (nmo); + bs.back ().name = move (n.value); + bs.back ().params = params; + } + } + else + { + // Since we are not a meta-operation, the nested buildspec should be + // just a bunch of targets. + // + assert (nmo.size () == 1); + const opspec& nos (nmo.back ()); + + if (bs.empty () || !bs.back ().name.empty ()) + bs.push_back (metaopspec ()); // Empty (default) meta operation. + + for (name& n: ns) + { + bs.back ().push_back (nos); + bs.back ().back ().name = move (n.value); + bs.back ().back ().params = params; + } + } + } + else if (!ns.empty ()) + { + // Group all the targets into a single operation. In other + // words, 'foo bar' is equivalent to 'update(foo bar)'. + // + if (bs.empty () || !bs.back ().name.empty ()) + bs.push_back (metaopspec ()); // Empty (default) meta operation. + + metaopspec& ms (bs.back ()); + + for (auto i (ns.begin ()), e (ns.end ()); i != e; ++i) + { + // @@ We may actually want to support this at some point. + // + if (i->qualified ()) + fail (l) << "expected target name instead of " << *i; + + if (opname (*i)) + ms.push_back (opspec (move (i->value))); + else + { + // Do we have the src_base? + // + dir_path src_base; + if (i->pair) + { + if (i->pair != '@') + fail << "unexpected pair style in buildspec"; + + if (i->typed ()) + fail (l) << "expected target src_base instead of " << *i; + + src_base = move (i->dir); + + if (!i->value.empty ()) + src_base /= dir_path (move (i->value)); + + ++i; + assert (i != e); // Got to have the second half of the pair. + } + + if (ms.empty () || !ms.back ().name.empty ()) + ms.push_back (opspec ()); // Empty (default) operation. + + opspec& os (ms.back ()); + os.emplace_back (move (src_base), move (*i)); + } + } + } + } + + return bs; + } + + lookup parser:: + lookup_variable (name&& qual, string&& name, const location& loc) + { + tracer trace ("parser::lookup_variable", &path_); + + // Process variable name. @@ OLD + // + if (name.front () == '.') // Fully namespace-qualified name. + name.erase (0, 1); + else + { + //@@ TODO : append namespace if any. + } + + const scope* s (nullptr); + const target* t (nullptr); + const prerequisite* p (nullptr); + + // If we are qualified, it can be a scope or a target. + // + enter_scope sg; + enter_target tg; + + if (qual.empty ()) + { + s = scope_; + t = target_; + p = prerequisite_; + } + else + { + switch (qual.pair) + { + case '/': + { + assert (qual.directory ()); + sg = enter_scope (*this, move (qual.dir)); + s = scope_; + break; + } + case ':': + { + qual.pair = '\0'; + + // @@ OUT TODO + // + tg = enter_target ( + *this, move (qual), build2::name (), true, loc, trace); + t = target_; + break; + } + default: assert (false); + } + } + + // Lookup. + // + const auto& var (var_pool.rw (*scope_).insert (move (name), true)); + + if (p != nullptr) + { + // The lookup depth is a bit of a hack but should be harmless since + // unused. + // + pair<lookup, size_t> r (p->vars[var], 1); + + if (!r.first.defined ()) + r = t->find_original (var); + + return var.overrides == nullptr + ? r.first + : t->base_scope ().find_override (var, move (r), true).first; + } + + if (t != nullptr) + { + if (var.visibility > variable_visibility::target) + { + fail (loc) << "variable " << var << " has " << var.visibility + << " visibility but is expanded in target context"; + } + + return (*t)[var]; + } + + if (s != nullptr) + { + if (var.visibility > variable_visibility::scope) + { + fail (loc) << "variable " << var << " has " << var.visibility + << " visibility but is expanded in scope context"; + } + + return (*s)[var]; + } + + // Undefined/NULL namespace variables are not allowed. + // + // @@ TMP this isn't proving to be particularly useful. + // + // if (!l) + // { + // if (var.name.find ('.') != string::npos) + // fail (loc) << "undefined/null namespace variable " << var; + // } + + return lookup (); + } + + void parser:: + switch_scope (const dir_path& d) + { + tracer trace ("parser::switch_scope", &path_); + + auto p (build2::switch_scope (*root_, d)); + scope_ = &p.first; + pbase_ = scope_->src_path_ != nullptr ? scope_->src_path_ : &d; + + if (p.second != root_) + { + root_ = p.second; + l5 ([&] + { + if (root_ != nullptr) + trace << "switching to root scope " << *root_; + else + trace << "switching to out of project scope"; + }); + } + } + + void parser:: + process_default_target (token& t) + { + tracer trace ("parser::process_default_target", &path_); + + // The logic is as follows: if we have an explicit current directory + // target, then that's the default target. Otherwise, we take the + // first target and use it as a prerequisite to create an implicit + // current directory target, effectively making it the default + // target via an alias. If there are no targets in this buildfile, + // then we don't do anything. + // + if (default_target_ == nullptr) // No targets in this buildfile. + return; + + target& dt (*default_target_); + + target* ct ( + const_cast<target*> ( // Ok (serial execution). + targets.find (dir::static_type, // Explicit current dir target. + scope_->out_path (), + dir_path (), // Out tree target. + string (), + nullopt, + trace))); + + if (ct == nullptr) + { + l5 ([&]{trace (t) << "creating current directory alias for " << dt;}); + + // While this target is not explicitly mentioned in the buildfile, we + // say that we behave as if it were. Thus not implied. + // + ct = &targets.insert (dir::static_type, + scope_->out_path (), + dir_path (), + string (), + nullopt, + false, + trace).first; + // Fall through. + } + else if (ct->implied) + { + ct->implied = false; + // Fall through. + } + else + return; // Existing and not implied. + + ct->prerequisites_state_.store (2, memory_order_relaxed); + ct->prerequisites_.emplace_back (prerequisite (dt)); + } + + void parser:: + enter_buildfile (const path& p) + { + tracer trace ("parser::enter_buildfile", &path_); + + dir_path d (p.directory ()); + + // Figure out if we need out. + // + dir_path out; + if (scope_->src_path_ != nullptr && + scope_->src_path () != scope_->out_path () && + d.sub (scope_->src_path ())) + { + out = out_src (d, *root_); + } + + targets.insert<buildfile> ( + move (d), + move (out), + p.leaf ().base ().string (), + p.extension (), // Always specified. + trace); + } + + type parser:: + next (token& t, type& tt) + { + replay_token r; + + if (peeked_) + { + r = move (peek_); + peeked_ = false; + } + else + r = replay_ != replay::play ? lexer_next () : replay_next (); + + if (replay_ == replay::save) + replay_data_.push_back (r); + + t = move (r.token); + tt = t.type; + return tt; + } + + inline type parser:: + next_after_newline (token& t, type& tt, char e) + { + if (tt == type::newline) + next (t, tt); + else if (tt != type::eos) + { + if (e == '\0') + fail (t) << "expected newline instead of " << t; + else + fail (t) << "expected newline after '" << e << "'"; + } + + return tt; + } + + type parser:: + peek () + { + if (!peeked_) + { + peek_ = (replay_ != replay::play ? lexer_next () : replay_next ()); + peeked_ = true; + } + + return peek_.token.type; + } +} diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx new file mode 100644 index 0000000..658f266 --- /dev/null +++ b/libbuild2/parser.hxx @@ -0,0 +1,673 @@ +// file : libbuild2/parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_PARSER_HXX +#define LIBBUILD2_PARSER_HXX + +#include <stack> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/spec.hxx> +#include <libbuild2/lexer.hxx> +#include <libbuild2/token.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class scope; + class target; + class prerequisite; + + class LIBBUILD2_SYMEXPORT parser + { + public: + // If boot is true, then we are parsing bootstrap.build and modules + // should only be bootstrapped. + // + explicit + parser (bool boot = false): fail ("error", &path_), boot_ (boot) {} + + // Issue diagnostics and throw failed in case of an error. + // + void + parse_buildfile (istream&, const path& name, scope& root, scope& base); + + buildspec + parse_buildspec (istream&, const path& name); + + token + parse_variable (lexer&, scope&, const variable&, token_type kind); + + pair<value, token> + parse_variable_value (lexer&, scope&, const dir_path*, const variable&); + + names + parse_export_stub (istream& is, const path& p, scope& r, scope& b) + { + parse_buildfile (is, p, r, b); + return move (export_value_); + } + + // Recursive descent parser. + // + protected: + + // Pattern expansion mode. + // + enum class pattern_mode + { + ignore, // Treat as ordinary names. + detect, // Ignore pair/dir/type if the first name is a pattern. + expand // Expand to ordinary names. + }; + + // If one is true then parse a single (logical) line (logical means it + // can actually be several lines, e.g., an if-block). Return false if + // nothing has been parsed (i.e., we are still on the same token). + // + // Note that after this function returns, the token is the first token of + // the next line (or eos). + // + bool + parse_clause (token&, token_type&, bool one = false); + + void + parse_variable_block (token&, token_type&, const target_type*, string); + + // Ad hoc target names inside < ... >. + // + struct adhoc_names_loc + { + names ns; + location loc; + }; + + using adhoc_names = small_vector<adhoc_names_loc, 1>; + + void + enter_adhoc_members (adhoc_names_loc&&, bool); + + small_vector<reference_wrapper<target>, 1> + enter_targets (names&&, const location&, adhoc_names&&, size_t); + + bool + parse_dependency (token&, token_type&, + names&&, const location&, + adhoc_names&&, + names&&, const location&, + bool = false); + + void + parse_assert (token&, token_type&); + + void + parse_print (token&, token_type&); + + void + parse_diag (token&, token_type&); + + void + parse_dump (token&, token_type&); + + void + parse_source (token&, token_type&); + + void + parse_include (token&, token_type&); + + void + parse_run (token&, token_type&); + + void + parse_import (token&, token_type&); + + void + parse_export (token&, token_type&); + + void + parse_using (token&, token_type&); + + void + parse_define (token&, token_type&); + + void + parse_if_else (token&, token_type&); + + void + parse_for (token&, token_type&); + + void + parse_variable (token&, token_type&, const variable&, token_type); + + void + parse_type_pattern_variable (token&, token_type&, + const target_type&, string, + const variable&, token_type, const location&); + + const variable& + parse_variable_name (names&&, const location&); + + // Note: calls attributes_push() that the caller must pop. + // + value + parse_variable_value (token&, token_type&); + + void + apply_variable_attributes (const variable&); + + void + apply_value_attributes (const variable*, // Optional. + value& lhs, + value&& rhs, + token_type assign_kind); + + // Return the value pack (values can be NULL/typed). Note that for an + // empty eval context ('()' potentially with whitespaces in between) the + // result is an empty pack, not a pack of one empty. + // + values + parse_eval (token&, token_type&, pattern_mode); + + values + parse_eval_comma (token&, token_type&, pattern_mode, bool = false); + + value + parse_eval_ternary (token&, token_type&, pattern_mode, bool = false); + + value + parse_eval_or (token&, token_type&, pattern_mode, bool = false); + + value + parse_eval_and (token&, token_type&, pattern_mode, bool = false); + + value + parse_eval_comp (token&, token_type&, pattern_mode, bool = false); + + value + parse_eval_value (token&, token_type&, pattern_mode, bool = false); + + // Attributes stack. We can have nested attributes, for example: + // + // x = [bool] ([uint64] $x == [uint64] $y) + // + // In this example we only apply the value attributes after evaluating + // the context, which has its own attributes. + // + struct attributes + { + bool has; // Has attributes flag. + location loc; // Start of attributes location. + vector<pair<string, string>> ats; // Attributes. + + explicit operator bool () const {return has;} + }; + + // Push a new entry into the attributes_ stack. If the next token is '[' + // parse the attribute sequence until ']' storing the result in the new + // stack entry and setting the 'has' flag (unless the attribute list is + // empty). Then get the next token and, if standalone is false, verify + // it is not newline/eos (i.e., there is something after it). Return the + // indication of whether there are any attributes and their location. + // + // Note that during pre-parsing nothing is pushed into the stack and + // the returned attributes object indicates there are no attributes. + // + pair<bool, location> + attributes_push (token&, token_type&, bool standalone = false); + + attributes + attributes_pop () + { + assert (!pre_parse_); + attributes r (move (attributes_.top ())); + attributes_.pop (); + return r; + } + + attributes& + attributes_top () {return attributes_.top ();} + + // Source a stream optionnaly entering it as a buildfile and performing + // the default target processing. + // + void + source (istream&, + const path&, + const location&, + bool enter, + bool default_target); + + // If chunk is true, then parse the smallest but complete, name-wise, + // chunk of input. Note that in this case you may still end up with + // multiple names, for example, {foo bar} or $foo. In the pre-parse mode + // always return empty list of names. + // + // The what argument is used in diagnostics (e.g., "expected <what> + // instead of ...". + // + // The separators argument specifies the special characters to recognize + // inside the name. These can be the directory separators and the '%' + // project separator. Note that even if it is NULL, the result may still + // contain non-simple names due to variable expansions. + // + + static const string name_separators; + + names + parse_names (token& t, token_type& tt, + pattern_mode pmode, + bool chunk = false, + const char* what = "name", + const string* separators = &name_separators) + { + names ns; + parse_names (t, tt, + ns, + pmode, + chunk, + what, + separators, + 0, + nullopt, nullptr, nullptr); + return ns; + } + + // Return true if this token starts a name. Or, to put it another way, + // calling parse_names() on this token won't fail with the "expected name + // instead of <this-token>" error. Only consider '(' if the second + // argument is true. + // + bool + start_names (token_type&, bool lparen = true); + + // As above but return the result as a value, which can be typed and NULL. + // + value + parse_value (token& t, token_type& tt, + pattern_mode pmode, + const char* what = "name", + const string* separators = &name_separators, + bool chunk = false) + { + names ns; + auto r (parse_names (t, tt, + ns, + pmode, + chunk, + what, + separators, + 0, + nullopt, nullptr, nullptr)); + + value v (r.type); // Potentially typed NULL value. + + // This should not fail since we are typing the result of reversal from + // the typed value. + // + if (r.not_null) + v.assign (move (ns), nullptr); + + return v; + } + + // Append names and return the indication if the parsed value is not NULL + // and whether it is typed (and whether it is a pattern if pattern_mode is + // detect). + // + // You may have noticed that what we return here is essentially a value + // and doing it this way (i.e., reversing it to untyped names and + // returning its type so that it can potentially be "typed back") is kind + // of backwards. The reason we are doing it this way is because in many + // places we expect things untyped and if we were to always return a + // (potentially typed) value, then we would have to reverse it in all + // those places. Still it may make sense to look into redesigning the + // whole thing one day. + // + // Currently the only way for the result to be NULL or have a type is if + // it is the result of a sole, unquoted variable expansion, function call, + // or context evaluation. + // + struct parse_names_result + { + bool not_null; + const value_type* type; + optional<const target_type*> pattern; + }; + + parse_names_result + parse_names (token&, token_type&, + names&, + pattern_mode, + bool chunk = false, + const char* what = "name", + const string* separators = &name_separators, + size_t pairn = 0, + const optional<project_name>& prj = nullopt, + const dir_path* dir = nullptr, + const string* type = nullptr, + bool cross = true, + bool curly = false); + + size_t + parse_names_trailer (token&, token_type&, + names&, + pattern_mode, + const char* what, + const string* separators, + size_t pairn, + const optional<project_name>& prj, + const dir_path* dir, + const string* type, + bool cross); + + size_t + expand_name_pattern (const location&, + names&&, + names&, + const char* what, + size_t pairn, + const dir_path* dir, + const string* type, + const target_type*); + + size_t + splice_names (const location&, + const names_view&, + names&&, + names&, + const char* what, + size_t pairn, + const optional<project_name>& prj, + const dir_path* dir, + const string* type); + + // Skip until newline or eos. + // + void + skip_line (token&, token_type&); + + // Skip until block-closing } or eos, taking into account nested blocks. + // + void + skip_block (token&, token_type&); + + // Return true if the name token can be considered a directive keyword. + // + bool + keyword (token&); + + // Buildspec. + // + buildspec + parse_buildspec_clause (token&, token_type&, size_t); + + // Customization hooks. + // + protected: + // If qual is not empty, then its pair member should indicate the kind + // of qualification: ':' -- target, '/' -- scope. + // + virtual lookup + lookup_variable (name&& qual, string&& name, const location&); + + // Utilities. + // + protected: + class enter_scope; + class enter_target; + class enter_prerequisite; + + // Switch to a new current scope. Note that this function might also have + // to switch to a new root scope if the new current scope is in another + // project. So both must be saved and restored. + // + void + switch_scope (const dir_path&); + + void + process_default_target (token&); + + // Enter buildfile as a target. + // + void + enter_buildfile (const path&); + + // Lexer. + // + protected: + location + get_location (const token& t) const + { + return build2::get_location (t, *path_); + } + + token_type + next (token&, token_type&); + + // If the current token is newline, then get the next token. Otherwise, + // fail unless the current token is eos (i.e., optional newline at the end + // of stream). If the after argument is not \0, use it in diagnostics as + // the token after which the newline was expectd. + // + token_type + next_after_newline (token&, token_type&, char after = '\0'); + + // Be careful with peeking and switching the lexer mode. See keyword() + // for more information. + // + token_type + peek (); + + token_type + peek (lexer_mode m, char ps = '\0') + { + // The idea is that if we already have something peeked, then it should + // be in the same mode. We also don't re-set the mode since it may have + // expired after the first token. + // + if (peeked_) + { + assert (peek_.mode == m); + return peek_.token.type; + } + + mode (m, ps); + return peek (); + } + + const token& + peeked () const + { + assert (peeked_); + return peek_.token; + } + + void + mode (lexer_mode m, char ps = '\0') + { + if (replay_ != replay::play) + lexer_->mode (m, ps); + else + // As a sanity check, make sure the mode matches the next token. Note + // that we don't check the pair separator since it can be overriden by + // the lexer's mode() implementation. + // + assert (replay_i_ != replay_data_.size () && + replay_data_[replay_i_].mode == m); + } + + lexer_mode + mode () const + { + if (replay_ != replay::play) + return lexer_->mode (); + else + { + assert (replay_i_ != replay_data_.size ()); + return replay_data_[replay_i_].mode; + } + } + + void + expire_mode () + { + if (replay_ != replay::play) + lexer_->expire_mode (); + } + + // Token saving and replaying. Note that it can only be used in certain + // contexts. Specifically, the code that parses a replay must not interact + // with the lexer directly (e.g., the keyword() test). Replays also cannot + // nest. For now we don't enforce any of this. + // + // Note also that the peeked token is not part of the replay, until it + // is "got". + // + void + replay_save () + { + assert (replay_ == replay::stop); + replay_ = replay::save; + } + + void + replay_play () + { + assert ((replay_ == replay::save && !replay_data_.empty ()) || + (replay_ == replay::play && replay_i_ == replay_data_.size ())); + + if (replay_ == replay::save) + replay_path_ = path_; // Save old path. + + replay_i_ = 0; + replay_ = replay::play; + } + + void + replay_stop () + { + if (replay_ == replay::play) + path_ = replay_path_; // Restore old path. + + replay_data_.clear (); + replay_ = replay::stop; + } + + struct replay_guard + { + replay_guard (parser& p, bool start = true) + : p_ (start ? &p : nullptr) + { + if (p_ != nullptr) + p_->replay_save (); + } + + void + play () + { + if (p_ != nullptr) + p_->replay_play (); + } + + ~replay_guard () + { + if (p_ != nullptr) + p_->replay_stop (); + } + + private: + parser* p_; + }; + + // Stop saving and get the data. + // + replay_tokens + replay_data () + { + assert (replay_ == replay::save); + + replay_tokens r (move (replay_data_)); + replay_data_.clear (); + replay_ = replay::stop; + return r; + } + + // Set the data and start playing. + // + void + replay_data (replay_tokens&& d) + { + assert (replay_ == replay::stop); + + replay_path_ = path_; // Save old path. + + replay_data_ = move (d); + replay_i_ = 0; + replay_ = replay::play; + } + + // Implementation details, don't call directly. + // + replay_token + lexer_next () + { + lexer_mode m (lexer_->mode ()); // Get it first since it may expire. + return replay_token {lexer_->next (), path_, m}; + } + + const replay_token& + replay_next () + { + assert (replay_i_ != replay_data_.size ()); + const replay_token& rt (replay_data_[replay_i_++]); + + // Update the path. Note that theoretically it is possible that peeking + // at the next token will "change" the path of the current token. The + // workaround would be to call get_location() before peeking. + // + path_ = rt.file; + + return rt; + } + + // Diagnostics. + // + protected: + const fail_mark fail; + + protected: + bool pre_parse_ = false; + bool boot_; + + const path* path_; // Current path. + lexer* lexer_; + + prerequisite* prerequisite_ = nullptr; // Current prerequisite, if any. + target* target_ = nullptr; // Current target, if any. + scope* scope_ = nullptr; // Current base scope (out_base). + scope* root_ = nullptr; // Current root scope (out_root). + + const dir_path* pbase_ = nullptr; // Current pattern base directory. + + std::stack<attributes> attributes_; + + target* default_target_; + names export_value_; + + replay_token peek_; + bool peeked_ = false; + + enum class replay {stop, save, play} replay_ = replay::stop; + replay_tokens replay_data_; + size_t replay_i_; // Position of the next token during replay. + const path* replay_path_; // Path before replay began (to be restored). + }; +} + +#endif // LIBBUILD2_PARSER_HXX diff --git a/libbuild2/prerequisite.cxx b/libbuild2/prerequisite.cxx new file mode 100644 index 0000000..7355323 --- /dev/null +++ b/libbuild2/prerequisite.cxx @@ -0,0 +1,120 @@ +// file : libbuild2/prerequisite.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/prerequisite.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + // prerequisite_key + // + ostream& + operator<< (ostream& os, const prerequisite_key& pk) + { + if (pk.proj) + os << *pk.proj << '%'; + // + // Don't print scope if we are project-qualified or the prerequisite's + // directory is absolute. In both these cases the scope is not used to + // resolve it to target. + // + else if (!pk.tk.dir->absolute ()) + { + // Avoid printing './' in './:...', similar to what we do for the + // directory in target_key. + // + const dir_path& s (pk.scope->out_path ()); + + if (stream_verb (os).path < 1) + { + const string& r (diag_relative (s, false)); + + if (!r.empty ()) + os << r << ':'; + } + else + os << s << ':'; + } + + return os << pk.tk; + } + + // prerequisite + // + static inline optional<string> + to_ext (const string* e) + { + return e != nullptr ? optional<string> (*e) : nullopt; + } + + prerequisite:: + prerequisite (const target_type& t) + : proj (nullopt), + type (t.type ()), + dir (t.dir), + out (t.out), // @@ If it's empty, then we treat as undetermined? + name (t.name), + ext (to_ext (t.ext ())), + scope (t.base_scope ()), + target (&t), + vars (false /* global */) + { + } + + bool prerequisite:: + belongs (const target_type& t) const + { + const auto& p (t.prerequisites ()); + return !(p.empty () || this < &p.front () || this > &p.back ()); + } + + value& prerequisite:: + append (const variable& var, const target_type& t) + { + if (value* r = vars.find_to_modify (var).first) + return *r; + + value& r (assign (var)); // NULL. + + // Note: pretty similar logic to target::append(). + // + lookup l (t.find_original (var).first); + + if (l.defined ()) + r = *l; // Copy value (and type) from the target/outer scope. + + return r; + } + + // include() + // + include_type + include_impl (action a, + const target& t, + const string& v, + const prerequisite& p, + const target* m) + { + include_type r (false); + + if (v == "false") r = include_type::excluded; + else if (v == "adhoc") r = include_type::adhoc; + else if (v == "true") r = include_type::normal; + else + fail << "invalid " << var_include->name << " variable value " + << "'" << v << "' specified for prerequisite " << p; + + // Call the meta-operation override, if any (currently used by dist). + // + return current_mif->include == nullptr + ? r + : current_mif->include (a, t, prerequisite_member {p, m}, r); + } +} diff --git a/libbuild2/prerequisite.hxx b/libbuild2/prerequisite.hxx new file mode 100644 index 0000000..f79ce04 --- /dev/null +++ b/libbuild2/prerequisite.hxx @@ -0,0 +1,229 @@ +// file : libbuild2/prerequisite.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_PREREQUISITE_HXX +#define LIBBUILD2_PREREQUISITE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/action.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/target-key.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class scope; + class target; + + // Light-weight (by being shallow-pointing) prerequisite key, similar + // to (and based on) target key. + // + // Note that unlike prerequisite, the key is not (necessarily) owned by a + // target. So for the key we instead have the base scope of the target that + // (would) own it. Note that we assume keys to be ephemeral enough for the + // base scope to remain unchanged. + // + class prerequisite_key + { + public: + typedef build2::scope scope_type; + + const optional<project_name>& proj; + target_key tk; // The .dir and .out members can be relative. + const scope_type* scope; // Can be NULL if tk.dir is absolute. + + template <typename T> + bool is_a () const {return tk.is_a<T> ();} + bool is_a (const target_type& tt) const {return tk.is_a (tt);} + }; + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const prerequisite_key&); + + // Note that every data member except for the target is immutable (const). + // + class LIBBUILD2_SYMEXPORT prerequisite + { + public: + using scope_type = build2::scope; + using target_type = build2::target; + using target_type_type = build2::target_type; + + // Note that unlike targets, for prerequisites an empty out directory + // means undetermined rather than being definitely in the out tree. + // + // It might seem natural to keep the reference to the owner target instead + // of to the scope. But that's not the semantics that we have, consider: + // + // foo/obj{x}: bar/cxx{y} + // + // bar/ here is relative to the scope, not to foo/. Plus, bar/ can resolve + // to either src or out. + // + const optional<project_name> proj; + const target_type_type& type; + const dir_path dir; // Normalized absolute or relative (to scope). + const dir_path out; // Empty, normalized absolute, or relative. + const string name; + const optional<string> ext; // Absent if unspecified. + const scope_type& scope; + + // NULL if not yet resolved. Note that this should always be the "primary + // target", not a member of a target group. + // + // While normally only a matching rule should change this, if the + // prerequisite comes from the group, then it's possible that several + // rules will try to update it simultaneously. Thus the atomic. + // + mutable atomic<const target_type*> target {nullptr}; + + // Prerequisite-specific variables. + // + // Note that the lookup is often ad hoc (see bin.whole as an example). + // But see also parser::lookup_variable() if adding something here. + // + public: + variable_map vars; + + // Return a value suitable for assignment. See target for details. + // + value& + assign (const variable& var) {return vars.assign (var);} + + // Return a value suitable for appending. See target for details. Note + // that we have to explicitly pass the target that this prerequisite + // belongs to. + // + value& + append (const variable&, const target_type&); + + public: + prerequisite (optional<project_name> p, + const target_type_type& t, + dir_path d, + dir_path o, + string n, + optional<string> e, + const scope_type& s) + : proj (move (p)), + type (t), + dir (move (d)), + out (move (o)), + name (move (n)), + ext (move (e)), + scope (s), + vars (false /* global */) {} + + // Make a prerequisite from a target. + // + explicit + prerequisite (const target_type&); + + // Note that the returned key "tracks" the prerequisite; that is, any + // updates to the prerequisite's members will be reflected in the key. + // + prerequisite_key + key () const + { + return prerequisite_key {proj, {&type, &dir, &out, &name, ext}, &scope}; + } + + // As above but remap the target type to the specified. + // + prerequisite_key + key (const target_type_type& tt) const + { + return prerequisite_key {proj, {&tt, &dir, &out, &name, ext}, &scope}; + } + + // Return true if this prerequisite instance (physically) belongs to the + // target's prerequisite list. Note that this test only works if you use + // references to the container elements and the container hasn't been + // resized since such a reference was obtained. Normally this function is + // used when iterating over a combined prerequisites range to detect if + // the prerequisite came from the group (see group_prerequisites). + // + bool + belongs (const target_type&) const; + + // Prerequisite (target) type. + // + public: + template <typename T> + bool + is_a () const {return type.is_a<T> ();} + + bool + is_a (const target_type_type& tt) const {return type.is_a (tt);} + + public: + prerequisite (prerequisite&& x) + : proj (move (x.proj)), + type (x.type), + dir (move (x.dir)), + out (move (x.out)), + name (move (x.name)), + ext (move (x.ext)), + scope (x.scope), + target (x.target.load (memory_order_relaxed)), + vars (move (x.vars)) {} + + prerequisite (const prerequisite& x, memory_order o = memory_order_consume) + : proj (x.proj), + type (x.type), + dir (x.dir), + out (x.out), + name (x.name), + ext (x.ext), + scope (x.scope), + target (x.target.load (o)), + vars (x.vars) {} + }; + + inline ostream& + operator<< (ostream& os, const prerequisite& p) + { + return os << p.key (); + } + + using prerequisites = vector<prerequisite>; + + // Helpers for dealing with the prerequisite inclusion/exclusion (the + // 'include' buildfile variable, see var_include in context.hxx). + // + // Note that the include(prerequisite_member) overload is also provided. + // + // @@ Maybe this filtering should be incorporated into *_prerequisites() and + // *_prerequisite_members() logic? Could make normal > adhoc > excluded and + // then pass the "threshold". + // + class include_type + { + public: + enum value {excluded, adhoc, normal}; + + include_type (value v): v_ (v) {} + include_type (bool v): v_ (v ? normal : excluded) {} + + operator value () const {return v_;} + explicit operator bool () const {return v_ != excluded;} + + private: + value v_; + }; + + include_type + include (action, + const target&, + const prerequisite&, + const target* = nullptr); +} + +#include <libbuild2/prerequisite.ixx> + +#endif // LIBBUILD2_PREREQUISITE_HXX diff --git a/libbuild2/prerequisite.ixx b/libbuild2/prerequisite.ixx new file mode 100644 index 0000000..d62af49 --- /dev/null +++ b/libbuild2/prerequisite.ixx @@ -0,0 +1,34 @@ +// file : libbuild2/prerequisite.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/export.hxx> + +namespace build2 +{ + LIBBUILD2_SYMEXPORT include_type + include_impl (action, + const target&, + const string&, + const prerequisite&, + const target*); + + LIBBUILD2_SYMEXPORT extern const variable* var_include; // context.cxx + + inline include_type + include (action a, const target& t, const prerequisite& p, const target* m) + { + // Most of the time this variable will not be specified, so let's optimize + // for that. + // + if (p.vars.empty ()) + return true; + + const string* v (cast_null<string> (p.vars[var_include])); + + if (v == nullptr) + return true; + + return include_impl (a, t, *v, p, m); + } +} diff --git a/libbuild2/rule-map.hxx b/libbuild2/rule-map.hxx new file mode 100644 index 0000000..26f80a8 --- /dev/null +++ b/libbuild2/rule-map.hxx @@ -0,0 +1,123 @@ +// file : libbuild2/rule-map.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_RULE_MAP_HXX +#define LIBBUILD2_RULE_MAP_HXX + +#include <map> + +#include <libbutl/prefix-map.mxx> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/action.hxx> + +namespace build2 +{ + class rule; + + using hint_rule_map = + butl::prefix_map<string, reference_wrapper<const rule>, '.'>; + + using target_type_rule_map = std::map<const target_type*, hint_rule_map>; + + // This is an "indexed map" with operation_id being the index. Entry + // with id 0 is a wildcard. + // + // Note that while we may resize some vectors during non-serial load, this + // is MT-safe since we never cache any references to their elements. + // + class operation_rule_map + { + public: + template <typename T> + void + insert (operation_id oid, const char* hint, const rule& r) + { + // 3 is the number of builtin operations. + // + if (oid >= map_.size ()) + map_.resize ((oid < 3 ? 3 : oid) + 1); + + map_[oid][&T::static_type].emplace (hint, r); + } + + // Return NULL if not found. + // + const target_type_rule_map* + operator[] (operation_id oid) const + { + return map_.size () > oid ? &map_[oid] : nullptr; + } + + bool + empty () const {return map_.empty ();} + + private: + vector<target_type_rule_map> map_; + }; + + // This is another indexed map but this time meta_operation_id is the + // index. The implementation is different, however: here we use a linked + // list with the first, statically-allocated node corresponding to the + // perform meta-operation. The idea is to try and get away with a dynamic + // allocation for the common cases since most rules will be registered + // for perform, at least on non-root scopes. + // + // @@ Redo using small_vector? + // + class rule_map + { + public: + + template <typename T> + void + insert (action_id a, const char* hint, const rule& r) + { + insert<T> (a >> 4, a & 0x0F, hint, r); + } + + // 0 oid is a wildcard. + // + template <typename T> + void + insert (meta_operation_id mid, + operation_id oid, + const char* hint, + const rule& r) + { + if (mid_ == mid) + map_.insert<T> (oid, hint, r); + else + { + if (next_ == nullptr) + next_.reset (new rule_map (mid)); + + next_->insert<T> (mid, oid, hint, r); + } + } + + // Return NULL if not found. + // + const operation_rule_map* + operator[] (meta_operation_id mid) const + { + return mid == mid_ ? &map_ : next_ == nullptr ? nullptr : (*next_)[mid]; + } + + explicit + rule_map (meta_operation_id mid = perform_id): mid_ (mid) {} + + bool + empty () const {return map_.empty () && next_ == nullptr;} + + private: + meta_operation_id mid_; + operation_rule_map map_; + unique_ptr<rule_map> next_; + }; +} + +#endif // LIBBUILD2_RULE_MAP_HXX diff --git a/libbuild2/rule.cxx b/libbuild2/rule.cxx new file mode 100644 index 0000000..0ade8a3 --- /dev/null +++ b/libbuild2/rule.cxx @@ -0,0 +1,309 @@ +// file : libbuild2/rule.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/rule.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + // file_rule + // + // Note that this rule is special. It is the last, fallback rule. If + // it doesn't match, then no other rule can possibly match and we have + // an error. It also cannot be ambigious with any other rule. As a + // result the below implementation bends or ignores quite a few rules + // that normal implementations should follow. So you probably shouldn't + // use it as a guide to implement your own, normal, rules. + // + bool file_rule:: + match (action a, target& t, const string&) const + { + tracer trace ("file_rule::match"); + + // While strictly speaking we should check for the file's existence + // for every action (because that's the condition for us matching), + // for some actions this is clearly a waste. Say, perform_clean: we + // are not doing anything for this action so not checking if the file + // exists seems harmless. + // + switch (a) + { + case perform_clean_id: + return true; + default: + { + // While normally we shouldn't do any of this in match(), no other + // rule should ever be ambiguous with the fallback one and path/mtime + // access is atomic. In other words, we know what we are doing but + // don't do this in normal rules. + + // First check the timestamp. This takes care of the special "trust + // me, this file exists" situations (used, for example, for installed + // stuff where we know it's there, just not exactly where). + // + mtime_target& mt (t.as<mtime_target> ()); + + timestamp ts (mt.mtime ()); + + if (ts != timestamp_unknown) + return ts != timestamp_nonexistent; + + // Otherwise, if this is not a path_target, then we don't match. + // + path_target* pt (mt.is_a<path_target> ()); + if (pt == nullptr) + return false; + + const path* p (&pt->path ()); + + // Assign the path. + // + if (p->empty ()) + { + // Since we cannot come up with an extension, ask the target's + // derivation function to treat this as prerequisite (just like in + // search_existing_file()). + // + if (pt->derive_extension (true) == nullptr) + { + l4 ([&]{trace << "no default extension for target " << *pt;}); + return false; + } + + p = &pt->derive_path (); + } + + ts = mtime (*p); + pt->mtime (ts); + + if (ts != timestamp_nonexistent) + return true; + + l4 ([&]{trace << "no existing file for target " << *pt;}); + return false; + } + } + } + + recipe file_rule:: + apply (action a, target& t) const + { + /* + @@ outer + return noop_recipe; + */ + + // Update triggers the update of this target's prerequisites so it would + // seem natural that we should also trigger their cleanup. However, this + // possibility is rather theoretical so until we see a real use-case for + // this functionality, we simply ignore the clean operation. + // + if (a.operation () == clean_id) + return noop_recipe; + + // If we have no prerequisites, then this means this file is up to date. + // Return noop_recipe which will also cause the target's state to be set + // to unchanged. This is an important optimization on which quite a few + // places that deal with predominantly static content rely. + // + if (!t.has_group_prerequisites ()) // Group as in match_prerequisites(). + return noop_recipe; + + // Match all the prerequisites. + // + match_prerequisites (a, t); + + // Note that we used to provide perform_update() which checked that this + // target is not older than any of its prerequisites. However, later we + // realized this is probably wrong: consider a script with a testscript as + // a prerequisite; chances are the testscript will be newer than the + // script and there is nothing wrong with that. + // + return default_recipe; + } + + const file_rule file_rule::instance; + + // alias_rule + // + bool alias_rule:: + match (action, target&, const string&) const + { + return true; + } + + recipe alias_rule:: + apply (action a, target& t) const + { + // Inject dependency on our directory (note: not parent) so that it is + // automatically created on update and removed on clean. + // + inject_fsdir (a, t, false); + + match_prerequisites (a, t); + return default_recipe; + } + + const alias_rule alias_rule::instance; + + // fsdir_rule + // + bool fsdir_rule:: + match (action, target&, const string&) const + { + return true; + } + + recipe fsdir_rule:: + apply (action a, target& t) const + { + // Inject dependency on the parent directory. Note that it must be first + // (see perform_update_direct()). + // + inject_fsdir (a, t); + + match_prerequisites (a, t); + + switch (a) + { + case perform_update_id: return &perform_update; + case perform_clean_id: return &perform_clean; + default: assert (false); return default_recipe; + } + } + + static bool + fsdir_mkdir (const target& t, const dir_path& d) + { + // Even with the exists() check below this can still be racy so only print + // things if we actually did create it (similar to build2::mkdir()). + // + auto print = [&t, &d] () + { + if (verb >= 2) + text << "mkdir " << d; + else if (verb && current_diag_noise) + text << "mkdir " << t; + }; + + // Note: ignoring the dry_run flag. + // + mkdir_status ms; + + try + { + ms = try_mkdir (d); + } + catch (const system_error& e) + { + print (); + fail << "unable to create directory " << d << ": " << e << endf; + } + + if (ms == mkdir_status::success) + { + print (); + return true; + } + + return false; + } + + target_state fsdir_rule:: + perform_update (action a, const target& t) + { + target_state ts (target_state::unchanged); + + // First update prerequisites (e.g. create parent directories) then create + // this directory. + // + // @@ outer: should we assume for simplicity its only prereqs are fsdir{}? + // + if (!t.prerequisite_targets[a].empty ()) + ts = straight_execute_prerequisites (a, t); + + // The same code as in perform_update_direct() below. + // + const dir_path& d (t.dir); // Everything is in t.dir. + + // Generally, it is probably correct to assume that in the majority of + // cases the directory will already exist. If so, then we are going to get + // better performance by first checking if it indeed exists. See + // butl::try_mkdir() for details. + // + // @@ Also skip prerequisites? Can't we return noop in apply? + // + if (!exists (d) && fsdir_mkdir (t, d)) + ts |= target_state::changed; + + return ts; + } + + void fsdir_rule:: + perform_update_direct (action a, const target& t) + { + // First create the parent directory. If present, it is always first. + // + const target* p (t.prerequisite_targets[a].empty () + ? nullptr + : t.prerequisite_targets[a][0]); + + if (p != nullptr && p->is_a<fsdir> ()) + perform_update_direct (a, *p); + + // The same code as in perform_update() above. + // + const dir_path& d (t.dir); + + if (!exists (d)) + fsdir_mkdir (t, d); + } + + target_state fsdir_rule:: + perform_clean (action a, const target& t) + { + // The reverse order of update: first delete this directory, then clean + // prerequisites (e.g., delete parent directories). + // + // Don't fail if we couldn't remove the directory because it is not empty + // (or is current working directory). In this case rmdir() will issue a + // warning when appropriate. + // + target_state ts (rmdir (t.dir, t, current_diag_noise ? 1 : 2) + ? target_state::changed + : target_state::unchanged); + + if (!t.prerequisite_targets[a].empty ()) + ts |= reverse_execute_prerequisites (a, t); + + return ts; + } + + const fsdir_rule fsdir_rule::instance; + + // noop_rule + // + bool noop_rule:: + match (action, target&, const string&) const + { + return true; + } + + recipe noop_rule:: + apply (action, target&) const + { + return noop_recipe; + } + + const noop_rule noop_rule::instance; +} diff --git a/libbuild2/rule.hxx b/libbuild2/rule.hxx new file mode 100644 index 0000000..abd754e --- /dev/null +++ b/libbuild2/rule.hxx @@ -0,0 +1,107 @@ +// file : libbuild2/rule.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_RULE_HXX +#define LIBBUILD2_RULE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/action.hxx> +#include <libbuild2/target.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Once a rule is registered (for a scope), it is treated as immutable. If + // you need to modify some state (e.g., counters or some such), then make + // sure it is MT-safe. + // + // Note: match() is only called once but may not be followed by apply(). + // + class rule + { + public: + virtual bool + match (action, target&, const string& hint) const = 0; + + virtual recipe + apply (action, target&) const = 0; + }; + + // Fallback rule that only matches if the file exists. It will also match + // an mtime_target provided it has a set timestamp. + // + class LIBBUILD2_SYMEXPORT file_rule: public rule + { + public: + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + file_rule () {} + static const file_rule instance; + }; + + class LIBBUILD2_SYMEXPORT alias_rule: public rule + { + public: + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + alias_rule () {} + static const alias_rule instance; + }; + + // Note that this rule ignores the dry_run flag; see mkdir() in filesystem + // for the rationale. + // + class LIBBUILD2_SYMEXPORT fsdir_rule: public rule + { + public: + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + static target_state + perform_update (action, const target&); + + static target_state + perform_clean (action, const target&); + + // Sometimes, as an optimization, we want to emulate execute_direct() + // of fsdir{} without the overhead of switching to the execute phase. + // + static void + perform_update_direct (action, const target&); + + fsdir_rule () {} + static const fsdir_rule instance; + }; + + // Fallback rule that always matches and does nothing. + // + class LIBBUILD2_SYMEXPORT noop_rule: public rule + { + public: + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + noop_rule () {} + static const noop_rule instance; + }; +} + +#endif // LIBBUILD2_RULE_HXX diff --git a/libbuild2/scheduler.cxx b/libbuild2/scheduler.cxx new file mode 100644 index 0000000..8ac2b97 --- /dev/null +++ b/libbuild2/scheduler.cxx @@ -0,0 +1,820 @@ +// file : libbuild2/scheduler.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/scheduler.hxx> + +#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) +# include <pthread.h> +# ifdef __FreeBSD__ +# include <pthread_np.h> // pthread_attr_get_np() +# endif +#endif + +#ifndef _WIN32 +# include <thread> // this_thread::sleep_for() +#else +# include <libbutl/win32-utility.hxx> + +# include <chrono> +#endif + +#include <cerrno> +#include <exception> // std::terminate() + +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + // TLS cache of thread's task queue. + // + // Note that scheduler::task_queue struct is private. + // + static +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + void* scheduler_queue = nullptr; + + scheduler::task_queue* scheduler:: + queue () noexcept + { + return static_cast<scheduler::task_queue*> (scheduler_queue); + } + + void scheduler:: + queue (scheduler::task_queue* q) noexcept + { + scheduler_queue = q; + } + + size_t scheduler:: + wait (size_t start_count, const atomic_count& task_count, work_queue wq) + { + // Note that task_count is a synchronization point. + // + size_t tc; + + if ((tc = task_count.load (memory_order_acquire)) <= start_count) + return tc; + + assert (max_active_ != 1); // Serial execution, nobody to wait for. + + // See if we can run some of our own tasks. + // + if (wq != work_none) + { + // If we are waiting on someone else's task count then there migh still + // be no queue (set by async()). + // + if (task_queue* tq = queue ()) + { + for (lock ql (tq->mutex); !tq->shutdown && !empty_back (*tq); ) + { + pop_back (*tq, ql); + + if (wq == work_one) + { + if ((tc = task_count.load (memory_order_acquire)) <= start_count) + return tc; + } + } + + // Note that empty task queue doesn't automatically mean the task + // count has been decremented (some might still be executing + // asynchronously). + // + if ((tc = task_count.load (memory_order_acquire)) <= start_count) + return tc; + } + } + + return suspend (start_count, task_count); + } + + void scheduler:: + deactivate () + { + if (max_active_ == 1) // Serial execution. + return; + + lock l (mutex_); + + active_--; + waiting_++; + progress_++; + + if (waiting_ > stat_max_waiters_) + stat_max_waiters_ = waiting_; + + // A spare active thread has become available. If there are ready masters + // or eager helpers, wake someone up. + // + if (ready_ != 0) + { + ready_condv_.notify_one (); + } + else if (queued_task_count_.load (std::memory_order_consume) != 0) + { + activate_helper (l); + } + // @@ TODO: Redo as a separate "monitoring" thread. + // + // This still doesn't work for the phase lock case where we call + // deactivate and then go wait on a condition variable: we are doing + // deadlock detection while holding the lock that prevents other + // threads from making progress! + // +#if 0 + else if (active_ == 0) + { + // We may have a deadlock which can happen because of dependency cycles. + // + // Relying on the active_ count alone is not precise enough, however: + // some threads might be transitioning between the active/waiting/ready + // states. Carefully accounting for this is not trivial, to say the + // least (especially in the face of spurious wakeups). So we are going + // to do a "fuzzy" deadlock detection by measuring "progress". The idea + // is that those transitions should be pretty short-lived and so if we + // wait for a couple of hundreds context switches, then we should be + // able to distinguish a real deadlock from the transition case. + // + size_t p (progress_); + + for (size_t i (0); i != 100; ++i) + { + l.unlock (); + this_thread::yield () is not enough. + l.lock (); + + if (p != progress_) + break; + } + + if (p == progress_) + { + // Reactivate and fail. + // + waiting_--; + active_++; + + // Shutting things down cleanly is tricky: we could have handled it in + // the scheduler (e.g., by setting a flag and then waking everyone up, + // similar to shutdown). But there could also be "external waiters" + // that have called deactivate() -- we have no way to wake those up. + // So for now we are going to abort (the nice thing about abort is if + // this is not a dependency cycle, then we have a core to examine). + // + error << "deadlock detected, can be caused by a dependency cycle" << + info << "re-run with -s to diagnose dependency cycles"; + + std::terminate (); + } + } +#endif + } + + void scheduler:: + activate (bool collision) + { + if (max_active_ == 1) // Serial execution. + return; + + lock l (mutex_); + + if (collision) + stat_wait_collisions_++; + + // If we have spare active threads, then become active. Otherwise it + // enters the ready queue. + // + waiting_--; + ready_++; + progress_++; + + while (!shutdown_ && active_ >= max_active_) + ready_condv_.wait (l); + + ready_--; + active_++; + progress_++; + + if (shutdown_) + throw_generic_error (ECANCELED); + } + + void scheduler:: + sleep (const duration& d) + { + deactivate (); + + // MINGW GCC 4.9 doesn't implement this_thread so use Win32 Sleep(). + // +#ifndef _WIN32 + this_thread::sleep_for (d); +#else + using namespace chrono; + + Sleep (static_cast<DWORD> (duration_cast<milliseconds> (d).count ())); +#endif + + activate (); + } + + size_t scheduler:: + suspend (size_t start_count, const atomic_count& task_count) + { + wait_slot& s ( + wait_queue_[ + hash<const atomic_count*> () (&task_count) % wait_queue_size_]); + + // This thread is no longer active. + // + deactivate (); + + // Note that the task count is checked while holding the lock. We also + // have to notify while holding the lock (see resume()). The aim here + // is not to end up with a notification that happens between the check + // and the wait. + // + size_t tc (0); + bool collision; + { + lock l (s.mutex); + + // We have a collision if there is already a waiter for a different + // task count. + // + collision = (s.waiters++ != 0 && s.task_count != &task_count); + + // This is nuanced: we want to always have the task count of the last + // thread to join the queue. Otherwise, if threads are leaving and + // joining the queue simultaneously, we may end up with a task count of + // a thread group that is no longer waiting. + // + s.task_count = &task_count; + + // We could probably relax the atomic access since we use a mutex for + // synchronization though this has a different tradeoff (calling wait + // because we don't see the count). + // + while (!(s.shutdown || + (tc = task_count.load (memory_order_acquire)) <= start_count)) + s.condv.wait (l); + + s.waiters--; + } + + // This thread is no longer waiting. + // + activate (collision); + + return tc; + } + + void scheduler:: + resume (const atomic_count& tc) + { + if (max_active_ == 1) // Serial execution, nobody to wakeup. + return; + + wait_slot& s ( + wait_queue_[hash<const atomic_count*> () (&tc) % wait_queue_size_]); + + // See suspend() for why we must hold the lock. + // + lock l (s.mutex); + + if (s.waiters != 0) + s.condv.notify_all (); + } + + scheduler:: + ~scheduler () + { + try { shutdown (); } catch (system_error&) {} + } + + auto scheduler:: + wait_idle () -> lock + { + lock l (mutex_); + + assert (waiting_ == 0); + assert (ready_ == 0); + + while (active_ != init_active_ || starting_ != 0) + { + l.unlock (); + this_thread::yield (); + l.lock (); + } + + return l; + } + + size_t scheduler:: + shard_size (size_t mul, size_t div) const + { + size_t n (max_threads_ == 1 ? 0 : max_threads_ * mul / div / 4); + + // Experience shows that we want something close to 2x for small numbers, + // then reduce to 1.5x in-between, and 1x for large ones. + // + // Note that Intel Xeons are all over the map when it comes to cores (6, + // 8, 10, 12, 14, 16, 18, 20, 22). + // + return // HW threads x arch-bits (see max_threads below) + n == 0 ? 1 : // serial + // + // 2x + // + n == 1 ? 3 : + n == 2 ? 5 : + n == 4 ? 11 : + n == 6 ? 13 : + n == 8 ? 17 : // 2 x 4 + n == 16 ? 31 : // 4 x 4, 2 x 8 + // + // 1.5x + // + n == 32 ? 47 : // 4 x 8 + n == 48 ? 53 : // 6 x 8 + n == 64 ? 67 : // 8 x 8 + n == 80 ? 89 : // 10 x 8 + // + // 1x + // + n == 96 ? 101 : // 12 x 8 + n == 112 ? 127 : // 14 x 8 + n == 128 ? 131 : // 16 x 8 + n == 144 ? 139 : // 18 x 8 + n == 160 ? 157 : // 20 x 8 + n == 176 ? 173 : // 22 x 8 + n == 192 ? 191 : // 24 x 8 + n == 224 ? 223 : // 28 x 8 + n == 256 ? 251 : // 32 x 8 + n == 288 ? 271 : // 36 x 8 + n == 320 ? 313 : // 40 x 8 + n == 352 ? 331 : // 44 x 8 + n == 384 ? 367 : // 48 x 8 + n == 512 ? 499 : // 64 x 8 + n - 1; // Assume it is even. + } + + void scheduler:: + startup (size_t max_active, + size_t init_active, + size_t max_threads, + size_t queue_depth, + optional<size_t> max_stack) + { + // Lock the mutex to make sure our changes are visible in (other) active + // threads. + // + lock l (mutex_); + + max_stack_ = max_stack; + + // Use 8x max_active on 32-bit and 32x max_active on 64-bit. Unless we + // were asked to run serially. + // + if (max_threads == 0) + max_threads = (max_active == 1 ? 1 : + sizeof (void*) < 8 ? 8 : 32) * max_active; + + assert (shutdown_ && + init_active != 0 && + init_active <= max_active && + max_active <= max_threads); + + active_ = init_active_ = init_active; + max_active_ = orig_max_active_ = max_active; + max_threads_ = max_threads; + + // This value should be proportional to the amount of hardware concurrency + // we have (no use queing things up if helpers cannot keep up). Note that + // the queue entry is quite sizable. + // + // The relationship is as follows: we want to have a deeper queue if the + // tasks take long (e.g., compilation) and shorter if they are quick (e.g, + // test execution). If the tasks are quick then the synchronization + // overhead required for queuing/dequeuing things starts to dominate. + // + task_queue_depth_ = queue_depth != 0 + ? queue_depth + : max_active * 4; + + queued_task_count_.store (0, memory_order_relaxed); + + if ((wait_queue_size_ = max_threads == 1 ? 0 : shard_size ()) != 0) + wait_queue_.reset (new wait_slot[wait_queue_size_]); + + // Reset counters. + // + stat_max_waiters_ = 0; + stat_wait_collisions_ = 0; + + progress_ = 0; + + for (size_t i (0); i != wait_queue_size_; ++i) + wait_queue_[i].shutdown = false; + + shutdown_ = false; + } + + void scheduler:: + tune (size_t max_active) + { + if (max_active == 0) + max_active = orig_max_active_; + + assert (max_active >= init_active_ && + max_active <= orig_max_active_); + + // The scheduler must not be active though some threads might still be + // comming off from finishing a task. So we busy-wait for them. + // + lock l (wait_idle ()); + + max_active_ = max_active; + } + + auto scheduler:: + shutdown () -> stat + { + // Our overall approach to shutdown is not to try and stop everything as + // quickly as possible but rather to avoid performing any tasks. This + // avoids having code littered with if(shutdown) on every other line. + + stat r; + lock l (mutex_); + + if (!shutdown_) + { + // Collect statistics. + // + r.thread_helpers = helpers_; + + // Signal shutdown. + // + shutdown_ = true; + + for (size_t i (0); i != wait_queue_size_; ++i) + { + wait_slot& ws (wait_queue_[i]); + lock l (ws.mutex); + ws.shutdown = true; + } + + for (task_queue& tq: task_queues_) + { + lock ql (tq.mutex); + r.task_queue_full += tq.stat_full; + tq.shutdown = true; + } + + // Wait for all the helpers to terminate waking up any thread that + // sleeps. + // + while (helpers_ != 0) + { + bool i (idle_ != 0); + bool r (ready_ != 0); + bool w (waiting_ != 0); + + l.unlock (); + + if (i) + idle_condv_.notify_all (); + + if (r) + ready_condv_.notify_all (); + + if (w) + for (size_t i (0); i != wait_queue_size_; ++i) + wait_queue_[i].condv.notify_all (); + + this_thread::yield (); + l.lock (); + } + + // Free the memory. + // + wait_queue_.reset (); + task_queues_.clear (); + + r.thread_max_active = orig_max_active_; + r.thread_max_total = max_threads_; + r.thread_max_waiting = stat_max_waiters_; + + r.task_queue_depth = task_queue_depth_; + r.task_queue_remain = queued_task_count_.load (memory_order_consume); + + r.wait_queue_slots = wait_queue_size_; + r.wait_queue_collisions = stat_wait_collisions_; + } + + return r; + } + + scheduler::monitor_guard scheduler:: + monitor (atomic_count& c, size_t t, function<size_t (size_t)> f) + { + assert (monitor_count_ == nullptr && t != 0); + + // While the scheduler must not be active, some threads might still be + // comming off from finishing a task and trying to report progress. So we + // busy-wait for them (also in ~monitor_guard()). + // + lock l (wait_idle ()); + + monitor_count_ = &c; + monitor_tshold_.store (t, memory_order_relaxed); + monitor_init_ = c.load (memory_order_relaxed); + monitor_func_ = move (f); + + return monitor_guard (this); + } + + void scheduler:: + activate_helper (lock& l) + { + if (!shutdown_) + { + if (idle_ != 0) + { + idle_condv_.notify_one (); + } + // + // Ignore the max_threads value if we have queued tasks but no active + // threads. This means everyone is waiting for something to happen but + // nobody is doing anything (e.g., working the queues). This, for + // example, can happen if a thread waits for a task that is in its queue + // but is below the mark. + // + else if (init_active_ + helpers_ < max_threads_ || + (active_ == 0 && + queued_task_count_.load (memory_order_consume) != 0)) + { + create_helper (l); + } + } + } + + void scheduler:: + create_helper (lock& l) + { + helpers_++; + starting_++; + l.unlock (); + + // Restore the counters if the thread creation fails. + // + struct guard + { + lock* l; + size_t& h; + size_t& s; + + ~guard () {if (l != nullptr) {l->lock (); h--; s--;}} + + } g {&l, helpers_, starting_}; + + // For some platforms/compilers the default stack size for newly created + // threads may differ from that of the main thread. Here are the default + // main/new thread sizes (in KB) for some of them: + // + // Linux : 8192 / 8196 + // FreeBSD : 524288 / 2048 + // MacOS : 8192 / 512 + // MinGW : 2048 / 2048 + // VC : 1024 / 1024 + // + // Provided the main thread size is less-equal than + // LIBBUILD2_SANE_STACK_SIZE (which defaults to + // sizeof(void*)*LIBBUILD2_DEFAULT_STACK_SIZE), we make sure that the new + // thread stack is the same as for the main thread. Otherwise, we cap it + // at LIBBUILD2_DEFAULT_STACK_SIZE (default: 8MB). This can also be + // overridden at runtime with the --max-stack build2 driver option + // (remember to update its documentation of changing anything here). + // + // On Windows the stack size is the same for all threads and is customized + // at the linking stage (see build2/buildfile). Thus neither *_STACK_SIZE + // nor --max-stack have any effect here. + // + // On Linux, FreeBSD and MacOS there is no way to change it once and for + // all newly created threads. Thus we will use pthreads, creating threads + // with the stack size of the current thread. This way all threads will + // inherit the main thread's stack size (since the first helper is always + // created by the main thread). + // + // Note also the interaction with our backtrace functionality: in order to + // get the complete stack trace we let unhandled exceptions escape the + // thread function expecting the runtime to still call std::terminate. In + // particular, having a noexcept function anywhere on the exception's path + // causes the stack trace to be truncated, at least on Linux. + // +#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) + +#ifndef LIBBUILD2_DEFAULT_STACK_SIZE +# define LIBBUILD2_DEFAULT_STACK_SIZE 8388608 // 8MB +#endif + +#ifndef LIBBUILD2_SANE_STACK_SIZE +# define LIBBUILD2_SANE_STACK_SIZE (sizeof(void*) * LIBBUILD2_DEFAULT_STACK_SIZE) +#endif + + // Auto-deleter. + // + struct attr_deleter + { + void + operator() (pthread_attr_t* a) const + { + int r (pthread_attr_destroy (a)); + + // We should be able to destroy the valid attributes object, unless + // something is severely damaged. + // + assert (r == 0); + } + }; + + // Calculate the current thread stack size. Don't forget to update #if + // conditions above when adding the stack size customization for a new + // platforms/compilers. + // + size_t stack_size; + { +#ifdef __linux__ + // Note that the attributes must not be initialized. + // + pthread_attr_t attr; + int r (pthread_getattr_np (pthread_self (), &attr)); + + if (r != 0) + throw_system_error (r); + + unique_ptr<pthread_attr_t, attr_deleter> ad (&attr); + r = pthread_attr_getstacksize (&attr, &stack_size); + + if (r != 0) + throw_system_error (r); + +#elif defined(__FreeBSD__) + pthread_attr_t attr; + int r (pthread_attr_init (&attr)); + + if (r != 0) + throw_system_error (r); + + unique_ptr<pthread_attr_t, attr_deleter> ad (&attr); + r = pthread_attr_get_np (pthread_self (), &attr); + + if (r != 0) + throw_system_error (r); + + r = pthread_attr_getstacksize (&attr, &stack_size); + + if (r != 0) + throw_system_error (r); + +#else // defined(__APPLE__) + stack_size = pthread_get_stacksize_np (pthread_self ()); +#endif + } + + // Cap the size if necessary. + // + if (max_stack_) + { + if (*max_stack_ != 0 && stack_size > *max_stack_) + stack_size = *max_stack_; + } + else if (stack_size > LIBBUILD2_SANE_STACK_SIZE) + stack_size = LIBBUILD2_DEFAULT_STACK_SIZE; + + pthread_attr_t attr; + int r (pthread_attr_init (&attr)); + + if (r != 0) + throw_system_error (r); + + unique_ptr<pthread_attr_t, attr_deleter> ad (&attr); + + // Create the thread already detached. + // + r = pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + + if (r != 0) + throw_system_error (r); + + r = pthread_attr_setstacksize (&attr, stack_size); + + if (r != 0) + throw_system_error (r); + + pthread_t t; + r = pthread_create (&t, &attr, helper, this); + + if (r != 0) + throw_system_error (r); +#else + thread t (helper, this); + t.detach (); +#endif + + g.l = nullptr; // Disarm. + } + + void* scheduler:: + helper (void* d) + { + scheduler& s (*static_cast<scheduler*> (d)); + + // Note that this thread can be in an in-between state (not active or + // idle) but only while holding the lock. Which means that if we have the + // lock then we can account for all of them (this is important during + // shutdown). Except when the thread is just starting, before acquiring + // the lock for the first time, which we handle with the starting count. + // + lock l (s.mutex_); + s.starting_--; + + while (!s.shutdown_) + { + // If there is a spare active thread, become active and go looking for + // some work. + // + if (s.active_ < s.max_active_) + { + s.active_++; + + while (s.queued_task_count_.load (memory_order_consume) != 0) + { + // Queues are never removed which means we can get the current range + // and release the main lock while examining each of them. + // + auto it (s.task_queues_.begin ()); + size_t n (s.task_queues_.size ()); // Different to end(). + l.unlock (); + + // Note: we have to be careful not to advance the iterator past the + // last element (since what's past could be changing). + // + for (size_t i (0);; ++it) + { + task_queue& tq (*it); + + for (lock ql (tq.mutex); !tq.shutdown && !s.empty_front (tq); ) + s.pop_front (tq, ql); + + if (++i == n) + break; + } + + l.lock (); + } + + s.active_--; + + // While executing the tasks a thread might have become ready. + // + if (s.ready_ != 0) + s.ready_condv_.notify_one (); + } + + // Become idle and wait for a notification. + // + s.idle_++; + s.idle_condv_.wait (l); + s.idle_--; + } + + s.helpers_--; + return nullptr; + } + + auto scheduler:: + create_queue () -> task_queue& + { + // Note that task_queue_depth is immutable between startup() and + // shutdown() (but see join()). + // + task_queue* tq; + { + lock l (mutex_); + task_queues_.emplace_back (task_queue_depth_); + tq = &task_queues_.back (); + tq->shutdown = shutdown_; + } + + queue (tq); + return *tq; + } +} diff --git a/libbuild2/scheduler.hxx b/libbuild2/scheduler.hxx new file mode 100644 index 0000000..09c9e02 --- /dev/null +++ b/libbuild2/scheduler.hxx @@ -0,0 +1,709 @@ +// file : libbuild2/scheduler.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCHEDULER_HXX +#define LIBBUILD2_SCHEDULER_HXX + +#include <list> +#include <mutex> +#include <tuple> +#include <atomic> +#include <type_traits> // aligned_storage, etc +#include <condition_variable> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Scheduler of tasks and threads. Works best for "substantial" tasks (e.g., + // running a process), where in comparison thread synchronization overhead + // is negligible. + // + // A thread (called "master") may need to perform several tasks which can be + // done in parallel (e.g., update all the prerequisites or run all the + // tests). To acomplish this, the master, via a call to async(), can ask the + // scheduler to run a task in another thread (called "helper"). If a helper + // is available, then the task is executed asynchronously by such a helper. + // Otherwise, the task is (normally) executed synchronously as part of the + // wait() call below. However, in certain cases (serial execution or full + // queue), the task may be executed synchronously as part of the async() + // call itself. Once the master thread has scheduled all the tasks, it calls + // wait() to await for their completion. + // + // The scheduler makes sure that only a certain number of threads (for + // example, the number of available hardware threads) are "active" at any + // given time. When a master thread calls wait(), it is "suspended" until + // all its asynchronous tasks are completed (at which point it becomes + // "ready"). A suspension of a master results in either another ready master + // being "resumed" or another helper thread becoming available. + // + // On completion of a task a helper thread returns to the scheduler which + // can again lead either to a ready master being resumed (in which case the + // helper is suspended) or the helper becoming available to perform another + // task. + // + // Note that suspended threads are not reused as helpers. Rather, a new + // helper thread is always created if none is available. This is done to + // allow a ready master to continue as soon as possible. If it were reused + // as a helper, then it could be blocked on a nested wait() further down the + // stack. All this means that the number of threads created by the scheduler + // will normally exceed the maximum active allowed. + // + class LIBBUILD2_SYMEXPORT scheduler + { + public: + using atomic_count = std::atomic<size_t>; + + // F should return void and not throw any exceptions. The way the result + // of a task is communicated back to the master thread is ad hoc, usually + // via "out" arguments. Such result(s) can only be retrieved by the master + // once its task count reaches the start count. + // + // The argument passing semantics is the same as for std::thread. In + // particular, lvalue-references are passed as copies (use ref()/cref() + // for the by-reference semantics), except the case where the task is + // executed synchronously and as part of the async() call itself (this + // subtlety can become important when passing shared locks; you would + // only want it to be copied if the task is queued). + // + // Return true if the task was queued and false if it was executed + // synchronously. + // + // If the scheduler is shutdown, throw system_error(ECANCELED). + // + template <typename F, typename... A> + bool + async (size_t start_count, atomic_count& task_count, F&&, A&&...); + + template <typename F, typename... A> + bool + async (atomic_count& task_count, F&& f, A&&... a) + { + return async (0, task_count, forward<F> (f), forward<A> (a)...); + } + + // Wait until the task count reaches the start count or less. If the + // scheduler is shutdown while waiting, throw system_error(ECANCELED). + // Return the value of task count. Note that this is a synchronizaiton + // point (i.e., the task count is checked with memory_order_acquire). + // + // Note that it is valid to wait on another thread's task count (that is, + // without making any async() calls in this thread). However, if the start + // count differs from the one passed to async(), then whomever sets the + // start count to this alternative value must also call resume() below + // in order to signal waiting threads. + // + // Note also that in this case (waiting on someone else's start count), + // the async() call could execute the tasks synchronously without ever + // incrementing the task count. Thus if waiting on another thread's start + // count starts before/during async() calls, then it must be "gated" with + // an alternative (lower) start count. + // + // Finally, if waiting on someone else's start count, it may be unsafe + // (from the deadlock's point of view) to continue working through our own + // queue (i.e., we may block waiting on a task that has been queued before + // us which in turn may end up waiting on "us"). + // + enum work_queue + { + work_none, // Don't work own queue. + work_one, // Work own queue rechecking the task count after every task. + work_all // Work own queue before rechecking the task count. + }; + + size_t + wait (size_t start_count, + const atomic_count& task_count, + work_queue = work_all); + + size_t + wait (const atomic_count& task_count, work_queue wq = work_all) + { + return wait (0, task_count, wq); + } + + // Resume threads waiting on this task count. + // + void + resume (const atomic_count& task_count); + + // An active thread that is about to wait for potentially significant time + // on something other than task_count (e.g., mutex, condition variable) + // should deactivate itself with the scheduler and then reactivate once + // done waiting. + // + void + deactivate (); + + void + activate (bool collision = false); + + // Sleep for the specified duration, deactivating the thread before going + // to sleep and re-activating it after waking up (which means this + // function may sleep potentially significantly longer than requested). + // + void + sleep (const duration&); + + // Startup and shutdown. + // + public: + // Unless already shut down, call shutdown() but ignore errors. + // + ~scheduler (); + + // Create a shut down scheduler. + // + scheduler () = default; + + // Create a started up scheduler. + // + // The initial active argument is the number of threads to assume are + // already active (e.g., the calling thread). It must not be 0 (since + // someone has to schedule the first task). + // + // If the maximum threads or task queue depth arguments are unspecified, + // then appropriate defaults are used. + // + explicit + scheduler (size_t max_active, + size_t init_active = 1, + size_t max_threads = 0, + size_t queue_depth = 0, + optional<size_t> max_stack = nullopt) + { + startup (max_active, init_active, max_threads, queue_depth, max_stack); + } + + // Start the scheduler. + // + void + startup (size_t max_active, + size_t init_active = 1, + size_t max_threads = 0, + size_t queue_depth = 0, + optional<size_t> max_stack = nullopt); + + // Return true if the scheduler was started up. + // + // Note: can only be called from threads that have observed creation, + // startup, or shutdown. + // + bool + started () const {return !shutdown_;} + + // Tune a started up scheduler. + // + // Currently one cannot increase the number of max_active. Pass 0 to + // restore the initial value. + // + // Note that tuning can only be done while the scheduler is inactive, that + // is, no threads are executing a task or are suspended. For example, in a + // setup with a single initial active thread that would be after a return + // from the top-level wait() call. + // + void + tune (size_t max_active); + + // Return true if the scheduler is configured to run tasks serially. + // + // Note: can only be called from threads that have observed startup. + // + bool + serial () const {return max_active_ == 1;} + + // Wait for all the helper threads to terminate. Throw system_error on + // failure. Note that the initially active threads are not waited for. + // Return scheduling statistics. + // + struct stat + { + size_t thread_max_active = 0; // max # of active threads allowed. + size_t thread_max_total = 0; // max # of total threads allowed. + size_t thread_helpers = 0; // # of helper threads created. + size_t thread_max_waiting = 0; // max # of waiters at any time. + + size_t task_queue_depth = 0; // # of entries in a queue (capacity). + size_t task_queue_full = 0; // # of times task queue was full. + size_t task_queue_remain = 0; // # of tasks remaining in queue. + + size_t wait_queue_slots = 0; // # of wait slots (buckets). + size_t wait_queue_collisions = 0; // # of times slot had been occupied. + }; + + stat + shutdown (); + + // Progress monitoring. + // + // Setting and clearing of the monitor is not thread-safe. That is, it + // should be set before any tasks are queued and cleared after all of + // them have completed. + // + // The counter must go in one direction, either increasing or decreasing, + // and should contain the initial value during the call. Zero threshold + // value is reserved. + // + struct monitor_guard + { + explicit + monitor_guard (scheduler* s = nullptr): s_ (s) {} + monitor_guard (monitor_guard&& x): s_ (x.s_) {x.s_ = nullptr;} + monitor_guard& operator= (monitor_guard&& x) + { + if (&x != this) + { + s_ = x.s_; + x.s_ = nullptr; + } + return *this; + } + + ~monitor_guard () + { + if (s_ != nullptr) + { + lock l (s_->wait_idle ()); // See monitor() for details. + s_->monitor_count_ = nullptr; + s_->monitor_func_ = nullptr; + } + } + + explicit operator bool () const {return s_ != nullptr;} + + private: + scheduler* s_; + }; + + monitor_guard + monitor (atomic_count&, size_t threshold, function<size_t (size_t)>); + + // If initially active thread(s) (besides the one that calls startup()) + // exist before the call to startup(), then they must call join() before + // executing any tasks. The two common cases where you don't have to call + // join are a single active thread that calls startup()/shutdown() or + // active thread(s) that are created after startup(). + // + void + join () + { + assert (queue () == nullptr); + + // Lock the mutex to make sure the values set in startup() are visible + // in this thread. + // + lock l (mutex_); + } + + // If initially active thread(s) participate in multiple schedulers and/or + // sessions (intervals between startup() and shutdown()), then they must + // call leave() before joining another scheduler/session. Note that this + // applies to the active thread that calls shutdown(). Note that a thread + // can only participate in one scheduler at a time. + // + void + leave () + { + queue (nullptr); + } + + // Return the number of hardware threads or 0 if unable to determine. + // + static size_t + hardware_concurrency () + { + return std::thread::hardware_concurrency (); + } + + // Return a prime number that can be used as a lock shard size that's + // appropriate for the scheduler's concurrency. Use power of two values + // for mul for higher-contention shards and for div for lower-contention + // ones. Always return 1 for serial execution. + // + // Note: can only be called from threads that have observed startup. + // + size_t + shard_size (size_t mul = 1, size_t div = 1) const; + + // Assuming all the task have been executed, busy-wait for all the threads + // to become idle. Return the lock over the scheduler mutex. Normally you + // don't need to call this function directly. + // + using lock = std::unique_lock<std::mutex>; + + lock + wait_idle (); + + private: + void + activate_helper (lock&); + + void + create_helper (lock&); + + // We restrict ourselves to a single pointer as an argument in hope of + // a small object optimization. Return NULL. + // + // Note that the return type is void* to make the function usable with + // pthreads (see scheduler.cxx for details). + // + static void* + helper (void*); + + size_t + suspend (size_t start_count, const atomic_count& task_count); + + // Task encapsulation. + // + template <typename F, typename... A> + struct task_type + { + using func_type = std::decay_t<F>; + using args_type = std::tuple<std::decay_t<A>...>; + + atomic_count* task_count; + size_t start_count; + func_type func; + args_type args; + + template <size_t... i> + void + thunk (std::index_sequence<i...>) + { + move (func) (std::get<i> (move (args))...); + } + }; + + template <typename F, typename... A> + static void + task_thunk (scheduler&, lock&, void*); + + template <typename T> + static std::decay_t<T> + decay_copy (T&& x) {return forward<T> (x);} + + private: + // Monitor. + // + atomic_count* monitor_count_ = nullptr; // NULL if not used. + atomic_count monitor_tshold_; // 0 means locked. + size_t monitor_init_; // Initial count. + function<size_t (size_t)> monitor_func_; + + std::mutex mutex_; + bool shutdown_ = true; // Shutdown flag. + + optional<size_t> max_stack_; + + // The constraints that we must maintain: + // + // active <= max_active + // (init_active + helpers) <= max_threads (soft; see activate_helper()) + // + // Note that the first three are immutable between startup() and + // shutdown() so can be accessed without a lock (but see join()). + // + size_t init_active_ = 0; // Initially active threads. + size_t max_active_ = 0; // Maximum number of active threads. + size_t max_threads_ = 0; // Maximum number of total threads. + + size_t helpers_ = 0; // Number of helper threads created so far. + + // Every thread that we manage must be accounted for in one of these + // counters. And their sum should equal (init_active + helpers). + // + size_t active_ = 0; // Active master threads executing a task. + size_t idle_ = 0; // Idle helper threads waiting for a task. + size_t waiting_ = 0; // Suspended master threads waiting for their tasks. + size_t ready_ = 0; // Ready master thread waiting to become active. + size_t starting_ = 0; // Helper threads starting up. + + // Original values (as specified during startup) that can be altered via + // tuning. + // + size_t orig_max_active_ = 0; + + std::condition_variable idle_condv_; // Idle helpers queue. + std::condition_variable ready_condv_; // Ready masters queue. + + // Statistics counters. + // + size_t stat_max_waiters_; + size_t stat_wait_collisions_; + + // Progress counter. + // + // We increment it for each active->waiting->ready->active transition + // and it is used for deadlock detection (see deactivate()). + // + size_t progress_; + + // Wait queue. + // + // A wait slot blocks a bunch of threads. When they are (all) unblocked, + // they re-examine their respective conditions and either carry on or + // block again. + // + // The wait queue is a shard of slots. A thread picks a slot based on the + // address of its task count variable. How many slots do we need? This + // depends on the number of waiters that we can have which cannot be + // greater than the total number of threads. + // + // The pointer to the task count is used to identify the already waiting + // group of threads for collision statistics. + // + struct wait_slot + { + std::mutex mutex; + std::condition_variable condv; + size_t waiters = 0; + const atomic_count* task_count; + bool shutdown = true; + }; + + size_t wait_queue_size_; // Proportional to max_threads. + unique_ptr<wait_slot[]> wait_queue_; + + // Task queue. + // + // Each queue has its own mutex plus we have an atomic total count of the + // queued tasks. Note that it should only be modified while holding one + // of the queue locks. + // + atomic_count queued_task_count_; + + // For now we only support trivially-destructible tasks. + // + struct task_data + { + std::aligned_storage<sizeof (void*) * 8>::type data; + void (*thunk) (scheduler&, lock&, void*); + }; + + // We have two requirements: Firstly, we want to keep the master thread + // (the one that called wait()) busy working though its own queue for as + // long as possible before (if at all) it "reincarnates" as a helper. The + // main reason for this is the limited number of helpers we can create. + // + // Secondly, we don't want to block wait() longer than necessary since the + // master thread can do some work with the result. Plus, overall, we want + // to "unwind" task hierarchies as soon as possible since they hold up + // resources such as thread's stack. All this means that the master thread + // can only work through tasks that it has queued at this "level" of the + // async()/wait() calls since we know that wait() cannot return until + // they are done. + // + // To satisfy the first requirement, the master and helper threads get the + // tasks from different ends of the queue: master from the back while + // helpers from the front. And the master always adds new tasks to the + // back. + // + // To satisfy the second requirement, the master thread stores the index + // of the first task it has queued at this "level" and makes sure it + // doesn't try to deque any task beyond that. + // + size_t task_queue_depth_; // Multiple of max_active. + + struct task_queue + { + std::mutex mutex; + bool shutdown = false; + + size_t stat_full = 0; // Number of times push() returned NULL. + + // Our task queue is circular with head being the index of the first + // element and tail -- of the last. Since this makes the empty and one + // element cases indistinguishable, we also keep the size. + // + // The mark is an index somewhere between (figuratively speaking) head + // and tail, if enabled. If the mark is hit, then it is disabled until + // the queue becomes empty or it is reset by a push. + // + size_t head = 0; + size_t mark = 0; + size_t tail = 0; + size_t size = 0; + + unique_ptr<task_data[]> data; + + task_queue (size_t depth): data (new task_data[depth]) {} + }; + + // Task queue API. Expects the queue mutex to be locked. + // + + // Push a new task to the queue returning a pointer to the task data to be + // filled or NULL if the queue is full. + // + task_data* + push (task_queue& tq) + { + size_t& s (tq.size); + size_t& t (tq.tail); + size_t& m (tq.mark); + + if (s != task_queue_depth_) + { + // normal wrap empty + // | | | + t = s != 0 ? (t != task_queue_depth_ - 1 ? t + 1 : 0) : t; + s++; + + if (m == task_queue_depth_) // Enable the mark if first push. + m = t; + + queued_task_count_.fetch_add (1, std::memory_order_release); + return &tq.data[t]; + } + + return nullptr; + } + + bool + empty_front (task_queue& tq) const {return tq.size == 0;} + + void + pop_front (task_queue& tq, lock& ql) + { + size_t& s (tq.size); + size_t& h (tq.head); + size_t& m (tq.mark); + + bool a (h == m); // Adjust mark? + task_data& td (tq.data[h]); + + // normal wrap empty + // | | | + h = s != 1 ? (h != task_queue_depth_ - 1 ? h + 1 : 0) : h; + + if (--s == 0 || a) + m = h; // Reset or adjust the mark. + + execute (ql, td); + } + + bool + empty_back (task_queue& tq) const + { + return tq.size == 0 || tq.mark == task_queue_depth_; + } + + void + pop_back (task_queue& tq, lock& ql) + { + size_t& s (tq.size); + size_t& t (tq.tail); + size_t& m (tq.mark); + + bool a (t == m); // Adjust mark? + + task_data& td (tq.data[t]); + + // Save the old queue mark and disable it in case the task we are about + // to run adds sub-tasks. The first push(), if any, will reset it. + // + size_t om (m); + m = task_queue_depth_; + + // normal wrap empty + // | | | + t = s != 1 ? (t != 0 ? t - 1 : task_queue_depth_ - 1) : t; + --s; + + execute (ql, td); + + // Restore the old mark (which we might have to adjust). + // + if (s == 0) + m = t; // Reset the mark. + else if (a) + m = task_queue_depth_; // Disable the mark. + else + // What happens if head goes past the old mark? In this case we will + // get into the empty queue state before we end up making any (wrong) + // decisions based on this value. Unfortunately there is no way to + // detect this (and do some sanity asserts) since things can wrap + // around. + // + // To put it another way, the understanding here is that after the + // task returns we will either have an empty queue or there will still + // be tasks between the old mark and the current tail, something along + // these lines: + // + // OOOOOXXXXOOO + // | | | + // m h t + // + m = om; + } + + void + execute (lock& ql, task_data& td) + { + queued_task_count_.fetch_sub (1, std::memory_order_release); + + // The thunk moves the task data to its stack, releases the lock, + // and continues to execute the task. + // + td.thunk (*this, ql, &td.data); + + // See if we need to call the monitor (see also the serial version + // in async()). + // + if (monitor_count_ != nullptr) + { + // Note that we don't care if we don't see the updated values right + // away. + // + if (size_t t = monitor_tshold_.load (memory_order_relaxed)) + { + // "Lock" the monitor by setting threshold to 0. + // + if (monitor_tshold_.compare_exchange_strong ( + t, + 0, + memory_order_release, + memory_order_relaxed)) + { + // Now we are the only ones messing with this. + // + size_t v (monitor_count_->load (memory_order_relaxed)); + + if (v != monitor_init_) + { + // See which direction we are going. + // + if (v > monitor_init_ ? (v >= t) : (v <= t)) + t = monitor_func_ (v); + } + + monitor_tshold_.store (t, memory_order_release); + } + } + } + + ql.lock (); + } + + // Each thread has its own queue which are stored in this list. + // + std::list<task_queue> task_queues_; + + task_queue& + create_queue (); + + static task_queue* + queue () noexcept; + + static void + queue (task_queue*) noexcept; + }; +} + +#include <libbuild2/scheduler.txx> + +#endif // LIBBUILD2_SCHEDULER_HXX diff --git a/libbuild2/scheduler.test.cxx b/libbuild2/scheduler.test.cxx new file mode 100644 index 0000000..1252575 --- /dev/null +++ b/libbuild2/scheduler.test.cxx @@ -0,0 +1,187 @@ +// file : libbuild2/scheduler.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <chrono> +#include <thread> + +#include <cassert> +#include <iostream> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/scheduler.hxx> + +using namespace std; + +namespace build2 +{ + // Usage argv[0] [-v <volume>] [-d <difficulty>] [-c <concurrency>] + // [-q <queue-depth>] + // + // -v task tree volume (affects both depth and width), for example 100 + // -d computational difficulty of each task, for example 10 + // -c max active threads, if unspecified or 0, then hardware concurrency + // -q task queue depth, if unspecified or 0, then appropriate default used + // + // Specifying any option also turns on the verbose mode. + // + // Notes on testing: + // + // 1. Ideally you would want to test things on an SMP machine. + // + // 2. When need to compare performance, disable turbo boost since its + // availability depends on CPU utilization/temperature: + // + // # echo '1' >/sys/devices/system/cpu/intel_pstate/no_turbo + // + // 3. Use turbostat(1) to see per-CPU details (utlization, frequency): + // + // $ sudo turbostat --interval 1 ./driver -d 8 -v 300 + // + static bool + prime (uint64_t); + + // Find # of primes in the [x, y) range. + // + static void + inner (uint64_t x, uint64_t y, uint64_t& r) + { + for (; x != y; ++x) + if (prime (x)) + r++; + }; + + int + main (int argc, char* argv[]) + { + bool verb (false); + + // Adjust assert() below if changing these defaults. + // + size_t volume (100); + uint32_t difficulty (10); + + size_t max_active (0); + size_t queue_depth (0); + + for (int i (1); i != argc; ++i) + { + string a (argv[i]); + + if (a == "-v") + volume = stoul (argv[++i]); + else if (a == "-d") + difficulty = stoul (argv[++i]); + else if (a == "-c") + max_active = stoul (argv[++i]); + else if (a == "-q") + queue_depth = stoul (argv[++i]); + else + assert (false); + + verb = true; + } + + if (max_active == 0) + max_active = scheduler::hardware_concurrency (); + + scheduler s (max_active, 1, 0, queue_depth); + + // Find # prime counts of primes in [i, d*i*i) ranges for i in (0, n]. + // + auto outer = [difficulty, &s] (size_t n, vector<uint64_t>& o, uint64_t& r) + { + scheduler::atomic_count task_count (0); + + for (size_t i (1); i <= n; ++i) + { + o[i - 1] = 0; + s.async (task_count, + inner, + i, + i * i * difficulty, + ref (o[i - 1])); + } + + s.wait (task_count); + assert (task_count == 0); + + for (uint64_t v: o) + r += prime (v) ? 1 : 0; + }; + + vector<uint64_t> r (volume, 0); + vector<vector<uint64_t>> o (volume, vector<uint64_t> ()); + + scheduler::atomic_count task_count (0); + + for (size_t i (0); i != volume; ++i) + { + o[i].resize (i); + s.async (task_count, + outer, + i, + ref (o[i]), + ref (r[i])); + } + + s.wait (task_count); + assert (task_count == 0); + + uint64_t n (0); + for (uint64_t v: r) + n += v; + + if (volume == 100 && difficulty == 10) + assert (n == 580); + + scheduler::stat st (s.shutdown ()); + + if (verb) + { + cerr << "result " << n << endl + << endl; + + cerr << "thread_max_active " << st.thread_max_active << endl + << "thread_max_total " << st.thread_max_total << endl + << "thread_helpers " << st.thread_helpers << endl + << "thread_max_waiting " << st.thread_max_waiting << endl + << endl + << "task_queue_depth " << st.task_queue_depth << endl + << "task_queue_full " << st.task_queue_full << endl + << endl + << "wait_queue_slots " << st.wait_queue_slots << endl + << "wait_queue_collisions " << st.wait_queue_collisions << endl; + } + + return 0; + } + + static bool + prime (uint64_t x) + { + if (x == 2 || x == 3) + return true; + + if (x < 2 || x % 2 == 0 || x % 3 == 0) + return false; + + // Test divisors starting from 5 and incrementing alternatively by 2/4. + // + for (uint64_t d (5), i (2); d * d <= x; d += i, i = 6 - i) + { + if (x % d == 0) + return false; + } + + return true; + } +} + +int +main (int argc, char* argv[]) +{ + return build2::main (argc, argv); +} diff --git a/libbuild2/scheduler.txx b/libbuild2/scheduler.txx new file mode 100644 index 0000000..805a072 --- /dev/null +++ b/libbuild2/scheduler.txx @@ -0,0 +1,138 @@ +// file : libbuild2/scheduler.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cerrno> + +namespace build2 +{ + template <typename F, typename... A> + bool scheduler:: + async (size_t start_count, atomic_count& task_count, F&& f, A&&... a) + { + using task = task_type<F, A...>; + + static_assert (sizeof (task) <= sizeof (task_data::data), + "insufficient space"); + + static_assert (std::is_trivially_destructible<task>::value, + "not trivially destructible"); + + // If running serially, then run the task synchronously. In this case + // there is no need to mess with task count. + // + if (max_active_ == 1) + { + forward<F> (f) (forward<A> (a)...); + + // See if we need to call the monitor (see the concurrent version in + // execute() for details). + // + if (monitor_count_ != nullptr) + { + size_t v (monitor_count_->load (memory_order_relaxed)); + if (v != monitor_init_) + { + size_t t (monitor_tshold_.load (memory_order_relaxed)); + if (v > monitor_init_ ? (v >= t) : (v <= t)) + monitor_tshold_.store (monitor_func_ (v), memory_order_relaxed); + } + } + + return false; + } + + // Try to push the task into the queue falling back to running serially + // if the queue is full. + // + task_queue* tq (queue ()); // Single load. + if (tq == nullptr) + tq = &create_queue (); + + { + lock ql (tq->mutex); + + if (tq->shutdown) + throw_generic_error (ECANCELED); + + if (task_data* td = push (*tq)) + { + // Package the task (under lock). + // + new (&td->data) task { + &task_count, + start_count, + decay_copy (forward<F> (f)), + typename task::args_type (decay_copy (forward<A> (a))...)}; + + td->thunk = &task_thunk<F, A...>; + + // Increment the task count. This has to be done under lock to prevent + // the task from decrementing the count before we had a chance to + // increment it. + // + task_count.fetch_add (1, std::memory_order_release); + } + else + { + tq->stat_full++; + + // We have to perform the same mark adjust/restore as in pop_back() + // since the task we are about to execute synchronously may try to + // work the queue. + // + // It would have been cleaner to package all this logic into push() + // but that would require dragging function/argument types into it. + // + size_t& s (tq->size); + size_t& t (tq->tail); + size_t& m (tq->mark); + + size_t om (m); + m = task_queue_depth_; + + ql.unlock (); + forward<F> (f) (forward<A> (a)...); // Should not throw. + + if (om != task_queue_depth_) + { + ql.lock (); + m = s == 0 ? t : om; + } + + return false; + } + } + + // If there is a spare active thread, wake up (or create) the helper + // (unless someone already snatched the task). + // + if (queued_task_count_.load (std::memory_order_consume) != 0) + { + lock l (mutex_); + + if (active_ < max_active_) + activate_helper (l); + } + + return true; + } + + template <typename F, typename... A> + void scheduler:: + task_thunk (scheduler& s, lock& ql, void* td) + { + using task = task_type<F, A...>; + + // Move the data and release the lock. + // + task t (move (*static_cast<task*> (td))); + ql.unlock (); + + t.thunk (std::index_sequence_for<A...> ()); + + atomic_count& tc (*t.task_count); + if (tc.fetch_sub (1, memory_order_release) - 1 <= t.start_count) + s.resume (tc); // Resume waiters, if any. + } +} diff --git a/libbuild2/scope.cxx b/libbuild2/scope.cxx new file mode 100644 index 0000000..1ad7455 --- /dev/null +++ b/libbuild2/scope.cxx @@ -0,0 +1,911 @@ +// file : libbuild2/scope.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/scope.hxx> + +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> + +using namespace std; + +namespace build2 +{ + // scope + // + pair<lookup, size_t> scope:: + find_original (const variable& var, + const target_type* tt, const string* tn, + const target_type* gt, const string* gn, + size_t start_d) const + { + assert (tt != nullptr || var.visibility != variable_visibility::target); + + size_t d (0); + + if (var.visibility == variable_visibility::prereq) + return make_pair (lookup (), d); + + // Process target type/pattern-specific prepend/append values. + // + auto pre_app = [&var] (lookup& l, + const scope* s, + const target_type* tt, const string* tn, + const target_type* gt, const string* gn) + { + const value& v (*l); + assert ((v.extra == 1 || v.extra == 2) && v.type == nullptr); + + // First we need to look for the stem value starting from the "next + // lookup point". That is, if we have the group, then from the + // s->target_vars (for the group), otherwise from s->vars, and then + // continuing looking in the outer scopes (for both target and group). + // Note that this may have to be repeated recursively, i.e., we may have + // prepents/appends in outer scopes. Also, if the value is for the + // group, then we shouldn't be looking for stem in the target's + // variables. In other words, once we "jump" to group, we stay there. + // + lookup stem (s->find_original (var, tt, tn, gt, gn, 2).first); + + // Check the cache. + // + pair<value&, ulock> entry ( + s->target_vars.cache.insert ( + make_tuple (&v, tt, *tn), + stem, + static_cast<const variable_map::value_data&> (v).version, + var)); + + value& cv (entry.first); + + // If cache miss/invalidation, update the value. + // + if (entry.second.owns_lock ()) + { + // Un-typify the cache. This can be necessary, for example, if we are + // changing from one value-typed stem to another. + // + // Note: very similar logic as in the override cache population code + // below. + // + if (!stem.defined () || cv.type != stem->type) + { + cv = nullptr; + cv.type = nullptr; // Un-typify. + } + + // Copy the stem. + // + if (stem.defined ()) + cv = *stem; + + // Typify the cache value in case there is no stem (we still want to + // prepend/append things in type-aware way). + // + if (cv.type == nullptr && var.type != nullptr) + typify (cv, *var.type, &var); + + // Now prepend/append the value, unless it is NULL. + // + if (v) + { + if (v.extra == 1) + cv.prepend (names (cast<names> (v)), &var); + else + cv.append (names (cast<names> (v)), &var); + } + } + + // Return cache as the resulting value but retain l.var/vars, so it + // looks as if the value came from s->target_vars. + // + l.value = &cv; + }; + + for (const scope* s (this); s != nullptr; ) + { + if (tt != nullptr) // This started from the target. + { + bool f (!s->target_vars.empty ()); + + // Target. + // + if (++d >= start_d) + { + if (f) + { + lookup l (s->target_vars.find (*tt, *tn, var)); + + if (l.defined ()) + { + if (l->extra != 0) // Prepend/append? + pre_app (l, s, tt, tn, gt, gn); + + return make_pair (move (l), d); + } + } + } + + // Group. + // + if (++d >= start_d) + { + if (f && gt != nullptr) + { + lookup l (s->target_vars.find (*gt, *gn, var)); + + if (l.defined ()) + { + if (l->extra != 0) // Prepend/append? + pre_app (l, s, gt, gn, nullptr, nullptr); + + return make_pair (move (l), d); + } + } + } + } + + // Note that we still increment the lookup depth so that we can compare + // depths of variables with different visibilities. + // + if (++d >= start_d && var.visibility != variable_visibility::target) + { + auto p (s->vars.find (var)); + if (p.first != nullptr) + return make_pair (lookup (*p.first, p.second, s->vars), d); + } + + switch (var.visibility) + { + case variable_visibility::scope: + s = nullptr; + break; + case variable_visibility::target: + case variable_visibility::project: + s = s->root () ? nullptr : s->parent_scope (); + break; + case variable_visibility::normal: + s = s->parent_scope (); + break; + case variable_visibility::prereq: + assert (false); + } + } + + return make_pair (lookup (), size_t (~0)); + } + + pair<lookup, size_t> scope:: + find_override (const variable& var, + pair<lookup, size_t> original, + bool target, + bool rule) const + { + assert (!rule || target); // Rule-specific is target-specific. + + // Normally there would be no overrides and if there are, there will only + // be a few of them. As a result, here we concentrate on keeping the logic + // as straightforward as possible without trying to optimize anything. + // + // Note also that we rely (e.g., in the config module) on the fact that if + // no overrides apply, then we return the original value and not its copy + // in the cache (this is used to detect if the value was overriden). + // + assert (var.overrides != nullptr); + + const lookup& orig (original.first); + size_t orig_depth (original.second); + + // The first step is to find out where our cache will reside. After some + // meditation you will see it should be next to the innermost (scope-wise) + // value of this variable (override or original). + // + // We also keep track of the root scope of the project from which this + // innermost value comes. This is used to decide whether a non-recursive + // project-wise override applies. And also where our variable cache is. + // + const variable_map* inner_vars (nullptr); + const scope* inner_proj (nullptr); + + // One special case is if the original is target/rule-specific, which is + // the most innermost. Or is it innermostest? + // + bool targetspec (false); + if (target) + { + targetspec = orig.defined () && (orig_depth == 1 || + orig_depth == 2 || + (rule && orig_depth == 3)); + if (targetspec) + { + inner_vars = orig.vars; + inner_proj = root_scope (); + } + } + + const scope* s; + + // Return true if the override applies to a value from vars/proj. Note + // that it expects vars and proj to be not NULL; if there is nothing "more + // inner", then any override will still be "visible". + // + auto applies = [&s] (const variable* o, + const variable_map* vars, + const scope* proj) -> bool + { + switch (o->visibility) + { + case variable_visibility::scope: + { + // Does not apply if in a different scope. + // + if (vars != &s->vars) + return false; + + break; + } + case variable_visibility::project: + { + // Does not apply if in a subproject. + // + // Note that before we used to require the same project but that + // missed values that are "visible" from the outer projects. + // + // If root scope is NULL, then we are looking at the global scope. + // + const scope* rs (s->root_scope ()); + if (rs != nullptr && rs->sub_root (*proj)) + return false; + + break; + } + case variable_visibility::normal: + break; + case variable_visibility::target: + case variable_visibility::prereq: + assert (false); + } + + return true; + }; + + // Return the override value if present in scope s and (optionally) of + // the specified kind (__override, __prefix, etc). + // + auto find = [&s, &var] (const variable* o, + const char* k = nullptr) -> lookup + { + if (k != nullptr && !o->override (k)) + return lookup (); + + // Note: using the original as storage variable. + // + return lookup (s->vars.find (*o).first, &var, &s->vars); + }; + + // Return true if a value is from this scope (either target type/pattern- + // specific or ordinary). + // + auto belongs = [&s, target] (const lookup& l) -> bool + { + if (target) + { + for (auto& p1: s->target_vars) + for (auto& p2: p1.second) + if (l.vars == &p2.second) + return true; + } + + return l.vars == &s->vars; + }; + + // While looking for the cache we also detect if none of the overrides + // apply. In this case the result is simply the original value (if any). + // + bool apply (false); + + for (s = this; s != nullptr; s = s->parent_scope ()) + { + // If we are still looking for the cache, see if the original comes from + // this scope. We check this before the overrides since it can come from + // the target type/patter-specific variables, which is "more inner" than + // normal scope variables (see find_original()). + // + if (inner_vars == nullptr && orig.defined () && belongs (orig)) + { + inner_vars = orig.vars; + inner_proj = s->root_scope (); + } + + for (const variable* o (var.overrides.get ()); + o != nullptr; + o = o->overrides.get ()) + { + if (inner_vars != nullptr && !applies (o, inner_vars, inner_proj)) + continue; + + auto l (find (o)); + + if (l.defined ()) + { + if (inner_vars == nullptr) + { + inner_vars = l.vars; + inner_proj = s->root_scope (); + } + + apply = true; + break; + } + } + + // We can stop if we found the cache and at least one override applies. + // + if (inner_vars != nullptr && apply) + break; + } + + if (!apply) + return original; + + assert (inner_vars != nullptr); + + // If for some reason we are not in a project, use the cache from the + // global scope. + // + if (inner_proj == nullptr) + inner_proj = global_scope; + + // Now find our "stem", that is, the value to which we will be appending + // suffixes and prepending prefixes. This is either the original or the + // __override, provided it applies. We may also not have either. + // + lookup stem; + size_t stem_depth (0); + const scope* stem_proj (nullptr); + const variable* stem_ovr (nullptr); // __override if found and applies. + + // Again the special case of a target/rule-specific variable. + // + if (targetspec) + { + stem = orig; + stem_depth = orig_depth; + stem_proj = root_scope (); + } + + // Depth at which we found the override (with implied target/rule-specific + // lookup counts). + // + size_t ovr_depth (target ? (rule ? 3 : 2) : 0); + + for (s = this; s != nullptr; s = s->parent_scope ()) + { + bool done (false); + + // First check if the original is from this scope. + // + if (orig.defined () && belongs (orig)) + { + stem = orig; + stem_depth = orig_depth; + stem_proj = s->root_scope (); + // Keep searching. + } + + ++ovr_depth; + + // Then look for an __override that applies. + // + // Note that the override list is in the reverse order of appearance and + // so we will naturally see the most recent override first. + // + for (const variable* o (var.overrides.get ()); + o != nullptr; + o = o->overrides.get ()) + { + // If we haven't yet found anything, then any override will still be + // "visible" even if it doesn't apply. + // + if (stem.defined () && !applies (o, stem.vars, stem_proj)) + continue; + + auto l (find (o, "__override")); + + if (l.defined ()) + { + stem = move (l); + stem_depth = ovr_depth; + stem_proj = s->root_scope (); + stem_ovr = o; + done = true; + break; + } + } + + if (done) + break; + } + + // Check the cache. + // + variable_override_cache& cache ( + inner_proj == global_scope + ? global_override_cache + : inner_proj->root_extra->override_cache); + + pair<value&, ulock> entry ( + cache.insert ( + make_pair (&var, inner_vars), + stem, + 0, // Overrides are immutable. + var)); + + value& cv (entry.first); + bool cl (entry.second.owns_lock ()); + + // If cache miss/invalidation, update the value. + // + if (cl) + { + // Note: very similar logic as in the target type/pattern specific cache + // population code above. + // + + // Un-typify the cache. This can be necessary, for example, if we are + // changing from one value-typed stem to another. + // + if (!stem.defined () || cv.type != stem->type) + { + cv = nullptr; + cv.type = nullptr; // Un-typify. + } + + if (stem.defined ()) + cv = *stem; + + // Typify the cache value. If the stem is the original, then the type + // would get propagated automatically. But the stem could also be the + // override, which is kept untyped. Or the stem might not be there at + // all while we still need to apply prefixes/suffixes in the type-aware + // way. + // + if (cv.type == nullptr && var.type != nullptr) + typify (cv, *var.type, &var); + } + + // Now apply override prefixes and suffixes (if updating the cache). Also + // calculate the vars and depth of the result, which will be those of the + // stem or prefix/suffix that applies, whichever is the innermost. + // + // Note: we could probably cache this information instead of recalculating + // it every time. + // + size_t depth (stem_depth); + const variable_map* vars (stem.vars); + const scope* proj (stem_proj); + + ovr_depth = target ? (rule ? 3 : 2) : 0; + + for (s = this; s != nullptr; s = s->parent_scope ()) + { + ++ovr_depth; + + // The override list is in the reverse order of appearance so we need to + // iterate backwards in order to apply things in the correct order. + // + // We also need to skip any append/prepend overrides that appear before + // __override (in the command line order), provided it is from this + // scope. + // + bool skip (stem_ovr != nullptr && stem_depth == ovr_depth); + + for (const variable* o (var.overrides->aliases); // Last override. + o != nullptr; + o = (o->aliases != var.overrides->aliases ? o->aliases : nullptr)) + { + if (skip) + { + if (stem_ovr == o) // Keep skipping until after we see __override. + skip = false; + + continue; + } + + // First see if this override applies. This is tricky: what if the + // stem is a "visible" override from an outer project? Shouldn't its + // overrides apply? Sure sounds logical. So we use the project of the + // stem's scope. + // + if (vars != nullptr && !applies (o, vars, proj)) + continue; + + // Note that we keep override values as untyped names even if the + // variable itself is typed. We also pass the original variable for + // diagnostics. + // + auto lp (find (o, "__prefix")); + auto ls (find (o, "__suffix")); + + if (cl) + { + // Note: if we have both, then one is already in the stem. + // + if (lp) // No sense to prepend/append if NULL. + { + cv.prepend (names (cast<names> (lp)), &var); + } + else if (ls) + { + cv.append (names (cast<names> (ls)), &var); + } + } + + if (lp.defined () || ls.defined ()) + { + // If we had no stem, use the first override as a surrogate stem. + // + if (vars == nullptr) + { + depth = ovr_depth; + vars = &s->vars; + proj = s->root_scope (); + } + // Otherwise, pick the innermost location between the stem and + // prefix/suffix. + // + else if (ovr_depth < depth) + { + depth = ovr_depth; + vars = &s->vars; + } + } + } + } + + // Use the location of the innermost value that contributed as the + // location of the result. + // + return make_pair (lookup (&cv, &var, vars), depth); + } + + value& scope:: + append (const variable& var) + { + // Note that here we want the original value without any overrides + // applied. + // + lookup l (find_original (var).first); + + if (l.defined () && l.belongs (*this)) // Existing var in this scope. + return vars.modify (l); // Ok since this is original. + + value& r (assign (var)); // NULL. + + if (l.defined ()) + r = *l; // Copy value (and type) from the outer scope. + + return r; + } + + const target_type* scope:: + find_target_type (const string& tt, const scope** rs) const + { + // Search scopes outwards, stopping at the project root. + // + for (const scope* s (this); + s != nullptr; + s = s->root () ? global_scope : s->parent_scope ()) + { + if (s->target_types.empty ()) + continue; + + if (const target_type* r = s->target_types.find (tt)) + { + if (rs != nullptr) + *rs = s; + + return r; + } + } + + return nullptr; + } + + // Find target type from file name. + // + static const target_type* + find_file_target_type (const scope* s, const string& n) + { + // Pretty much the same logic as in find_target_type() above. + // + for (; s != nullptr; s = s->root () ? global_scope : s->parent_scope ()) + { + if (s->target_types.empty ()) + continue; + + if (const target_type* r = s->target_types.find_file (n)) + return r; + } + + return nullptr; + } + + pair<const target_type*, optional<string>> scope:: + find_target_type (name& n, const location& loc) const + { + const target_type* tt (nullptr); + optional<string> ext; + + string& v (n.value); + + // If the target type is specified, resolve it and bail out if not found. + // Otherwise, we know in the end it will resolve to something (if nothing + // else, either dir{} or file{}), so we can go ahead and process the name. + // + if (n.typed ()) + { + tt = find_target_type (n.type); + + if (tt == nullptr) + return make_pair (tt, move (ext)); + } + else + { + // Empty name as well as '.' and '..' signify a directory. Note that + // this logic must be consistent with other places (grep for ".."). + // + if (v.empty () || v == "." || v == "..") + tt = &dir::static_type; + } + + // Directories require special name processing. If we find that more + // targets deviate, then we should make this target type-specific. + // + if (tt != nullptr && (tt->is_a<dir> () || tt->is_a<fsdir> ())) + { + // The canonical representation of a directory name is with empty + // value. + // + if (!v.empty ()) + { + n.dir /= dir_path (v); // Move name value to dir. + v.clear (); + } + } + else if (!v.empty ()) + { + // Split the path into its directory part (if any) the name part, and + // the extension (if any). We cannot assume the name part is a valid + // filesystem name so we will have to do the splitting manually. + // + // See also parser::expand_name_pattern() if changing anything here. + // + size_t p (path::traits_type::rfind_separator (v)); + + if (p != string::npos) + { + try + { + n.dir /= dir_path (v, p != 0 ? p : 1); // Special case: "/". + } + catch (const invalid_path& e) + { + fail (loc) << "invalid path '" << e.path << "'"; + } + + // This is probably too general of a place to ignore multiple trailing + // slashes and treat it as a directory (e.g., we don't want to + // encourage this sloppiness in buildfiles). We could, however, do it + // for certain contexts, such as buildspec. Maybe a lax flag? + // + if (++p == v.size ()) + fail (loc) << "invalid name '" << v << "'"; + + v.erase (0, p); + } + + // Extract the extension. + // + ext = target::split_name (v, loc); + } + + // If the target type is still unknown, map it using the name/extension, + // falling back to file{}. + // + if (tt == nullptr) + { + // We only consider files without extension for file name mapping. + // + if (!ext) + tt = find_file_target_type (this, v); + + //@@ TODO: derive type from extension. + + if (tt == nullptr) + tt = &file::static_type; + } + + // If the target type does not use extensions but one was specified, + // factor it back into the name (this way we won't assert when printing + // diagnostics; see to_stream(target_key) for details). + // + if (ext && + tt->fixed_extension == nullptr && + tt->default_extension == nullptr) + { + v += '.'; + v += *ext; + ext = nullopt; + } + + return make_pair (tt, move (ext)); + } + + static target* + derived_tt_factory (const target_type& t, dir_path d, dir_path o, string n) + { + // Pass our type to the base factory so that it can detect that it is + // being called to construct a derived target. This can be used, for + // example, to decide whether to "link up" to the group. + // + // One exception: if we are derived from a derived target type, then this + // logic would lead to infinite recursion. So in this case get the + // ultimate base. + // + const target_type* bt (t.base); + for (; bt->factory == &derived_tt_factory; bt = bt->base) ; + + target* r (bt->factory (t, move (d), move (o), move (n))); + r->derived_type = &t; + return r; + } + + pair<reference_wrapper<const target_type>, bool> scope:: + derive_target_type (const string& name, const target_type& base) + { + // Base target type uses extensions. + // + bool ext (base.fixed_extension != nullptr || + base.default_extension != nullptr); + + // @@ Looks like we may need the ability to specify a fixed extension + // (which will be used to compare existing targets and not just + // search for existing files that is handled by the target_type:: + // extension hook). See the file_factory() for details. We will + // probably need to specify it as part of the define directive (and + // have the ability to specify empty and NULL). + // + // Currently, if we define myfile{}: file{}, then myfile{foo} and + // myfile{foo.x} are the same target. + // + unique_ptr<target_type> dt (new target_type (base)); + dt->base = &base; + dt->factory = &derived_tt_factory; + + // @@ We should probably inherit the fixed extension unless overriden with + // another fixed? But then any derivation from file{} will have to specify + // (or override) the fixed extension? But what is the use of deriving from + // a fixed extension target and not overriding its extension? Some kind of + // alias. Fuzzy. + // + dt->fixed_extension = nullptr /*&target_extension_fix<???>*/; // @@ TODO + + // Override default extension/pattern derivation function: we most likely + // don't want to use the same default as our base (think cli: file). But, + // if our base doesn't use extensions, then most likely neither do we + // (think foo: alias). + // + dt->default_extension = + ext && dt->fixed_extension == nullptr + ? &target_extension_var<var_extension, nullptr> + : nullptr; + + dt->pattern = + dt->fixed_extension != nullptr ? nullptr /*&target_pattern_fix<???>*/ : + dt->default_extension != nullptr ? &target_pattern_var<var_extension, nullptr> : + nullptr; + + // There is actually a difference between "fixed fixed" (like man1{}) and + // "fixed but overridable" (like file{}). Fuzzy: feels like there are + // different kinds of "fixed" (file{} vs man{} vs man1{}). + // + dt->print = + dt->fixed_extension != nullptr + ? &target_print_0_ext_verb // Fixed extension, no use printing. + : nullptr; // Normal. + + return target_types.insert (name, move (dt)); + } + + scope* scope::global_; + scope::variable_override_cache scope::global_override_cache; + + // scope_map + // + scope_map scope_map::instance; + const scope_map& scope_map::cinstance = scope_map::instance; + const scope_map& scopes = scope_map::cinstance; + + const scope* global_scope; + + auto scope_map:: + insert (const dir_path& k, bool root) -> iterator + { + scope_map_base& m (*this); + + auto er (m.emplace (k, scope (true))); // Global. + scope& s (er.first->second); + + // If this is a new scope, update the parent chain. + // + if (er.second) + { + scope* p (nullptr); + + // Update scopes of which we are a new parent/root (unless this is the + // global scope). Also find our parent while at it. + // + if (m.size () > 1) + { + // The first entry is ourselves. + // + auto r (m.find_sub (k)); + for (++r.first; r.first != r.second; ++r.first) + { + scope& c (r.first->second); + + // The first scope of which we are a parent is the least (shortest) + // one which means there is no other scope between it and our + // parent. + // + if (p == nullptr) + p = c.parent_; + + if (root && c.root_ == p->root_) // No intermediate root. + c.root_ = &s; + + if (p == c.parent_) // No intermediate parent. + c.parent_ = &s; + } + + // We couldn't get the parent from one of its old children so we have + // to find it ourselves. + // + if (p == nullptr) + p = &find (k.directory ()); + } + + s.parent_ = p; + s.root_ = root ? &s : (p != nullptr ? p->root_ : nullptr); + } + else if (root && !s.root ()) + { + // Upgrade to root scope. + // + auto r (m.find_sub (k)); + for (++r.first; r.first != r.second; ++r.first) + { + scope& c (r.first->second); + + if (c.root_ == s.root_) // No intermediate root. + c.root_ = &s; + } + + s.root_ = &s; + } + + return er.first; + } + + scope& scope_map:: + find (const dir_path& k) + { + assert (k.normalized (false)); // Allow non-canonical dir separators. + + scope_map_base& m (*this); + auto i (m.find_sup (k)); + assert (i != m.end ()); // Should have global scope. + return i->second; + } +} diff --git a/libbuild2/scope.hxx b/libbuild2/scope.hxx new file mode 100644 index 0000000..7b4fec5 --- /dev/null +++ b/libbuild2/scope.hxx @@ -0,0 +1,471 @@ +// file : libbuild2/scope.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SCOPE_HXX +#define LIBBUILD2_SCOPE_HXX + +#include <map> +#include <unordered_set> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/module.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/target-key.hxx> +#include <libbuild2/target-type.hxx> +#include <libbuild2/target-state.hxx> +#include <libbuild2/rule-map.hxx> +#include <libbuild2/operation.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class dir; + + class LIBBUILD2_SYMEXPORT scope + { + public: + // Absolute and normalized. + // + const dir_path& out_path () const {return *out_path_;} + const dir_path& src_path () const {return *src_path_;} + + // The first is a pointer to the key in scope_map. The second is a pointer + // to the src_root/base variable value, if any (i.e., it can be NULL). + // + const dir_path* out_path_ = nullptr; + const dir_path* src_path_ = nullptr; + + bool + root () const {return root_ == this;} + + scope* parent_scope () {return parent_;} + const scope* parent_scope () const {return parent_;} + + // Root scope of this scope or NULL if this scope is not (yet) + // in any (known) project. Note that if the scope itself is + // root, then this function return this. To get to the outer + // root, query the root scope of the parent. + // + scope* root_scope () {return root_;} + const scope* root_scope () const {return root_;} + + // Root scope of a strong amalgamation of this scope or NULL if + // this scope is not (yet) in any (known) project. If there is + // no strong amalgamation, then this function returns the root + // scope of the project (in other words, in this case a project + // is treated as its own strong amalgamation). + // + scope* strong_scope (); + const scope* strong_scope () const; + + // Root scope of the outermost amalgamation or NULL if this scope is not + // (yet) in any (known) project. If there is no amalgamation, then this + // function returns the root scope of the project (in other words, in this + // case a project is treated as its own amalgamation). + // + scope* weak_scope (); + const scope* weak_scope () const; + + // Return true if the specified root scope is a sub-scope of this root + // scope. Note that both scopes must be root. + // + bool + sub_root (const scope&) const; + + // Variables. + // + public: + variable_map vars; + + // Lookup, including in outer scopes. If you only want to lookup in this + // scope, do it on the the variables map directly (and note that there + // will be no overrides). + // + lookup + operator[] (const variable& var) const + { + return find (var).first; + } + + lookup + operator[] (const variable* var) const // For cached variables. + { + assert (var != nullptr); + return operator[] (*var); + } + + lookup + operator[] (const string& name) const + { + const variable* var (var_pool.find (name)); + return var != nullptr ? operator[] (*var) : lookup (); + } + + // As above, but include target type/pattern-specific variables. + // + lookup + find (const variable& var, const target_key& tk) const + { + return find (var, tk.type, tk.name).first; + } + + lookup + find (const variable& var, const target_type& tt, const string& tn) const + { + return find (var, &tt, &tn).first; + } + + pair<lookup, size_t> + find (const variable& var, + const target_type* tt = nullptr, + const string* tn = nullptr) const + { + auto p (find_original (var, tt, tn)); + return var.overrides == nullptr ? p : find_override (var, move (p)); + } + + // Implementation details (used by scope target lookup). The start_depth + // can be used to skip a number of initial lookups. + // + pair<lookup, size_t> + find_original ( + const variable&, + const target_type* tt = nullptr, const string* tn = nullptr, + const target_type* gt = nullptr, const string* gn = nullptr, + size_t start_depth = 1) const; + + pair<lookup, size_t> + find_override (const variable&, + pair<lookup, size_t> original, + bool target = false, + bool rule = false) const; + + // Return a value suitable for assignment (or append if you only want to + // append to the value from this scope). If the value does not exist in + // this scope's map, then a new one with the NULL value is added and + // returned. Otherwise the existing value is returned. + // + value& + assign (const variable& var) {return vars.assign (var);} + + value& + assign (const variable* var) {return vars.assign (var);} // For cached. + + value& + assign (string name) + { + return assign (variable_pool::instance.insert (move (name))); + } + + // Assign a typed non-overridable variable with normal visibility. + // + template <typename T> + value& + assign (string name) + { + return vars.assign (variable_pool::instance.insert<T> (move (name))); + } + + // Return a value suitable for appending. If the variable does not + // exist in this scope's map, then outer scopes are searched for + // the same variable. If found then a new variable with the found + // value is added to this scope and returned. Otherwise this + // function proceeds as assign(). + // + value& + append (const variable&); + + // Target type/pattern-specific variables. + // + variable_type_map target_vars; + + // Variable override caches. Only on project roots (in root_extra) plus a + // global one for the global scope. + // + // The key is the variable plus the innermost (scope-wise) variable map to + // which this override applies. See find_override() for details. + // + // Note: since it can be modified on any lookup (including during the + // execute phase), the cache is protected by its own mutex shard. + // + using variable_override_cache = variable_cache<pair<const variable*, + const variable_map*>>; + + static variable_override_cache global_override_cache; + + // Set of buildfiles already loaded for this scope. The included + // buildfiles are checked against the project's root scope while + // imported -- against the global scope (global_scope). + // + public: + std::unordered_set<path> buildfiles; + + // Target types. + // + public: + target_type_map target_types; + + const target_type* + find_target_type (const string&, const scope** = nullptr) const; + + // Given a target name, figure out its type, taking into account + // extensions, special names (e.g., '.' and '..'), or anything else that + // might be relevant. Process the name (in place) by extracting (and + // returning) extension, adjusting dir/leaf, etc., (note that the dir is + // not necessarily normalized). Return NULL if not found. + // + pair<const target_type*, optional<string>> + find_target_type (name&, const location&) const; + + // Dynamically derive a new target type from an existing one. Return the + // reference to the target type and an indicator of whether it was + // actually created. + // + pair<reference_wrapper<const target_type>, bool> + derive_target_type (const string& name, const target_type& base); + + template <typename T> + pair<reference_wrapper<const target_type>, bool> + derive_target_type (const string& name) + { + return derive_target_type (name, T::static_type); + } + + // Rules. + // + public: + rule_map rules; + + // Operation callbacks. + // + // An entity (module, core) can register a function that will be called + // when an action is executed on the dir{} target that corresponds to this + // scope. The pre callback is called just before the recipe and the post + // -- immediately after. The callbacks are only called if the recipe + // (including noop recipe) is executed for the corresponding target. The + // callbacks should only be registered during the load phase. + // + // It only makes sense for callbacks to return target_state changed or + // unchanged and to throw failed in case of an error. These pre/post + // states will be merged with the recipe state and become the target + // state. See execute_recipe() for details. + // + public: + struct operation_callback + { + using callback = target_state (action, const scope&, const dir&); + + function<callback> pre; + function<callback> post; + }; + + using operation_callback_map = std::multimap<action_id, + operation_callback>; + + operation_callback_map operation_callbacks; + + // Extra root scope-only data. + // + public: + struct root_data + { + bool altn; // True if using alternative build file/directory naming. + + // Build file/directory naming scheme used by this project. + // + const string& build_ext; // build or build2 (no dot) + const dir_path& build_dir; // build/ or build2/ + const path& buildfile_file; // buildfile or build2file + const path& buildignore_file; // buildignore or build2ignore + + const dir_path& root_dir; // build[2]/root/ + const dir_path& bootstrap_dir; // build[2]/bootstrap/ + + const path& bootstrap_file; // build[2]/bootstrap.build[2] + const path& root_file; // build[2]/root.build[2] + const path& export_file; // build[2]/export.build[2] + const path& src_root_file; // build[2]/bootstrap/src-root.build[2] + const path& out_root_file; // build[2]/bootstrap/src-root.build[2] + + // Meta/operations supported by this project. + // + build2::meta_operations meta_operations; + build2::operations operations; + + // Modules. + // + loaded_module_map modules; + + // Variable override cache (see above). + // + mutable variable_override_cache override_cache; + }; + + unique_ptr<root_data> root_extra; + + void + insert_operation (operation_id id, const operation_info& in) + { + root_extra->operations.insert (id, in); + } + + void + insert_meta_operation (meta_operation_id id, const meta_operation_info& in) + { + root_extra->meta_operations.insert (id, in); + } + + template <typename T> + T* + lookup_module (const string& name) const + { + return root_extra->modules.lookup<T> (name); + } + + public: + // RW access. + // + scope& + rw () const + { + assert (phase == run_phase::load); + return const_cast<scope&> (*this); + } + + // RW access to global scope (RO via global global_scope below). + // + scope& + global () {return *global_;} + + public: + static scope* global_; // Normally not accessed directly. + + private: + friend class parser; + friend class scope_map; + friend class temp_scope; + + // These two from <libbuild2/file.hxx> set strong_. + // + friend LIBBUILD2_SYMEXPORT void create_bootstrap_outer (scope&); + friend LIBBUILD2_SYMEXPORT scope& create_bootstrap_inner (scope&, + const dir_path&); + + explicit + scope (bool global): vars (global), target_vars (global) {} + + scope* parent_; + scope* root_; + scope* strong_ = nullptr; // Only set on root scopes. + // NULL means no strong amalgamtion. + }; + + inline ostream& + operator<< (ostream& os, const scope& s) + { + return os << s.out_path ().string (); // Always absolute. + } + + // Temporary scope. The idea is to be able to create a temporary scope in + // order not to change the variables in the current scope. Such a scope is + // not entered in to the scope map. As a result it can only be used as a + // temporary set of variables. In particular, defining targets directly in + // such a scope will surely end up badly. Defining any nested scopes will be + // as if defining such a scope in the parent (since path() returns parent's + // path). + // + class temp_scope: public scope + { + public: + temp_scope (scope& p) + : scope (false) // Not global. + { + out_path_ = p.out_path_; + src_path_ = p.src_path_; + parent_ = &p; + root_ = p.root_; + // No need to copy strong_ since we are never root scope. + } + }; + + // Scope map. + // + // Protected by the phase mutex. Note that the scope map is only for paths + // from the out tree. + // + using scope_map_base = dir_path_map<scope>; + + class scope_map: public scope_map_base + { + public: + // Note that we assume the first insertion into the map is always the + // global scope with empty key. + // + LIBBUILD2_SYMEXPORT iterator + insert (const dir_path&, bool root = false); + + // Find the most qualified scope that encompasses this path. + // + const scope& + find (const dir_path& d) const + { + return const_cast<scope_map*> (this)->find (d); + } + + const scope& + find (const path& p) const + { + // Natural thing to do here would be to call find (p.directory ()). + // However, there could be a situation where the passed path is a + // directory (i.e., the calling code does not know what it is dealing + // with), so let's use the whole path. + // + // In fact, ideally, we should have used path_map instead of + // dir_path_map to be able to search for both paths without any casting + // (and copies). But currently we have too much stuff pointing to the + // key. + // + return find (path_cast<dir_path> (p)); + } + + // RW access. + // + public: + scope_map& + rw () const + { + assert (phase == run_phase::load); + return const_cast<scope_map&> (*this); + } + + scope_map& + rw (scope&) const {return const_cast<scope_map&> (*this);} + + private: + LIBBUILD2_SYMEXPORT static scope_map instance; + + // Entities that can access bypassing the lock proof. + // + friend int main (int, char*[]); + friend LIBBUILD2_SYMEXPORT variable_overrides reset (const strings&); + + LIBBUILD2_SYMEXPORT scope& + find (const dir_path&); + + public: + // For var_pool initialization. + // + LIBBUILD2_SYMEXPORT static const scope_map& cinstance; + }; + + LIBBUILD2_SYMEXPORT extern const scope_map& scopes; + LIBBUILD2_SYMEXPORT extern const scope* global_scope; +} + +#include <libbuild2/scope.ixx> + +#endif // LIBBUILD2_SCOPE_HXX diff --git a/libbuild2/scope.ixx b/libbuild2/scope.ixx new file mode 100644 index 0000000..3498ae0 --- /dev/null +++ b/libbuild2/scope.ixx @@ -0,0 +1,54 @@ +// file : libbuild2/scope.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + // scope + // + inline scope* scope:: + strong_scope () + { + return root_ != nullptr + ? root_->strong_ != nullptr ? root_->strong_ : root_ + : nullptr; + } + + inline const scope* scope:: + strong_scope () const + { + return root_ != nullptr + ? root_->strong_ != nullptr ? root_->strong_ : root_ + : nullptr; + } + + inline scope* scope:: + weak_scope () + { + scope* r (root_); + if (r != nullptr) + for (; r->parent_->root_ != nullptr; r = r->parent_->root_) ; + return r; + } + + inline const scope* scope:: + weak_scope () const + { + const scope* r (root_); + if (r != nullptr) + for (; r->parent_->root_ != nullptr; r = r->parent_->root_) ; + return r; + } + + inline bool scope:: + sub_root (const scope& r) const + { + // Scan the parent root scope chain looking for this scope. + // + for (const scope* pr (&r); (pr = pr->parent_->root_) != nullptr; ) + if (pr == this) + return true; + + return false; + } +} diff --git a/libbuild2/search.cxx b/libbuild2/search.cxx new file mode 100644 index 0000000..1ff9c73 --- /dev/null +++ b/libbuild2/search.cxx @@ -0,0 +1,244 @@ +// file : libbuild2/search.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/search.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/filesystem.hxx> // mtime() +#include <libbuild2/prerequisite.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + const target* + search_existing_target (const prerequisite_key& pk) + { + tracer trace ("search_existing_target"); + + const target_key& tk (pk.tk); + + // Look for an existing target in the prerequisite's scope. + // + dir_path d; + if (tk.dir->absolute ()) + d = *tk.dir; // Already normalized. + else + { + d = tk.out->empty () ? pk.scope->out_path () : pk.scope->src_path (); + + if (!tk.dir->empty ()) + { + d /= *tk.dir; + d.normalize (); + } + } + + // Prerequisite's out directory can be one of the following: + // + // empty This means out is undetermined and we simply search for a + // target that is in the out tree which happens to be indicated + // by an empty value, so we can just pass this as is. + // + // absolute This is the "final" value that doesn't require any processing + // and we simply use it as is. + // + // relative The out directory was specified using @-syntax as relative (to + // the prerequisite's scope) and we need to complete it similar + // to how we complete the relative dir above. + // + dir_path o; + if (!tk.out->empty ()) + { + if (tk.out->absolute ()) + o = *tk.out; // Already normalized. + else + { + o = pk.scope->out_path (); + o /= *tk.out; + o.normalize (); + } + + // Drop out if it is the same as src (in-src build). + // + if (o == d) + o.clear (); + } + + const target* t (targets.find (*tk.type, d, o, *tk.name, tk.ext, trace)); + + if (t != nullptr) + l5 ([&]{trace << "existing target " << *t + << " for prerequisite " << pk;}); + + return t; + } + + const target* + search_existing_file (const prerequisite_key& cpk) + { + tracer trace ("search_existing_file"); + + const target_key& ctk (cpk.tk); + const scope* s (cpk.scope); + + path f; + + if (ctk.dir->absolute ()) + f = *ctk.dir; // Already normalized. + else + { + f = s->src_path (); + + if (!ctk.dir->empty ()) + { + f /= *ctk.dir; + f.normalize (); + } + } + + // Bail out if not inside project's src_root. + // + if (s == nullptr || !f.sub (s->root_scope ()->src_path ())) + return nullptr; + + // Figure out the extension. Pretty similar logic to file::derive_path(). + // + optional<string> ext (ctk.ext); + + if (!ext) + { + if (auto f = ctk.type->fixed_extension) + ext = f (ctk, s->root_scope ()); + else if (auto f = ctk.type->default_extension) + ext = f (ctk, *s, nullptr, true); + + if (!ext) + { + // What should we do here, fail or say we didn't find anything? + // Current think is that if the target type couldn't find the default + // extension, then we simply shouldn't search for any existing files + // (of course, if the user specified the extension explicitly, we will + // still do so). + // + l4 ([&]{trace << "no default extension for prerequisite " << cpk;}); + return nullptr; + } + } + + // Make a copy with the updated extension. + // + const prerequisite_key pk { + cpk.proj, {ctk.type, ctk.dir, ctk.out, ctk.name, ext}, cpk.scope}; + const target_key& tk (pk.tk); + + // Check if there is a file. + // + f /= *tk.name; + + if (!ext->empty ()) + { + f += '.'; + f += *ext; + } + + timestamp mt (mtime (f)); + + if (mt == timestamp_nonexistent) + { + l4 ([&]{trace << "no existing file for prerequisite " << cpk;}); + return nullptr; + } + + l5 ([&]{trace << "found existing file " << f << " for prerequisite " + << cpk;}); + + dir_path d (f.directory ()); + + // Calculate the corresponding out. We have the same three options for the + // prerequisite's out directory as in search_existing_target(). If it is + // empty (undetermined), then we need to calculate it since this target + // will be from the src tree. + // + // In the other two cases we use the prerequisite's out (in case it is + // relative, we need to complete it, which is @@ OUT TODO). Note that we + // blindly trust the user's value which can be used for some interesting + // tricks, for example: + // + // ../cxx{foo}@./ + // + dir_path out; + + if (tk.out->empty ()) + { + if (s->out_path () != s->src_path ()) + out = out_src (d, *s->root_scope ()); + } + else + out = *tk.out; + + // Find or insert. Note that we are using our updated extension. + // + auto r ( + targets.insert ( + *tk.type, move (d), move (out), *tk.name, ext, true, trace)); + + // Has to be a file_target. + // + const file& t (dynamic_cast<const file&> (r.first)); + + l5 ([&]{trace << (r.second ? "new" : "existing") << " target " << t + << " for prerequisite " << cpk;}); + + t.mtime (mt); + t.path (move (f)); + + return &t; + } + + const target& + create_new_target (const prerequisite_key& pk) + { + tracer trace ("create_new_target"); + + const target_key& tk (pk.tk); + + // We default to the target in this directory scope. + // + dir_path d; + if (tk.dir->absolute ()) + d = *tk.dir; // Already normalized. + else + { + d = pk.scope->out_path (); + + if (!tk.dir->empty ()) + { + d /= *tk.dir; + d.normalize (); + } + } + + // Find or insert. + // + // @@ OUT: same story as in search_existing_target() re out. + // + auto r (targets.insert (*tk.type, + move (d), + *tk.out, + *tk.name, + tk.ext, + true /* implied */, + trace)); + + const target& t (r.first); + l5 ([&]{trace << (r.second ? "new" : "existing") << " target " << t + << " for prerequisite " << pk;}); + return t; + } +} diff --git a/libbuild2/search.hxx b/libbuild2/search.hxx new file mode 100644 index 0000000..b281b12 --- /dev/null +++ b/libbuild2/search.hxx @@ -0,0 +1,41 @@ +// file : libbuild2/search.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SEARCH_HXX +#define LIBBUILD2_SEARCH_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class target; + class prerequisite_key; + + // Search for an existing target in this prerequisite's scope. + // + LIBBUILD2_SYMEXPORT const target* + search_existing_target (const prerequisite_key&); + + // Search for an existing file. If the prerequisite directory is relative, + // then look in the scope's src directory. Otherwise, if the absolute + // directory is inside the project's root scope, look there. In case of + // the absolute directory, if the scope is NULL, assume the file is not + // in src. + // + // Originally the plan was to have a target-type specific variable that + // contains the search paths. But there wasn't any need for this yet. + // + LIBBUILD2_SYMEXPORT const target* + search_existing_file (const prerequisite_key&); + + // Create a new target in this prerequisite's scope. + // + LIBBUILD2_SYMEXPORT const target& + create_new_target (const prerequisite_key&); +} + +#endif // LIBBUILD2_SEARCH_HXX diff --git a/libbuild2/spec.cxx b/libbuild2/spec.cxx new file mode 100644 index 0000000..3ad6b7d --- /dev/null +++ b/libbuild2/spec.cxx @@ -0,0 +1,111 @@ +// file : libbuild2/spec.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/spec.hxx> + +#include <libbuild2/context.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + ostream& + operator<< (ostream& os, const targetspec& s) + { + if (!s.src_base.empty ()) + { + // Avoid printing './' in './@...', similar to what we do for the + // {target,prerequisite}_key. + // + if (stream_verb (os).path < 1) + { + const string& r (diag_relative (s.src_base, false)); + + if (!r.empty ()) + os << r << '@'; + } + else + os << s.src_base << '@'; + } + + os << s.name; + return os; + } + + ostream& + operator<< (ostream& os, const opspec& s) + { + bool hn (!s.name.empty ()); + bool ht (!s.empty ()); + + os << (hn ? "\"" : "") << s.name << (hn ? "\"" : ""); + + if (hn && ht) + os << '('; + + for (auto b (s.begin ()), i (b); i != s.end (); ++i) + os << (i != b ? " " : "") << *i; + + for (const value& v: s.params) + { + os << ", "; + + if (v) + { + names storage; + os << reverse (v, storage); + } + else + os << "[null]"; + } + + if (hn && ht) + os << ')'; + + return os; + } + + ostream& + operator<< (ostream& os, const metaopspec& s) + { + bool hn (!s.name.empty ()); + bool ho (!s.empty ()); + + os << (hn ? "\'" : "") << s.name << (hn ? "\'" : ""); + + if (hn && ho) + os << '('; + + for (auto b (s.begin ()), i (b); i != s.end (); ++i) + os << (i != b ? " " : "") << *i; + + for (const value& v: s.params) + { + os << ", "; + + if (v) + { + names storage; + os << reverse (v, storage); + } + else + os << "[null]"; + } + + if (hn && ho) + os << ')'; + + return os; + } + + ostream& + operator<< (ostream& os, const buildspec& s) + { + for (auto b (s.begin ()), i (b); i != s.end (); ++i) + os << (i != b ? " " : "") << *i; + + return os; + } +} diff --git a/libbuild2/spec.hxx b/libbuild2/spec.hxx new file mode 100644 index 0000000..b18f665 --- /dev/null +++ b/libbuild2/spec.hxx @@ -0,0 +1,72 @@ +// file : libbuild2/spec.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_SPEC_HXX +#define LIBBUILD2_SPEC_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/variable.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class scope; + + struct targetspec + { + typedef build2::name name_type; + + explicit + targetspec (name_type n): name (move (n)) {} + targetspec (dir_path sb, name_type n) + : src_base (move (sb)), name (move (n)) {} + + dir_path src_base; + name_type name; + + // The rest is calculated and cached. + // + scope* root_scope = nullptr; + dir_path out_base; + path buildfile; // Empty if implied. + bool forwarded = false; + }; + + struct opspec: vector<targetspec> + { + opspec () = default; + opspec (string n): name (move (n)) {} + + string name; + values params; + }; + + struct metaopspec: vector<opspec> + { + metaopspec () = default; + metaopspec (string n): name (move (n)) {} + + string name; + values params; + }; + + typedef vector<metaopspec> buildspec; + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const targetspec&); + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const opspec&); + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const metaopspec&); + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const buildspec&); +} + +#endif // LIBBUILD2_SPEC_HXX diff --git a/libbuild2/target-key.hxx b/libbuild2/target-key.hxx new file mode 100644 index 0000000..e23991d --- /dev/null +++ b/libbuild2/target-key.hxx @@ -0,0 +1,106 @@ +// file : libbuild2/target-key.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TARGET_KEY_HXX +#define LIBBUILD2_TARGET_KEY_HXX + +#include <map> +#include <cstring> // strcmp() + +#include <libbutl/utility.mxx> // compare_c_string + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target-type.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Light-weight (by being shallow-pointing) target key. + // + class target_key + { + public: + const target_type* const type; + const dir_path* const dir; // Can be relative if part of prerequisite_key. + const dir_path* const out; // Can be relative if part of prerequisite_key. + const string* const name; + mutable optional<string> ext; // Absent - unspecified, empty - none. + + template <typename T> + bool is_a () const {return type->is_a<T> ();} + bool is_a (const target_type& tt) const {return type->is_a (tt);} + }; + + inline bool + operator== (const target_key& x, const target_key& y) + { + if (x.type != y.type || + *x.dir != *y.dir || + *x.out != *y.out || + *x.name != *y.name) + return false; + + // Unless fixed, unspecified and specified extensions are assumed equal. + // + const target_type& tt (*x.type); + + if (tt.fixed_extension == nullptr) + return !x.ext || !y.ext || *x.ext == *y.ext; + else + { + // Note that for performance reasons here we use the specified extension + // without calling fixed_extension(). + // + const char* xe (x.ext + ? x.ext->c_str () + : tt.fixed_extension (x, nullptr /* root scope */)); + + const char* ye (y.ext + ? y.ext->c_str () + : tt.fixed_extension (y, nullptr /* root scope */)); + + return strcmp (xe, ye) == 0; + } + } + + inline bool + operator!= (const target_key& x, const target_key& y) {return !(x == y);} + + // If the target type has a custom print function, call that. Otherwise, + // call to_stream(). Both are defined in target.cxx. + // + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const target_key&); + + LIBBUILD2_SYMEXPORT ostream& + to_stream (ostream&, const target_key&, optional<stream_verbosity> = nullopt); +} + +namespace std +{ + // Note that we ignore the extension when calculating the hash because of + // its special "unspecified" logic (see operator== above). + // + template <> + struct hash<build2::target_key> + { + using argument_type = build2::target_key; + using result_type = size_t; + + size_t + operator() (const build2::target_key& k) const noexcept + { + return build2::combine_hash ( + hash<const build2::target_type*> () (k.type), + hash<build2::dir_path> () (*k.dir), + hash<build2::dir_path> () (*k.out), + hash<string> () (*k.name)); + } + }; +} + +#endif // LIBBUILD2_TARGET_KEY_HXX diff --git a/libbuild2/target-state.hxx b/libbuild2/target-state.hxx new file mode 100644 index 0000000..5bc6895 --- /dev/null +++ b/libbuild2/target-state.hxx @@ -0,0 +1,46 @@ +// file : libbuild2/target-state.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TARGET_STATE_HXX +#define LIBBUILD2_TARGET_STATE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // The order of the enumerators is arranged so that their integral values + // indicate whether one "overrides" the other in the "merge" operator| + // (see below). + // + // Note that postponed is "greater" than unchanged since it may result in + // the changed state. + // + enum class target_state: uint8_t + { + unknown, + unchanged, + postponed, + busy, + changed, + failed, + group // Target's state is the group's state. + }; + + inline target_state& + operator |= (target_state& l, target_state r) + { + if (static_cast<uint8_t> (r) > static_cast<uint8_t> (l)) + l = r; + + return l; + } + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, target_state); // target.cxx +} + +#endif // LIBBUILD2_TARGET_STATE_HXX diff --git a/libbuild2/target-type.hxx b/libbuild2/target-type.hxx new file mode 100644 index 0000000..3537c90 --- /dev/null +++ b/libbuild2/target-type.hxx @@ -0,0 +1,208 @@ +// file : libbuild2/target-type.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TARGET_TYPE_HXX +#define LIBBUILD2_TARGET_TYPE_HXX + +#include <map> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class scope; + class target; + class target_key; + class prerequisite_key; + + // Target type. + // + // Note that we assume there is always a single instance of this class for + // any target type. As a result, we can use address comparison to determine + // if two target types are the same. + // + // If the extension derivation functions are NULL, then it means this target + // type does not use extensions. Note that this is relied upon when deciding + // whether to print the extension. + // + // The fixed extension function should return the fixed extension (which can + // point to the key's ext member; note that for performance reasons we + // currently only verify the explicitly specified extension on target + // insersion -- see target_key comparison for details). + // + // The root scope argument to the fixed extension function may be NULL which + // means the root scope is not known. A target type that relies on this must + // be prepared to resolve the root scope itself and handle the cases where + // the target is not (yet) in any project (this is currently only used to + // handle the alternative build file/directory naming scheme and hopefully + // it will stay that way). + // + // The default extension is used in two key (there are others) places: + // search_existing_file() (called for a prerequisite with the last argument + // true) and in target::derive_extension() (called for a target with the + // last argument false); see their respective implementations for details. + // The third argument is the default extension that is supplied (e.g., by a + // rule) to derive_extension(), if any. The implementation can decide which + // takes precedence, etc (see the exe{} target type for some interesting + // logic). If the default extension function returns NULL, then it means the + // default extension for this target could not be derived. + // + // If the pattern function is not NULL, then it is used to amend a pattern + // or match (reverse is false) and then, if the amendment call returned + // true, to reverse it in the resulting matches. The pattern function for a + // non-directory target must first call target::split_name() if reverse is + // false. + // + struct LIBBUILD2_SYMEXPORT target_type + { + const char* name; + const target_type* base; + + target* (*factory) (const target_type&, dir_path, dir_path, string); + + const char* (*fixed_extension) (const target_key&, + const scope* root); + optional<string> (*default_extension) (const target_key&, + const scope& base, + const char*, + bool search); + + bool (*pattern) (const target_type&, + const scope& base, + string& name, + optional<string>& extension, + const location&, + bool reverse); + + void (*print) (ostream&, const target_key&); + + const target* (*search) (const target&, const prerequisite_key&); + + bool see_through; // A group with the default "see through" semantics. + + template <typename T> + bool + is_a () const {return is_a (T::static_type);} + + bool + is_a (const target_type& tt) const + { + return this == &tt || (base != nullptr && is_a_base (tt)); + } + + bool + is_a_base (const target_type&) const; // Defined in target.cxx + }; + + inline bool + operator< (const target_type& x, const target_type& y) {return &x < &y;} + + inline bool + operator== (const target_type& x, const target_type& y) {return &x == &y;} + + inline bool + operator!= (const target_type& x, const target_type& y) {return &x != &y;} + + inline ostream& + operator<< (ostream& os, const target_type& tt) {return os << tt.name;} + + // Target type map. + // + class target_type_map + { + public: + // Target type name to target type mapping. + // + const target_type* + find (const string& n) const + { + auto i (type_map_.find (n)); + return i != type_map_.end () ? &i->second.get () : nullptr; + } + + bool + empty () const + { + return type_map_.empty (); + } + + const target_type& + insert (const target_type& tt) + { + type_map_.emplace (tt.name, target_type_ref (tt)); + return tt; + } + + template <typename T> + const target_type& + insert () + { + return insert (T::static_type); + } + + pair<reference_wrapper<const target_type>, bool> + insert (const string& n, unique_ptr<target_type>&& tt) + { + target_type& rtt (*tt); // Save a non-const reference to the object. + + auto p (type_map_.emplace (n, target_type_ref (move (tt)))); + + // Patch the alias name to use the map's key storage. + // + if (p.second) + rtt.name = p.first->first.c_str (); + + return pair<reference_wrapper<const target_type>, bool> ( + p.first->second.get (), p.second); + } + + // File name to target type mapping. + // + const target_type* + find_file (const string& n) const + { + auto i (file_map_.find (n)); + return i != file_map_.end () ? &i->second.get () : nullptr; + } + + void + insert_file (const string& n, const target_type& tt) + { + file_map_.emplace (n, tt); + } + + private: + struct target_type_ref + { + // Like reference_wrapper except it sometimes deletes the target type. + // + explicit + target_type_ref (const target_type& r): p_ (&r), d_ (false) {} + + explicit + target_type_ref (unique_ptr<target_type>&& p) + : p_ (p.release ()), d_ (true) {} + + target_type_ref (target_type_ref&& r) + : p_ (r.p_), d_ (r.d_) {r.p_ = nullptr;} + + ~target_type_ref () {if (p_ != nullptr && d_) delete p_;} + + explicit operator const target_type& () const {return *p_;} + const target_type& get () const {return *p_;} + + private: + const target_type* p_; + bool d_; + }; + + std::map<string, target_type_ref> type_map_; + std::map<string, reference_wrapper<const target_type>> file_map_; + }; +} + +#endif // LIBBUILD2_TARGET_TYPE_HXX diff --git a/libbuild2/target.cxx b/libbuild2/target.cxx new file mode 100644 index 0000000..c823e85 --- /dev/null +++ b/libbuild2/target.cxx @@ -0,0 +1,1260 @@ +// file : libbuild2/target.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/target.hxx> + +#include <libbuild2/file.hxx> +#include <libbuild2/scope.hxx> +#include <libbuild2/search.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + // target_type + // + bool target_type:: + is_a_base (const target_type& tt) const + { + for (const target_type* b (base); b != nullptr; b = b->base) + if (*b == tt) + return true; + + return false; + } + + // target_state + // + static const char* const target_state_[] = + { + "unknown", + "unchanged", + "postponed", + "busy", + "changed", + "failed", + "group" + }; + + ostream& + operator<< (ostream& os, target_state ts) + { + return os << target_state_[static_cast<uint8_t> (ts)]; + } + + // recipe + // + const recipe empty_recipe; + const recipe noop_recipe (&noop_action); + const recipe default_recipe (&default_action); + const recipe group_recipe (&group_action); + + // target + // + const target::prerequisites_type target::empty_prerequisites_; + + target:: + ~target () + { + clear_data (); + } + + const string& target:: + ext (string v) + { + ulock l (targets.mutex_); + + // Once the extension is set, it is immutable. However, it is possible + // that someone has already "branded" this target with a different + // extension. + // + optional<string>& e (*ext_); + + if (!e) + e = move (v); + else if (*e != v) + { + string o (*e); + l.unlock (); + + fail << "conflicting extensions '" << o << "' and '" << v << "' " + << "for target " << *this; + } + + return *e; + } + + group_view target:: + group_members (action) const + { + assert (false); // Not a group or doesn't expose its members. + return group_view {nullptr, 0}; + } + + const scope& target:: + base_scope () const + { + // If this target is from the src tree, use its out directory to find + // the scope. + // + return scopes.find (out_dir ()); + } + + const scope& target:: + root_scope () const + { + // This is tricky to cache so we do the lookup for now. + // + const scope* r (base_scope ().root_scope ()); + assert (r != nullptr); + return *r; + } + + pair<lookup, size_t> target:: + find_original (const variable& var, bool target_only) const + { + pair<lookup, size_t> r (lookup (), 0); + + ++r.second; + { + auto p (vars.find (var)); + if (p.first != nullptr) + r.first = lookup (*p.first, p.second, vars); + } + + const target* g (nullptr); + + if (!r.first) + { + ++r.second; + + // Skip looking up in the ad hoc group, which is semantically the + // first/primary member. + // + if ((g = group == nullptr + ? nullptr + : group->adhoc_group () ? group->group : group)) + { + auto p (g->vars.find (var)); + if (p.first != nullptr) + r.first = lookup (*p.first, p.second, g->vars); + } + } + + // Delegate to scope's find_original(). + // + if (!r.first) + { + if (!target_only) + { + auto p (base_scope ().find_original ( + var, + &type (), + &name, + g != nullptr ? &g->type () : nullptr, + g != nullptr ? &g->name : nullptr)); + + r.first = move (p.first); + r.second = r.first ? r.second + p.second : p.second; + } + else + r.second = size_t (~0); + } + + return r; + } + + value& target:: + append (const variable& var) + { + // Note: see also prerequisite::append() if changing anything here. + + // Note that here we want the original value without any overrides + // applied. + // + lookup l (find_original (var).first); + + if (l.defined () && l.belongs (*this)) // Existing var in this target. + return vars.modify (l); // Ok since this is original. + + value& r (assign (var)); // NULL. + + if (l.defined ()) + r = *l; // Copy value (and type) from the outer scope. + + return r; + } + + pair<lookup, size_t> target::opstate:: + find_original (const variable& var, bool target_only) const + { + pair<lookup, size_t> r (lookup (), 0); + + ++r.second; + { + auto p (vars.find (var)); + if (p.first != nullptr) + r.first = lookup (*p.first, p.second, vars); + } + + // Delegate to target's find_original(). + // + if (!r.first) + { + auto p (target_->find_original (var, target_only)); + + r.first = move (p.first); + r.second = r.first ? r.second + p.second : p.second; + } + + return r; + } + + optional<string> target:: + split_name (string& v, const location& loc) + { + assert (!v.empty ()); + + // We treat a single trailing dot as "specified no extension", double dots + // as a single trailing dot (that is, an escape sequence which can be + // repeated any number of times; in such cases we naturally assume there + // is no default extension) and triple dots as "unspecified (default) + // extension" (used when the extension in the name is not "ours", for + // example, cxx{foo.test...} for foo.test.cxx). An odd number of dots + // other than one or three is invalid. + // + optional<string> r; + + size_t p; + if (v.back () != '.') + { + if ((p = path::traits_type::find_extension (v)) != string::npos) + r = string (v.c_str () + p + 1); + } + else + { + if ((p = v.find_last_not_of ('.')) == string::npos) + fail (loc) << "invalid target name '" << v << "'"; + + p++; // Position of the first trailing dot. + size_t n (v.size () - p); // Number of the trailing dots. + + if (n == 1) + r = string (); + else if (n == 3) + ; + else if (n % 2 == 0) + { + p += n / 2; // Keep half of the dots. + r = string (); + } + else + fail (loc) << "invalid trailing dot sequence in target name '" + << v << "'"; + } + + if (p != string::npos) + v.resize (p); + + return r; + } + + void target:: + combine_name (string& v, const optional<string>& e, bool de) + { + if (v.back () == '.') + { + assert (e && e->empty ()); + + size_t p (v.find_last_not_of ('.')); + assert (p != string::npos); + + p++; // Position of the first trailing dot. + size_t n (v.size () - p); // Number of the trailing dots. + v.append (n, '.'); // Double them. + } + else if (e) + { + v += '.'; + v += *e; // Empty or not. + } + else if (de) + { + if (path::traits_type::find_extension (v) != string::npos) + v += "..."; + } + } + + // target_set + // + target_set targets; + + const target* target_set:: + find (const target_key& k, tracer& trace) const + { + slock sl (mutex_); + map_type::const_iterator i (map_.find (k)); + + if (i == map_.end ()) + return nullptr; + + const target& t (*i->second); + optional<string>& ext (i->first.ext); + + if (ext != k.ext) + { + ulock ul; // Keep locked for trace. + + if (k.ext) + { + // To update the extension we have to re-lock for exclusive access. + // Between us releasing the shared lock and acquiring unique the + // extension could change and possibly a new target that matches the + // key could be inserted. In this case we simply re-run find (). + // + sl.unlock (); + ul = ulock (mutex_); + + if (ext) // Someone set the extension. + { + ul.unlock (); + return find (k, trace); + } + } + + l5 ([&]{ + diag_record r (trace); + r << "assuming target "; + to_stream (r.os, + target_key {&t.type (), &t.dir, &t.out, &t.name, ext}, + stream_verb_max); // Always print the extension. + r << " is the same as the one with "; + + if (!k.ext) + r << "unspecified extension"; + else if (k.ext->empty ()) + r << "no extension"; + else + r << "extension " << *k.ext; + }); + + if (k.ext) + ext = k.ext; + } + + return &t; + } + + pair<target&, ulock> target_set:: + insert_locked (const target_type& tt, + dir_path dir, + dir_path out, + string name, + optional<string> ext, + bool implied, + tracer& trace) + { + target_key tk {&tt, &dir, &out, &name, move (ext)}; + target* t (const_cast<target*> (find (tk, trace))); + + if (t == nullptr) + { + // We sometimes call insert() even if we expect to find an existing + // target in order to keep the same code (see cc/search_library()). + // + assert (phase != run_phase::execute); + + optional<string> e ( + tt.fixed_extension != nullptr + ? string (tt.fixed_extension (tk, nullptr /* root scope */)) + : move (tk.ext)); + + t = tt.factory (tt, move (dir), move (out), move (name)); + + // Re-lock for exclusive access. In the meantime, someone could have + // inserted this target so emplace() below could return false, in which + // case we proceed pretty much like find() except already under the + // exclusive lock. + // + ulock ul (mutex_); + + auto p (map_.emplace (target_key {&tt, &t->dir, &t->out, &t->name, e}, + unique_ptr<target> (t))); + + map_type::iterator i (p.first); + + if (p.second) + { + t->ext_ = &i->first.ext; + t->implied = implied; + t->state.data[0].target_ = t; + t->state.data[1].target_ = t; + return pair<target&, ulock> (*t, move (ul)); + } + + // The "tail" of find(). + // + t = i->second.get (); + optional<string>& ext (i->first.ext); + + if (ext != e) + { + l5 ([&]{ + diag_record r (trace); + r << "assuming target "; + to_stream ( + r.os, + target_key {&t->type (), &t->dir, &t->out, &t->name, ext}, + stream_verb_max); // Always print the extension. + r << " is the same as the one with "; + + if (!e) + r << "unspecified extension"; + else if (e->empty ()) + r << "no extension"; + else + r << "extension " << *e; + }); + + if (e) + ext = e; + } + + // Fall through (continue as if the first find() returned this target). + } + + if (!implied) + { + // The implied flag can only be cleared during the load phase. + // + assert (phase == run_phase::load); + + // Clear the implied flag. + // + if (t->implied) + t->implied = false; + } + + return pair<target&, ulock> (*t, ulock ()); + } + + ostream& + to_stream (ostream& os, const target_key& k, optional<stream_verbosity> osv) + { + stream_verbosity sv (osv ? *osv : stream_verb (os)); + uint16_t dv (sv.path); + uint16_t ev (sv.extension); + + // If the name is empty, then we want to print the last component of the + // directory inside {}, e.g., dir{bar/}, not bar/dir{}. + // + bool n (!k.name->empty ()); + + // Note: relative() returns empty for './'. + // + const dir_path& rd (dv < 1 ? relative (*k.dir) : *k.dir); // Relative. + const dir_path& pd (n ? rd : rd.directory ()); // Parent. + + if (!pd.empty ()) + { + if (dv < 1) + os << diag_relative (pd); + else + os << pd.representation (); + } + + const target_type& tt (*k.type); + + os << tt.name << '{'; + + if (n) + { + os << *k.name; + + // If the extension derivation functions are NULL, then it means this + // target type doesn't use extensions. + // + if (tt.fixed_extension != nullptr || tt.default_extension != nullptr) + { + // For verbosity level 0 we don't print the extension. For 1 we print + // it if there is one. For 2 we print 'foo.?' if it hasn't yet been + // assigned and 'foo.' if it is assigned as "no extension" (empty). + // + if (ev > 0 && (ev > 1 || (k.ext && !k.ext->empty ()))) + { + os << '.' << (k.ext ? *k.ext : "?"); + } + } + else + assert (!k.ext); + } + else + os << (rd.empty () ? dir_path (".") : rd.leaf ()).representation (); + + os << '}'; + + // If this target is from src, print its out. + // + if (!k.out->empty ()) + { + if (dv < 1) + { + // Don't print '@./'. + // + const string& o (diag_relative (*k.out, false)); + + if (!o.empty ()) + os << '@' << o; + } + else + os << '@' << *k.out; + } + + return os; + } + + ostream& + operator<< (ostream& os, const target_key& k) + { + if (auto p = k.type->print) + p (os, k); + else + to_stream (os, k, stream_verb (os)); + + return os; + } + + // mtime_target + // + timestamp mtime_target:: + mtime () const + { + // Figure out from which target we should get the value. + // + const mtime_target* t (this); + + switch (phase) + { + case run_phase::load: break; + case run_phase::match: + { + // Similar logic to matched_state_impl(). + // + const opstate& s (state[action () /* inner */]); + size_t o (s.task_count.load (memory_order_relaxed) - // Synchronized. + target::count_base ()); + + if (o != target::offset_applied && o != target::offset_executed) + break; + } + // Fall through. + case run_phase::execute: + { + if (group_state (action () /* inner */)) + t = &group->as<mtime_target> (); + + break; + } + } + + return timestamp (duration (t->mtime_.load (memory_order_consume))); + } + + // path_target + // + const string* path_target:: + derive_extension (bool search, const char* de) + { + // See also search_existing_file() if updating anything here. + + // Should be no default extension if searching. + // + assert (!search || de == nullptr); + + // The target should use extensions and they should not be fixed. + // + assert (de == nullptr || type ().default_extension != nullptr); + + if (const string* p = ext ()) + // Note that returning by reference is now MT-safe since once the + // extension is specified, it is immutable. + // + return p; + else + { + optional<string> e; + + // If the target type has the default extension function then try that + // first. The reason for preferring it over what's been provided by the + // caller is that this function will often use the 'extension' variable + // which the user can use to override extensions. But since we pass the + // provided default extension, the target type can override this logic + // (see the exe{} target type for a use case). + // + if (auto f = type ().default_extension) + e = f (key (), base_scope (), de, search); + + if (!e) + { + if (de != nullptr) + e = de; + else + { + if (search) + return nullptr; + + fail << "no default extension for target " << *this << endf; + } + } + + return &ext (move (*e)); + } + } + + const path& path_target:: + derive_path (const char* de, const char* np, const char* ns) + { + path_type p (dir); + + if (np == nullptr || np[0] == '\0') + p /= name; + else + { + p /= np; + p += name; + } + + if (ns != nullptr) + p += ns; + + return derive_path (move (p), de); + } + + const path& path_target:: + derive_path (path_type p, const char* de) + { + // Derive and add the extension if any. + // + { + const string& e (derive_extension (de)); + + if (!e.empty ()) + { + p += '.'; + p += e; + } + } + + path (move (p)); + return path_; + } + + // Search functions. + // + + const target* + target_search (const target&, const prerequisite_key& pk) + { + // The default behavior is to look for an existing target in the + // prerequisite's directory scope. + // + return search_existing_target (pk); + } + + const target* + file_search (const target&, const prerequisite_key& pk) + { + // First see if there is an existing target. + // + if (const target* t = search_existing_target (pk)) + return t; + + // Then look for an existing file in the src tree. + // + return search_existing_file (pk); + } + + void + target_print_0_ext_verb (ostream& os, const target_key& k) + { + stream_verbosity sv (stream_verb (os)); + if (sv.extension == 1) sv.extension = 0; // Remap 1 to 0. + to_stream (os, k, sv); + } + + void + target_print_1_ext_verb (ostream& os, const target_key& k) + { + stream_verbosity sv (stream_verb (os)); + if (sv.extension == 0) sv.extension = 1; // Remap 0 to 1. + to_stream (os, k, sv); + } + + // type info + // + + const target_type target::static_type + { + "target", + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &target_search, + false + }; + + const target_type mtime_target::static_type + { + "mtime_target", + &target::static_type, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &target_search, + false + }; + + const target_type path_target::static_type + { + "path_target", + &mtime_target::static_type, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &target_search, + false + }; + + extern const char file_ext_def[] = ""; + + const target_type file::static_type + { + "file", + &path_target::static_type, + &target_factory<file>, + &target_extension_fix<file_ext_def>, + nullptr, /* default_extension */ + nullptr, /* pattern */ + &target_print_1_ext_verb, // Print extension even at verbosity level 0. + &file_search, + false + }; + + static const target* + alias_search (const target&, const prerequisite_key& pk) + { + // For an alias we don't want to silently create a target since it will do + // nothing and it most likely not what the user intended. + // + const target* t (search_existing_target (pk)); + + if (t == nullptr || t->implied) + fail << "no explicit target for " << pk; + + return t; + } + + const target_type alias::static_type + { + "alias", + &target::static_type, + &target_factory<alias>, + nullptr, // Extension not used. + nullptr, + nullptr, + nullptr, + &alias_search, + false + }; + + // dir + // + bool dir:: + check_implied (const scope& rs, const dir_path& d) + { + try + { + for (const dir_entry& e: dir_iterator (d, true /* ignore_dangling */)) + { + switch (e.type ()) + { + case entry_type::directory: + { + if (check_implied (rs, d / path_cast<dir_path> (e.path ()))) + return true; + + break; + } + case entry_type::regular: + { + if (e.path () == rs.root_extra->buildfile_file) + return true; + + break; + } + default: + break; + } + } + } + catch (const system_error& e) + { + fail << "unable to iterate over " << d << ": " << e << endf; + } + + return false; + } + + prerequisites dir:: + collect_implied (const scope& bs) + { + prerequisites_type r; + const dir_path& d (bs.src_path ()); + + try + { + for (const dir_entry& e: dir_iterator (d, true /* ignore_dangling */)) + { + if (e.type () == entry_type::directory) + r.push_back ( + prerequisite (nullopt, + dir::static_type, + dir_path (e.path ().representation ()), // Relative. + dir_path (), // In the out tree. + string (), + nullopt, + bs)); + } + } + catch (const system_error& e) + { + fail << "unable to iterate over " << d << ": " << e; + } + + return r; + } + + static const target* + dir_search (const target&, const prerequisite_key& pk) + { + tracer trace ("dir_search"); + + // The first step is like in search_alias(): looks for an existing target. + // + const target* t (search_existing_target (pk)); + + if (t != nullptr && !t->implied) + return t; + + // If not found (or is implied), then try to load the corresponding + // buildfile (which would normally define this target). Failed that, see + // if we can assume an implied buildfile which would be equivalent to: + // + // ./: */ + // + const dir_path& d (*pk.tk.dir); + + // We only do this for relative paths. + // + if (d.relative ()) + { + // Note: this code is a custom version of parser::parse_include(). + + const scope& s (*pk.scope); + + // Calculate the new out_base. + // + dir_path out_base (s.out_path () / d); + out_base.normalize (); + + // In our world modifications to the scope structure during search & + // match should be "pure append" in the sense that they should not + // affect any existing targets that have already been searched & + // matched. + // + // A straightforward way to enforce this is to not allow any existing + // targets to be inside any newly created scopes (except, perhaps for + // the directory target itself which we know hasn't been searched yet). + // This, however, is not that straightforward to implement: we would + // need to keep a directory prefix map for all the targets (e.g., in + // target_set). Also, a buildfile could load from a directory that is + // not a subdirectory of out_base. So for now we just assume that this + // is so. And so it is. + // + bool retest (false); + + assert (phase == run_phase::match); + { + // Switch the phase to load. + // + phase_switch ps (run_phase::load); + + // This is subtle: while we were fussing around another thread may + // have loaded the buildfile. So re-test now that we are in exclusive + // phase. + // + if (t == nullptr) + t = search_existing_target (pk); + + if (t != nullptr && !t->implied) + retest = true; + else + { + // Ok, no luck, switch the scope. + // + pair<scope&, scope*> sp ( + switch_scope (*s.rw ().root_scope (), out_base)); + + if (sp.second != nullptr) // Ignore scopes out of any project. + { + scope& base (sp.first); + scope& root (*sp.second); + + const dir_path& src_base (base.src_path ()); + + path bf (src_base / root.root_extra->buildfile_file); + + if (exists (bf)) + { + l5 ([&]{trace << "loading buildfile " << bf << " for " << pk;}); + retest = source_once (root, base, bf, root); + } + else if (exists (src_base)) + { + t = dir::search_implied (base, pk, trace); + retest = (t != nullptr); + } + } + } + } + assert (phase == run_phase::match); + + // If we loaded/implied the buildfile, examine the target again. + // + if (retest) + { + if (t == nullptr) + t = search_existing_target (pk); + + if (t != nullptr && !t->implied) + return t; + } + } + + fail << "no explicit target for " << pk << endf; + } + + static bool + dir_pattern (const target_type&, + const scope&, + string& v, + optional<string>&, + const location&, + bool r) + { + // Add/strip trailing directory separator unless already there. + // + bool d (path::traits_type::is_separator (v.back ())); + + if (r) + { + assert (d); + v.resize (v.size () - 1); + } + else if (!d) + { + v += path::traits_type::directory_separator; + return true; + } + + return false; + } + + const target_type dir::static_type + { + "dir", + &alias::static_type, + &target_factory<dir>, + nullptr, // Extension not used. + nullptr, + &dir_pattern, + nullptr, + &dir_search, + false + }; + + const target_type fsdir::static_type + { + "fsdir", + &target::static_type, + &target_factory<fsdir>, + nullptr, // Extension not used. + nullptr, + &dir_pattern, + nullptr, + &target_search, + false + }; + + static optional<string> + exe_target_extension (const target_key&, + const scope&, + const char* e, + bool search) + { + // If we are searching for an executable that is not a target, then use + // the build machine executable extension. Otherwise, if this is a target, + // then we expect the rule to supply the target machine extension. But if + // it doesn't, then fallback to no extension (e.g., a script). + // + return string (!search + ? (e != nullptr ? e : "") + : +#ifdef _WIN32 + "exe" +#else + "" +#endif + ); + } + +#ifdef _WIN32 + static bool + exe_target_pattern (const target_type&, + const scope&, + string& v, + optional<string>& e, + const location& l, + bool r) + { + if (r) + { + assert (e); + e = nullopt; + } + else + { + e = target::split_name (v, l); + + if (!e) + { + e = "exe"; + return true; + } + } + + return false; + } +#endif + + const target_type exe::static_type + { + "exe", + &file::static_type, + &target_factory<exe>, + nullptr, /* fixed_extension */ + &exe_target_extension, +#ifdef _WIN32 + &exe_target_pattern, +#else + nullptr, +#endif + nullptr, + &file_search, + false + }; + + static const char* + buildfile_target_extension (const target_key& tk, const scope* root) + { + // If the name is the special 'buildfile', then there is no extension, + // otherwise it is 'build' (or 'build2file' and 'build2' in the + // alternative naming scheme). + + // Let's try hard not to need the root scope by trusting the extensions + // we were given. + // + // BTW, one way to get rid of all this root scope complication is to + // always require explicit extension specification for buildfiles. Since + // they are hardly ever mentioned explicitly, this should probably be ok. + // + if (tk.ext) + return tk.ext->c_str (); + + if (root == nullptr) + { + // The same login as in target::root_scope(). + // + // Note: we are guaranteed the scope is never NULL for prerequisites + // (where out/dir could be relative and none of this will work). + // + root = scopes.find (tk.out->empty () ? *tk.dir : *tk.out).root_scope (); + + if (root == nullptr || root->root_extra == nullptr) + fail << "unable to determine extension for buildfile target " << tk; + } + + return *tk.name == root->root_extra->buildfile_file.string () + ? "" + : root->root_extra->build_ext.c_str (); + } + + static bool + buildfile_target_pattern (const target_type&, + const scope& base, + string& v, + optional<string>& e, + const location& l, + bool r) + { + if (r) + { + assert (e); + e = nullopt; + } + else + { + e = target::split_name (v, l); + + if (!e) + { + const scope* root (base.root_scope ()); + + if (root == nullptr || root->root_extra == nullptr) + fail (l) << "unable to determine extension for buildfile pattern"; + + if (v != root->root_extra->buildfile_file.string ()) + { + e = root->root_extra->build_ext; + return true; + } + } + } + + return false; + } + + const target_type buildfile::static_type + { + "build", + &file::static_type, + &target_factory<buildfile>, + &buildfile_target_extension, + nullptr, /* default_extension */ + &buildfile_target_pattern, + nullptr, + &file_search, + false + }; + + const target_type doc::static_type + { + "doc", + &file::static_type, + &target_factory<doc>, + &target_extension_fix<file_ext_def>, // Same as file (no extension). + nullptr, /* default_extension */ + nullptr, /* pattern */ // Same as file. + &target_print_1_ext_verb, // Same as file. + &file_search, + false + }; + + static const char* + man_extension (const target_key& tk, const scope*) + { + if (!tk.ext) + fail << "man target " << tk << " must include extension (man section)"; + + return tk.ext->c_str (); + } + + const target_type man::static_type + { + "man", + &doc::static_type, + &target_factory<man>, + &man_extension, // Should be specified explicitly. + nullptr, /* default_extension */ + nullptr, + &target_print_1_ext_verb, // Print extension even at verbosity level 0. + &file_search, + false + }; + + extern const char man1_ext[] = "1"; // VC14 rejects constexpr. + + const target_type man1::static_type + { + "man1", + &man::static_type, + &target_factory<man1>, + &target_extension_fix<man1_ext>, + nullptr, /* default_extension */ + &target_pattern_fix<man1_ext>, + &target_print_0_ext_verb, // Fixed extension, no use printing. + &file_search, + false + }; + + static const char* + manifest_target_extension (const target_key& tk, const scope*) + { + // If the name is special 'manifest', then there is no extension, + // otherwise it is .manifest. + // + return *tk.name == "manifest" ? "" : "manifest"; + } + + static bool + manifest_target_pattern (const target_type&, + const scope&, + string& v, + optional<string>& e, + const location& l, + bool r) + { + if (r) + { + assert (e); + e = nullopt; + } + else + { + e = target::split_name (v, l); + + if (!e && v != "manifest") + { + e = "manifest"; + return true; + } + } + + return false; + } + + const target_type manifest::static_type + { + "manifest", + &doc::static_type, + &target_factory<manifest>, + &manifest_target_extension, + nullptr, /* default_extension */ + &manifest_target_pattern, + nullptr, + &file_search, + false + }; +} diff --git a/libbuild2/target.hxx b/libbuild2/target.hxx new file mode 100644 index 0000000..cfbd9bc --- /dev/null +++ b/libbuild2/target.hxx @@ -0,0 +1,1817 @@ +// file : libbuild2/target.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TARGET_HXX +#define LIBBUILD2_TARGET_HXX + +#include <iterator> // tags, etc. +#include <type_traits> // aligned_storage +#include <unordered_map> + +#include <libbutl/multi-index.mxx> // map_iterator_adapter + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/action.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/target-key.hxx> +#include <libbuild2/target-type.hxx> +#include <libbuild2/target-state.hxx> +#include <libbuild2/prerequisite.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + class rule; + class scope; + class target; + + // From <libbuild2/context.hxx>. + // + LIBBUILD2_SYMEXPORT extern size_t current_on; + + // From <libbuild2/algorithm.hxx>. + // + const target& search (const target&, const prerequisite&); + const target* search_existing (const prerequisite&); + + // Recipe. + // + // The returned target state is normally changed or unchanged. If there is + // an error, then the recipe should throw failed rather than returning (this + // is the only exception that a recipe can throw). + // + // The return value of the recipe is used to update the target state. If it + // is target_state::group then the target's state is the group's state. + // + // The recipe may also return postponed in which case the target state is + // assumed to be unchanged (normally this means a prerequisite was postponed + // and while the prerequisite will be re-examined via another dependency, + // this target is done). + // + // Note that max size for the "small capture optimization" in std::function + // ranges (in pointer sizes) from 0 (GCC prior to 5) to 2 (GCC 5) to 6 (VC + // 14.2). With the size ranging (in bytes for 64-bit target) from 32 (GCC) + // to 64 (VC). + // + using recipe_function = target_state (action, const target&); + using recipe = function<recipe_function>; + + // Commonly-used recipes. The default recipe executes the action on + // all the prerequisites in a loop, skipping ignored. Specifically, + // for actions with the "first" execution mode, it calls + // execute_prerequisites() while for those with the "last" mode -- + // reverse_execute_prerequisites(); see <libbuild2/operation.hxx>, + // <libbuild2/algorithm.hxx> for details. The group recipe call's the + // group's recipe. + // + LIBBUILD2_SYMEXPORT extern const recipe empty_recipe; + LIBBUILD2_SYMEXPORT extern const recipe noop_recipe; + LIBBUILD2_SYMEXPORT extern const recipe default_recipe; + LIBBUILD2_SYMEXPORT extern const recipe group_recipe; + + // Defined in <libbuild2/algorithm.hxx>. + // + LIBBUILD2_SYMEXPORT target_state + noop_action (action, const target&); + + // Defined in <libbuild2/algorithm.hxx>. + // + LIBBUILD2_SYMEXPORT target_state + group_action (action, const target&); + + // A view of target group members. + // + struct group_view + { + const target* const* members; // NULL means not yet known. + size_t count; + }; + + // List of prerequisites resolved to targets. Unless additional storage is + // needed, it can be used as just vector<const target*> (which is what we + // used to have initially). + // + struct prerequisite_target + { + using target_type = build2::target; + + prerequisite_target (const target_type* t, bool a = false, uintptr_t d = 0) + : target (t), adhoc (a), data (d) {} + + prerequisite_target (const target_type* t, include_type a, uintptr_t d = 0) + : prerequisite_target (t, a == include_type::adhoc, d) {} + + operator const target_type*& () {return target;} + operator const target_type* () const {return target;} + const target_type* operator-> () const {return target;} + + const target_type* target; + bool adhoc; // True if include=adhoc. + uintptr_t data; + }; + using prerequisite_targets = vector<prerequisite_target>; + + // A rule match is an element of hint_rule_map. + // + using rule_match = pair<const string, reference_wrapper<const rule>>; + + // Target. + // + class LIBBUILD2_SYMEXPORT target + { + optional<string>* ext_; // Reference to value in target_key. + + public: + // For targets that are in the src tree of a project we also keep the + // corresponding out directory. As a result we may end up with multiple + // targets for the same file if we are building multiple configurations of + // the same project at once. We do it this way because, in a sense, a + // target's out directory is its "configuration" (in terms of variables). + // As an example, consider installing the same README file (src) but for + // two different project configurations at once. Which installation + // directory should we use? The answer depends on which configuration you + // ask. + // + // Empty out directory indicates this target is in the out tree (including + // when src == out). We also treat out of project targets as being in the + // out tree. + // + const dir_path dir; // Absolute and normalized. + const dir_path out; // Empty or absolute and normalized. + const string name; + + const string* ext () const; // Return NULL if not specified. + const string& ext (string); + + const dir_path& + out_dir () const {return out.empty () ? dir : out;} + + // A target that is not (yet) entered as part of a real dependency + // declaration (for example, that is entered as part of a target-specific + // variable assignment, dependency extraction, etc) is called implied. + // + // The implied flag should only be cleared during the load phase via the + // MT-safe target_set::insert(). + // + bool implied; + + // Target group to which this target belongs, if any. Note that we assume + // that the group and all its members are in the same scope (for example, + // in variable lookup). We also don't support nested groups (with an + // exception for ad hoc groups; see below). + // + // The semantics of the interaction between the group and its members and + // what it means to, say, update the group, is unspecified and is + // determined by the group's type. In particular, a group can be created + // out of member types that have no idea they are part of this group + // (e.g., cli.cxx{}). + // + // Normally, however, there are two kinds of groups: "all" and "choice". + // In a choice-group, normally one of the members is selected when the + // group is mentioned as a prerequisite with, perhaps, an exception for + // special rules, like aliases, where it makes more sense to treat such + // group prerequisites as a whole. In this case we say that the rule + // "semantically recognizes" the group and picks some of its members. + // + // Updating a choice-group as a whole can mean updating some subset of its + // members (e.g., lib{}). Or the group may not support this at all (e.g., + // obj{}). + // + // In an all-group, when a group is updated, normally all its members are + // updates (and usually with a single command), though there could be some + // members that are omitted, depending on the configuration (e.g., an + // inline file not/being generated). When an all-group is mentioned as a + // prerequisite, the rule is usually interested in the individual members + // rather than the whole group. For example, a C++ compile rule would like + // to "see" the ?xx{} members when it gets a cli.cxx{} group. + // + // Which brings us to the group iteration mode. The target type contains a + // member called see_through that indicates whether the default iteration + // mode for the group should be "see through"; that is, whether we see the + // members or the group itself. For the iteration support itself, see the + // *_prerequisite_members() machinery below. + // + // In an all-group we usually want the state (and timestamp; see mtime()) + // for members to come from the group. This is achieved with the special + // target_state::group state. You would normally also use the group_recipe + // for group members. + // + // Note that the group-member link-up can happen anywhere between the + // member creation and rule matching so reading the group before the + // member has been matched can be racy. + // + const target* group = nullptr; + + // What has been described above is a "explicit" group. That is, there is + // a dedicated target type that explicitly serves as a group and there is + // an explicit mechanism for discovering the group's members. + // + // However, sometimes, we may want to create a group on the fly out of a + // normal target type. For example, we have the libs{} target type. But + // on Windows a shared library consist of (at least) two files: the import + // library and the DLL itself. So we somehow need to be able to capture + // that. One approach would be to imply the presence of the second file. + // However, that means that a lot of generic rules (e.g., clean, install, + // etc) will need to know about this special semantics on Windows. Also, + // there would be no convenient way to customize things like extensions, + // etc (for which we use target-specific variables). In other words, it + // would be much easier and more consistent to make these extra files + // proper targets. + // + // So to support this requirement we have "ad hoc" groups. The idea is + // that any target can be turned either by a user's declaration in a + // buildfile or by the rule that matches it into an ad hoc group by + // chaining several targets together. + // + // Ad hoc groups have a more restricted semantics compared to the normal + // groups. In particular: + // + // - The ad hoc group itself is in a sense its first/primary target. + // + // - Group member's recipes, if set, should be group_recipe. Normally, a + // rule-managed member isn't matched by the rule since all that's + // usually needed is to derive its path. + // + // - Unless declared, members are discovered lazily, they are only known + // after the group's rule's apply() call. + // + // - Only declared members can be used as prerequisites but all can be + // used as targets (e.g., to set variables, etc). + // + // - Members don't have prerequisites. + // + // - Ad hoc group cannot have sub-groups (of any kind) though an ad hoc + // group can be a sub-group of an explicit group. + // + // - Member variable lookup skips the ad hoc group (since the group is the + // first member, this is normally what we want). + // + // Note that ad hoc groups can be part of explicit groups. In a sense, we + // have a two-level grouping: an explicit group with its members each of + // which can be an ad hoc group. For example, lib{} contains libs{} which + // may have an import stub as its ad hoc member. + // + // Use add_adhoc_member(), find_adhoc_member() from algorithms to manage + // ad hoc members. + // + const_ptr<target> member = nullptr; + + bool + adhoc_group () const + { + // An ad hoc group can be a member of a normal group. + // + return member != nullptr && + (group == nullptr || group->member == nullptr); + } + + bool + adhoc_member () const + { + return group != nullptr && group->member != nullptr; + } + + public: + // Normally you should not call this function directly and rather use + // resolve_members() from algorithm.hxx. + // + virtual group_view + group_members (action) const; + + // Note that the returned key "tracks" the target (except for the + // extension). + // + target_key + key () const; + + // Scoping. + // + public: + // Most qualified scope that contains this target. + // + const scope& + base_scope () const; + + // Root scope of a project that contains this target. Note that + // a target can be out of any (known) project root in which case + // this function asserts. If you need to detect this situation, + // then use base_scope().root_scope() expression instead. + // + const scope& + root_scope () const; + + // Root scope of a strong amalgamation that contains this target. + // The same notes as to root_scope() apply. + // + const scope& + strong_scope () const {return *root_scope ().strong_scope ();} + + // Root scope of the outermost amalgamation that contains this target. + // The same notes as to root_scope() apply. + // + const scope& + weak_scope () const {return *root_scope ().weak_scope ();} + + bool + in (const scope& s) const + { + return out_dir ().sub (s.out_path ()); + } + + // Prerequisites. + // + // We use an atomic-empty semantics that allows one to "swap in" a set of + // prerequisites if none were specified. This is used to implement + // "synthesized" dependencies. + // + public: + using prerequisites_type = build2::prerequisites; + + const prerequisites_type& + prerequisites () const; + + // Swap-in a list of prerequisites. Return false if unsuccessful (i.e., + // someone beat us to it). Note that it can be called on const target. + // + bool + prerequisites (prerequisites_type&&) const; + + // Check if there are any prerequisites. Note that the group version may + // be racy (see target::group). + // + bool + has_prerequisites () const; + + bool + has_group_prerequisites () const; + + private: + friend class parser; + + // Note that the state is also used to synchronize the prerequisites + // value so we use the release-acquire ordering. + // + // 0 - absent + // 1 - being set + // 2 - present + // + atomic<uint8_t> prerequisites_state_ {0}; + prerequisites_type prerequisites_; + + static const prerequisites_type empty_prerequisites_; + + // Target-specific variables. + // + // See also rule-specific variables below. + // + public: + variable_map vars; + + // Lookup, including in groups to which this target belongs and then in + // outer scopes (including target type/pattern-specific variables). If you + // only want to lookup in this target, do it on the variable map directly + // (and note that there will be no overrides). + // + lookup + operator[] (const variable& var) const + { + return find (var).first; + } + + lookup + operator[] (const variable* var) const // For cached variables. + { + assert (var != nullptr); + return operator[] (*var); + } + + lookup + operator[] (const string& name) const + { + const variable* var (var_pool.find (name)); + return var != nullptr ? operator[] (*var) : lookup (); + } + + // As above but also return the depth at which the value is found. The + // depth is calculated by adding 1 for each test performed. So a value + // that is from the target will have depth 1. That from the group -- 2. + // From the innermost scope's target type/patter-specific variables -- + // 3. From the innermost scope's variables -- 4. And so on. The idea is + // that given two lookups from the same target, we can say which one came + // earlier. If no value is found, then the depth is set to ~0. + // + pair<lookup, size_t> + find (const variable& var) const + { + auto p (find_original (var)); + return var.overrides == nullptr + ? p + : base_scope ().find_override (var, move (p), true); + } + + // If target_only is true, then only look in target and its target group + // without continuing in scopes. + // + pair<lookup, size_t> + find_original (const variable&, bool target_only = false) const; + + // Return a value suitable for assignment. See scope for details. + // + value& + assign (const variable& var) {return vars.assign (var);} + + value& + assign (const variable* var) {return vars.assign (var);} // For cached. + + // Return a value suitable for appending. See scope for details. + // + value& + append (const variable&); + + // Target operation state. + // + public: + // Atomic task count that is used during match and execution to track the + // target's "meta-state" as well as the number of its sub-tasks (e.g., + // busy+1, busy+2, and so on, for instance, number of prerequisites + // being matched or executed). + // + // For each operation in a meta-operation batch (current_on) we have a + // "band" of counts, [touched, executed], that represent the target + // meta-state. Once the next operation is started, this band "moves" thus + // automatically resetting the target to "not yet touched" state for this + // operation. + // + // The target is said to be synchronized (in this thread) if we have + // either observed the task count to reach applied or executed or we have + // successfully changed it (via compare_exchange) to locked or busy. If + // the target is synchronized, then we can access and modify (second case) + // its state etc. + // + static const size_t offset_touched = 1; // Target has been locked. + static const size_t offset_tried = 2; // Rule match has been tried. + static const size_t offset_matched = 3; // Rule has been matched. + static const size_t offset_applied = 4; // Rule has been applied. + static const size_t offset_executed = 5; // Recipe has been executed. + static const size_t offset_busy = 6; // Match/execute in progress. + + static size_t count_base () {return 5 * (current_on - 1);} + + static size_t count_touched () {return offset_touched + count_base ();} + static size_t count_tried () {return offset_tried + count_base ();} + static size_t count_matched () {return offset_matched + count_base ();} + static size_t count_applied () {return offset_applied + count_base ();} + static size_t count_executed () {return offset_executed + count_base ();} + static size_t count_busy () {return offset_busy + count_base ();} + + // Inner/outer operation state. See operation.hxx for details. + // + class LIBBUILD2_SYMEXPORT opstate + { + public: + mutable atomic_count task_count {0}; // Start offset_touched - 1. + + // Number of direct targets that depend on this target in the current + // operation. It is incremented during match and then decremented during + // execution, before running the recipe. As a result, the recipe can + // detect the last chance (i.e., last dependent) to execute the command + // (see also the first/last execution modes in <operation.hxx>). + // + mutable atomic_count dependents {0}; + + // Matched rule (pointer to hint_rule_map element). Note that in case of + // a direct recipe assignment we may not have a rule (NULL). + // + const rule_match* rule; + + // Applied recipe. + // + build2::recipe recipe; + + // Target state for this operation. Note that it is undetermined until + // a rule is matched and recipe applied (see set_recipe()). + // + target_state state; + + // Rule-specific variables. + // + // The rule (for this action) has to be matched before these variables + // can be accessed and only the rule being matched can modify them (so + // no iffy modifications of the group's variables by member's rules). + // + // They are also automatically cleared before another rule is matched, + // similar to the data pad. In other words, rule-specific variables are + // only valid for this match-execute phase. + // + variable_map vars; + + // Lookup, continuing in the target-specific variables, etc. Note that + // the group's rule-specific variables are not included. If you only + // want to lookup in this target, do it on the variable map directly + // (and note that there will be no overrides). + // + lookup + operator[] (const variable& var) const + { + return find (var).first; + } + + lookup + operator[] (const variable* var) const // For cached variables. + { + assert (var != nullptr); + return operator[] (*var); + } + + lookup + operator[] (const string& name) const + { + const variable* var (var_pool.find (name)); + return var != nullptr ? operator[] (*var) : lookup (); + } + + // As above but also return the depth at which the value is found. The + // depth is calculated by adding 1 for each test performed. So a value + // that is from the rule will have depth 1. That from the target - 2, + // and so on, similar to target-specific variables. + // + pair<lookup, size_t> + find (const variable& var) const + { + auto p (find_original (var)); + return var.overrides == nullptr + ? p + : target_->base_scope ().find_override (var, move (p), true, true); + } + + // If target_only is true, then only look in target and its target group + // without continuing in scopes. + // + pair<lookup, size_t> + find_original (const variable&, bool target_only = false) const; + + // Return a value suitable for assignment. See target for details. + // + value& + assign (const variable& var) {return vars.assign (var);} + + value& + assign (const variable* var) {return vars.assign (var);} // For cached. + + public: + opstate (): vars (false /* global */) {} + + private: + friend class target_set; + + const target* target_ = nullptr; // Back-pointer, set by target_set. + }; + + action_state<opstate> state; + + opstate& operator[] (action a) {return state[a];} + const opstate& operator[] (action a) const {return state[a];} + + // This function should only be called during match if we have observed + // (synchronization-wise) that this target has been matched (i.e., the + // rule has been applied) for this action. + // + target_state + matched_state (action, bool fail = true) const; + + // See try_match(). + // + pair<bool, target_state> + try_matched_state (action, bool fail = true) const; + + // After the target has been matched and synchronized, check if the target + // is known to be unchanged. Used for optimizations during search & match. + // + bool + unchanged (action) const; + + // This function should only be called during execution if we have + // observed (synchronization-wise) that this target has been executed. + // + target_state + executed_state (action, bool fail = true) const; + + protected: + // Version that should be used during match after the target has been + // matched for this action. + // + // Indicate whether there is a rule match with the first half of the + // result (see try_match()). + // + pair<bool, target_state> + matched_state_impl (action) const; + + // Return fail-untranslated (but group-translated) state assuming the + // target is executed and synchronized. + // + target_state + executed_state_impl (action) const; + + // Return true if the state comes from the group. Target must be at least + // matched. + // + bool + group_state (action) const; + + public: + // Targets to which prerequisites resolve for this action. Note that + // unlike prerequisite::target, these can be resolved to group members. + // NULL means the target should be skipped (or the rule may simply not add + // such a target to the list). + // + // Note also that it is possible the target can vary from action to + // action, just like recipes. We don't need to keep track of the action + // here since the targets will be updated if the recipe is updated, + // normally as part of rule::apply(). + // + // Note that the recipe may modify this list. + // + mutable action_state<build2::prerequisite_targets> prerequisite_targets; + + // Auxilary data storage. + // + // A rule that matches (i.e., returns true from its match() function) may + // use this pad to pass data between its match and apply functions as well + // as the recipe. After the recipe is executed, the data is destroyed by + // calling data_dtor (if not NULL). The rule should static assert that the + // size of the pad is sufficient for its needs. + // + // Note also that normally at least 2 extra pointers may be stored without + // a dynamic allocation in the returned recipe (small object optimization + // in std::function). So if you need to pass data only between apply() and + // the recipe, then this might be a more convenient way. + // + // Note also that a rule that delegates to another rule may not be able to + // use this mechanism fully since the delegated-to rule may also need the + // data pad. + // + // Currenly the data is not destroyed until the next match. + // + // Note that the recipe may modify the data. Currently reserved for the + // inner part of the action. + // + static constexpr size_t data_size = sizeof (string) * 16; + mutable std::aligned_storage<data_size>::type data_pad; + + mutable void (*data_dtor) (void*) = nullptr; + + template <typename R, + typename T = typename std::remove_cv< + typename std::remove_reference<R>::type>::type> + typename std::enable_if<std::is_trivially_destructible<T>::value,T&>::type + data (R&& d) const + { + assert (sizeof (T) <= data_size && data_dtor == nullptr); + return *new (&data_pad) T (forward<R> (d)); + } + + template <typename R, + typename T = typename std::remove_cv< + typename std::remove_reference<R>::type>::type> + typename std::enable_if<!std::is_trivially_destructible<T>::value,T&>::type + data (R&& d) const + { + assert (sizeof (T) <= data_size && data_dtor == nullptr); + T& r (*new (&data_pad) T (forward<R> (d))); + data_dtor = [] (void* p) {static_cast<T*> (p)->~T ();}; + return r; + } + + template <typename T> + T& + data () const {return *reinterpret_cast<T*> (&data_pad);} + + void + clear_data () const + { + if (data_dtor != nullptr) + { + data_dtor (&data_pad); + data_dtor = nullptr; + } + } + + // Target type info and casting. + // + public: + const target* + is_a (const target_type& tt) const { + return type ().is_a (tt) ? this : nullptr;} + + template <typename T> + T* + is_a () {return dynamic_cast<T*> (this);} + + template <typename T> + const T* + is_a () const {return dynamic_cast<const T*> (this);} + + // Unchecked cast. + // + template <typename T> + T& + as () {return static_cast<T&> (*this);} + + template <typename T> + const T& + as () const {return static_cast<const T&> (*this);} + + // Dynamic derivation to support define. + // + const target_type* derived_type = nullptr; + + const target_type& + type () const + { + return derived_type != nullptr ? *derived_type : dynamic_type (); + } + + virtual const target_type& dynamic_type () const = 0; + static const target_type static_type; + + public: + // Split the name leaf into target name (in place) and extension + // (returned). + // + static optional<string> + split_name (string&, const location&); + + // Combine the target name and extension into the name leaf. + // + // If the target type has the default extension, then "escape" the + // existing extension if any. + // + static void + combine_name (string&, const optional<string>&, bool default_extension); + + // Targets should be created via the targets set below. + // + public: + target (dir_path d, dir_path o, string n) + : dir (move (d)), out (move (o)), name (move (n)), + vars (false /* global */) {} + + target (target&&) = delete; + target& operator= (target&&) = delete; + + target (const target&) = delete; + target& operator= (const target&) = delete; + + virtual + ~target (); + + friend class target_set; + }; + + // All targets are from the targets set below. + // + inline bool + operator== (const target& x, const target& y) {return &x == &y;} + + inline bool + operator!= (const target& x, const target& y) {return !(x == y);} + + ostream& + operator<< (ostream&, const target&); + + // Sometimes it is handy to "mark" a pointer to a target (for example, in + // prerequisite_targets). We use the last 2 bits in a pointer for that (aka + // the "bit stealing" technique). Note that the pointer needs to be unmarked + // before it can be usable so care must be taken in the face of exceptions, + // etc. + // + void + mark (const target*&, uint8_t = 1); + + uint8_t + marked (const target*); // Can be used as a predicate or to get the mark. + + uint8_t + unmark (const target*&); + + // A "range" that presents the prerequisites of a group and one of + // its members as one continuous sequence, or, in other words, as + // if they were in a single container. The group's prerequisites + // come first followed by the member's. If you need to see them + // in the other direction, iterate in reverse, for example: + // + // for (prerequisite& p: group_prerequisites (t)) + // + // for (prerequisite& p: reverse_iterate (group_prerequisites (t)) + // + // Note that in this case the individual elements of each list will + // also be traversed in reverse, but that's what you usually want, + // anyway. + // + // Note that you either should be iterating over a locked target (e.g., in + // rule's match() or apply()) or you should call resolve_group(). + // + class group_prerequisites + { + public: + explicit + group_prerequisites (const target& t); + + group_prerequisites (const target& t, const target* g); + + using prerequisites_type = target::prerequisites_type; + using base_iterator = prerequisites_type::const_iterator; + + struct iterator + { + using value_type = base_iterator::value_type; + using pointer = base_iterator::pointer; + using reference = base_iterator::reference; + using difference_type = base_iterator::difference_type; + using iterator_category = std::bidirectional_iterator_tag; + + iterator () {} + iterator (const target* t, + const target* g, + const prerequisites_type* c, + base_iterator i): t_ (t), g_ (g), c_ (c), i_ (i) {} + + iterator& + operator++ (); + + iterator + operator++ (int) {iterator r (*this); operator++ (); return r;} + + iterator& + operator-- (); + + iterator + operator-- (int) {iterator r (*this); operator-- (); return r;} + + reference operator* () const {return *i_;} + pointer operator-> () const {return i_.operator -> ();} + + friend bool + operator== (const iterator& x, const iterator& y) + { + return x.t_ == y.t_ && x.g_ == y.g_ && x.c_ == y.c_ && x.i_ == y.i_; + } + + friend bool + operator!= (const iterator& x, const iterator& y) {return !(x == y);} + + private: + const target* t_ = nullptr; + const target* g_ = nullptr; + const prerequisites_type* c_ = nullptr; + base_iterator i_; + }; + + using reverse_iterator = std::reverse_iterator<iterator>; + + iterator + begin () const; + + iterator + end () const; + + reverse_iterator + rbegin () const {return reverse_iterator (end ());} + + reverse_iterator + rend () const {return reverse_iterator (begin ());} + + size_t + size () const; + + private: + const target& t_; + const target* g_; + }; + + // A member of a prerequisite. If 'member' is NULL, then this is the + // prerequisite itself. Otherwise, it is its member. In this case + // 'prerequisite' still refers to the prerequisite. + // + struct prerequisite_member + { + using scope_type = build2::scope; + using target_type = build2::target; + using prerequisite_type = build2::prerequisite; + using target_type_type = build2::target_type; + + const prerequisite_type& prerequisite; + const target_type* member; + + template <typename T> + bool + is_a () const + { + return member != nullptr + ? member->is_a<T> () != nullptr + : prerequisite.is_a<T> (); + } + + bool + is_a (const target_type_type& tt) const + { + return member != nullptr + ? member->is_a (tt) != nullptr + : prerequisite.is_a (tt); + } + + prerequisite_key + key () const; + + const target_type_type& + type () const + { + return member != nullptr ? member->type () : prerequisite.type; + } + + const string& + name () const + { + return member != nullptr ? member->name : prerequisite.name; + } + + const dir_path& + dir () const + { + return member != nullptr ? member->dir : prerequisite.dir; + } + + const optional<project_name>& + proj () const + { + // Member cannot be project-qualified. + // + return member != nullptr ? nullopt_project_name : prerequisite.proj; + } + + const scope_type& + scope () const + { + return member != nullptr ? member->base_scope () : prerequisite.scope; + } + + const target_type& + search (const target_type& t) const + { + return member != nullptr ? *member : build2::search (t, prerequisite); + } + + const target_type* + search_existing () const + { + return member != nullptr + ? member + : build2::search_existing (prerequisite); + } + + const target_type* + load (memory_order mo = memory_order_consume) + { + return member != nullptr ? member : prerequisite.target.load (mo); + } + + // Return as a new prerequisite instance. + // + prerequisite_type + as_prerequisite () const; + }; + + // It is often stored as the target's auxiliary data so make sure there is + // no destructor overhead. + // + static_assert (std::is_trivially_destructible<prerequisite_member>::value, + "prerequisite_member is not trivially destructible"); + + inline ostream& + operator<< (ostream& os, const prerequisite_member& pm) + { + return os << pm.key (); + } + + inline include_type + include (action a, const target& t, const prerequisite_member& pm) + { + return include (a, t, pm.prerequisite, pm.member); + } + + // A "range" that presents a sequence of prerequisites (e.g., from + // group_prerequisites()) as a sequence of prerequisite_member's. For each + // group prerequisite you will "see" either the prerequisite itself or all + // its members, depending on the default iteration mode of the target group + // type (ad hoc groups are never implicitly see through since one can only + // safely access members after a synchronous match). You can skip the + // rest of the group members with leave_group() and you can force iteration + // over the members with enter_group(). Usage: + // + // for (prerequisite_member pm: prerequisite_members (a, ...)) + // + // Where ... can be: + // + // t.prerequisites + // reverse_iterate(t.prerequisites) + // group_prerequisites (t) + // reverse_iterate (group_prerequisites (t)) + // + // But use shortcuts instead: + // + // prerequisite_members (a, t) + // reverse_prerequisite_members (a, t) + // group_prerequisite_members (a, t) + // reverse_group_prerequisite_members (a, t) + // + template <typename R> + class prerequisite_members_range; + + // See-through group members iteration mode. Ad hoc members must always + // be entered explicitly. + // + enum class members_mode + { + always, // Iterate over members, assert if not resolvable. + maybe, // Iterate over members if resolvable, group otherwise. + never // Iterate over group (can still use enter_group()). + }; + + template <typename R> + inline prerequisite_members_range<R> + prerequisite_members (action a, const target& t, + R&& r, + members_mode m = members_mode::always) + { + return prerequisite_members_range<R> (a, t, forward<R> (r), m); + } + + template <typename R> + class prerequisite_members_range + { + public: + prerequisite_members_range (action a, const target& t, + R&& r, + members_mode m) + : a_ (a), t_ (t), mode_ (m), r_ (forward<R> (r)), e_ (r_.end ()) {} + + using base_iterator = decltype (declval<R> ().begin ()); + + struct iterator + { + using value_type = prerequisite_member; + using pointer = const value_type*; + using reference = const value_type&; + using difference_type = typename base_iterator::difference_type; + using iterator_category = std::forward_iterator_tag; + + iterator (): r_ (nullptr) {} + iterator (const prerequisite_members_range* r, const base_iterator& i) + : r_ (r), i_ (i), g_ {nullptr, 0}, k_ (nullptr) + { + if (r_->mode_ != members_mode::never && + i_ != r_->e_ && + i_->type.see_through) + switch_mode (); + } + + iterator& operator++ (); + iterator operator++ (int) {iterator r (*this); operator++ (); return r;} + + // Skip iterating over the rest of this group's members, if any. Note + // that the only valid operation after this call is to increment the + // iterator. + // + void + leave_group (); + + // Iterate over this group's members. Return false if the member + // information is not available. Similar to leave_group(), you should + // increment the iterator after calling this function (provided it + // returned true). + // + bool + enter_group (); + + // Return true if the next element is this group's members. Normally + // used to iterate over group members only, for example: + // + // for (...; ++i) + // { + // if (i->prerequisite.type.see_through) + // { + // for (i.enter_group (); i.group (); ) + // { + // ++i; + // ... + // } + // } + // } + // + bool + group () const; + + value_type operator* () const + { + const target* t (k_ != nullptr ? k_: + g_.count != 0 ? g_.members[j_ - 1] : nullptr); + + return value_type {*i_, t}; + } + + pointer operator-> () const + { + static_assert ( + std::is_trivially_destructible<value_type>::value, + "prerequisite_member is not trivially destructible"); + + const target* t (k_ != nullptr ? k_: + g_.count != 0 ? g_.members[j_ - 1] : nullptr); + + return new (&m_) value_type {*i_, t}; + } + + friend bool + operator== (const iterator& x, const iterator& y) + { + return x.i_ == y.i_ && + x.g_.count == y.g_.count && + (x.g_.count == 0 || x.j_ == y.j_) && + x.k_ == y.k_; + } + + friend bool + operator!= (const iterator& x, const iterator& y) {return !(x == y);} + + // What we have here is a state for three nested iteration modes (and + // no, I am not proud of it). The innermost mode is iteration over an ad + // hoc group (k_). Then we have iteration over a normal group (g_ and + // j_). Finally, at the outer level, we have the range itself (i_). + // + // Also, the enter/leave group support is full of ugly, special cases. + // + private: + void + switch_mode (); + + private: + const prerequisite_members_range* r_; + base_iterator i_; + group_view g_; + size_t j_; // 1-based index, to support enter_group(). + const target* k_; // Current member of ad hoc group or NULL. + mutable typename std::aligned_storage<sizeof (value_type), + alignof (value_type)>::type m_; + }; + + iterator + begin () const {return iterator (this, r_.begin ());} + + iterator + end () const {return iterator (this, e_);} + + private: + action a_; + const target& t_; + members_mode mode_; + R r_; + base_iterator e_; + }; + + // prerequisite_members(t.prerequisites ()) + // + auto + prerequisite_members (action a, const target& t, + members_mode m = members_mode::always); + + // prerequisite_members(reverse_iterate(t.prerequisites ())) + // + auto + reverse_prerequisite_members (action a, const target& t, + members_mode m = members_mode::always); + + // prerequisite_members(group_prerequisites (t)) + // + inline auto + group_prerequisite_members (action a, target& t, + members_mode m = members_mode::always) + { + return prerequisite_members (a, t, group_prerequisites (t), m); + } + + inline auto + group_prerequisite_members (action a, const target& t, + members_mode m = members_mode::always) + { + return prerequisite_members (a, t, group_prerequisites (t), m); + } + + // prerequisite_members(reverse_iterate (group_prerequisites (t))) + // + inline auto + reverse_group_prerequisite_members (action a, target& t, + members_mode m = members_mode::always) + { + return prerequisite_members ( + a, t, reverse_iterate (group_prerequisites (t)), m); + } + + inline auto + reverse_group_prerequisite_members (action a, const target& t, + members_mode m = members_mode::always) + { + return prerequisite_members ( + a, t, reverse_iterate (group_prerequisites (t)), m); + } + + // A target with an unspecified extension is considered equal to the one + // with the specified one. And when we find a target with an unspecified + // extension via a key with the specified one, we update the extension, + // essentially modifying the map's key. To make this work we use a hash + // map. The key's hash ignores the extension, so the hash will stay stable + // across extension updates. + // + // Note also that once the extension is specified, it becomes immutable. + // + class LIBBUILD2_SYMEXPORT target_set + { + public: + using map_type = std::unordered_map<target_key, unique_ptr<target>>; + + // Return existing target or NULL. + // + const target* + find (const target_key& k, tracer& trace) const; + + const target* + find (const target_type& type, + const dir_path& dir, + const dir_path& out, + const string& name, + const optional<string>& ext, + tracer& trace) const + { + return find (target_key {&type, &dir, &out, &name, ext}, trace); + } + + template <typename T> + const T* + find (const target_type& type, + const dir_path& dir, + const dir_path& out, + const string& name, + const optional<string>& ext, + tracer& trace) const + { + return static_cast<const T*> (find (type, dir, out, name, ext, trace)); + } + + // As above but ignore the extension. + // + const target* + find (const target_type& type, + const dir_path& dir, + const dir_path& out, + const string& name) const + { + slock l (mutex_); + auto i (map_.find (target_key {&type, &dir, &out, &name, nullopt})); + return i != map_.end () ? i->second.get () : nullptr; + } + + template <typename T> + const T* + find (const dir_path& dir, const dir_path& out, const string& name) const + { + return static_cast<const T*> (find (T::static_type, dir, out, name)); + } + + // If the target was inserted, keep the map exclusive-locked and return + // the lock. In this case, the target is effectively still being created + // since nobody can see it until the lock is released. + // + pair<target&, ulock> + insert_locked (const target_type&, + dir_path dir, + dir_path out, + string name, + optional<string> ext, + bool implied, + tracer&); + + pair<target&, bool> + insert (const target_type& tt, + dir_path dir, + dir_path out, + string name, + optional<string> ext, + bool implied, + tracer& t) + { + auto p (insert_locked (tt, + move (dir), + move (out), + move (name), + move (ext), + implied, + t)); + + return pair<target&, bool> (p.first, p.second.owns_lock ()); + } + + // Note that the following versions always enter implied targets. + // + template <typename T> + T& + insert (const target_type& tt, + dir_path dir, + dir_path out, + string name, + optional<string> ext, + tracer& t) + { + return insert (tt, + move (dir), + move (out), + move (name), + move (ext), + true, + t).first.template as<T> (); + } + + template <typename T> + T& + insert (const dir_path& dir, + const dir_path& out, + const string& name, + const optional<string>& ext, + tracer& t) + { + return insert<T> (T::static_type, dir, out, name, ext, t); + } + + template <typename T> + T& + insert (const dir_path& dir, + const dir_path& out, + const string& name, + tracer& t) + { + return insert<T> (dir, out, name, nullopt, t); + } + + // Note: not MT-safe so can only be used during serial execution. + // + public: + using iterator = butl::map_iterator_adapter<map_type::const_iterator>; + + iterator begin () const {return map_.begin ();} + iterator end () const {return map_.end ();} + + void + clear () {map_.clear ();} + + private: + friend class target; // Access to mutex. + + mutable shared_mutex mutex_; + map_type map_; + }; + + LIBBUILD2_SYMEXPORT extern target_set targets; + + // Modification time-based target. + // + class LIBBUILD2_SYMEXPORT mtime_target: public target + { + public: + using target::target; + + // Modification time is an "atomic cash". That is, it can be set at any + // time (including on a const instance) and we assume everything will be + // ok regardless of the order in which racing updates happen because we do + // not modify the external state (which is the source of timestemps) while + // updating the internal. + // + // The modification time is reserved for the inner operation thus there is + // no action argument. + // + // The rule for groups that utilize target_state::group is as follows: if + // it has any members that are mtime_targets, then the group should be + // mtime_target and the members get the mtime from it. During match and + // execute the target should be synchronized. + // + // Note that this function can be called before the target is matched in + // which case the value always comes from the target itself. In other + // words, that group logic only kicks in once the target is matched. + // + timestamp + mtime () const; + + // Note also that while we can cache the mtime, it may be ignored if the + // target state is set to group (see above). + // + void + mtime (timestamp) const; + + // If the mtime is unknown, then load it from the filesystem also caching + // the result. + // + // Note: can only be called during executing and must not be used if the + // target state is group. + // + timestamp + load_mtime (const path&) const; + + // Return true if this target is newer than the specified timestamp. + // + // Note: can only be called during execute on a synchronized target. + // + bool + newer (timestamp) const; + + public: + static const target_type static_type; + + protected: + + // Complain if timestamp is not lock-free unless we were told non-lock- + // free is ok. + // +#ifndef LIBBUILD2_ATOMIC_NON_LOCK_FREE + // C++17: + // + // static_assert (atomic<timestamp::rep>::is_always_lock_free, + // "timestamp is not lock-free on this architecture"); + // +#if !defined(ATOMIC_LLONG_LOCK_FREE) || ATOMIC_LLONG_LOCK_FREE != 2 +# error timestamp is not lock-free on this architecture +#endif +#endif + + // Note that the value is not used to synchronize any other state so we + // use the release-consume ordering (i.e., we are only interested in the + // mtime value being synchronized). + // + // Store it as an underlying representation (normally int64_t) since + // timestamp is not usable with atomic (non-noexcept default ctor). + // + mutable atomic<timestamp::rep> mtime_ {timestamp_unknown_rep}; + }; + + // Filesystem path-based target. + // + class LIBBUILD2_SYMEXPORT path_target: public mtime_target + { + public: + using mtime_target::mtime_target; + + typedef build2::path path_type; + + // Target path is an "atomic consistent cash". That is, it can be set at + // any time (including on a const instance) but any subsequent updates + // must set the same path. Or, in other words, once the path is set, it + // never changes. + // + // An empty path may signify special unknown/undetermined/unreal location + // (for example, a binless library or an installed import library -- we + // know the DLL is there, just not exactly where). In this case you would + // also normally set its mtime. + // + // We used to return a pointer to properly distinguish between not set and + // empty but that proved too tedious to work with. So now we return empty + // path both when not set (which will be empty_path so you can distinguish + // the two case if you really want to) and when set to empty. Note that + // this means there could be a race between path and mtime (unless you + // lock the target in some other way; see file_rule) so in this case it + // makes sense to set the timestamp first. + // + const path_type& + path () const; + + const path_type& + path (path_type) const; + + timestamp + load_mtime () const; + + // Derive a path from target's dir, name, and, if set, ext. If ext is not + // set, try to derive it using the target type extension function and + // fallback to default_ext, if specified. In both cases also update the + // target's extension (this becomes important if later we need to reliably + // determine whether this file has an extension; think hxx{foo.bar.} and + // hxx{*}:extension is empty). + // + // If name_prefix is not NULL, add it before the name part and after the + // directory. Similarly, if name_suffix is not NULL, add it after the name + // part and before the extension. + // + // Finally, if the path was already assigned to this target, then this + // function verifies that the two are the same. + // + const path_type& + derive_path (const char* default_ext = nullptr, + const char* name_prefix = nullptr, + const char* name_suffix = nullptr); + + // This version can be used to derive the path from another target's path + // by adding another extension. + // + const path_type& + derive_path (path_type base, const char* default_ext = nullptr); + + // As above but only derives (and returns) the extension (empty means no + // extension used). + // + const string& + derive_extension (const char* default_ext = nullptr) + { + return *derive_extension (false, default_ext); + } + + // As above but if search is true then look for the extension as if it was + // a prerequisite, not a target. In this case, if no extension can be + // derived, return NULL instead of failing (like search_existing_file()). + // + const string* + derive_extension (bool search, const char* default_ext = nullptr); + + // Const versions of the above that can be used on unlocked targets. Note + // that here we don't allow providing any defaults since you probably + // should only use this version if everything comes from the target itself + // (and is therefore atomic). + // + const path_type& + derive_path () const + { + return const_cast<path_target*> (this)->derive_path (); // MT-aware. + } + + const string& + derive_extension () const + { + return const_cast<path_target*> (this)->derive_extension (); // MT-aware. + } + + public: + static const target_type static_type; + + private: + // Note that the state is also used to synchronize the path value so + // we use the release-acquire ordering. + // + // 0 - absent + // 1 - being set + // 2 - present + // + mutable atomic<uint8_t> path_state_ {0}; + mutable path_type path_; + }; + + // File target. + // + class LIBBUILD2_SYMEXPORT file: public path_target + { + public: + using path_target::path_target; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // Alias target. It represents a list of targets (its prerequisites) + // as a single "name". + // + class LIBBUILD2_SYMEXPORT alias: public target + { + public: + using target::target; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // Directory target. Note that this is not a filesystem directory + // but rather an alias target with the directory name. For actual + // filesystem directory (creation), see fsdir. + // + class LIBBUILD2_SYMEXPORT dir: public alias + { + public: + using alias::alias; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + + public: + template <typename K> + static const target* + search_implied (const scope&, const K&, tracer&); + + // Return true if the implied buildfile is plausible for the specified + // subdirectory of a project with the specified root scope. That is, there + // is a buildfile in at least one of its subdirectories. Note that the + // directory must exist. + // + static bool + check_implied (const scope& root, const dir_path&); + + private: + static prerequisites_type + collect_implied (const scope&); + }; + + // While a filesystem directory is mtime-based, the semantics is not very + // useful in our case. In particular, if another target depends on fsdir{}, + // then all that's desired is the creation of the directory if it doesn't + // already exist. In particular, we don't want to update the target just + // because some unrelated entry was created in that directory. + // + class LIBBUILD2_SYMEXPORT fsdir: public target + { + public: + using target::target; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // Executable file. + // + class LIBBUILD2_SYMEXPORT exe: public file + { + public: + using file::file; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + class LIBBUILD2_SYMEXPORT buildfile: public file + { + public: + using file::file; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // Common documentation file targets. + // + class LIBBUILD2_SYMEXPORT doc: public file + { + public: + using file::file; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // The problem with man pages is this: different platforms have + // different sets of sections. What seems to be the "sane" set + // is 1-9 (Linux and BSDs). SysV (e.g., Solaris) instead maps + // 8 to 1M (system administration). The section determines two + // things: the directory where the page is installed (e.g., + // /usr/share/man/man1) as well as the extension of the file + // (e.g., test.1). Note also that there could be sub-sections, + // e.g., 1p (for POSIX). Such a page would still go into man1 + // but will have the .1p extension (at least that's what happens + // on Linux). The challenge is to somehow handle this in a + // portable manner. So here is the plan: + // + // First of all, we have the man{} target type which can be used + // for a custom man page. That is, you can have any extension and + // install it anywhere you please: + // + // man{foo.X}: install = man/manX + // + // Then we have man1..9{} target types which model the "sane" + // section set and that would be automatically installed into + // correct locations on other platforms. In other words, the + // idea is that you should be able to have the foo.8 file, + // write man8{foo} and have it installed as man1m/foo.1m on + // some SysV host. + // + // Re-mapping the installation directory is easy: to help with + // that we have assigned install.man1..9 directory names. The + // messy part is to change the extension. It seems the only + // way to do that would be to have special logic for man pages + // in the generic install rule. @@ This is still a TODO. + // + // Note that handling subsections with man1..9{} is easy, we + // simply specify the extension explicitly, e.g., man{foo.1p}. + // + class LIBBUILD2_SYMEXPORT man: public doc + { + public: + using doc::doc; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + class LIBBUILD2_SYMEXPORT man1: public man + { + public: + using man::man; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // We derive manifest from doc rather than file so that it get automatically + // installed into the same place where the rest of the documentation goes. + // If you think about it, it's kind of a documentation, similar to (but + // better than) the version file that many projects come with. + // + class LIBBUILD2_SYMEXPORT manifest: public doc + { + public: + using doc::doc; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // Common implementation of the target factory, extension, and search + // functions. + // + template <typename T> + target* + target_factory (const target_type&, dir_path d, dir_path o, string n) + { + return new T (move (d), move (o), move (n)); + } + + // Return fixed target extension unless one was specified. + // + template <const char* ext> + const char* + target_extension_fix (const target_key&, const scope*); + + template <const char* ext> + bool + target_pattern_fix (const target_type&, const scope&, + string&, optional<string>&, const location&, + bool); + + // Get the extension from the variable or use the default if none set. If + // the default is NULL, then return NULL. + // + template <const char* var, const char* def> + optional<string> + target_extension_var (const target_key&, const scope&, const char*, bool); + + template <const char* var, const char* def> + bool + target_pattern_var (const target_type&, const scope&, + string&, optional<string>&, const location&, + bool); + + // Target print functions. + // + + // Target type uses the extension but it is fixed and there is no use + // printing it (e.g., man1{}). + // + LIBBUILD2_SYMEXPORT void + target_print_0_ext_verb (ostream&, const target_key&); + + // Target type uses the extension and there is normally no default so it + // should be printed (e.g., file{}). + // + LIBBUILD2_SYMEXPORT void + target_print_1_ext_verb (ostream&, const target_key&); + + // The default behavior, that is, look for an existing target in the + // prerequisite's directory scope. + // + LIBBUILD2_SYMEXPORT const target* + target_search (const target&, const prerequisite_key&); + + // First look for an existing target as above. If not found, then look + // for an existing file in the target-type-specific list of paths. + // + LIBBUILD2_SYMEXPORT const target* + file_search (const target&, const prerequisite_key&); +} + +#include <libbuild2/target.ixx> +#include <libbuild2/target.txx> + +#endif // LIBBUILD2_TARGET_HXX diff --git a/libbuild2/target.ixx b/libbuild2/target.ixx new file mode 100644 index 0000000..4570558 --- /dev/null +++ b/libbuild2/target.ixx @@ -0,0 +1,496 @@ +// file : libbuild2/target.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cstring> // memcpy() + +#include <libbuild2/filesystem.hxx> // mtime() + +namespace build2 +{ + // target + // + inline const string* target:: + ext () const + { + slock l (targets.mutex_); + return *ext_ ? &**ext_ : nullptr; + } + + inline target_key target:: + key () const + { + const string* e (ext ()); + return target_key { + &type (), + &dir, + &out, + &name, + e != nullptr ? optional<string> (*e) : nullopt}; + } + + inline auto target:: + prerequisites () const -> const prerequisites_type& + { + return prerequisites_state_.load (memory_order_acquire) == 2 + ? prerequisites_ + : empty_prerequisites_; + } + + inline bool target:: + prerequisites (prerequisites_type&& p) const + { + target& x (const_cast<target&> (*this)); // MT-aware. + + uint8_t e (0); + if (x.prerequisites_state_.compare_exchange_strong ( + e, + 1, + memory_order_acq_rel, + memory_order_acquire)) + { + x.prerequisites_ = move (p); + x.prerequisites_state_.fetch_add (1, memory_order_release); + return true; + } + else + { + // Spin the transition out so that prerequisites() doesn't return empty. + // + for (; e == 1; e = prerequisites_state_.load (memory_order_acquire)) + /*this_thread::yield ()*/ ; + + return false; + } + } + + inline bool target:: + group_state (action a) const + { + // We go an extra step and short-circuit to the target state even if the + // raw state is not group provided the recipe is group_recipe and the + // state is unknown (see mtime() for a discussion on why we do it). + // + const opstate& s (state[a]); + + if (s.state == target_state::group) + return true; + + if (s.state == target_state::unknown && group != nullptr) + { + if (recipe_function* const* f = s.recipe.target<recipe_function*> ()) + return *f == &group_action; + } + + return false; + } + + inline pair<bool, target_state> target:: + matched_state_impl (action a) const + { + assert (phase == run_phase::match); + + // Note that the "tried" state is "final". + // + const opstate& s (state[a]); + size_t o (s.task_count.load (memory_order_relaxed) - // Synchronized. + target::count_base ()); + + if (o == target::offset_tried) + return make_pair (false, target_state::unknown); + else + { + // Normally applied but can also be already executed. + // + assert (o == target::offset_applied || o == target::offset_executed); + return make_pair (true, (group_state (a) ? group->state[a] : s).state); + } + } + + inline target_state target:: + executed_state_impl (action a) const + { + assert (phase == run_phase::execute); + return (group_state (a) ? group->state : state)[a].state; + } + + inline target_state target:: + matched_state (action a, bool fail) const + { + // Note that the target could be being asynchronously re-matched. + // + pair<bool, target_state> r (matched_state_impl (a)); + + if (fail && (!r.first || r.second == target_state::failed)) + throw failed (); + + return r.second; + } + + inline pair<bool, target_state> target:: + try_matched_state (action a, bool fail) const + { + pair<bool, target_state> r (matched_state_impl (a)); + + if (fail && r.first && r.second == target_state::failed) + throw failed (); + + return r; + } + + inline target_state target:: + executed_state (action a, bool fail) const + { + target_state r (executed_state_impl (a)); + + if (fail && r == target_state::failed) + throw failed (); + + return r; + } + + inline bool target:: + has_prerequisites () const + { + return !prerequisites ().empty (); + } + + inline bool target:: + has_group_prerequisites () const + { + return has_prerequisites () || + (group != nullptr && !group->has_prerequisites ()); + } + + inline bool target:: + unchanged (action a) const + { + return matched_state_impl (a).second == target_state::unchanged; + } + + inline ostream& + operator<< (ostream& os, const target& t) + { + return os << t.key (); + } + + // mark()/unmark() + // + + // VC15 doesn't like if we use (abstract) target here. + // + static_assert (alignof (file) % 4 == 0, "unexpected target alignment"); + + inline void + mark (const target*& p, uint8_t m) + { + uintptr_t i (reinterpret_cast<uintptr_t> (p)); + i |= m & 0x03; + p = reinterpret_cast<const target*> (i); + } + + inline uint8_t + marked (const target* p) + { + uintptr_t i (reinterpret_cast<uintptr_t> (p)); + return uint8_t (i & 0x03); + } + + inline uint8_t + unmark (const target*& p) + { + uintptr_t i (reinterpret_cast<uintptr_t> (p)); + uint8_t m (i & 0x03); + + if (m != 0) + { + i &= ~uintptr_t (0x03); + p = reinterpret_cast<const target*> (i); + } + + return m; + } + + // group_prerequisites + // + inline group_prerequisites:: + group_prerequisites (const target& t) + : t_ (t), + g_ (t_.group == nullptr || + t_.group->member != nullptr || // Ad hoc group member. + t_.group->prerequisites ().empty () + ? nullptr : t_.group) + { + } + + inline group_prerequisites:: + group_prerequisites (const target& t, const target* g) + : t_ (t), + g_ (g == nullptr || + g->prerequisites ().empty () + ? nullptr : g) + { + } + + inline auto group_prerequisites:: + begin () const -> iterator + { + auto& c ((g_ != nullptr ? *g_ : t_).prerequisites ()); + return iterator (&t_, g_, &c, c.begin ()); + } + + inline auto group_prerequisites:: + end () const -> iterator + { + auto& c (t_.prerequisites ()); + return iterator (&t_, g_, &c, c.end ()); + } + + inline size_t group_prerequisites:: + size () const + { + return t_.prerequisites ().size () + + (g_ != nullptr ? g_->prerequisites ().size () : 0); + } + + // group_prerequisites::iterator + // + inline auto group_prerequisites::iterator:: + operator++ () -> iterator& + { + if (++i_ == c_->end () && c_ != &t_->prerequisites ()) + { + c_ = &t_->prerequisites (); + i_ = c_->begin (); + } + return *this; + } + + + inline auto group_prerequisites::iterator:: + operator-- () -> iterator& + { + if (i_ == c_->begin () && c_ == &t_->prerequisites ()) + { + c_ = &g_->prerequisites (); + i_ = c_->end (); + } + + --i_; + return *this; + } + + // prerequisite_member + // + inline prerequisite prerequisite_member:: + as_prerequisite () const + { + if (member == nullptr) + return prerequisite; + + // An ad hoc group member cannot be used as a prerequisite (use the whole + // group instead). + // + assert (!member->adhoc_member ()); + + return prerequisite_type (*member); + } + + inline prerequisite_key prerequisite_member:: + key () const + { + return member != nullptr + ? prerequisite_key {prerequisite.proj, member->key (), nullptr} + : prerequisite.key (); + } + + // prerequisite_members + // + group_view + resolve_members (action, const target&); // algorithm.hxx + + template <typename T> + inline auto prerequisite_members_range<T>::iterator:: + operator++ () -> iterator& + { + if (k_ != nullptr) // Iterating over an ad hoc group. + k_ = k_->member; + + if (k_ == nullptr && g_.count != 0) // Iterating over a normal group. + { + if (g_.members == nullptr || // Special case, see leave_group(). + ++j_ > g_.count) + g_.count = 0; + } + + if (k_ == nullptr && g_.count == 0) // Iterating over the range. + { + ++i_; + + if (r_->mode_ != members_mode::never && + i_ != r_->e_ && + i_->type.see_through) + switch_mode (); + } + + return *this; + } + + template <typename T> + inline bool prerequisite_members_range<T>::iterator:: + enter_group () + { + assert (k_ == nullptr); // No nested ad hoc group entering. + + // First see if we are about to enter an ad hoc group. + // + const target* t (g_.count != 0 + ? j_ != 0 ? g_.members[j_ - 1] : nullptr + : i_->target.load (memory_order_consume)); + + if (t != nullptr && t->member != nullptr) + k_ = t; // Increment that follows will make it t->member. + else + { + // Otherwise assume it is a normal group. + // + g_ = resolve_members (r_->a_, search (r_->t_, *i_)); + + if (g_.members == nullptr) // Members are not know. + { + g_.count = 0; + return false; + } + + if (g_.count != 0) // Group is not empty. + j_ = 0; // Account for the increment that will follow. + } + + return true; + } + + template <typename T> + inline void prerequisite_members_range<T>::iterator:: + leave_group () + { + if (k_ != nullptr) + { + // Skip until the last element (next increment will reach the end). + // + for (; k_->member != nullptr; k_ = k_->member) ; + } + else + { + // Pretend we are on the last member of a normal group. + // + j_ = 0; + g_.count = 1; + g_.members = nullptr; // Ugly "special case signal" for operator++. + } + } + + template <typename T> + inline bool prerequisite_members_range<T>::iterator:: + group () const + { + return + k_ != nullptr ? k_->member != nullptr : /* ad hoc */ + g_.count != 0 ? g_.members != nullptr && j_ < g_.count : /* explicit */ + false; + } + + inline auto + prerequisite_members (action a, const target& t, members_mode m) + { + return prerequisite_members (a, t, t.prerequisites (), m); + } + + inline auto + reverse_prerequisite_members (action a, const target& t, members_mode m) + { + return prerequisite_members (a, t, reverse_iterate (t.prerequisites ()), m); + } + + // mtime_target + // + inline void mtime_target:: + mtime (timestamp mt) const + { + mtime_.store (mt.time_since_epoch ().count (), memory_order_release); + } + + inline timestamp mtime_target:: + load_mtime (const path& p) const + { + assert (phase == run_phase::execute && + !group_state (action () /* inner */)); + + duration::rep r (mtime_.load (memory_order_consume)); + if (r == timestamp_unknown_rep) + { + assert (!p.empty ()); + + r = build2::mtime (p).time_since_epoch ().count (); + mtime_.store (r, memory_order_release); + } + + return timestamp (duration (r)); + } + + inline bool mtime_target:: + newer (timestamp mt) const + { + assert (phase == run_phase::execute); + + timestamp mp (mtime ()); + + // What do we do if timestamps are equal? This can happen, for example, + // on filesystems that don't have subsecond resolution. There is not + // much we can do here except detect the case where the target was + // changed on this run. + // + return mt < mp || (mt == mp && + executed_state_impl (action () /* inner */) == + target_state::changed); + } + + // path_target + // + inline const path& path_target:: + path () const + { + return path_state_.load (memory_order_acquire) == 2 ? path_ : empty_path; + } + + inline const path& path_target:: + path (path_type p) const + { + uint8_t e (0); + if (path_state_.compare_exchange_strong ( + e, + 1, + memory_order_acq_rel, + memory_order_acquire)) + { + path_ = move (p); + path_state_.fetch_add (1, memory_order_release); + } + else + { + // Spin the transition out. + // + for (; e == 1; e = path_state_.load (memory_order_acquire)) + /*this_thread::yield ()*/ ; + + assert (path_ == p); + } + + return path_; + } + + inline timestamp path_target:: + load_mtime () const + { + return mtime_target::load_mtime (path ()); + } +} diff --git a/libbuild2/target.txx b/libbuild2/target.txx new file mode 100644 index 0000000..b93a403 --- /dev/null +++ b/libbuild2/target.txx @@ -0,0 +1,185 @@ +// file : libbuild2/target.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbutl/filesystem.mxx> // dir_iterator + +#include <libbuild2/scope.hxx> +#include <libbuild2/diagnostics.hxx> +#include <libbuild2/prerequisite.hxx> + +namespace build2 +{ + // prerequisite_members_range + // + template <typename T> + void prerequisite_members_range<T>::iterator:: + switch_mode () + { + // A group could be empty, so we may have to iterate. + // + do + { + g_ = resolve_members (r_->a_, search (r_->t_, *i_)); + + // Group could not be resolved. + // + if (g_.members == nullptr) + { + assert (r_->mode_ != members_mode::always); + return; + } + + if (g_.count != 0) // Skip empty see through groups. + { + j_ = 1; // Start from the first group member. + break; + } + } + while (++i_ != r_->e_ && i_->type.see_through); + } + + // + // + template <const char* ext> + const char* + target_extension_fix (const target_key& tk, const scope*) + { + // A generic file target type doesn't imply any extension while a very + // specific one (say man1) may have a fixed extension. So if one wasn't + // specified set it to fixed ext rather than unspecified. For file{} + // itself we make it empty which means we treat file{foo} as file{foo.}. + // + return tk.ext ? tk.ext->c_str () : ext; + } + + template <const char* ext> + bool + target_pattern_fix (const target_type&, + const scope&, + string& v, + optional<string>& e, + const location& l, + bool r) + { + if (r) + { + // If we get called to reverse then it means we've added the extension + // in the first place. + // + assert (e); + e = nullopt; + } + else + { + e = target::split_name (v, l); + + // We only add our extension if there isn't one already. + // + if (!e) + { + e = ext; + return true; + } + } + + return false; + } + + inline optional<string> + target_extension_var_impl (const target_type& tt, + const string& tn, + const scope& s, + const char* var, + const char* def) + { + // Include target type/pattern-specific variables. + // + if (auto l = s.find (var_pool[var], tt, tn)) + { + // Help the user here and strip leading '.' from the extension. + // + const string& e (cast<string> (l)); + return !e.empty () && e.front () == '.' ? string (e, 1) : e; + } + + return def != nullptr ? optional<string> (def) : nullopt; + } + + template <const char* var, const char* def> + optional<string> + target_extension_var (const target_key& tk, + const scope& s, + const char*, + bool) + { + return target_extension_var_impl (*tk.type, *tk.name, s, var, def); + } + + template <const char* var, const char* def> + bool + target_pattern_var (const target_type& tt, + const scope& s, + string& v, + optional<string>& e, + const location& l, + bool r) + { + if (r) + { + // If we get called to reverse then it means we've added the extension + // in the first place. + // + assert (e); + e = nullopt; + } + else + { + e = target::split_name (v, l); + + // We only add our extension if there isn't one already. + // + if (!e) + { + // Use empty name as a target since we only want target type/pattern- + // specific variables that match any target ('*' but not '*.txt'). + // + if ((e = target_extension_var_impl (tt, string (), s, var, def))) + return true; + } + } + + return false; + } + + // dir + // + template <typename K> + const target* dir:: + search_implied (const scope& bs, const K& k, tracer& trace) + { + using namespace butl; + + // See if we have any prerequisites. + // + prerequisites_type ps (collect_implied (bs)); + + if (ps.empty ()) + return nullptr; + + l5 ([&]{trace << "implying buildfile for " << k;}); + + // We behave as if this target was explicitly mentioned in the (implied) + // buildfile. Thus not implied. + // + target& t (targets.insert (dir::static_type, + bs.out_path (), + dir_path (), + string (), + nullopt, + false, + trace).first); + t.prerequisites (move (ps)); + return &t; + } +} diff --git a/libbuild2/token.cxx b/libbuild2/token.cxx new file mode 100644 index 0000000..7455e26 --- /dev/null +++ b/libbuild2/token.cxx @@ -0,0 +1,60 @@ +// file : libbuild2/token.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/token.hxx> + +using namespace std; + +namespace build2 +{ + void + token_printer (ostream& os, const token& t, bool d) + { + // Only quote non-name tokens for diagnostics. + // + const char* q (d ? "'" : ""); + + switch (t.type) + { + case token_type::eos: os << "<end of file>"; break; + case token_type::newline: os << "<newline>"; break; + case token_type::pair_separator: os << "<pair separator " << t.value[0] << ">"; break; + case token_type::word: os << '\'' << t.value << '\''; break; + + case token_type::colon: os << q << ':' << q; break; + case token_type::dollar: os << q << '$' << q; break; + case token_type::question: os << q << '?' << q; break; + case token_type::comma: os << q << ',' << q; break; + + case token_type::lparen: os << q << '(' << q; break; + case token_type::rparen: os << q << ')' << q; break; + + case token_type::lcbrace: os << q << '{' << q; break; + case token_type::rcbrace: os << q << '}' << q; break; + + case token_type::lsbrace: os << q << '[' << q; break; + case token_type::rsbrace: os << q << ']' << q; break; + + case token_type::labrace: os << q << '<' << q; break; + case token_type::rabrace: os << q << '>' << q; break; + + case token_type::assign: os << q << '=' << q; break; + case token_type::prepend: os << q << "=+" << q; break; + case token_type::append: os << q << "+=" << q; break; + + case token_type::equal: os << q << "==" << q; break; + case token_type::not_equal: os << q << "!=" << q; break; + case token_type::less: os << q << '<' << q; break; + case token_type::greater: os << q << '>' << q; break; + case token_type::less_equal: os << q << "<=" << q; break; + case token_type::greater_equal: os << q << ">=" << q; break; + + case token_type::log_or: os << q << "||" << q; break; + case token_type::log_and: os << q << "&&" << q; break; + case token_type::log_not: os << q << '!' << q; break; + + default: assert (false); // Unhandled extended token. + } + } +} diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx new file mode 100644 index 0000000..a9b9a11 --- /dev/null +++ b/libbuild2/token.hxx @@ -0,0 +1,191 @@ +// file : libbuild2/token.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TOKEN_HXX +#define LIBBUILD2_TOKEN_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Extendable/inheritable enum-like class. + // + // A line consists of a sequence of words separated by separators and + // terminated with the newline. If whitespace is a separator, then it is + // ignored. + // + struct token_type + { + enum + { + // NOTE: remember to update token_printer()! + + eos, + newline, + word, + pair_separator, // token::value[0] is the pair separator char. + + colon, // : + dollar, // $ + question, // ? + comma, // , + + lparen, // ( + rparen, // ) + + lcbrace, // { + rcbrace, // } + + lsbrace, // [ + rsbrace, // ] + + labrace, // < + rabrace, // > + + assign, // = + prepend, // =+ + append, // += + + equal, // == + not_equal, // != + less, // < + greater, // > + less_equal, // <= + greater_equal, // >= + + log_or, // || + log_and, // && + log_not, // ! + + value_next + }; + + using value_type = uint16_t; + + token_type (value_type v = eos): v_ (v) {} + operator value_type () const {return v_;} + value_type v_; + }; + + // Token can be unquoted, single-quoted ('') or double-quoted (""). It can + // also be mixed. + // + enum class quote_type {unquoted, single, double_, mixed}; + + class token; + + LIBBUILD2_SYMEXPORT void + token_printer (ostream&, const token&, bool); + + class token + { + public: + using printer_type = void (ostream&, const token&, bool diag); + + token_type type; + bool separated; // Whitespace-separated from the previous token. + + // Quoting can be complete, where the token starts and ends with the quote + // characters and quoting is contiguous or partial where only some part(s) + // of the token are quoted or quoting continus to the next token. + // + quote_type qtype; + bool qcomp; + + // Normally only used for word, but can also be used to store "modifiers" + // or some such for other tokens. + // + string value; + + uint64_t line; + uint64_t column; + + printer_type* printer; + + public: + token () + : token (token_type::eos, false, 0, 0, token_printer) {} + + token (token_type t, bool s, uint64_t l, uint64_t c, printer_type* p) + : token (t, string (), s, quote_type::unquoted, false, l, c, p) {} + + token (token_type t, bool s, + quote_type qt, + uint64_t l, uint64_t c, + printer_type* p) + : token (t, string (), s, qt, qt != quote_type::unquoted, l, c, p) {} + + token (string v, bool s, + quote_type qt, bool qc, + uint64_t l, uint64_t c) + : token (token_type::word, move (v), s, qt, qc, l, c, &token_printer){} + + token (token_type t, + string v, bool s, + quote_type qt, bool qc, + uint64_t l, uint64_t c, + printer_type* p) + : type (t), separated (s), + qtype (qt), qcomp (qc), + value (move (v)), + line (l), column (c), + printer (p) {} + }; + + // Output the token value in a format suitable for diagnostics. + // + inline ostream& + operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;} + + // Extendable/inheritable enum-like class. + // + struct lexer_mode_base + { + enum { value_next }; + + using value_type = uint16_t; + + lexer_mode_base (value_type v = value_next): v_ (v) {} + operator value_type () const {return v_;} + value_type v_; + }; + + struct replay_token + { + build2::token token; + const path* file; + lexer_mode_base mode; + + using location_type = build2::location; + + location_type + location () const {return location_type (file, token.line, token.column);} + }; + + using replay_tokens = vector<replay_token>; + + // Diagnostics plumbing. We assume that any diag stream for which we can use + // token as location has its aux data pointing to pointer to path. + // + inline location + get_location (const token& t, const path& p) + { + return location (&p, t.line, t.column); + } + + inline location + get_location (const token& t, const void* data) + { + assert (data != nullptr); // E.g., must be &parser::path_. + const path* p (*static_cast<const path* const*> (data)); + return get_location (t, *p); + } +} + +#endif // LIBBUILD2_TOKEN_HXX diff --git a/libbuild2/types.hxx b/libbuild2/types.hxx new file mode 100644 index 0000000..cbaf89a --- /dev/null +++ b/libbuild2/types.hxx @@ -0,0 +1,360 @@ +// file : libbuild2/types.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TYPES_HXX +#define LIBBUILD2_TYPES_HXX + +// Include unprocessed file during bootstrap. See config.hxx.in for details. +// +#ifdef BUILD2_BOOTSTRAP +# include <libbuild2/config.hxx.in> +#else +# include <libbuild2/config.hxx> +#endif + +#include <array> +#include <tuple> +#include <vector> +#include <string> +#include <memory> // unique_ptr, shared_ptr +#include <utility> // pair, move() +#include <cstddef> // size_t, nullptr_t +#include <cstdint> // uint{8,16,32,64}_t, *_MIN, *_MAX +#include <istream> +#include <ostream> +#include <functional> // hash, function, reference_wrapper +#include <initializer_list> + +#include <mutex> +#include <atomic> +#include <thread> +#include <condition_variable> + +#include <libbutl/ft/shared_mutex.hxx> +#if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex) +# include <shared_mutex> +#endif + +#include <ios> // ios_base::failure +#include <exception> // exception +#include <stdexcept> // logic_error, invalid_argument, runtime_error +#include <system_error> + +#include <libbutl/path.mxx> +#include <libbutl/path-map.mxx> +#include <libbutl/sha256.mxx> +#include <libbutl/process.mxx> +#include <libbutl/fdstream.mxx> +#include <libbutl/optional.mxx> +#include <libbutl/const-ptr.mxx> +#include <libbutl/timestamp.mxx> +#include <libbutl/vector-view.mxx> +#include <libbutl/small-vector.mxx> +#include <libbutl/project-name.mxx> +#include <libbutl/target-triplet.mxx> +#include <libbutl/semantic-version.mxx> +#include <libbutl/standard-version.mxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Commonly-used types. + // + using std::uint8_t; + using std::uint16_t; + using std::uint32_t; + using std::uint64_t; + using std::uintptr_t; + + using uint64s = std::vector<uint64_t>; + + using std::size_t; + using std::nullptr_t; + + using std::pair; + using std::tuple; + using std::string; + using std::function; + using std::reference_wrapper; + + using strings = std::vector<string>; + using cstrings = std::vector<const char*>; + + using std::hash; + + using std::initializer_list; + + using std::unique_ptr; + using std::shared_ptr; + using std::weak_ptr; + + using std::array; + using std::vector; + using butl::vector_view; // <libbutl/vector-view.mxx> + using butl::small_vector; // <libbutl/small-vector.mxx> + + using std::istream; + using std::ostream; + using std::endl; + using std::streamsize; // C++'s ssize_t. + + // Concurrency. + // + using std::atomic; + using std::memory_order; + using std::memory_order_relaxed; + using std::memory_order_consume; + using std::memory_order_acquire; + using std::memory_order_release; + using std::memory_order_acq_rel; + using std::memory_order_seq_cst; + + using atomic_count = atomic<size_t>; // Matches scheduler::atomic_count. + + // Like std::atomic except implicit conversion and assignment use relaxed + // memory ordering. + // + template <typename T> + struct relaxed_atomic: atomic<T> + { + using atomic<T>::atomic; // Delegate. + relaxed_atomic (const relaxed_atomic& a) noexcept + : atomic<T> (a.load (memory_order_relaxed)) {} + + operator T () const noexcept {return this->load (memory_order_relaxed);} + + T operator= (T v) noexcept { + this->store (v, memory_order_relaxed); return v;} + T operator= (const relaxed_atomic& a) noexcept { + return *this = a.load (memory_order_relaxed);} + }; + + template <typename T> + struct relaxed_atomic<T*>: atomic<T*> + { + using atomic<T*>::atomic; // Delegate. + relaxed_atomic (const relaxed_atomic& a) noexcept + : atomic<T*> (a.load (memory_order_relaxed)) {} + + operator T* () const noexcept {return this->load (memory_order_relaxed);} + T& operator* () const noexcept {return *this->load (memory_order_relaxed);} + T* operator-> () const noexcept {return this->load (memory_order_relaxed);} + + T* operator= (T* v) noexcept { + this->store (v, memory_order_relaxed); return v;} + T* operator= (const relaxed_atomic& a) noexcept { + return *this = a.load (memory_order_relaxed);} + }; + + // VC 14 has issues. + // +#if defined(_MSC_VER) && _MSC_VER <= 1900 + template <typename T, typename P> + inline bool + operator== (const relaxed_atomic<T*>& x, const P& y) + { + return static_cast<T*> (x) == y; + } + + template <typename T, typename P> + inline bool + operator!= (const relaxed_atomic<T*>& x, const P& y) + { + return static_cast<T*> (x) != y; + } +#endif + + using std::mutex; + using mlock = std::unique_lock<mutex>; + + using std::condition_variable; + +#if defined(__cpp_lib_shared_mutex) + using shared_mutex = std::shared_mutex; + using ulock = std::unique_lock<shared_mutex>; + using slock = std::shared_lock<shared_mutex>; +#elif defined(__cpp_lib_shared_timed_mutex) + using shared_mutex = std::shared_timed_mutex; + using ulock = std::unique_lock<shared_mutex>; + using slock = std::shared_lock<shared_mutex>; +#else + // Because we have this fallback, we need to be careful not to create + // multiple shared locks in the same thread. + // + struct shared_mutex: mutex + { + using mutex::mutex; + + void lock_shared () { lock (); } + void try_lock_shared () { try_lock (); } + void unlock_shared () { unlock (); } + }; + + using ulock = std::unique_lock<shared_mutex>; + using slock = ulock; +#endif + + using std::defer_lock; + using std::adopt_lock; + + using std::thread; + namespace this_thread = std::this_thread; + + // Exceptions. + // + // While <exception> is included, there is no using for std::exception -- + // use qualified. + // + using std::logic_error; + using std::invalid_argument; + using std::runtime_error; + using std::system_error; + using io_error = std::ios_base::failure; + + // <libbutl/optional.mxx> + // + using butl::optional; + using butl::nullopt; + + // <libbutl/const-ptr.mxx> + // + using butl::const_ptr; + + // <libbutl/path.mxx> + // <libbutl/path-map.mxx> + // + using butl::path; + using butl::dir_path; + using butl::path_cast; + using butl::basic_path; + using butl::invalid_path; + using butl::path_abnormality; + + using butl::path_map; + using butl::dir_path_map; + + // Absolute directory path. Note that for now we don't do any checking that + // the path is in fact absolute. + // + // The idea is to have a different type that we automatically complete when + // a (variable) value of this type gets initialized from untyped names. See + // value_type<abs_dir_path> for details. + // + // Note that currently we also normalize and actualize the path. And we + // leave empty path as is. + // + struct abs_dir_path: dir_path + { + using dir_path::dir_path; + + explicit + abs_dir_path (dir_path d): dir_path (std::move (d)) {} + abs_dir_path () = default; + }; + + using paths = std::vector<path>; + using dir_paths = std::vector<dir_path>; + + // <libbutl/timestamp.mxx> + // + using butl::system_clock; + using butl::timestamp; + using butl::duration; + using butl::timestamp_unknown; + using butl::timestamp_unknown_rep; + using butl::timestamp_nonexistent; + using butl::to_string; + using butl::operator<<; + + // <libbutl/sha256.mxx> + // + using butl::sha256; + + // <libbutl/process.mxx> + // <libbutl/fdstream.mxx> + // + using butl::process; + using butl::process_env; + using butl::process_path; + using butl::process_error; + + using butl::auto_fd; + using butl::ifdstream; + using butl::ofdstream; + using butl::fdopen_mode; + using butl::fdstream_mode; + using butl::fdselect_state; + using butl::fdselect_set; + + // <libbutl/target-triplet.mxx> + // + using butl::target_triplet; + + // <libbutl/semantic-version.mxx> + // + using butl::semantic_version; + using butl::parse_semantic_version; + + // <libbutl/standard-version.mxx> + // + using butl::standard_version; + using butl::standard_version_constraint; + + // <libbutl/project-name.mxx> + // + using butl::project_name; + + // Diagnostics location. + // + class location + { + public: + // Note that location maintains a shallow reference to path. Zero lines + // or columns are not printed. + // + explicit + location (const path* f = nullptr, uint64_t l = 0, uint64_t c = 0) + : file (f), line (l), column (c) {} + + bool + empty () const {return file == nullptr;} + + const path* file; + uint64_t line; + uint64_t column; + }; + + // See context. + // + enum class run_phase {load, match, execute}; + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, run_phase); // utility.cxx + + LIBBUILD2_SYMEXPORT extern run_phase phase; +} + +// In order to be found (via ADL) these have to be either in std:: or in +// butl::. The latter is a bad idea since libbutl includes the default +// implementation. They are defined in utility.cxx. +// +namespace std +{ + // Path printing with trailing slash for directories. + // + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const ::butl::path&); + + // Print as recall[@effect]. + // + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, const ::butl::process_path&); +} + +// <libbuild2/name.hxx> +// +#include <libbuild2/name.hxx> + +#endif // LIBBUILD2_TYPES_HXX diff --git a/libbuild2/utility.cxx b/libbuild2/utility.cxx new file mode 100644 index 0000000..396ce82 --- /dev/null +++ b/libbuild2/utility.cxx @@ -0,0 +1,517 @@ +// file : libbuild2/utility.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/utility.hxx> + +#include <time.h> // tzset() (POSIX), _tzset() (Windows) + +#include <cstring> // strlen(), str[n]cmp() +#include <iostream> // cerr + +#include <libbuild2/target.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; +using namespace butl; + +// +// <libbuild2/types.hxx> +// +namespace build2 +{ + static const char* const run_phase_[] = {"load", "match", "execute"}; + + ostream& + operator<< (ostream& os, run_phase p) + { + return os << run_phase_[static_cast<uint8_t> (p)]; + } +} + +namespace std +{ + ostream& + operator<< (ostream& os, const ::butl::path& p) + { + using namespace build2; + + return os << (stream_verb (os).path < 1 + ? diag_relative (p) + : p.representation ()); + } + + ostream& + operator<< (ostream& os, const ::butl::process_path& p) + { + using namespace build2; + + if (p.empty ()) + os << "<empty>"; + else + { + // @@ Is there a reason not to print as a relative path as it is done + // for path (see above)? + // + os << p.recall_string (); + + if (!p.effect.empty ()) + os << '@' << p.effect.string (); // Suppress relative(). + } + + return os; + } +} + +namespace build2 +{ + // + // <libbuild2/utility.hxx> + // + process_path argv0; + + const standard_version build_version (LIBBUILD2_VERSION_STR); + + bool dry_run_option; + optional<bool> mtime_check_option; + + optional<path> config_sub; + optional<path> config_guess; + + void + check_build_version (const standard_version_constraint& c, const location& l) + { + if (!c.satisfies (build_version)) + fail (l) << "incompatible build2 version" << + info << "running " << build_version.string () << + info << "required " << c.string (); + } + + dir_path work; + dir_path home; + const dir_path* relative_base = &work; + + path + relative (const path_target& t) + { + const path& p (t.path ()); + assert (!p.empty ()); + return relative (p); + } + + string + diag_relative (const path& p, bool cur) + { + if (p.string () == "-") + return "<stdin>"; + + const path& b (*relative_base); + + if (p.absolute ()) + { + if (p == b) + return cur ? "." + p.separator_string () : string (); + +#ifndef _WIN32 + if (!home.empty ()) + { + if (p == home) + return "~" + p.separator_string (); + } +#endif + + path rb (relative (p)); + +#ifndef _WIN32 + if (!home.empty ()) + { + if (rb.relative ()) + { + // See if the original path with the ~/ shortcut is better that the + // relative to base. + // + if (p.sub (home)) + { + path rh (p.leaf (home)); + if (rb.size () > rh.size () + 2) // 2 for '~/' + return "~/" + move (rh).representation (); + } + } + else if (rb.sub (home)) + return "~/" + rb.leaf (home).representation (); + } + +#endif + + return move (rb).representation (); + } + + return p.representation (); + } + + process_path + run_search (const char*& args0, bool path_only, const location& l) + try + { + return process::path_search (args0, dir_path () /* fallback */, path_only); + } + catch (const process_error& e) + { + fail (l) << "unable to execute " << args0 << ": " << e << endf; + } + + process_path + run_search (const path& f, + bool init, + const dir_path& fallback, + bool path_only, + const location& l) + try + { + return process::path_search (f, init, fallback, path_only); + } + catch (const process_error& e) + { + fail (l) << "unable to execute " << f << ": " << e << endf; + } + + process_path + try_run_search (const path& f, + bool init, + const dir_path& fallback, + bool path_only) + { + return process::try_path_search (f, init, fallback, path_only); + } + + process + run_start (uint16_t verbosity, + const process_env& pe, + const char* args[], + int in, + int out, + bool err, + const dir_path& cwd, + const location& l) + try + { + assert (args[0] == pe.path->recall_string ()); + + if (verb >= verbosity) + print_process (args, 0); + + return process ( + *pe.path, + args, + in, + out, + (err ? 2 : 1), + (!cwd.empty () + ? cwd.string ().c_str () + : pe.cwd != nullptr ? pe.cwd->string ().c_str () : nullptr), + pe.vars); + } + catch (const process_error& e) + { + if (e.child) + { + // Note: run_finish() expects this exact message. + // + cerr << "unable to execute " << args[0] << ": " << e << endl; + + // In a multi-threaded program that fork()'ed but did not exec(), it is + // unwise to try to do any kind of cleanup (like unwinding the stack and + // running destructors). + // + exit (1); + } + else + fail (l) << "unable to execute " << args[0] << ": " << e << endf; + } + + bool + run_finish (const char* args[], + process& pr, + bool err, + const string& l, + const location& loc) + try + { + tracer trace ("run_finish"); + + if (pr.wait ()) + return true; + + const process_exit& e (*pr.exit); + + if (!e.normal ()) + fail (loc) << "process " << args[0] << " " << e; + + // Normall but non-zero exit status. + // + if (err) + { + // While we assuming diagnostics has already been issued (to STDERR), if + // that's not the case, it's a real pain to debug. So trace it. + // + l4 ([&]{trace << "process " << args[0] << " " << e;}); + + throw failed (); + } + + // Even if the user asked to suppress diagnostiscs, one error that we + // want to let through is the inability to execute the program itself. + // We cannot reserve a special exit status to signal this so we will + // just have to compare the output. This particular situation will + // result in a single error line printed by run_start() above. + // + if (l.compare (0, 18, "unable to execute ") == 0) + fail (loc) << l; + + return false; + } + catch (const process_error& e) + { + fail (loc) << "unable to execute " << args[0] << ": " << e << endf; + } + + const string empty_string; + const path empty_path; + const dir_path empty_dir_path; + const project_name empty_project_name; + + const optional<string> nullopt_string; + const optional<path> nullopt_path; + const optional<dir_path> nullopt_dir_path; + const optional<project_name> nullopt_project_name; + + void + append_options (cstrings& args, const lookup& l, const char* e) + { + if (l) + append_options (args, cast<strings> (l), e); + } + + void + append_options (strings& args, const lookup& l, const char* e) + { + if (l) + append_options (args, cast<strings> (l), e); + } + + void + hash_options (sha256& csum, const lookup& l) + { + if (l) + hash_options (csum, cast<strings> (l)); + } + + void + append_options (cstrings& args, const strings& sv, size_t n, const char* e) + { + if (n != 0) + { + args.reserve (args.size () + n); + + for (size_t i (0); i != n; ++i) + { + if (e == nullptr || e != sv[i]) + args.push_back (sv[i].c_str ()); + } + } + } + + void + append_options (strings& args, const strings& sv, size_t n, const char* e) + { + if (n != 0) + { + args.reserve (args.size () + n); + + for (size_t i (0); i != n; ++i) + { + if (e == nullptr || e != sv[i]) + args.push_back (sv[i]); + } + } + } + + void + hash_options (sha256& csum, const strings& sv, size_t n) + { + for (size_t i (0); i != n; ++i) + csum.append (sv[i]); + } + + bool + find_option (const char* o, const lookup& l, bool ic) + { + return l && find_option (o, cast<strings> (l), ic); + } + + bool + find_option (const char* o, const strings& strs, bool ic) + { + for (const string& s: strs) + if (ic ? casecmp (s, o) == 0 : s == o) + return true; + + return false; + } + + bool + find_option (const char* o, const cstrings& cstrs, bool ic) + { + for (const char* s: cstrs) + if (s != nullptr && (ic ? casecmp (s, o) : strcmp (s, o)) == 0) + return true; + + return false; + } + + bool + find_options (initializer_list<const char*> os, const lookup& l, bool ic) + { + return l && find_options (os, cast<strings> (l), ic); + } + + bool + find_options (initializer_list<const char*> os, const strings& strs, bool ic) + { + for (const string& s: strs) + for (const char* o: os) + if (ic ? casecmp (s, o) == 0 : s == o) + return true; + + return false; + } + + bool + find_options (initializer_list<const char*> os, + const cstrings& cstrs, + bool ic) + { + for (const char* s: cstrs) + if (s != nullptr) + for (const char* o: os) + if ((ic ? casecmp (s, o) : strcmp (s, o)) == 0) + return true; + + return false; + } + + const string* + find_option_prefix (const char* p, const lookup& l, bool ic) + { + return l ? find_option_prefix (p, cast<strings> (l), ic) : nullptr; + } + + const string* + find_option_prefix (const char* p, const strings& strs, bool ic) + { + size_t n (strlen (p)); + + for (const string& s: reverse_iterate (strs)) + if ((ic ? casecmp (s, p, n) : s.compare (0, n, p)) == 0) + return &s; + + return nullptr; + } + + const char* + find_option_prefix (const char* p, const cstrings& cstrs, bool ic) + { + size_t n (strlen (p)); + + for (const char* s: reverse_iterate (cstrs)) + if (s != nullptr && (ic ? casecmp (s, p, n) : strncmp (s, p, n)) == 0) + return s; + + return nullptr; + } + + const string* + find_option_prefixes (initializer_list<const char*> ps, + const lookup& l, + bool ic) + { + return l ? find_option_prefixes (ps, cast<strings> (l), ic) : nullptr; + } + + const string* + find_option_prefixes (initializer_list<const char*> ps, + const strings& strs, + bool ic) + { + for (const string& s: reverse_iterate (strs)) + for (const char* p: ps) + if ((ic + ? casecmp (s, p, strlen (p)) + : s.compare (0, strlen (p), p)) == 0) + return &s; + + return nullptr; + } + + const char* + find_option_prefixes (initializer_list<const char*> ps, + const cstrings& cstrs, + bool ic) + { + for (const char* s: reverse_iterate (cstrs)) + if (s != nullptr) + for (const char* p: ps) + if ((ic + ? casecmp (s, p, strlen (p)) + : strncmp (s, p, strlen (p))) == 0) + return s; + + return nullptr; + } + + string + apply_pattern (const char* s, const string* p) + { + if (p == nullptr || p->empty ()) + return s; + + size_t i (p->find ('*')); + assert (i != string::npos); + + string r (*p, 0, i++); + r.append (s); + r.append (*p, i, p->size () - i); + return r; + } + + void + init (const char* a0, + bool kg, bool dr, optional<bool> mc, + optional<path> cs, optional<path> cg) + { + // Build system driver process path. + // + argv0 = process::path_search (a0, true); + + keep_going = kg; + dry_run_option = dr; + mtime_check_option = mc; + + config_sub = move (cs); + config_guess = move (cg); + + // Figure out work and home directories. + // + try + { + work = dir_path::current_directory (); + } + catch (const system_error& e) + { + fail << "invalid current working directory: " << e; + } + + home = dir_path::home_directory (); + } +} diff --git a/libbuild2/utility.hxx b/libbuild2/utility.hxx new file mode 100644 index 0000000..af72c58 --- /dev/null +++ b/libbuild2/utility.hxx @@ -0,0 +1,671 @@ +// file : libbuild2/utility.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_UTILITY_HXX +#define LIBBUILD2_UTILITY_HXX + +#include <tuple> // make_tuple() +#include <memory> // make_shared() +#include <string> // to_string() +#include <utility> // move(), forward(), declval(), make_pair(), swap() +#include <cassert> // assert() +#include <iterator> // make_move_iterator() +#include <algorithm> // * +#include <functional> // ref(), cref() + +#include <libbutl/ft/lang.hxx> + +#include <libbutl/utility.mxx> // combine_hash(), reverse_iterate(), etc + +#include <unordered_set> + +#include <libbuild2/types.hxx> + +// "Fake" version values used during bootstrap. +// +#ifdef BUILD2_BOOTSTRAP +# define LIBBUILD2_VERSION 9999999999999990000ULL +# define LIBBUILD2_VERSION_STR "99999.99999.99999" +# define LIBBUILD2_VERSION_ID "99999.99999.99999" +# define LIBBUTL_VERSION_STR "99999.99999.99999" +# define LIBBUTL_VERSION_ID "99999.99999.99999" +#else +# include <libbuild2/version.hxx> +#endif + +#include <libbuild2/export.hxx> + +namespace build2 +{ + using std::move; + using std::swap; + using std::forward; + using std::declval; + + using std::ref; + using std::cref; + + using std::make_pair; + using std::make_tuple; + using std::make_shared; + using std::make_move_iterator; + using std::to_string; + using std::stoul; + using std::stoull; + + // <libbutl/utility.mxx> + // + using butl::reverse_iterate; + using butl::compare_c_string; + using butl::compare_pointer_target; + //using butl::hash_pointer_target; + using butl::combine_hash; + using butl::casecmp; + using butl::case_compare_string; + using butl::case_compare_c_string; + using butl::lcase; + using butl::alpha; + using butl::alnum; + using butl::digit; + + using butl::trim; + using butl::next_word; + + using butl::make_guard; + using butl::make_exception_guard; + + using butl::getenv; + using butl::setenv; + using butl::unsetenv; + + using butl::throw_generic_error; + using butl::throw_system_error; + + using butl::eof; + + // Diagnostics state (verbosity level, etc; see diagnostics.hxx). + // + // Note on naming of values (here and in the global state below) that come + // from the command line options: if a value is not meant to be used + // directly, then it has the _option suffix and a function or another + // variable as its public interface. + + // Initialize the diagnostics state. Should be called once early in main(). + // Default values are for unit tests. + // + LIBBUILD2_SYMEXPORT void + init_diag (uint16_t verbosity, + optional<bool> progress = nullopt, + bool no_lines = false, + bool no_columns = false, + bool stderr_term = false); + + LIBBUILD2_SYMEXPORT extern uint16_t verb; + const uint16_t verb_never = 7; + + // --[no-]progress + // + LIBBUILD2_SYMEXPORT extern optional<bool> diag_progress_option; + + LIBBUILD2_SYMEXPORT extern bool diag_no_line; // --no-line + LIBBUILD2_SYMEXPORT extern bool diag_no_column; // --no-column + + LIBBUILD2_SYMEXPORT extern bool stderr_term; // True if stderr is a terminal. + + // Global state (verbosity, home/work directories, etc). + + // Initialize the global state. Should be called once early in main(). + // Default values are for unit tests. + // + LIBBUILD2_SYMEXPORT void + init (const char* argv0, + bool keep_going = false, + bool dry_run = false, + optional<bool> mtime_check = nullopt, + optional<path> config_sub = nullopt, + optional<path> config_guess = nullopt); + + // Build system driver process path (argv0.initial is argv[0]). + // + LIBBUILD2_SYMEXPORT extern process_path argv0; + + // Build system driver version and check. + // + LIBBUILD2_SYMEXPORT extern const standard_version build_version; + + LIBBUILD2_SYMEXPORT extern bool dry_run_option; // --dry-run + + // --[no-]mtime-check + // + LIBBUILD2_SYMEXPORT extern optional<bool> mtime_check_option; + + LIBBUILD2_SYMEXPORT extern optional<path> config_sub; // --config-sub + LIBBUILD2_SYMEXPORT extern optional<path> config_guess; // --config-guess + + class location; + + LIBBUILD2_SYMEXPORT void + check_build_version (const standard_version_constraint&, const location&); + + // Work/home directories (must be initialized in main()) and relative path + // calculation. + // + LIBBUILD2_SYMEXPORT extern dir_path work; + LIBBUILD2_SYMEXPORT extern dir_path home; + + // By default this points to work. Setting this to something else should + // only be done in tightly controlled, non-concurrent situations (e.g., + // state dump). If it is empty, then relative() below returns the original + // path. + // + LIBBUILD2_SYMEXPORT extern const dir_path* relative_base; + + // If possible and beneficial, translate an absolute, normalized path into + // relative to the relative_base directory, which is normally work. Note + // that if the passed path is the same as relative_base, then this function + // returns empty path. + // + template <typename K> + basic_path<char, K> + relative (const basic_path<char, K>&); + + class path_target; + + LIBBUILD2_SYMEXPORT path + relative (const path_target&); + + // In addition to calling relative(), this function also uses shorter + // notations such as '~/'. For directories the result includes the trailing + // slash. If the path is the same as base, returns "./" if current is true + // and empty string otherwise. + // + LIBBUILD2_SYMEXPORT string + diag_relative (const path&, bool current = true); + + // Basic process utilities. + // + // The run*() functions with process_path assume that you are printing + // the process command line yourself. + + // Search for a process executable. Issue diagnostics and throw failed in + // case of an error. + // + LIBBUILD2_SYMEXPORT process_path + run_search (const char*& args0, + bool path_only, + const location& = location ()); + + inline process_path + run_search (const char*& args0, const location& l = location ()) + { + return run_search (args0, false, l); + } + + LIBBUILD2_SYMEXPORT process_path + run_search (const path&, + bool init = false, + const dir_path& fallback = dir_path (), + bool path_only = false, + const location& = location ()); + + LIBBUILD2_SYMEXPORT process_path + try_run_search (const path&, + bool init = false, + const dir_path& fallback = dir_path (), + bool path_only = false); + + // Wait for process termination. Issue diagnostics and throw failed in case + // of abnormal termination. If the process has terminated normally but with + // a non-zero exit status, then, if error is true, assume the diagnostics + // has already been issued and throw failed as well. Otherwise (error is + // false), return false. The last argument is used in cooperation with + // run_start() in case STDERR is redirected to STDOUT. + // + LIBBUILD2_SYMEXPORT bool + run_finish (const char* args[], + process&, + bool error = true, + const string& = string (), + const location& = location ()); + + inline void + run_finish (cstrings& args, process& pr, const location& l = location ()) + { + run_finish (args.data (), pr, true, string (), l); + } + + // Start a process with the specified arguments. If in is -1, then redirect + // STDIN to a pipe (can also be -2 to redirect to /dev/null or equivalent). + // If out is -1, redirect STDOUT to a pipe. If error is false, then + // redirecting STDERR to STDOUT (this can be used to suppress diagnostics + // from the child process). Issue diagnostics and throw failed in case of an + // error. + // + LIBBUILD2_SYMEXPORT process + run_start (uint16_t verbosity, + const process_env&, // Implicit-constructible from process_path. + const char* args[], + int in, + int out, + bool error = true, + const dir_path& cwd = dir_path (), + const location& = location ()); + + inline process + run_start (const process_env& pe, // Implicit-constructible from process_path. + const char* args[], + int in, + int out, + bool error = true, + const dir_path& cwd = dir_path (), + const location& l = location ()) + { + return run_start (verb_never, pe, args, in, out, error, cwd, l); + } + + inline void + run (const process_path& p, + const char* args[], + const dir_path& cwd = dir_path ()) + { + process pr (run_start (p, args, 0 /* stdin */, 1 /* stdout */, true, cwd)); + run_finish (args, pr); + } + + inline void + run (const process_path& p, + cstrings& args, + const dir_path& cwd = dir_path ()) + { + run (p, args.data (), cwd); + } + + // As above, but search for the process (including updating args[0]) and + // print the process commands line at the specified verbosity level. + // + inline process + run_start (uint16_t verbosity, + const char* args[], + int in, + int out, + bool error = true, + const dir_path& cwd = dir_path (), + const location& l = location ()) + { + process_path pp (run_search (args[0], l)); + return run_start (verbosity, pp, args, in, out, error, cwd, l); + } + + inline process + run_start (uint16_t verbosity, + cstrings& args, + int in, + int out, + bool error = true, + const dir_path& cwd = dir_path (), + const location& l = location ()) + { + return run_start (verbosity, args.data (), in, out, error, cwd, l); + } + + inline void + run (uint16_t verbosity, + const char* args[], + const dir_path& cwd = dir_path ()) + { + process pr (run_start (verbosity, + args, + 0 /* stdin */, + 1 /* stdout */, + true, + cwd)); + run_finish (args, pr); + } + + inline void + run (uint16_t verbosity, + cstrings& args, + const dir_path& cwd = dir_path ()) + { + run (verbosity, args.data (), cwd); + } + + // Start the process as above and then call the specified function on each + // trimmed line of the output until it returns a non-empty object T (tested + // with T::empty()) which is then returned to the caller. + // + // The predicate can move the value out of the passed string but, if error + // is false, only in case of a "content match" (so that any diagnostics + // lines are left intact). The function signature should be: + // + // T (string& line, bool last) + // + // If ignore_exit is true, then the program's exit status is ignored (if it + // is false and the program exits with the non-zero status, then an empty T + // instance is returned). + // + // If checksum is not NULL, then feed it the content of each trimmed line + // (including those that come after the callback returns non-empty object). + // + template <typename T, typename F> + T + run (uint16_t verbosity, + const process_env&, // Implicit-constructible from process_path. + const char* args[], + F&&, + bool error = true, + bool ignore_exit = false, + sha256* checksum = nullptr); + + template <typename T, typename F> + inline T + run (const process_env& pe, // Implicit-constructible from process_path. + const char* args[], + F&& f, + bool error = true, + bool ignore_exit = false, + sha256* checksum = nullptr) + { + return run<T> ( + verb_never, pe, args, forward<F> (f), error, ignore_exit, checksum); + } + + template <typename T, typename F> + inline T + run (uint16_t verbosity, + const char* args[], + F&& f, + bool error = true, + bool ignore_exit = false, + sha256* checksum = nullptr) + { + process_path pp (run_search (args[0])); + return run<T> ( + verbosity, pp, args, forward<F> (f), error, ignore_exit, checksum); + } + + // run <prog> + // + template <typename T, typename F> + inline T + run (uint16_t verbosity, + const path& prog, + F&& f, + bool error = true, + bool ignore_exit = false, + sha256* checksum = nullptr) + { + const char* args[] = {prog.string ().c_str (), nullptr}; + return run<T> ( + verbosity, args, forward<F> (f), error, ignore_exit, checksum); + } + + template <typename T, typename F> + inline T + run (uint16_t verbosity, + const process_env& pe, // Implicit-constructible from process_path. + F&& f, + bool error = true, + bool ignore_exit = false, + sha256* checksum = nullptr) + { + const char* args[] = {pe.path->recall_string (), nullptr}; + return run<T> ( + verbosity, pe, args, forward<F> (f), error, ignore_exit, checksum); + } + + // run <prog> <arg> + // + template <typename T, typename F> + inline T + run (uint16_t verbosity, + const path& prog, + const char* arg, + F&& f, + bool error = true, + bool ignore_exit = false, + sha256* checksum = nullptr) + { + const char* args[] = {prog.string ().c_str (), arg, nullptr}; + return run<T> ( + verbosity, args, forward<F> (f), error, ignore_exit, checksum); + } + + template <typename T, typename F> + inline T + run (uint16_t verbosity, + const process_env& pe, // Implicit-constructible from process_path. + const char* arg, + F&& f, + bool error = true, + bool ignore_exit = false, + sha256* checksum = nullptr) + { + const char* args[] = {pe.path->recall_string (), arg, nullptr}; + return run<T> ( + verbosity, pe, args, forward<F> (f), error, ignore_exit, checksum); + } + + // Empty/nullopt string, path, and project name. + // + LIBBUILD2_SYMEXPORT extern const string empty_string; + LIBBUILD2_SYMEXPORT extern const path empty_path; + LIBBUILD2_SYMEXPORT extern const dir_path empty_dir_path; + LIBBUILD2_SYMEXPORT extern const project_name empty_project_name; + + LIBBUILD2_SYMEXPORT extern const optional<string> nullopt_string; + LIBBUILD2_SYMEXPORT extern const optional<path> nullopt_path; + LIBBUILD2_SYMEXPORT extern const optional<dir_path> nullopt_dir_path; + LIBBUILD2_SYMEXPORT extern const optional<project_name> nullopt_project_name; + + // Hash a path potentially without the specific directory prefix. + // + // If prefix is not empty and is a super-path of the path to hash, then only + // hash the suffix. Note that both paths are assumed to be normalized. + // + // This functionality is normally used to strip out_root from target paths + // being hashed in order to avoid updates in case out_root was moved. Note + // that this should only be done if the result of the update does not + // include the out_root path in any form (as could be the case, for example, + // for debug information, __FILE__ macro expansion, rpath, etc). + // + void + hash_path (sha256&, const path&, const dir_path& prefix = dir_path ()); + + // Append all the values from a variable to the C-string list. T is either + // target or scope. The variable is expected to be of type strings. + // + // If excl is not NULL, then filter this option out (note: case sensitive). + // + struct variable; + + template <typename T> + void + append_options (cstrings&, T&, const variable&, const char* excl = nullptr); + + template <typename T> + void + append_options (cstrings&, T&, const char*, const char* excl = nullptr); + + template <typename T> + void + append_options (strings&, T&, const variable&, const char* excl = nullptr); + + template <typename T> + void + append_options (strings&, T&, const char*, const char* excl = nullptr); + + template <typename T> + void + hash_options (sha256&, T&, const variable&); + + template <typename T> + void + hash_options (sha256&, T&, const char*); + + // As above but from the strings value directly. + // + class value; + struct lookup; + + LIBBUILD2_SYMEXPORT void + append_options (cstrings&, const lookup&, const char* excl = nullptr); + + LIBBUILD2_SYMEXPORT void + append_options (strings&, const lookup&, const char* excl = nullptr); + + LIBBUILD2_SYMEXPORT void + hash_options (sha256&, const lookup&); + + void + append_options (cstrings&, const strings&, const char* excl = nullptr); + + void + append_options (strings&, const strings&, const char* excl = nullptr); + + void + hash_options (sha256&, const strings&); + + LIBBUILD2_SYMEXPORT void + append_options (cstrings&, + const strings&, size_t, + const char* excl = nullptr); + + LIBBUILD2_SYMEXPORT void + append_options (strings&, + const strings&, size_t, + const char* excl = nullptr); + + LIBBUILD2_SYMEXPORT void + hash_options (sha256&, const strings&, size_t); + + // As above but append/hash option values for the specified option (e.g., + // -I, -L). + // + template <typename I, typename F> + void + append_option_values (cstrings&, + const char* opt, + I begin, I end, + F&& get = [] (const string& s) {return s.c_str ();}); + + template <typename I, typename F> + void + hash_option_values (sha256&, + const char* opt, + I begin, I end, + F&& get = [] (const string& s) {return s;}); + + // Check if a specified option is present in the variable or value. T is + // either target or scope. + // + template <typename T> + bool + find_option (const char* option, + T&, + const variable&, + bool ignore_case = false); + + template <typename T> + bool + find_option (const char* option, + T&, + const char* variable, + bool ignore_case = false); + + LIBBUILD2_SYMEXPORT bool + find_option (const char* option, const lookup&, bool ignore_case = false); + + LIBBUILD2_SYMEXPORT bool + find_option (const char* option, const strings&, bool ignore_case = false); + + LIBBUILD2_SYMEXPORT bool + find_option (const char* option, const cstrings&, bool ignore_case = false); + + // As above but look for several options returning true if any is present. + // + template <typename T> + bool + find_options (initializer_list<const char*>, + T&, + const variable&, + bool = false); + + template <typename T> + bool + find_options (initializer_list<const char*>, T&, const char*, bool = false); + + LIBBUILD2_SYMEXPORT bool + find_options (initializer_list<const char*>, const lookup&, bool = false); + + LIBBUILD2_SYMEXPORT bool + find_options (initializer_list<const char*>, const strings&, bool = false); + + LIBBUILD2_SYMEXPORT bool + find_options (initializer_list<const char*>, const cstrings&, bool = false); + + // As above but look for an option that has the specified prefix. Return the + // pointer to option or NULL if not found (thus can be used as bool). + // Search backward (which is normall consistent with how options override + // each other). + // + template <typename T> + const string* + find_option_prefix (const char* prefix, T&, const variable&, bool = false); + + template <typename T> + const string* + find_option_prefix (const char* prefix, T&, const char*, bool = false); + + LIBBUILD2_SYMEXPORT const string* + find_option_prefix (const char* prefix, const lookup&, bool = false); + + LIBBUILD2_SYMEXPORT const string* + find_option_prefix (const char* prefix, const strings&, bool = false); + + LIBBUILD2_SYMEXPORT const char* + find_option_prefix (const char* prefix, const cstrings&, bool = false); + + // As above but look for several option prefixes. + // + template <typename T> + const string* + find_option_prefixes (initializer_list<const char*>, + T&, + const variable&, + bool = false); + + template <typename T> + const string* + find_option_prefixes (initializer_list<const char*>, + T&, + const char*, + bool = false); + + LIBBUILD2_SYMEXPORT const string* + find_option_prefixes (initializer_list<const char*>, + const lookup&, bool = false); + + LIBBUILD2_SYMEXPORT const string* + find_option_prefixes (initializer_list<const char*>, + const strings&, + bool = false); + + LIBBUILD2_SYMEXPORT const char* + find_option_prefixes (initializer_list<const char*>, + const cstrings&, + bool = false); + + // Apply the specified substitution (stem) to a '*'-pattern. If pattern is + // NULL or empty, then return the stem itself. Assume the pattern is valid, + // i.e., contains a single '*' character. + // + LIBBUILD2_SYMEXPORT string + apply_pattern (const char* stem, const string* pattern); +} + +#include <libbuild2/utility.ixx> +#include <libbuild2/utility.txx> + +#endif // LIBBUILD2_UTILITY_HXX diff --git a/libbuild2/utility.ixx b/libbuild2/utility.ixx new file mode 100644 index 0000000..8d3f6ba --- /dev/null +++ b/libbuild2/utility.ixx @@ -0,0 +1,155 @@ +// file : libbuild2/utility.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + inline void + hash_path (sha256& cs, const path& p, const dir_path& prefix) + { + // Note: for efficiency we don't use path::leaf() and "skip" the prefix + // without copying. + // + const char* s (p.string ().c_str ()); + + if (!prefix.empty () && p.sub (prefix)) + { + s += prefix.size (); // Does not include trailing slash except for root. + if (path::traits_type::is_separator (*s)) + ++s; + } + + cs.append (s); + } + + template <typename T> + inline void + append_options (cstrings& args, T& s, const variable& var, const char* e) + { + append_options (args, s[var], e); + } + + template <typename T> + inline void + append_options (strings& args, T& s, const variable& var, const char* e) + { + append_options (args, s[var], e); + } + + template <typename T> + inline void + hash_options (sha256& csum, T& s, const variable& var) + { + hash_options (csum, s[var]); + } + + template <typename T> + inline void + append_options (cstrings& args, T& s, const char* var, const char* e) + { + append_options (args, s[var], e); + } + + template <typename T> + inline void + append_options (strings& args, T& s, const char* var, const char* e) + { + append_options (args, s[var], e); + } + + template <typename T> + inline void + hash_options (sha256& csum, T& s, const char* var) + { + hash_options (csum, s[var]); + } + + inline void + append_options (cstrings& args, const strings& sv, const char* e) + { + if (size_t n = sv.size ()) + append_options (args, sv, n, e); + } + + inline void + append_options (strings& args, const strings& sv, const char* e) + { + if (size_t n = sv.size ()) + append_options (args, sv, n, e); + } + + inline void + hash_options (sha256& csum, const strings& sv) + { + if (size_t n = sv.size ()) + hash_options (csum, sv, n); + } + + template <typename T> + inline bool + find_option (const char* o, T& s, const variable& var, bool ic) + { + return find_option (o, s[var], ic); + } + + template <typename T> + inline bool + find_option (const char* o, T& s, const char* var, bool ic) + { + return find_option (o, s[var], ic); + } + + template <typename T> + inline bool + find_options (initializer_list<const char*> os, + T& s, + const variable& var, + bool ic) + { + return find_options (os, s[var], ic); + } + + template <typename T> + inline bool + find_options (initializer_list<const char*> os, + T& s, + const char* var, + bool ic) + { + return find_options (os, s[var], ic); + } + + template <typename T> + inline const string* + find_option_prefix (const char* p, T& s, const variable& var, bool ic) + { + return find_option_prefix (p, s[var], ic); + } + + template <typename T> + inline const string* + find_option_prefix (const char* p, T& s, const char* var, bool ic) + { + return find_option_prefix (p, s[var], ic); + } + + template <typename T> + inline const string* + find_option_prefixes (initializer_list<const char*> ps, + T& s, + const variable& var, + bool ic) + { + return find_option_prefixes (ps, s[var], ic); + } + + template <typename T> + inline const string* + find_option_prefixes (initializer_list<const char*> ps, + T& s, + const char* var, + bool ic) + { + return find_option_prefixes (ps, s[var], ic); + } +} diff --git a/libbuild2/utility.txx b/libbuild2/utility.txx new file mode 100644 index 0000000..a91cb15 --- /dev/null +++ b/libbuild2/utility.txx @@ -0,0 +1,115 @@ +// file : libbuild2/utility.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + template <typename I, typename F> + void + append_option_values (cstrings& args, const char* o, I b, I e, F&& get) + { + if (b != e) + { + args.reserve (args.size () + (e - b)); + + for (; b != e; ++b) + { + args.push_back (o); + args.push_back (get (*b)); + } + } + } + + template <typename I, typename F> + void + hash_option_values (sha256& cs, const char* o, I b, I e, F&& get) + { + for (; b != e; ++b) + { + cs.append (o); + cs.append (get (*b)); + } + } + + template <typename K> + basic_path<char, K> + relative (const basic_path<char, K>& p) + { + typedef basic_path<char, K> path; + + const dir_path& b (*relative_base); + + if (p.simple () || b.empty ()) + return p; + + if (p.sub (b)) + return p.leaf (b); + + if (p.root_directory () == b.root_directory ()) + { + path r (p.relative (b)); + + if (r.string ().size () < p.string ().size ()) + return r; + } + + return p; + } + + template <typename T, typename F> + T + run (uint16_t verbosity, + const process_env& pe, + const char* args[], + F&& f, + bool err, + bool ignore_exit, + sha256* checksum) + { + process pr (run_start (verbosity, + pe, + args, + 0 /* stdin */, + -1 /* stdout */, + err)); + T r; + string l; // Last line of output. + + try + { + ifdstream is (move (pr.in_ofd), butl::fdstream_mode::skip); + + // Make sure we keep the last line. + // + for (bool last (is.peek () == ifdstream::traits_type::eof ()); + !last && getline (is, l); ) + { + last = (is.peek () == ifdstream::traits_type::eof ()); + + trim (l); + + if (checksum != nullptr) + checksum->append (l); + + if (r.empty ()) + { + r = f (l, last); + + if (!r.empty () && checksum == nullptr) + break; + } + } + + is.close (); + } + catch (const io_error&) + { + // Presumably the child process failed. Let run_finish() deal with that. + } + + if (!(run_finish (args, pr, err, l) || ignore_exit)) + r = T (); + + return r; + } +} diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx new file mode 100644 index 0000000..beb169e --- /dev/null +++ b/libbuild2/variable.cxx @@ -0,0 +1,1533 @@ +// file : libbuild2/variable.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/variable.hxx> + +#include <cstring> // memcmp() + +#include <libbutl/filesystem.mxx> // path_match() + +#include <libbuild2/context.hxx> +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + // variable_visibility + // + ostream& + operator<< (ostream& o, variable_visibility v) + { + const char* s (nullptr); + + switch (v) + { + case variable_visibility::normal: s = "normal"; break; + case variable_visibility::project: s = "project"; break; + case variable_visibility::scope: s = "scope"; break; + case variable_visibility::target: s = "target"; break; + case variable_visibility::prereq: s = "prerequisite"; break; + } + + return o << s; + } + + // value + // + void value:: + reset () + { + if (type == nullptr) + as<names> ().~names (); + else if (type->dtor != nullptr) + type->dtor (*this); + + null = true; + } + + value:: + value (value&& v) + : type (v.type), null (v.null), extra (v.extra) + { + if (!null) + { + if (type == nullptr) + new (&data_) names (move (v).as<names> ()); + else if (type->copy_ctor != nullptr) + type->copy_ctor (*this, v, true); + else + data_ = v.data_; // Copy as POD. + } + } + + value:: + value (const value& v) + : type (v.type), null (v.null), extra (v.extra) + { + if (!null) + { + if (type == nullptr) + new (&data_) names (v.as<names> ()); + else if (type->copy_ctor != nullptr) + type->copy_ctor (*this, v, false); + else + data_ = v.data_; // Copy as POD. + } + } + + value& value:: + operator= (value&& v) + { + if (this != &v) + { + // Prepare the receiving value. + // + if (type != v.type) + { + *this = nullptr; + type = v.type; + } + + // Now our types are the same. If the receiving value is NULL, then call + // copy_ctor() instead of copy_assign(). + // + if (v) + { + if (type == nullptr) + { + if (null) + new (&data_) names (move (v).as<names> ()); + else + as<names> () = move (v).as<names> (); + } + else if (auto f = null ? type->copy_ctor : type->copy_assign) + f (*this, v, true); + else + data_ = v.data_; // Assign as POD. + + null = v.null; + } + else + *this = nullptr; + } + + return *this; + } + + value& value:: + operator= (const value& v) + { + if (this != &v) + { + // Prepare the receiving value. + // + if (type != v.type) + { + *this = nullptr; + type = v.type; + } + + // Now our types are the same. If the receiving value is NULL, then call + // copy_ctor() instead of copy_assign(). + // + if (v) + { + if (type == nullptr) + { + if (null) + new (&data_) names (v.as<names> ()); + else + as<names> () = v.as<names> (); + } + else if (auto f = null ? type->copy_ctor : type->copy_assign) + f (*this, v, false); + else + data_ = v.data_; // Assign as POD. + + null = v.null; + } + else + *this = nullptr; + } + + return *this; + } + + void value:: + assign (names&& ns, const variable* var) + { + assert (type == nullptr || type->assign != nullptr); + + if (type == nullptr) + { + if (null) + new (&data_) names (move (ns)); + else + as<names> () = move (ns); + } + else + type->assign (*this, move (ns), var); + + null = false; + } + + void value:: + append (names&& ns, const variable* var) + { + if (type == nullptr) + { + if (null) + new (&data_) names (move (ns)); + else + { + names& p (as<names> ()); + + if (p.empty ()) + p = move (ns); + else if (!ns.empty ()) + { + p.insert (p.end (), + make_move_iterator (ns.begin ()), + make_move_iterator (ns.end ())); + } + } + } + else + { + if (type->append == nullptr) + { + diag_record dr (fail); + + dr << "cannot append to " << type->name << " value"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + type->append (*this, move (ns), var); + } + + null = false; + } + + void value:: + prepend (names&& ns, const variable* var) + { + if (type == nullptr) + { + if (null) + new (&data_) names (move (ns)); + else + { + names& p (as<names> ()); + + if (p.empty ()) + p = move (ns); + else if (!ns.empty ()) + { + ns.insert (ns.end (), + make_move_iterator (p.begin ()), + make_move_iterator (p.end ())); + p = move (ns); + } + } + } + else + { + if (type->prepend == nullptr) + { + diag_record dr (fail); + + dr << "cannot prepend to " << type->name << " value"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + type->prepend (*this, move (ns), var); + } + + null = false; + } + + bool + operator== (const value& x, const value& y) + { + bool xn (x.null); + bool yn (y.null); + + assert (x.type == y.type || + (xn && x.type == nullptr) || + (yn && y.type == nullptr)); + + if (xn || yn) + return xn == yn; + + if (x.type == nullptr) + return x.as<names> () == y.as<names> (); + + if (x.type->compare == nullptr) + return memcmp (&x.data_, &y.data_, x.type->size) == 0; + + return x.type->compare (x, y) == 0; + } + + bool + operator< (const value& x, const value& y) + { + bool xn (x.null); + bool yn (y.null); + + assert (x.type == y.type || + (xn && x.type == nullptr) || + (yn && y.type == nullptr)); + + // NULL value is always less than non-NULL. + // + if (xn || yn) + return xn > yn; // !xn < !yn + + if (x.type == nullptr) + return x.as<names> () < y.as<names> (); + + if (x.type->compare == nullptr) + return memcmp (&x.data_, &y.data_, x.type->size) < 0; + + return x.type->compare (x, y) < 0; + } + + bool + operator> (const value& x, const value& y) + { + bool xn (x.null); + bool yn (y.null); + + assert (x.type == y.type || + (xn && x.type == nullptr) || + (yn && y.type == nullptr)); + + // NULL value is always less than non-NULL. + // + if (xn || yn) + return xn < yn; // !xn > !yn + + if (x.type == nullptr) + return x.as<names> () > y.as<names> (); + + if (x.type->compare == nullptr) + return memcmp (&x.data_, &y.data_, x.type->size) > 0; + + return x.type->compare (x, y) > 0; + } + + void + typify (value& v, const value_type& t, const variable* var, memory_order mo) + { + if (v.type == nullptr) + { + if (v) + { + // Note: the order in which we do things here is important. + // + names ns (move (v).as<names> ()); + v = nullptr; + + // Use value_type::assign directly to delay v.type change. + // + t.assign (v, move (ns), var); + v.null = false; + } + else + v.type = &t; + + v.type.store (&t, mo); + } + else if (v.type != &t) + { + diag_record dr (fail); + + dr << "type mismatch"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << info << "value type is " << v.type->name; + dr << info << (var != nullptr && &t == var->type ? "variable" : "new") + << " type is " << t.name; + } + } + + void + typify_atomic (value& v, const value_type& t, const variable* var) + { + // Typification is kind of like caching so we reuse that mutex shard. + // + shared_mutex& m ( + variable_cache_mutex_shard[ + hash<value*> () (&v) % variable_cache_mutex_shard_size]); + + // Note: v.type is rechecked by typify() under lock. + // + ulock l (m); + typify (v, t, var, memory_order_release); + } + + void + untypify (value& v) + { + if (v.type == nullptr) + return; + + if (v.null) + { + v.type = nullptr; + return; + } + + names ns; + names_view nv (v.type->reverse (v, ns)); + + if (nv.empty () || nv.data () == ns.data ()) + { + // If the data is in storage, then we are all set. + // + ns.resize (nv.size ()); // Just to be sure. + } + else + { + // If the data is somewhere in the value itself, then steal it. + // + auto b (const_cast<name*> (nv.data ())); + ns.assign (make_move_iterator (b), + make_move_iterator (b + nv.size ())); + } + + v = nullptr; // Free old data. + v.type = nullptr; // Change type. + v.assign (move (ns), nullptr); // Assign new data. + } + + // Throw invalid_argument for an invalid simple value. + // + [[noreturn]] static void + throw_invalid_argument (const name& n, const name* r, const char* type) + { + string m; + string t (type); + + if (r != nullptr) + m = "pair in " + t + " value"; + else + { + m = "invalid " + t + " value: "; + + if (n.simple ()) + m += "'" + n.value + "'"; + else if (n.directory ()) + m += "'" + n.dir.representation () + "'"; + else + m += "complex name"; + } + + throw invalid_argument (m); + } + + // names + // + const names& value_traits<names>::empty_instance = empty_names; + + // bool value + // + bool value_traits<bool>:: + convert (name&& n, name* r) + { + if (r == nullptr && n.simple ()) + { + const string& s (n.value); + + if (s == "true") + return true; + + if (s == "false") + return false; + + // Fall through. + } + + throw_invalid_argument (n, r, "bool"); + } + + const char* const value_traits<bool>::type_name = "bool"; + + const value_type value_traits<bool>::value_type + { + type_name, + sizeof (bool), + nullptr, // No base. + nullptr, // No element. + nullptr, // No dtor (POD). + nullptr, // No copy_ctor (POD). + nullptr, // No copy_assign (POD). + &simple_assign<bool>, + &simple_append<bool>, + &simple_append<bool>, // Prepend same as append. + &simple_reverse<bool>, + nullptr, // No cast (cast data_ directly). + nullptr, // No compare (compare as POD). + nullptr // Never empty. + }; + + // uint64_t value + // + uint64_t value_traits<uint64_t>:: + convert (name&& n, name* r) + { + if (r == nullptr && n.simple ()) + { + try + { + // May throw invalid_argument or out_of_range. + // + return stoull (n.value); + } + catch (const std::exception&) + { + // Fall through. + } + } + + throw_invalid_argument (n, r, "uint64"); + } + + const char* const value_traits<uint64_t>::type_name = "uint64"; + + const value_type value_traits<uint64_t>::value_type + { + type_name, + sizeof (uint64_t), + nullptr, // No base. + nullptr, // No element. + nullptr, // No dtor (POD). + nullptr, // No copy_ctor (POD). + nullptr, // No copy_assign (POD). + &simple_assign<uint64_t>, + &simple_append<uint64_t>, + &simple_append<uint64_t>, // Prepend same as append. + &simple_reverse<uint64_t>, + nullptr, // No cast (cast data_ directly). + nullptr, // No compare (compare as POD). + nullptr // Never empty. + }; + + // string value + // + string value_traits<string>:: + convert (name&& n, name* r) + { + // The goal is to reverse the name into its original representation. The + // code is a bit convoluted because we try to avoid extra allocations for + // the common cases (unqualified, unpaired simple name or directory). + // + + // We can only convert project-qualified simple and directory names. + // + if (!(n.simple (true) || n.directory (true)) || + !(r == nullptr || r->simple (true) || r->directory (true))) + throw_invalid_argument (n, r, "string"); + + string s; + + if (n.directory (true)) + // Note that here we cannot assume what's in dir is really a + // path (think s/foo/bar/) so we have to reverse it exactly. + // + s = move (n.dir).representation (); // Move out of path. + else + s.swap (n.value); + + // Convert project qualification to its string representation. + // + if (n.qualified ()) + { + string p (move (*n.proj).string ()); + p += '%'; + p += s; + p.swap (s); + } + + // The same for the RHS of a pair, if we have one. + // + if (r != nullptr) + { + s += '@'; + + if (r->qualified ()) + { + s += r->proj->string (); + s += '%'; + } + + if (r->directory (true)) + s += move (r->dir).representation (); + else + s += r->value; + } + + return s; + } + + const string& value_traits<string>::empty_instance = empty_string; + + const char* const value_traits<string>::type_name = "string"; + + const value_type value_traits<string>::value_type + { + type_name, + sizeof (string), + nullptr, // No base. + nullptr, // No element. + &default_dtor<string>, + &default_copy_ctor<string>, + &default_copy_assign<string>, + &simple_assign<string>, + &simple_append<string>, + &simple_prepend<string>, + &simple_reverse<string>, + nullptr, // No cast (cast data_ directly). + &simple_compare<string>, + &default_empty<string> + }; + + // path value + // + path value_traits<path>:: + convert (name&& n, name* r) + { + if (r == nullptr) + { + // A directory path is a path. + // + if (n.directory ()) + return move (n.dir); + + if (n.simple ()) + { + try + { + return path (move (n.value)); + } + catch (invalid_path& e) + { + n.value = move (e.path); // Restore the name object for diagnostics. + // Fall through. + } + } + + // Reassemble split dir/value. + // + if (n.untyped () && n.unqualified ()) + { + try + { + return n.dir / n.value; + } + catch (const invalid_path&) + { + // Fall through. + } + } + + // Fall through. + } + + throw_invalid_argument (n, r, "path"); + } + + const path& value_traits<path>::empty_instance = empty_path; + + const char* const value_traits<path>::type_name = "path"; + + const value_type value_traits<path>::value_type + { + type_name, + sizeof (path), + nullptr, // No base. + nullptr, // No element. + &default_dtor<path>, + &default_copy_ctor<path>, + &default_copy_assign<path>, + &simple_assign<path>, + &simple_append<path>, + &simple_prepend<path>, + &simple_reverse<path>, + nullptr, // No cast (cast data_ directly). + &simple_compare<path>, + &default_empty<path> + }; + + // dir_path value + // + dir_path value_traits<dir_path>:: + convert (name&& n, name* r) + { + if (r == nullptr) + { + if (n.directory ()) + return move (n.dir); + + if (n.simple ()) + { + try + { + return dir_path (move (n.value)); + } + catch (invalid_path& e) + { + n.value = move (e.path); // Restore the name object for diagnostics. + // Fall through. + } + } + + // Reassemble split dir/value. + // + if (n.untyped () && n.unqualified ()) + { + try + { + n.dir /= n.value; + return move (n.dir); + } + catch (const invalid_path&) + { + // Fall through. + } + } + + // Fall through. + } + + throw_invalid_argument (n, r, "dir_path"); + } + + const dir_path& value_traits<dir_path>::empty_instance = empty_dir_path; + + const char* const value_traits<dir_path>::type_name = "dir_path"; + + const value_type value_traits<dir_path>::value_type + { + type_name, + sizeof (dir_path), + &value_traits<path>::value_type, // Base (assuming direct cast works for + // both). + nullptr, // No element. + &default_dtor<dir_path>, + &default_copy_ctor<dir_path>, + &default_copy_assign<dir_path>, + &simple_assign<dir_path>, + &simple_append<dir_path>, + &simple_prepend<dir_path>, + &simple_reverse<dir_path>, + nullptr, // No cast (cast data_ directly). + &simple_compare<dir_path>, + &default_empty<dir_path> + }; + + // abs_dir_path value + // + abs_dir_path value_traits<abs_dir_path>:: + convert (name&& n, name* r) + { + if (r == nullptr && (n.simple () || n.directory ())) + { + try + { + dir_path d (n.simple () ? dir_path (move (n.value)) : move (n.dir)); + + if (!d.empty ()) + { + if (d.relative ()) + d.complete (); + + d.normalize (true); // Actualize. + } + + return abs_dir_path (move (d)); + } + catch (const invalid_path&) {} // Fall through. + } + + throw_invalid_argument (n, r, "abs_dir_path"); + } + + const char* const value_traits<abs_dir_path>::type_name = "abs_dir_path"; + + const value_type value_traits<abs_dir_path>::value_type + { + type_name, + sizeof (abs_dir_path), + &value_traits<dir_path>::value_type, // Base (assuming direct cast works + // for both). + nullptr, // No element. + &default_dtor<abs_dir_path>, + &default_copy_ctor<abs_dir_path>, + &default_copy_assign<abs_dir_path>, + &simple_assign<abs_dir_path>, + &simple_append<abs_dir_path>, + nullptr, // No prepend. + &simple_reverse<abs_dir_path>, + nullptr, // No cast (cast data_ directly). + &simple_compare<abs_dir_path>, + &default_empty<abs_dir_path> + }; + + // name value + // + name value_traits<name>:: + convert (name&& n, name* r) + { + if (r == nullptr) + return move (n); + + throw_invalid_argument (n, r, "name"); + } + + static names_view + name_reverse (const value& v, names&) + { + const name& n (v.as<name> ()); + return n.empty () ? names_view (nullptr, 0) : names_view (&n, 1); + } + + const char* const value_traits<name>::type_name = "name"; + + const value_type value_traits<name>::value_type + { + type_name, + sizeof (name), + nullptr, // No base. + nullptr, // No element. + &default_dtor<name>, + &default_copy_ctor<name>, + &default_copy_assign<name>, + &simple_assign<name>, + nullptr, // Append not supported. + nullptr, // Prepend not supported. + &name_reverse, + nullptr, // No cast (cast data_ directly). + &simple_compare<name>, + &default_empty<name> + }; + + // name_pair + // + name_pair value_traits<name_pair>:: + convert (name&& n, name* r) + { + n.pair = '\0'; // Keep "unpaired" in case r is empty. + return name_pair (move (n), r != nullptr ? move (*r) : name ()); + } + + void + name_pair_assign (value& v, names&& ns, const variable* var) + { + using traits = value_traits<name_pair>; + + size_t n (ns.size ()); + + if (n <= 2) + { + try + { + traits::assign ( + v, + (n == 0 + ? name_pair () + : traits::convert (move (ns[0]), n == 2 ? &ns[1] : nullptr))); + return; + } + catch (const invalid_argument&) {} // Fall through. + } + + diag_record dr (fail); + dr << "invalid name_pair value '" << ns << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + static names_view + name_pair_reverse (const value& v, names& ns) + { + const name_pair& p (v.as<name_pair> ()); + const name& f (p.first); + const name& s (p.second); + + if (f.empty () && s.empty ()) + return names_view (nullptr, 0); + + if (f.empty ()) + return names_view (&s, 1); + + if (s.empty ()) + return names_view (&f, 1); + + ns.push_back (f); + ns.back ().pair = '@'; + ns.push_back (s); + return ns; + } + + const char* const value_traits<name_pair>::type_name = "name_pair"; + + const value_type value_traits<name_pair>::value_type + { + type_name, + sizeof (name_pair), + nullptr, // No base. + nullptr, // No element. + &default_dtor<name_pair>, + &default_copy_ctor<name_pair>, + &default_copy_assign<name_pair>, + &name_pair_assign, + nullptr, // Append not supported. + nullptr, // Prepend not supported. + &name_pair_reverse, + nullptr, // No cast (cast data_ directly). + &simple_compare<name_pair>, + &default_empty<name_pair> + }; + + // process_path value + // + process_path value_traits<process_path>:: + convert (name&& n, name* r) + { + if ( n.untyped () && n.unqualified () && !n.empty () && + (r == nullptr || (r->untyped () && r->unqualified () && !r->empty ()))) + { + path rp (move (n.dir)); + if (rp.empty ()) + rp = path (move (n.value)); + else + rp /= n.value; + + path ep; + if (r != nullptr) + { + ep = move (r->dir); + if (ep.empty ()) + ep = path (move (r->value)); + else + ep /= r->value; + } + + process_path pp (nullptr, move (rp), move (ep)); + pp.initial = pp.recall.string ().c_str (); + return pp; + } + + throw_invalid_argument (n, r, "process_path"); + } + + void + process_path_assign (value& v, names&& ns, const variable* var) + { + using traits = value_traits<process_path>; + + size_t n (ns.size ()); + + if (n <= 2) + { + try + { + traits::assign ( + v, + (n == 0 + ? process_path () + : traits::convert (move (ns[0]), n == 2 ? &ns[1] : nullptr))); + return; + } + catch (const invalid_argument&) {} // Fall through. + } + + diag_record dr (fail); + dr << "invalid process_path value '" << ns << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + void + process_path_copy_ctor (value& l, const value& r, bool m) + { + const auto& rhs (r.as<process_path> ()); + + if (m) + new (&l.data_) process_path (move (const_cast<process_path&> (rhs))); + else + { + auto& lhs ( + *new (&l.data_) process_path ( + nullptr, path (rhs.recall), path (rhs.effect))); + lhs.initial = lhs.recall.string ().c_str (); + } + } + + void + process_path_copy_assign (value& l, const value& r, bool m) + { + auto& lhs (l.as<process_path> ()); + const auto& rhs (r.as<process_path> ()); + + if (m) + lhs = move (const_cast<process_path&> (rhs)); + else + { + lhs.recall = rhs.recall; + lhs.effect = rhs.effect; + lhs.initial = lhs.recall.string ().c_str (); + } + } + + static names_view + process_path_reverse (const value& v, names& s) + { + const process_path& x (v.as<process_path> ()); + + if (!x.empty ()) + { + s.reserve (x.effect.empty () ? 1 : 2); + + s.push_back (name (x.recall.directory (), + string (), + x.recall.leaf ().string ())); + + if (!x.effect.empty ()) + { + s.back ().pair = '@'; + s.push_back (name (x.effect.directory (), + string (), + x.effect.leaf ().string ())); + } + } + + return s; + } + + const char* const value_traits<process_path>::type_name = "process_path"; + + const value_type value_traits<process_path>::value_type + { + type_name, + sizeof (process_path), + nullptr, // No base. + nullptr, // No element. + &default_dtor<process_path>, + &process_path_copy_ctor, + &process_path_copy_assign, + &process_path_assign, + nullptr, // Append not supported. + nullptr, // Prepend not supported. + &process_path_reverse, + nullptr, // No cast (cast data_ directly). + &simple_compare<process_path>, + &default_empty<process_path> + }; + + // target_triplet value + // + target_triplet value_traits<target_triplet>:: + convert (name&& n, name* r) + { + if (r == nullptr) + { + if (n.simple ()) + { + try + { + return n.empty () ? target_triplet () : target_triplet (n.value); + } + catch (const invalid_argument& e) + { + throw invalid_argument ( + string ("invalid target_triplet value: ") + e.what ()); + } + } + + // Fall through. + } + + throw_invalid_argument (n, r, "target_triplet"); + } + + const char* const value_traits<target_triplet>::type_name = "target_triplet"; + + const value_type value_traits<target_triplet>::value_type + { + type_name, + sizeof (target_triplet), + nullptr, // No base. + nullptr, // No element. + &default_dtor<target_triplet>, + &default_copy_ctor<target_triplet>, + &default_copy_assign<target_triplet>, + &simple_assign<target_triplet>, + nullptr, // Append not supported. + nullptr, // Prepend not supported. + &simple_reverse<target_triplet>, + nullptr, // No cast (cast data_ directly). + &simple_compare<target_triplet>, + &default_empty<target_triplet> + }; + + // project_name value + // + project_name value_traits<project_name>:: + convert (name&& n, name* r) + { + if (r == nullptr) + { + if (n.simple ()) + { + try + { + return n.empty () ? project_name () : project_name (move (n.value)); + } + catch (const invalid_argument& e) + { + throw invalid_argument ( + string ("invalid project_name value: ") + e.what ()); + } + } + + // Fall through. + } + + throw_invalid_argument (n, r, "project_name"); + } + + const project_name& + value_traits<project_name>::empty_instance = empty_project_name; + + const char* const value_traits<project_name>::type_name = "project_name"; + + const value_type value_traits<project_name>::value_type + { + type_name, + sizeof (project_name), + nullptr, // No base. + nullptr, // No element. + &default_dtor<project_name>, + &default_copy_ctor<project_name>, + &default_copy_assign<project_name>, + &simple_assign<project_name>, + nullptr, // Append not supported. + nullptr, // Prepend not supported. + &simple_reverse<project_name>, + nullptr, // No cast (cast data_ directly). + &simple_compare<project_name>, + &default_empty<project_name> + }; + + // variable_pool + // + void variable_pool:: + update (variable& var, + const build2::value_type* t, + const variable_visibility* v, + const bool* o) const + { + // Check overridability (all overrides, if any, should already have + // been entered (see context.cxx:reset()). + // + if (var.overrides != nullptr && (o == nullptr || !*o)) + fail << "variable " << var.name << " cannot be overridden"; + + bool ut (t != nullptr && var.type != t); + bool uv (v != nullptr && var.visibility != *v); + + // Variable should not be updated post-aliasing. + // + assert (var.aliases == &var || (!ut && !uv)); + + // Update type? + // + if (ut) + { + assert (var.type == nullptr); + var.type = t; + } + + // Change visibility? While this might at first seem like a bad idea, + // it can happen that the variable lookup happens before any values + // were set, in which case the variable will be entered with the + // default visibility. + // + if (uv) + { + assert (var.visibility == variable_visibility::normal); // Default. + var.visibility = *v; + } + } + + static bool + match_pattern (const string& n, const string& p, const string& s, bool multi) + { + size_t nn (n.size ()), pn (p.size ()), sn (s.size ()); + + if (nn < pn + sn + 1) + return false; + + if (pn != 0) + { + if (n.compare (0, pn, p) != 0) + return false; + } + + if (sn != 0) + { + if (n.compare (nn - sn, sn, s) != 0) + return false; + } + + // Make sure the stem is a single name unless instructed otherwise. + // + return multi || string::traits_type::find (n.c_str () + pn, + nn - pn - sn, + '.') == nullptr; + } + + static inline void + merge_pattern (const variable_pool::pattern& p, + const build2::value_type*& t, + const variable_visibility*& v, + const bool*& o) + { + if (p.type) + { + if (t == nullptr) + t = *p.type; + else if (p.match) + assert (t == *p.type); + } + + if (p.visibility) + { + if (v == nullptr) + v = &*p.visibility; + else if (p.match) + assert (*v == *p.visibility); + } + + if (p.overridable) + { + if (o == nullptr) + o = &*p.overridable; + else if (p.match) + { + // Allow the pattern to restrict but not relax. + // + if (*o) + o = &*p.overridable; + else + assert (*o == *p.overridable); + } + } + } + + variable& variable_pool:: + insert (string n, + const build2::value_type* t, + const variable_visibility* v, + const bool* o, + bool pat) + { + assert (!global_ || phase == run_phase::load); + + // Apply pattern. + // + if (pat) + { + if (n.find ('.') != string::npos) + { + // Reverse means from the "largest" (most specific). + // + for (const pattern& p: reverse_iterate (patterns_)) + { + if (match_pattern (n, p.prefix, p.suffix, p.multi)) + { + merge_pattern (p, t, v, o); + break; + } + } + } + } + + auto p ( + insert ( + variable { + move (n), + nullptr, + t, + nullptr, + v != nullptr ? *v : variable_visibility::normal})); + + variable& r (p.first->second); + + if (p.second) + r.aliases = &r; + else // Note: overridden variable will always exist. + { + if (t != nullptr || v != nullptr || o != nullptr) + update (r, t, v, o); // Not changing the key. + else if (r.overrides != nullptr) + fail << "variable " << r.name << " cannot be overridden"; + } + + return r; + } + + const variable& variable_pool:: + insert_alias (const variable& var, string n) + { + assert (var.aliases != nullptr && var.overrides == nullptr); + + variable& a (insert (move (n), + var.type, + &var.visibility, + nullptr /* override */, + false /* pattern */)); + + if (a.aliases == &a) // Not aliased yet. + { + a.aliases = var.aliases; + const_cast<variable&> (var).aliases = &a; + } + else + assert (a.alias (var)); // Make sure it is already an alias of var. + + return a; + } + + void variable_pool:: + insert_pattern (const string& p, + optional<const value_type*> t, + optional<bool> o, + optional<variable_visibility> v, + bool retro, + bool match) + { + assert (!global_ || phase == run_phase::load); + + size_t pn (p.size ()); + + size_t w (p.find ('*')); + assert (w != string::npos); + + bool multi (w + 1 != pn && p[w + 1] == '*'); + + // Extract prefix and suffix. + // + string pfx, sfx; + + if (w != 0) + { + assert (p[w - 1] == '.' && w != 1); + pfx.assign (p, 0, w); + } + + w += multi ? 2 : 1; // First suffix character. + size_t sn (pn - w); // Suffix length. + + if (sn != 0) + { + assert (p[w] == '.' && sn != 1); + sfx.assign (p, w, sn); + } + + auto i ( + patterns_.insert ( + pattern {move (pfx), move (sfx), multi, match, t, v, o})); + + // Apply retrospectively to existing variables. + // + if (retro) + { + for (auto& p: map_) + { + variable& var (p.second); + + if (match_pattern (var.name, i->prefix, i->suffix, i->multi)) + { + // Make sure that none of the existing more specific patterns + // match. + // + auto j (i), e (patterns_.end ()); + for (++j; j != e; ++j) + { + if (match_pattern (var.name, j->prefix, j->suffix, j->multi)) + break; + } + + if (j == e) + update (var, + t ? *t : nullptr, + v ? &*v : nullptr, + o ? &*o : nullptr); // Not changing the key. + } + } + } + } + + variable_pool variable_pool::instance (true); + const variable_pool& variable_pool::cinstance = variable_pool::instance; + const variable_pool& var_pool = variable_pool::cinstance; + + // variable_map + // + auto variable_map:: + find (const variable& var, bool typed) const -> + pair<const value_data*, const variable&> + { + const variable* v (&var); + const value_data* r (nullptr); + do + { + // @@ Should we verify that there are no distinct values for aliases? + // This can happen if the values were entered before the variables + // were aliased. Possible but probably highly unlikely. + // + auto i (m_.find (*v)); + if (i != m_.end ()) + { + r = &i->second; + break; + } + + v = v->aliases; + + } while (v != &var && v != nullptr); + + // Check if this is the first access after being assigned a type. + // + if (r != nullptr && typed && v->type != nullptr) + typify (*r, *v); + + return pair<const value_data*, const variable&> ( + r, r != nullptr ? *v : var); + } + + auto variable_map:: + find_to_modify (const variable& var, bool typed) -> + pair<value_data*, const variable&> + { + auto p (find (var, typed)); + auto* r (const_cast<value_data*> (p.first)); + + if (r != nullptr) + r->version++; + + return pair<value_data*, const variable&> (r, p.second); + } + + pair<reference_wrapper<value>, bool> variable_map:: + insert (const variable& var, bool typed) + { + assert (!global_ || phase == run_phase::load); + + auto p (m_.emplace (var, value_data (typed ? var.type : nullptr))); + value_data& r (p.first->second); + + if (!p.second) + { + // Check if this is the first access after being assigned a type. + // + // Note: we still need atomic in case this is not a global state. + // + if (typed && var.type != nullptr) + typify (r, var); + } + + r.version++; + + return make_pair (reference_wrapper<value> (r), p.second); + } + + // variable_type_map + // + lookup variable_type_map:: + find (const target_type& type, + const string& name, + const variable& var) const + { + // Search across target type hierarchy. + // + for (auto tt (&type); tt != nullptr; tt = tt->base) + { + auto i (map_.find (*tt)); + + if (i == end ()) + continue; + + // Try to match the pattern, starting from the longest values + // so that the more "specific" patterns (i.e., those that cover + // fewer characters with the wildcard) take precedence. See + // tests/variable/type-pattern. + // + const variable_pattern_map& m (i->second); + + for (auto j (m.rbegin ()); j != m.rend (); ++j) + { + const string& pat (j->first); + + //@@ TODO: should we detect ambiguity? 'foo-*' '*-foo' and 'foo-foo'? + // Right now the last defined will be used. + // + if (pat != "*") + { + if (name.size () < pat.size () - 1 || // One for '*' or '?'. + !butl::path_match (pat, name)) + continue; + } + + // Ok, this pattern matches. But is there a variable? + // + // Since we store append/prepend values untyped, instruct find() not + // to automatically type it. And if it is assignment, then typify it + // ourselves. + // + const variable_map& vm (j->second); + { + auto p (vm.find (var, false)); + if (const variable_map::value_data* v = p.first) + { + // Check if this is the first access after being assigned a type. + // + if (v->extra == 0 && var.type != nullptr) + vm.typify (*v, var); + + return lookup (*v, p.second, vm); + } + } + } + } + + return lookup (); + } + + size_t variable_cache_mutex_shard_size; + unique_ptr<shared_mutex[]> variable_cache_mutex_shard; + + template struct LIBBUILD2_DEFEXPORT value_traits<strings>; + template struct LIBBUILD2_DEFEXPORT value_traits<vector<name>>; + template struct LIBBUILD2_DEFEXPORT value_traits<paths>; + template struct LIBBUILD2_DEFEXPORT value_traits<dir_paths>; + template struct LIBBUILD2_DEFEXPORT value_traits<uint64s>; + + template struct LIBBUILD2_DEFEXPORT value_traits<std::map<string, string>>; + + template struct LIBBUILD2_DEFEXPORT + value_traits<std::map<project_name, dir_path>>; +} diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx new file mode 100644 index 0000000..9a106b5 --- /dev/null +++ b/libbuild2/variable.hxx @@ -0,0 +1,1596 @@ +// file : libbuild2/variable.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_VARIABLE_HXX +#define LIBBUILD2_VARIABLE_HXX + +#include <map> +#include <set> +#include <type_traits> // aligned_storage +#include <unordered_map> + +#include <libbutl/prefix-map.mxx> +#include <libbutl/multi-index.mxx> // map_key + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target-type.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // Some general variable infrastructure rules: + // + // 1. A variable can only be entered or typified during the load phase. + // + // 2. Any entity (module) that caches a variable value must make sure the + // variable has already been typified. + // + // 3. Any entity (module) that assigns a target-specific variable value + // during a phase other than load must make sure the variable has already + // been typified. + + class value; + struct variable; + struct lookup; + + struct value_type + { + const char* name; // Type name for diagnostics. + const size_t size; // Type size in value::data_ (only used for PODs). + + // Base type, if any. We have very limited support for inheritance: a + // value can be cast to the base type. In particular, a derived/base value + // cannot be assigned to base/derived. If not NULL, then the cast function + // below is expected to return the base pointer if its second argument + // points to the base's value_type. + // + const value_type* base_type; + + // Element type, if this is a vector. + // + const value_type* element_type; + + // Destroy the value. If it is NULL, then the type is assumed to be POD + // with a trivial destructor. + // + void (*const dtor) (value&); + + // Copy/move constructor and copy/move assignment for data_. If NULL, then + // assume the stored data is POD. If move is true then the second argument + // can be const_cast and moved from. copy_assign() is only called with + // non-NULL first argument. + // + void (*const copy_ctor) (value&, const value&, bool move); + void (*const copy_assign) (value&, const value&, bool move); + + // While assign cannot be NULL, if append or prepend is NULL, then this + // means this type doesn't support this operation. Variable is optional + // and is provided only for diagnostics. Return true if the resulting + // value is not empty. + // + void (*const assign) (value&, names&&, const variable*); + void (*const append) (value&, names&&, const variable*); + void (*const prepend) (value&, names&&, const variable*); + + // Reverse the value back to a vector of names. Storage can be used by the + // implementation if necessary. Cannot be NULL. + // + names_view (*const reverse) (const value&, names& storage); + + // Cast value::data_ storage to value type so that the result can be + // static_cast to const T*. If it is NULL, then cast data_ directly. Note + // that this function is used for both const and non-const values. + // + const void* (*const cast) (const value&, const value_type*); + + // If NULL, then the types are compared as PODs using memcmp(). + // + int (*const compare) (const value&, const value&); + + // If NULL, then the value is never empty. + // + bool (*const empty) (const value&); + }; + + // The order of the enumerators is arranged so that their integral values + // indicate whether one is more restrictive than the other. + // + enum class variable_visibility: uint8_t + { + // Note that the search for target type/pattern-specific terminates at + // the project boundary. + // + normal, // All outer scopes. + project, // This project (no outer projects). + scope, // This scope (no outer scopes). + target, // Target and target type/pattern-specific. + prereq // Prerequisite-specific. + }; + + // VC14 reports ambiguity but seems to work if we don't provide any. + // +#if !defined(_MSC_VER) || _MSC_VER > 1900 + inline bool + operator> (variable_visibility l, variable_visibility r) + { + return static_cast<uint8_t> (l) > static_cast<uint8_t> (r); + } + + inline bool + operator>= (variable_visibility l, variable_visibility r) + { + return static_cast<uint8_t> (l) >= static_cast<uint8_t> (r); + } + + inline bool + operator< (variable_visibility l, variable_visibility r) + { + return r > l; + } + + inline bool + operator<= (variable_visibility l, variable_visibility r) + { + return r >= l; + } +#endif + + LIBBUILD2_SYMEXPORT ostream& + operator<< (ostream&, variable_visibility); + + // variable + // + // The two variables are considered the same if they have the same name. + // + // Variables can be aliases of each other in which case they form a circular + // linked list (the aliases pointer for variable without any aliases points + // to the variable itself). + // + // If the variable is overridden on the command line, then override is the + // linked list of the special override variables. Their names are derived + // from the main variable name as <name>.<N>.{__override,__prefix,__suffix} + // and they are not entered into the var_pool. The override variables only + // vary in their names and visibility. Their aliases pointer is re-purposed + // to make the list doubly-linked with the first override's aliases pointer + // pointing to the last element (or itself). + // + // Note also that we don't propagate the variable type to override variables + // and we keep override values as untyped names. They get "typed" when they + // are applied. + // + // The overrides list is in the reverse order of the overrides appearing on + // the command line, which is important when deciding whether and in what + // order they apply (see find_override() for details). + // + // The <N> part in the override variable name is its position on the command + // line, which effectively means we will have as many variable names as + // there are overrides. This strange arrangement is here to support multiple + // overrides. For example: + // + // b config.cc.coptions=-O2 config.cc.coptions+=-g config.cc.coptions+=-Wall + // + // We cannot yet apply them to form a single value since this requires + // knowing their type. And there is no way to store multiple values of the + // same variable in any given variable_map. As a result, the best option + // appears to be to store them as multiple variables. While not very + // efficient, this shouldn't be a big deal since we don't expect to have + // many overrides. + // + // We use the "modify original, override on query" model. Because of that, a + // modified value does not necessarily represent the actual value so care + // must be taken to re-query after (direct) modification. And because of + // that, variables set by the C++ code are by default non-overridable. + // + // Initial processing including entering of global overrides happens in + // reset() before any other variables. Project wide overrides are entered in + // main(). Overriding happens in scope::find_override(). + // + // NULL type and normal visibility are the defaults and can be overridden by + // "tighter" values. + // + struct variable + { + string name; + const variable* aliases; // Circular linked list. + const value_type* type; // If NULL, then not (yet) typed. + unique_ptr<const variable> overrides; + variable_visibility visibility; + + // Return true if this variable is an alias of the specified variable. + // + bool + alias (const variable& var) const + { + const variable* v (aliases); + for (; v != &var && v != this; v = v->aliases) ; + return v == &var; + } + + // Return the length of the original variable if this is an override, + // optionally of the specified kind (__override, __prefix, etc), and 0 + // otherwise (so this function can be used as a predicate). + // + // @@ It would be nicer to return the original variable but there is no + // natural place to store such a "back" pointer. The overrides pointer + // in the last element could work but it is owning. So let's not + // complicate things for now seeing that there are only a few places + // where we need this. + // + size_t + override (const char* k = nullptr) const + { + size_t p (name.rfind ('.')); + if (p != string::npos) + { + auto cmp = [this, p] (const char* k) + { + return name.compare (p + 1, string::npos, k) == 0; + }; + + if (k != nullptr + ? (cmp (k)) + : (cmp ("__override") || cmp ("__prefix") || cmp ("__suffix"))) + { + // Skip .<N>. + // + p = name.rfind ('.', p - 1); + assert (p != string::npos && p != 0); + return p; + } + } + + return 0; + } + }; + + inline bool + operator== (const variable& x, const variable& y) {return x.name == y.name;} + + inline ostream& + operator<< (ostream& os, const variable& v) {return os << v.name;} + + // + // + class LIBBUILD2_SYMEXPORT value + { + public: + // NULL means this value is not (yet) typed. + // + // Atomic access is used to implement on-first-access typification of + // values store in variable_map. Direct access as well as other functions + // that operate on values directly all use non-atomic access. + // + relaxed_atomic<const value_type*> type; + + // True if there is no value. + // + bool null; + + // Extra data that is associated with the value that can be used to store + // flags, etc. It is initialized to 0 and copied (but not assigned) from + // one value to another but is otherwise untouched (not even when the + // value is reset to NULL). + // + // Note: if deciding to use for something make sure it is not overlapping + // with an existing usage. + // + uint16_t extra; + + explicit operator bool () const {return !null;} + bool operator== (nullptr_t) const {return null;} + bool operator!= (nullptr_t) const {return !null;} + + // Check in a type-independent way if the value is empty. The value must + // not be NULL. + // + bool + empty () const; + + // Creation. A default-initialzied value is NULL and can be reset back to + // NULL by assigning nullptr. Values can be copied and copy-assigned. Note + // that for assignment, the values' types should be the same or LHS should + // be untyped. + // + // + public: + ~value () {*this = nullptr;} + + explicit + value (nullptr_t = nullptr): type (nullptr), null (true), extra (0) {} + + explicit + value (const value_type* t): type (t), null (true), extra (0) {} + + explicit + value (names); // Create untyped value. + + explicit + value (optional<names>); + + template <typename T> + explicit + value (T); // Create value of value_traits<T>::value_type type. + + template <typename T> + explicit + value (optional<T>); + + // Note: preserves type. + // + value& + operator= (nullptr_t) {if (!null) reset (); return *this;} + + value (value&&); + explicit value (const value&); + value& operator= (value&&); + value& operator= (const value&); + value& operator= (reference_wrapper<value>); + value& operator= (reference_wrapper<const value>); + + // Assign/Append/Prepend. + // + public: + // Assign/append a typed value. For assign, LHS should be either of the + // same type or untyped. For append, LHS should be either of the same type + // or untyped and NULL. + // + template <typename T> value& operator= (T); + template <typename T> value& operator+= (T); + + template <typename T> value& operator= (T* v) { + return v != nullptr ? *this = *v : *this = nullptr;} + + template <typename T> value& operator+= (T* v) { + return v != nullptr ? *this += *v : *this;} + + value& operator= (const char* v) {return *this = string (v);} + value& operator+= (const char* v) {return *this += string (v);} + + // Assign/append/prepend raw data. Variable is optional and is only used + // for diagnostics. + // + void assign (names&&, const variable*); + void assign (name&&, const variable*); // Shortcut for single name. + void append (names&&, const variable*); + void prepend (names&&, const variable*); + + + // Implementation details, don't use directly except in representation + // type implementations. + // + public: + // Fast, unchecked cast of data_ to T. + // + template <typename T> T& as () & {return reinterpret_cast<T&> (data_);} + template <typename T> T&& as () && {return move (as<T> ());} + template <typename T> const T& as () const& { + return reinterpret_cast<const T&> (data_);} + + public: + // The maximum size we can store directly is sufficient for the most + // commonly used types (string, vector, map) on all the platforms that we + // support (each type should static assert this in its value_traits + // specialization below). Types that don't fit will have to be handled + // with an extra dynamic allocation. + // + static constexpr size_t size_ = sizeof (name_pair); + std::aligned_storage<size_>::type data_; + + // Make sure we have sufficient storage for untyped values. + // + static_assert (sizeof (names) <= size_, "insufficient space"); + + private: + void + reset (); + }; + + // This is what we call a "value pack"; it can be created by the eval + // context and passed as arguments to functions. Usually we will have just + // one value. + // + using values = small_vector<value, 1>; + + // The values should be of the same type (or both be untyped) except NULL + // values can also be untyped. NULL values compare equal and a NULL value + // is always less than a non-NULL. + // + LIBBUILD2_SYMEXPORT bool operator== (const value&, const value&); + bool operator!= (const value&, const value&); + LIBBUILD2_SYMEXPORT bool operator< (const value&, const value&); + bool operator<= (const value&, const value&); + LIBBUILD2_SYMEXPORT bool operator> (const value&, const value&); + bool operator>= (const value&, const value&); + + // Value cast. The first three expect the value to be not NULL. The cast + // from lookup expects the value to also be defined. + // + // Note that a cast to names expects the value to be untyped while a cast + // to vector<name> -- typed. + // + // Why are these non-members? The cast is easier on the eyes and is also + // consistent with the cast operators. The other two are for symmetry. + // + template <typename T> T& cast (value&); + template <typename T> T&& cast (value&&); + template <typename T> const T& cast (const value&); + template <typename T> const T& cast (const lookup&); + + // As above but returns NULL if the value is NULL (or not defined, in + // case of lookup). + // + template <typename T> T* cast_null (value&); + template <typename T> const T* cast_null (const value&); + template <typename T> const T* cast_null (const lookup&); + + // As above but returns empty value if the value is NULL (or not defined, in + // case of lookup). + // + template <typename T> const T& cast_empty (const value&); + template <typename T> const T& cast_empty (const lookup&); + + // As above but returns the specified default if the value is NULL (or not + // defined, in case of lookup). Note that the return is by value, not by + // reference. + // + template <typename T> T cast_default (const value&, const T&); + template <typename T> T cast_default (const lookup&, const T&); + + // As above but returns false/true if the value is NULL (or not defined, + // in case of lookup). Note that the template argument is only for + // documentation and should be bool (or semantically compatible). + // + template <typename T> T cast_false (const value&); + template <typename T> T cast_false (const lookup&); + + template <typename T> T cast_true (const value&); + template <typename T> T cast_true (const lookup&); + + + // Assign value type to the value. The variable is optional and is only used + // for diagnostics. + // + template <typename T> + void typify (value&, const variable*); + void typify (value&, const value_type&, const variable*); + + LIBBUILD2_SYMEXPORT void + typify_atomic (value&, const value_type&, const variable*); + + // Remove value type from the value reversing it to names. This is similar + // to reverse() below except that it modifies the value itself. + // + LIBBUILD2_SYMEXPORT void untypify (value&); + + // Reverse the value back to names. The value should not be NULL and storage + // should be empty. + // + vector_view<const name> + reverse (const value&, names& storage); + + vector_view<name> + reverse (value&, names& storage); + + // lookup + // + // A variable can be undefined, NULL, or contain a (potentially empty) + // value. + // + class variable_map; + + struct lookup + { + using value_type = build2::value; + + // If vars is not NULL, then value is variable_map::value_data. + // + const value_type* value; // NULL if undefined. + const variable* var; // Storage variable. + const variable_map* vars; // Storage map. + + bool + defined () const {return value != nullptr;} + + // Note: returns true if defined and not NULL. + // + explicit operator bool () const {return defined () && !value->null;} + + const value_type& operator* () const {return *value;} + const value_type* operator-> () const {return value;} + + // Return true if this value belongs to the specified scope or target. + // Note that it can also be a target type/pattern-specific value in which + // case it won't belong to either unless we pass true as a second argument + // to consider it belonging to a scope (note that this test is expensive). + // + template <typename T> + bool + belongs (const T& x) const {return vars == &x.vars;} + + template <typename T> + bool + belongs (const T& x, bool target_type_pattern) const; + + lookup (): value (nullptr), var (nullptr), vars (nullptr) {} + + template <typename T> + lookup (const value_type& v, const variable& r, const T& x) + : lookup (&v, &r, &x.vars) {} + + lookup (const value_type& v, const variable& r, const variable_map& m) + : lookup (&v, &r, &m) {} + + lookup (const value_type* v, const variable* r, const variable_map* m) + : value (v), + var (v != nullptr ? r : nullptr), + vars (v != nullptr ? m : nullptr) {} + }; + + // Two lookups are equal if they point to the same variable. + // + inline bool + operator== (const lookup& x, const lookup& y) + { + bool r (x.value == y.value); + assert (!r || x.vars == y.vars); + return r; + } + + inline bool + operator!= (const lookup& x, const lookup& y) {return !(x == y);} + + + // Representation types. + // + // Potential optimizations: + // + // - Split value::operator=/+=() into const T and T&&, also overload + // value_traits functions that they call. + // + // - Specialization for vector<names> (if used and becomes critical). + // + template <typename T, typename E> + struct value_traits_specialization; // enable_if'able specialization support. + + template <typename T> + struct value_traits: value_traits_specialization <T, void> {}; + // { + // static_assert (sizeof (T) <= value::size_, "insufficient space"); + // + // // Convert name to T. If rhs is not NULL, then it is the second half + // // of a pair. Only needs to be provided by simple types. Throw + // // invalid_argument (with a message) if the name is not a valid + // // representation of value (in which case the name should remain + // // unchanged for diagnostics). + // // + // static T convert (name&&, name* rhs); + // + // // Assign/append/prepend T to value which is already of type T but can + // // be NULL. + // // + // static void assign (value&, T&&); + // static void append (value&, T&&); + // static void prepend (value&, T&&); + // + // // Reverse a value back to name. Only needs to be provided by simple + // // types. + // // + // static name reverse (const T&); + // + // // Compare two values. Only needs to be provided by simple types. + // // + // static int compare (const T&, const T&); + // + // // Return true if the value is empty. + // // + // static bool empty (const T&); + // + // // True if can be constructed from empty names as T(). + // // + // static const bool empty_value = true; + // + // static const T empty_instance; + // + // // For simple types (those that can be used as elements of containers), + // // type_name must be constexpr in order to sidestep the static init + // // order issue (in fact, that's the only reason we have it both here + // // and in value_type.name -- value_type cannot be constexpr because + // // of pointers to function template instantiations). + // // + // static const char* const type_name; + // static const build2::value_type value_type; + // }; + + // Convert name to a simple value. Throw invalid_argument (with a message) + // if the name is not a valid representation of value (in which case the + // name remains unchanged for diagnostics). The second version is called for + // a pair. + // + template <typename T> T convert (name&&); + template <typename T> T convert (name&&, name&&); + + // As above but can also be called for container types. Note that in this + // case (container) if invalid_argument is thrown, the names are not + // guaranteed to be unchanged. + // + //template <typename T> T convert (names&&); (declaration causes ambiguity) + + // Convert value to T. If value is already of type T, then simply cast it. + // Otherwise call convert(names) above. + // + template <typename T> T convert (value&&); + + // Default implementations of the dtor/copy_ctor/copy_assing callbacks for + // types that are stored directly in value::data_ and the provide all the + // necessary functions (copy/move ctor and assignment operator). + // + template <typename T> + static void + default_dtor (value&); + + template <typename T> + static void + default_copy_ctor (value&, const value&, bool); + + template <typename T> + static void + default_copy_assign (value&, const value&, bool); + + // Default implementations of the empty callback that calls + // value_traits<T>::empty(). + // + template <typename T> + static bool + default_empty (const value&); + + // Default implementations of the assign/append/prepend callbacks for simple + // types. They call value_traits<T>::convert() and then pass the result to + // value_traits<T>::assign()/append()/prepend(). As a result, it may not be + // the most efficient way to do it. + // + template <typename T> + static void + simple_assign (value&, names&&, const variable*); + + template <typename T> + static void + simple_append (value&, names&&, const variable*); + + template <typename T> + static void + simple_prepend (value&, names&&, const variable*); + + // Default implementations of the reverse callback for simple types that + // calls value_traits<T>::reverse() and adds the result to the vector. As a + // result, it may not be the most efficient way to do it. + // + template <typename T> + static names_view + simple_reverse (const value&, names&); + + // Default implementations of the compare callback for simple types that + // calls value_traits<T>::compare(). + // + template <typename T> + static int + simple_compare (const value&, const value&); + + // names + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<names> + { + static const names& empty_instance; + }; + + // bool + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<bool> + { + static_assert (sizeof (bool) <= value::size_, "insufficient space"); + + static bool convert (name&&, name*); + static void assign (value&, bool); + static void append (value&, bool); // OR. + static name reverse (bool x) {return name (x ? "true" : "false");} + static int compare (bool, bool); + static bool empty (bool) {return false;} + + static const bool empty_value = false; + static const char* const type_name; + static const build2::value_type value_type; + }; + + template <> + struct LIBBUILD2_SYMEXPORT value_traits<uint64_t> + { + static_assert (sizeof (uint64_t) <= value::size_, "insufficient space"); + + static uint64_t convert (name&&, name*); + static void assign (value&, uint64_t); + static void append (value&, uint64_t); // ADD. + static name reverse (uint64_t x) {return name (to_string (x));} + static int compare (uint64_t, uint64_t); + static bool empty (bool) {return false;} + + static const bool empty_value = false; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // Treat unsigned integral types as uint64. Note that bool is handled + // differently at an earlier stage. + // + template <typename T> + struct value_traits_specialization<T, + typename std::enable_if< + std::is_integral<T>::value && + std::is_unsigned<T>::value>::type>: + value_traits<uint64_t> {}; + + // string + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<string> + { + static_assert (sizeof (string) <= value::size_, "insufficient space"); + + static string convert (name&&, name*); + static void assign (value&, string&&); + static void append (value&, string&&); + static void prepend (value&, string&&); + static name reverse (const string& x) {return name (x);} + static int compare (const string&, const string&); + static bool empty (const string& x) {return x.empty ();} + + static const bool empty_value = true; + static const string& empty_instance; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // Treat const char* as string. + // + template <> + struct value_traits<const char*>: value_traits<string> {}; + + // path + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<path> + { + static_assert (sizeof (path) <= value::size_, "insufficient space"); + + static path convert (name&&, name*); + static void assign (value&, path&&); + static void append (value&, path&&); // operator/ + static void prepend (value&, path&&); // operator/ + static name reverse (const path& x) { + return x.to_directory () + ? name (path_cast<dir_path> (x)) + : name (x.string ()); + } + static int compare (const path&, const path&); + static bool empty (const path& x) {return x.empty ();} + + static const bool empty_value = true; + static const path& empty_instance; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // dir_path + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<dir_path> + { + static_assert (sizeof (dir_path) <= value::size_, "insufficient space"); + + static dir_path convert (name&&, name*); + static void assign (value&, dir_path&&); + static void append (value&, dir_path&&); // operator/ + static void prepend (value&, dir_path&&); // operator/ + static name reverse (const dir_path& x) {return name (x);} + static int compare (const dir_path&, const dir_path&); + static bool empty (const dir_path& x) {return x.empty ();} + + static const bool empty_value = true; + static const dir_path& empty_instance; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // abs_dir_path + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<abs_dir_path> + { + static_assert (sizeof (abs_dir_path) <= value::size_, + "insufficient space"); + + static abs_dir_path convert (name&&, name*); + static void assign (value&, abs_dir_path&&); + static void append (value&, abs_dir_path&&); // operator/ + static name reverse (const abs_dir_path& x) {return name (x);} + static int compare (const abs_dir_path&, const abs_dir_path&); + static bool empty (const abs_dir_path& x) {return x.empty ();} + + static const bool empty_value = true; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // name + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<name> + { + static_assert (sizeof (name) <= value::size_, "insufficient space"); + + static name convert (name&&, name*); + static void assign (value&, name&&); + static name reverse (const name& x) {return x;} + static int compare (const name& l, const name& r) {return l.compare (r);} + static bool empty (const name& x) {return x.empty ();} + + static const bool empty_value = true; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // name_pair + // + // An empty first or second half of a pair is treated as unspecified (this + // way it can be usage-specific whether a single value is first or second + // half of a pair). If both are empty then this is an empty value (and not a + // pair of two empties). + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<name_pair> + { + static_assert (sizeof (name_pair) <= value::size_, "insufficient space"); + + static name_pair convert (name&&, name*); + static void assign (value&, name_pair&&); + static int compare (const name_pair&, const name_pair&); + static bool empty (const name_pair& x) { + return x.first.empty () && x.second.empty ();} + + static const bool empty_value = true; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // process_path + // + // Note that instances that we store always have non-empty recall and + // initial is its shallow copy. + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<process_path> + { + static_assert (sizeof (process_path) <= value::size_, + "insufficient space"); + + // This one is represented as a @-pair of names. As a result it cannot + // be stored in a container. + // + static process_path convert (name&&, name*); + static void assign (value&, process_path&&); + static int compare (const process_path&, const process_path&); + static bool empty (const process_path& x) {return x.empty ();} + + static const bool empty_value = true; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // target_triplet + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<target_triplet> + { + static_assert (sizeof (target_triplet) <= value::size_, + "insufficient space"); + + static target_triplet convert (name&&, name*); + static void assign (value&, target_triplet&&); + static name reverse (const target_triplet& x) {return name (x.string ());} + static int compare (const target_triplet& x, const target_triplet& y) { + return x.compare (y);} + static bool empty (const target_triplet& x) {return x.empty ();} + + static const bool empty_value = true; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // project_name + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<project_name> + { + static_assert (sizeof (project_name) <= value::size_, + "insufficient space"); + + static project_name convert (name&&, name*); + static void assign (value&, project_name&&); + static name reverse (const project_name&); + static int compare (const project_name& x, const project_name& y) { + return x.compare (y);} + static bool empty (const project_name& x) {return x.empty ();} + + static const bool empty_value = true; + static const project_name& empty_instance; + static const char* const type_name; + static const build2::value_type value_type; + }; + + // vector<T> + // + template <typename T> + struct value_traits<vector<T>> + { + static_assert (sizeof (vector<T>) <= value::size_, "insufficient space"); + + static vector<T> convert (names&&); + static void assign (value&, vector<T>&&); + static void append (value&, vector<T>&&); + static void prepend (value&, vector<T>&&); + static bool empty (const vector<T>& x) {return x.empty ();} + + static const vector<T> empty_instance; + + // Make sure these are static-initialized together. Failed that VC will + // make sure it's done in the wrong order. + // + struct value_type_ex: build2::value_type + { + string type_name; + value_type_ex (value_type&&); + }; + static const value_type_ex value_type; + }; + + // map<K, V> + // + template <typename K, typename V> + struct value_traits<std::map<K, V>> + { + template <typename K1, typename V1> using map = std::map<K1, V1>; + + static_assert (sizeof (map<K, V>) <= value::size_, "insufficient space"); + + static void assign (value&, map<K, V>&&); + static void append (value&, map<K, V>&&); + static void prepend (value& v, map<K, V>&& x) { + return append (v, move (x));} + static bool empty (const map<K, V>& x) {return x.empty ();} + + static const map<K, V> empty_instance; + + // Make sure these are static-initialized together. Failed that VC will + // make sure it's done in the wrong order. + // + struct value_type_ex: build2::value_type + { + string type_name; + value_type_ex (value_type&&); + }; + static const value_type_ex value_type; + }; + + // Explicitly pre-instantiate and export value_traits templates for + // vector/map value types used in the build2 project. Note that this is not + // merely an optimization since not doing so we may end up with multiple + // value type objects for the same traits type (and we use their addressed + // as identity; see cast(const value&) for an example). + // + extern template struct LIBBUILD2_DECEXPORT value_traits<strings>; + extern template struct LIBBUILD2_DECEXPORT value_traits<vector<name>>; + extern template struct LIBBUILD2_DECEXPORT value_traits<paths>; + extern template struct LIBBUILD2_DECEXPORT value_traits<dir_paths>; + extern template struct LIBBUILD2_DECEXPORT value_traits<uint64s>; + + extern template struct LIBBUILD2_DECEXPORT + value_traits<std::map<string, string>>; + + extern template struct LIBBUILD2_DECEXPORT + value_traits<std::map<project_name, dir_path>>; + + // Project-wide (as opposed to global) variable overrides. Returned by + // context.cxx:reset(). + // + struct variable_override + { + const variable& var; // Original variable. + const variable& ovr; // Override variable. + optional<dir_path> dir; // Scope directory relative to base. + value val; + }; + + using variable_overrides = vector<variable_override>; + + // Variable pool. + // + // The global version is protected by the phase mutex. + // + class variable_pool + { + public: + // Find existing (assert exists). + // + const variable& + operator[] (const string& name) const; + + // Return NULL if there is no variable with this name. + // + const variable* + find (const string& name) const; + + // Find existing or insert new (untyped, non-overridable, normal + // visibility; but may be overridden by a pattern). + // + const variable& + insert (string name) + { + return insert (move (name), nullptr, nullptr, nullptr); + } + + // Insert or override (type/visibility). Note that by default the + // variable is not overridable. + // + const variable& + insert (string name, variable_visibility v) + { + return insert (move (name), nullptr, &v, nullptr); + } + + const variable& + insert (string name, bool overridable) + { + return insert (move (name), nullptr, nullptr, &overridable); + } + + const variable& + insert (string name, bool overridable, variable_visibility v) + { + return insert (move (name), nullptr, &v, &overridable); + } + + template <typename T> + const variable& + insert (string name) + { + return insert (move (name), &value_traits<T>::value_type); + } + + template <typename T> + const variable& + insert (string name, variable_visibility v) + { + return insert (move (name), &value_traits<T>::value_type, &v); + } + + template <typename T> + const variable& + insert (string name, bool overridable) + { + return insert ( + move (name), &value_traits<T>::value_type, nullptr, &overridable); + } + + template <typename T> + const variable& + insert (string name, bool overridable, variable_visibility v) + { + return insert ( + move (name), &value_traits<T>::value_type, &v, &overridable); + } + + // Alias an existing variable with a new name. + // + // Aliasing is purely a lookup-level mechanism. That is, when variable_map + // looks for a value, it tries all the aliases (and returns the storage + // variable in lookup). + // + // The existing variable should already have final type and visibility + // values which are copied over to the alias. + // + // Overridable aliased variables are most likely a bad idea: without a + // significant effort, the overrides will only be applied along the alias + // names (i.e., there would be no cross-alias overriding). So for now we + // don't allow this (use the common variable mechanism instead). + // + LIBBUILD2_SYMEXPORT const variable& + insert_alias (const variable& var, string name); + + // Insert a variable pattern. Any variable that matches this pattern + // will have the specified type, visibility, and overridability. If + // match is true, then individual insertions of the matching variable + // must match the specified type/visibility/overridability. Otherwise, + // individual insertions can provide alternative values and the pattern + // values are a fallback (if you specify false you better be very clear + // about what you are trying to achieve). + // + // The pattern must be in the form [<prefix>.](*|**)[.<suffix>] where + // '*' matches single component stems (i.e., 'foo' but not 'foo.bar') + // and '**' matches single and multi-component stems. Note that only + // multi-component variables are considered for pattern matching (so + // just '*' won't match anything). + // + // The patterns are matched in the more-specific-first order where the + // pattern is considered more specific if it has a greater sum of its + // prefix and suffix lengths. If the prefix and suffix are equal, then the + // '*' pattern is considered more specific than '**'. If neither is more + // specific, then they are matched in the reverse order of insertion. + // + // If retro is true then a newly inserted pattern is also applied + // retrospectively to all the existing variables that match but only + // if no more specific pattern already exists (which is then assumed + // to have been applied). So if you use this functionality, watch out + // for the insertion order (you probably want more specific first). + // + public: + LIBBUILD2_SYMEXPORT void + insert_pattern (const string& pattern, + optional<const value_type*> type, + optional<bool> overridable, + optional<variable_visibility>, + bool retro = false, + bool match = true); + + template <typename T> + void + insert_pattern (const string& p, + optional<bool> overridable, + optional<variable_visibility> v, + bool retro = false, + bool match = true) + { + insert_pattern ( + p, &value_traits<T>::value_type, overridable, v, retro, match); + } + + public: + void + clear () {map_.clear ();} + + variable_pool (): variable_pool (false) {} + + // RW access. + // + variable_pool& + rw () const + { + assert (phase == run_phase::load); + return const_cast<variable_pool&> (*this); + } + + variable_pool& + rw (scope&) const {return const_cast<variable_pool&> (*this);} + + private: + LIBBUILD2_SYMEXPORT static variable_pool instance; + + LIBBUILD2_SYMEXPORT variable& + insert (string name, + const value_type*, + const variable_visibility* = nullptr, + const bool* overridable = nullptr, + bool pattern = true); + + LIBBUILD2_SYMEXPORT void + update (variable&, + const value_type*, + const variable_visibility* = nullptr, + const bool* = nullptr) const; + + // Entities that can access bypassing the lock proof. + // + friend class parser; + friend class scope; + friend LIBBUILD2_SYMEXPORT variable_overrides reset (const strings&); + + public: + // For var_pool initialization. + // + LIBBUILD2_SYMEXPORT static const variable_pool& cinstance; + + // Variable map. + // + private: + using key = butl::map_key<string>; + using map = std::unordered_map<key, variable>; + + pair<map::iterator, bool> + insert (variable&& var) + { + // Keeping a pointer to the key while moving things during insertion is + // tricky. We could use a C-string instead of C++ for a key but that + // gets hairy very quickly (there is no std::hash for C-strings). So + // let's rely on small object-optimized std::string for now. + // + string n (var.name); + auto r (map_.insert (map::value_type (&n, move (var)))); + + if (r.second) + r.first->first.p = &r.first->second.name; + + return r; + } + + map map_; + + // Patterns. + // + public: + struct pattern + { + string prefix; + string suffix; + bool multi; // Match multi-component stems. + bool match; // Must match individual variable insersions. + + optional<const value_type*> type; + optional<variable_visibility> visibility; + optional<bool> overridable; + + friend bool + operator< (const pattern& x, const pattern& y) + { + if (x.prefix.size () + x.suffix.size () < + y.prefix.size () + y.suffix.size ()) + return true; + + if (x.prefix == y.prefix && x.suffix == y.suffix) + return x.multi && !y.multi; + + return false; + } + }; + + private: + std::multiset<pattern> patterns_; + + // Global pool flag. + // + private: + explicit + variable_pool (bool global): global_ (global) {} + + bool global_; + }; + + LIBBUILD2_SYMEXPORT extern const variable_pool& var_pool; +} + +// variable_map +// +namespace butl +{ + template <> + struct compare_prefix<std::reference_wrapper<const build2::variable>>: + compare_prefix<std::string> + { + typedef compare_prefix<std::string> base; + + explicit + compare_prefix (char d): base (d) {} + + bool + operator() (const build2::variable& x, const build2::variable& y) const + { + return base::operator() (x.name, y.name); + } + + bool + prefix (const build2::variable& p, const build2::variable& k) const + { + return base::prefix (p.name, k.name); + } + }; +} + +namespace build2 +{ + class LIBBUILD2_SYMEXPORT variable_map + { + public: + struct value_data: value + { + using value::value; + using value::operator=; + + size_t version = 0; // Incremented on each modification (variable_cache). + }; + + // Note that we guarantee ascending iteration order (e.g., for predictable + // dump output in tests). + // + using map_type = butl::prefix_map<reference_wrapper<const variable>, + value_data, + '.'>; + using size_type = map_type::size_type; + + template <typename I> + class iterator_adapter: public I + { + public: + iterator_adapter () = default; + iterator_adapter (const I& i, const variable_map& m): I (i), m_ (&m) {} + + // Automatically type a newly typed value on access. + // + typename I::reference operator* () const; + typename I::pointer operator-> () const; + + // Untyped access. + // + uint16_t extra () const {return I::operator* ().second.extra;} + typename I::reference untyped () const {return I::operator* ();} + + private: + const variable_map* m_; + }; + + using const_iterator = iterator_adapter<map_type::const_iterator>; + + // Lookup. Note that variable overrides will not be applied, even if + // set in this map. + // + lookup + operator[] (const variable& var) const + { + auto p (find (var)); + return lookup (p.first, &p.second, this); + } + + lookup + operator[] (const variable* var) const // For cached variables. + { + assert (var != nullptr); + return operator[] (*var); + } + + lookup + operator[] (const string& name) const + { + const variable* var (var_pool.find (name)); + return var != nullptr ? operator[] (*var) : lookup (); + } + + // If typed is false, leave the value untyped even if the variable is. + // The second half of the pair is the storage variable. + // + pair<const value_data*, const variable&> + find (const variable&, bool typed = true) const; + + pair<value_data*, const variable&> + find_to_modify (const variable&, bool typed = true); + + // Convert a lookup pointing to a value belonging to this variable map + // to its non-const version. Note that this is only safe on the original + // values (see find_original()). + // + value& + modify (const lookup& l) + { + assert (l.vars == this); + value& r (const_cast<value&> (*l.value)); + static_cast<value_data&> (r).version++; + return r; + } + + // Return a value suitable for assignment. See scope for details. + // + value& + assign (const variable& var) {return insert (var).first;} + + value& + assign (const variable* var) // For cached variables. + { + assert (var != nullptr); + return assign (*var); + } + + // Note that the variable is expected to have already been registered. + // + value& + assign (const string& name) {return insert (var_pool[name]).first;} + + // As above but also return an indication of whether the new value (which + // will be NULL) was actually inserted. Similar to find(), if typed is + // false, leave the value untyped even if the variable is. + // + pair<reference_wrapper<value>, bool> + insert (const variable&, bool typed = true); + + pair<const_iterator, const_iterator> + find_namespace (const variable& ns) const + { + auto r (m_.find_sub (ns)); + return make_pair (const_iterator (r.first, *this), + const_iterator (r.second, *this)); + } + + const_iterator + begin () const {return const_iterator (m_.begin (), *this);} + + const_iterator + end () const {return const_iterator (m_.end (), *this);} + + bool + empty () const {return m_.empty ();} + + size_type + size () const {return m_.size ();} + + public: + // Global should be true if this map is part of the global build state + // (e.g., scopes, etc). + // + explicit + variable_map (bool global = false): global_ (global) {} + + void + clear () {m_.clear ();} + + private: + friend class variable_type_map; + + void + typify (const value_data&, const variable&) const; + + private: + bool global_; + map_type m_; + }; + + // Value caching. Used for overrides as well as target type/pattern-specific + // append/prepend. + // + // In many places we assume that we can store a reference to the returned + // variable value (e.g., install::lookup_install()). As a result, in these + // cases where we calculate the value dynamically, we have to cache it + // (note, however, that if the value becomes stale, there is no guarantee + // the references remain valid). + // + // Note that since the cache can be modified on any lookup (including during + // the execute phase), it is protected by its own mutex shard (allocated in + // main()). This shard is also used for value typification (which is kind of + // like caching) during concurrent execution phases. + // + LIBBUILD2_SYMEXPORT extern size_t variable_cache_mutex_shard_size; + + LIBBUILD2_SYMEXPORT extern unique_ptr<shared_mutex[]> + variable_cache_mutex_shard; + + template <typename K> + class variable_cache + { + public: + // If the returned unique lock is locked, then the value has been + // invalidated. If the variable type does not match the value type, + // then typify the cached value. + // + pair<value&, ulock> + insert (K, const lookup& stem, size_t version, const variable&); + + private: + struct entry_type + { + // Note: we use value_data instead of value since the result is often + // returned as lookup. We also maintain the version in case one cached + // value (e.g., override) is based on another (e.g., target + // type/pattern-specific prepend/append). + // + variable_map::value_data value; + + size_t version = 0; // Version on which this value is based. + + // Location of the stem as well as the version on which this cache + // value is based. Used to track the location and value of the stem + // for cache invalidation. NULL/0 means there is no stem. + // + const variable_map* stem_vars = nullptr; + size_t stem_version = 0; + + // For GCC 4.9. + // + entry_type () = default; + entry_type (variable_map::value_data val, + size_t ver, + const variable_map* svars, + size_t sver) + : value (move (val)), + version (ver), + stem_vars (svars), + stem_version (sver) {} + }; + + using map_type = std::map<K, entry_type>; + + map_type m_; + }; + + // Target type/pattern-specific variables. + // + class variable_pattern_map + { + public: + using map_type = std::map<string, variable_map>; + using const_iterator = map_type::const_iterator; + using const_reverse_iterator = map_type::const_reverse_iterator; + + explicit + variable_pattern_map (bool global): global_ (global) {} + + variable_map& + operator[] (const string& v) + { + return map_.emplace (v, variable_map (global_)).first->second; + } + + const_iterator begin () const {return map_.begin ();} + const_iterator end () const {return map_.end ();} + const_reverse_iterator rbegin () const {return map_.rbegin ();} + const_reverse_iterator rend () const {return map_.rend ();} + bool empty () const {return map_.empty ();} + + private: + bool global_; + map_type map_; + }; + + class LIBBUILD2_SYMEXPORT variable_type_map + { + public: + using map_type = std::map<reference_wrapper<const target_type>, + variable_pattern_map>; + using const_iterator = map_type::const_iterator; + + explicit + variable_type_map (bool global): global_ (global) {} + + variable_pattern_map& + operator[] (const target_type& t) + { + return map_.emplace (t, variable_pattern_map (global_)).first->second; + } + + const_iterator begin () const {return map_.begin ();} + const_iterator end () const {return map_.end ();} + bool empty () const {return map_.empty ();} + + lookup + find (const target_type&, const string& tname, const variable&) const; + + // Prepend/append value cache. + // + // The key is the combination of the "original value identity" (as a + // pointer to the value in one of the variable_pattern_map's) and the + // "target identity" (as target type and target name). Note that while at + // first it may seem like we don't need the target identity, we actually + // do since the stem may itself be target-type/pattern-specific. See + // scope::find_original() for details. + // + mutable + variable_cache<tuple<const value*, const target_type*, string>> + cache; + + private: + bool global_; + map_type map_; + }; +} + +#include <libbuild2/variable.ixx> +#include <libbuild2/variable.txx> + +#endif // LIBBUILD2_VARIABLE_HXX diff --git a/libbuild2/variable.ixx b/libbuild2/variable.ixx new file mode 100644 index 0000000..f0bde09 --- /dev/null +++ b/libbuild2/variable.ixx @@ -0,0 +1,812 @@ +// file : libbuild2/variable.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <type_traits> // is_same + +#include <libbuild2/export.hxx> + +namespace build2 +{ + // value + // + inline bool value:: + empty () const + { + assert (!null); + return type == nullptr + ? as<names> ().empty () + : type->empty == nullptr ? false : type->empty (*this); + } + + inline value:: + value (names ns) + : type (nullptr), null (false), extra (0) + { + new (&data_) names (move (ns)); + } + + inline value:: + value (optional<names> ns) + : type (nullptr), null (!ns), extra (0) + { + if (!null) + new (&data_) names (move (*ns)); + } + + template <typename T> + inline value:: + value (T v) + : type (&value_traits<T>::value_type), null (true), extra (0) + { + value_traits<T>::assign (*this, move (v)); + null = false; + } + + template <typename T> + inline value:: + value (optional<T> v) + : type (&value_traits<T>::value_type), null (true), extra (0) + { + if (v) + { + value_traits<T>::assign (*this, move (*v)); + null = false; + } + } + + inline value& value:: + operator= (reference_wrapper<value> v) + { + return *this = v.get (); + } + + inline value& value:: + operator= (reference_wrapper<const value> v) + { + return *this = v.get (); + } + + template <typename T> + inline value& value:: + operator= (T v) + { + assert (type == &value_traits<T>::value_type || type == nullptr); + + // Prepare the receiving value. + // + if (type == nullptr) + { + *this = nullptr; + type = &value_traits<T>::value_type; + } + + value_traits<T>::assign (*this, move (v)); + null = false; + return *this; + } + + template <typename T> + inline value& value:: + operator+= (T v) + { + assert (type == &value_traits<T>::value_type || (type == nullptr && null)); + + // Prepare the receiving value. + // + if (type == nullptr) + type = &value_traits<T>::value_type; + + value_traits<T>::append (*this, move (v)); + null = false; + return *this; + } + + inline void value:: + assign (name&& n, const variable* var) + { + names ns; + ns.push_back (move (n)); + assign (move (ns), var); + } + + inline bool + operator!= (const value& x, const value& y) + { + return !(x == y); + } + + inline bool + operator<= (const value& x, const value& y) + { + return !(x > y); + } + + inline bool + operator>= (const value& x, const value& y) + { + return !(x < y); + } + + template <> + inline const names& + cast (const value& v) + { + assert (v && v.type == nullptr); + return v.as<names> (); + } + + template <> + inline names& + cast (value& v) + { + assert (v && v.type == nullptr); + return v.as<names> (); + } + + template <typename T> + inline const T& + cast (const value& v) + { + assert (v); + + // Find base if any. + // + // Note that here we use the value type address as type identity. + // + const value_type* b (v.type); + for (; + b != nullptr && b != &value_traits<T>::value_type; + b = b->base_type) ; + assert (b != nullptr); + + return *static_cast<const T*> (v.type->cast == nullptr + ? static_cast<const void*> (&v.data_) + : v.type->cast (v, b)); + } + + template <typename T> + inline T& + cast (value& v) + { + // Forward to const T&. + // + return const_cast<T&> (cast<T> (static_cast <const value&> (v))); + } + + template <typename T> + inline T&& + cast (value&& v) + { + return move (cast<T> (v)); // Forward to T&. + } + + template <typename T> + inline const T& + cast (const lookup& l) + { + return cast<T> (*l); + } + + template <typename T> + inline T* + cast_null (value& v) + { + return v ? &cast<T> (v) : nullptr; + } + + template <typename T> + inline const T* + cast_null (const value& v) + { + return v ? &cast<T> (v) : nullptr; + } + + template <typename T> + inline const T* + cast_null (const lookup& l) + { + return l ? &cast<T> (*l) : nullptr; + } + + template <typename T> + inline const T& + cast_empty (const value& v) + { + return v ? cast<T> (v) : value_traits<T>::empty_instance; + } + + template <typename T> + inline const T& + cast_empty (const lookup& l) + { + return l ? cast<T> (l) : value_traits<T>::empty_instance; + } + + template <typename T> + inline T + cast_default (const value& v, const T& d) + { + return v ? cast<T> (v) : d; + } + + template <typename T> + inline T + cast_default (const lookup& l, const T& d) + { + return l ? cast<T> (l) : d; + } + + template <typename T> + inline T + cast_false (const value& v) + { + return v && cast<T> (v); + } + + template <typename T> + inline T + cast_false (const lookup& l) + { + return l && cast<T> (l); + } + + template <typename T> + inline T + cast_true (const value& v) + { + return !v || cast<T> (v); + } + + template <typename T> + inline T + cast_true (const lookup& l) + { + return !l || cast<T> (l); + } + + template <typename T> + inline void + typify (value& v, const variable* var) + { + const value_type& t (value_traits<T>::value_type); + + if (v.type != &t) + typify (v, t, var); + } + + LIBBUILD2_SYMEXPORT void + typify (value&, const value_type&, const variable*, memory_order); + + inline void + typify (value& v, const value_type& t, const variable* var) + { + typify (v, t, var, memory_order_relaxed); + } + + inline vector_view<const name> + reverse (const value& v, names& storage) + { + assert (v && + storage.empty () && + (v.type == nullptr || v.type->reverse != nullptr)); + return v.type == nullptr ? v.as<names> () : v.type->reverse (v, storage); + } + + inline vector_view<name> + reverse (value& v, names& storage) + { + names_view cv (reverse (static_cast<const value&> (v), storage)); + return vector_view<name> (const_cast<name*> (cv.data ()), cv.size ()); + } + + // value_traits + // + template <typename T> + inline T + convert (name&& n) + { + return value_traits<T>::convert (move (n), nullptr); + } + + template <typename T> + inline T + convert (name&& l, name&& r) + { + return value_traits<T>::convert (move (l), &r); + } + + // This one will be SFINAE'd out unless T is a container. + // + template <typename T> + inline auto + convert (names&& ns) -> decltype (value_traits<T>::convert (move (ns))) + { + return value_traits<T>::convert (move (ns)); + } + + // bool value + // + inline void value_traits<bool>:: + assign (value& v, bool x) + { + if (v) + v.as<bool> () = x; + else + new (&v.data_) bool (x); + } + + inline void value_traits<bool>:: + append (value& v, bool x) + { + // Logical OR. + // + if (v) + v.as<bool> () = v.as<bool> () || x; + else + new (&v.data_) bool (x); + } + + inline int value_traits<bool>:: + compare (bool l, bool r) + { + return l < r ? -1 : (l > r ? 1 : 0); + } + + // uint64_t value + // + inline void value_traits<uint64_t>:: + assign (value& v, uint64_t x) + { + if (v) + v.as<uint64_t> () = x; + else + new (&v.data_) uint64_t (x); + } + + inline void value_traits<uint64_t>:: + append (value& v, uint64_t x) + { + // ADD. + // + if (v) + v.as<uint64_t> () += x; + else + new (&v.data_) uint64_t (x); + } + + inline int value_traits<uint64_t>:: + compare (uint64_t l, uint64_t r) + { + return l < r ? -1 : (l > r ? 1 : 0); + } + + // string value + // + inline void value_traits<string>:: + assign (value& v, string&& x) + { + if (v) + v.as<string> () = move (x); + else + new (&v.data_) string (move (x)); + } + + inline void value_traits<string>:: + append (value& v, string&& x) + { + if (v) + { + string& s (v.as<string> ()); + + if (s.empty ()) + s.swap (x); + else + s += x; + } + else + new (&v.data_) string (move (x)); + } + + inline void value_traits<string>:: + prepend (value& v, string&& x) + { + if (v) + { + string& s (v.as<string> ()); + + if (!s.empty ()) + x += s; + + s.swap (x); + } + else + new (&v.data_) string (move (x)); + } + + inline int value_traits<string>:: + compare (const string& l, const string& r) + { + return l.compare (r); + } + + // path value + // + inline void value_traits<path>:: + assign (value& v, path&& x) + { + if (v) + v.as<path> () = move (x); + else + new (&v.data_) path (move (x)); + } + + inline void value_traits<path>:: + append (value& v, path&& x) + { + if (v) + { + path& p (v.as<path> ()); + + if (p.empty ()) + p.swap (x); + else + p /= x; + } + else + new (&v.data_) path (move (x)); + } + + inline void value_traits<path>:: + prepend (value& v, path&& x) + { + if (v) + { + path& p (v.as<path> ()); + + if (!p.empty ()) + x /= p; + + p.swap (x); + } + else + new (&v.data_) path (move (x)); + } + + inline int value_traits<path>:: + compare (const path& l, const path& r) + { + return l.compare (r); + } + + // dir_path value + // + inline void value_traits<dir_path>:: + assign (value& v, dir_path&& x) + { + if (v) + v.as<dir_path> () = move (x); + else + new (&v.data_) dir_path (move (x)); + } + + inline void value_traits<dir_path>:: + append (value& v, dir_path&& x) + { + if (v) + { + dir_path& p (v.as<dir_path> ()); + + if (p.empty ()) + p.swap (x); + else + p /= x; + } + else + new (&v.data_) dir_path (move (x)); + } + + inline void value_traits<dir_path>:: + prepend (value& v, dir_path&& x) + { + if (v) + { + dir_path& p (v.as<dir_path> ()); + + if (!p.empty ()) + x /= p; + + p.swap (x); + } + else + new (&v.data_) dir_path (move (x)); + } + + inline int value_traits<dir_path>:: + compare (const dir_path& l, const dir_path& r) + { + return l.compare (r); + } + + // abs_dir_path value + // + inline void value_traits<abs_dir_path>:: + assign (value& v, abs_dir_path&& x) + { + if (v) + v.as<abs_dir_path> () = move (x); + else + new (&v.data_) abs_dir_path (move (x)); + } + + inline void value_traits<abs_dir_path>:: + append (value& v, abs_dir_path&& x) + { + if (v) + { + abs_dir_path& p (v.as<abs_dir_path> ()); + + if (p.empty ()) + p.swap (x); + else + p /= x; + } + else + new (&v.data_) abs_dir_path (move (x)); + } + + inline int value_traits<abs_dir_path>:: + compare (const abs_dir_path& l, const abs_dir_path& r) + { + return l.compare (static_cast<const dir_path&> (r)); + } + + // name value + // + inline void value_traits<name>:: + assign (value& v, name&& x) + { + if (v) + v.as<name> () = move (x); + else + new (&v.data_) name (move (x)); + } + + // name_pair value + // + inline void value_traits<name_pair>:: + assign (value& v, name_pair&& x) + { + if (v) + v.as<name_pair> () = move (x); + else + new (&v.data_) name_pair (move (x)); + } + + inline int value_traits<name_pair>:: + compare (const name_pair& x, const name_pair& y) + { + int r (x.first.compare (y.first)); + + if (r == 0) + r = x.second.compare (y.second); + + return r; + } + + // process_path value + // + inline void value_traits<process_path>:: + assign (value& v, process_path&& x) + { + // Convert the value to its "self-sufficient" form. + // + if (x.recall.empty ()) + x.recall = path (x.initial); + + x.initial = x.recall.string ().c_str (); + + if (v) + v.as<process_path> () = move (x); + else + new (&v.data_) process_path (move (x)); + } + + inline int value_traits<process_path>:: + compare (const process_path& x, const process_path& y) + { + int r (x.recall.compare (y.recall)); + + if (r == 0) + r = x.effect.compare (y.effect); + + return r; + } + + // target_triplet value + // + inline void value_traits<target_triplet>:: + assign (value& v, target_triplet&& x) + { + if (v) + v.as<target_triplet> () = move (x); + else + new (&v.data_) target_triplet (move (x)); + } + + // project_name value + // + inline void value_traits<project_name>:: + assign (value& v, project_name&& x) + { + if (v) + v.as<project_name> () = move (x); + else + new (&v.data_) project_name (move (x)); + } + + inline name value_traits<project_name>:: + reverse (const project_name& x) + { + // Make work for the special unnamed subproject representation (see + // find_subprojects() in file.cxx for details). + // + const string& s (x.string ()); + return name (s.empty () || path::traits_type::is_separator (s.back ()) + ? empty_string + : s); + } + + // vector<T> value + // + template <typename T> + inline void value_traits<vector<T>>:: + assign (value& v, vector<T>&& x) + { + if (v) + v.as<vector<T>> () = move (x); + else + new (&v.data_) vector<T> (move (x)); + } + + template <typename T> + inline void value_traits<vector<T>>:: + append (value& v, vector<T>&& x) + { + if (v) + { + vector<T>& p (v.as<vector<T>> ()); + + if (p.empty ()) + p.swap (x); + else + p.insert (p.end (), + make_move_iterator (x.begin ()), + make_move_iterator (x.end ())); + } + else + new (&v.data_) vector<T> (move (x)); + } + + template <typename T> + inline void value_traits<vector<T>>:: + prepend (value& v, vector<T>&& x) + { + if (v) + { + vector<T>& p (v.as<vector<T>> ()); + + if (!p.empty ()) + x.insert (x.end (), + make_move_iterator (p.begin ()), + make_move_iterator (p.end ())); + + p.swap (x); + } + else + new (&v.data_) vector<T> (move (x)); + } + + // map<K, V> value + // + template <typename K, typename V> + inline void value_traits<std::map<K, V>>:: + assign (value& v, map<K, V>&& x) + { + if (v) + v.as<map<K, V>> () = move (x); + else + new (&v.data_) map<K, V> (move (x)); + } + + template <typename K, typename V> + inline void value_traits<std::map<K, V>>:: + append (value& v, map<K, V>&& x) + { + if (v) + { + map<K, V>& m (v.as<map<K, V>> ()); + + if (m.empty ()) + m.swap (x); + else + // Note that this will only move values. Keys (being const) are still + // copied. + // + m.insert (make_move_iterator (x.begin ()), + make_move_iterator (x.end ())); + } + else + new (&v.data_) map<K, V> (move (x)); + } + + // variable_pool + // + inline const variable& variable_pool:: + operator[] (const string& n) const + { + const variable* r (find (n)); + assert (r != nullptr); + return *r; + } + + inline const variable* variable_pool:: + find (const string& n) const + { + auto i (map_.find (&n)); + return i != map_.end () ? &i->second : nullptr; + } + + // variable_map + // + inline void variable_map:: + typify (const value_data& v, const variable& var) const + { + // We assume typification is not modification so no version increment. + // + if (phase == run_phase::load) + { + if (v.type != var.type) + build2::typify (const_cast<value_data&> (v), *var.type, &var); + } + else + { + if (v.type.load (memory_order_acquire) != var.type) + build2::typify_atomic (const_cast<value_data&> (v), *var.type, &var); + } + } + + // variable_map::iterator_adapter + // + template <typename I> + inline typename I::reference variable_map::iterator_adapter<I>:: + operator* () const + { + auto& r (I::operator* ()); + const variable& var (r.first); + const value_data& val (r.second); + + // Check if this is the first access after being assigned a type. + // + if (var.type != nullptr) + m_->typify (val, var); + + return r; + } + + template <typename I> + inline typename I::pointer variable_map::iterator_adapter<I>:: + operator-> () const + { + auto p (I::operator-> ()); + const variable& var (p->first); + const value_data& val (p->second); + + // Check if this is the first access after being assigned a type. + // + if (var.type != nullptr) + m_->typify (val, var); + + return p; + } +} diff --git a/libbuild2/variable.txx b/libbuild2/variable.txx new file mode 100644 index 0000000..9b7490a --- /dev/null +++ b/libbuild2/variable.txx @@ -0,0 +1,670 @@ +// file : libbuild2/variable.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/diagnostics.hxx> + +namespace build2 +{ + template <typename T> + bool lookup:: + belongs (const T& x, bool t) const + { + if (vars == &x.vars) + return true; + + if (t) + { + for (const auto& p1: x.target_vars) // variable_type_map + { + for (const auto& p2: p1.second) // variable_pattern_map + { + if (vars == &p2.second) + return true; + } + } + } + + return false; + } + + // This one will be SFINAE'd out unless T is a simple value. + // + template <typename T> + auto + convert (names&& ns) -> + decltype (value_traits<T>::convert (move (ns[0]), nullptr)) + { + size_t n (ns.size ()); + + if (n == 0) + { + if (value_traits<T>::empty_value) + return T (); + } + else if (n == 1) + { + return convert<T> (move (ns[0])); + } + else if (n == 2 && ns[0].pair != '\0') + { + return convert<T> (move (ns[0]), move (ns[1])); + } + + throw invalid_argument ( + string ("invalid ") + value_traits<T>::type_name + + (n == 0 ? " value: empty" : " value: multiple names")); + } + + template <typename T> + T + convert (value&& v) + { + if (v.type == nullptr) + return convert<T> (move (v).as<names> ()); + else if (v.type == &value_traits<T>::value_type) + return move (v).as<T> (); + + throw invalid_argument ( + string ("invalid ") + value_traits<T>::value_type.name + + " value: conversion from " + v.type->name); + } + + template <typename T> + void + default_dtor (value& v) + { + v.as<T> ().~T (); + } + + template <typename T> + void + default_copy_ctor (value& l, const value& r, bool m) + { + if (m) + new (&l.data_) T (move (const_cast<value&> (r).as<T> ())); + else + new (&l.data_) T (r.as<T> ()); + } + + template <typename T> + void + default_copy_assign (value& l, const value& r, bool m) + { + if (m) + l.as<T> () = move (const_cast<value&> (r).as<T> ()); + else + l.as<T> () = r.as<T> (); + } + + template <typename T> + bool + default_empty (const value& v) + { + return value_traits<T>::empty (v.as<T> ()); + } + + template <typename T> + void + simple_assign (value& v, names&& ns, const variable* var) + { + size_t n (ns.size ()); + + if (value_traits<T>::empty_value ? n <= 1 : n == 1) + { + try + { + value_traits<T>::assign ( + v, + (n == 0 + ? T () + : value_traits<T>::convert (move (ns.front ()), nullptr))); + + return; + } + catch (const invalid_argument&) {} // Fall through. + } + + diag_record dr (fail); + + dr << "invalid " << value_traits<T>::value_type.name + << " value '" << ns << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + template <typename T> + void + simple_append (value& v, names&& ns, const variable* var) + { + size_t n (ns.size ()); + + if (value_traits<T>::empty_value ? n <= 1 : n == 1) + { + try + { + value_traits<T>::append ( + v, + (n == 0 + ? T () + : value_traits<T>::convert (move (ns.front ()), nullptr))); + + return; + } + catch (const invalid_argument&) {} // Fall through. + } + + diag_record dr (fail); + + dr << "invalid " << value_traits<T>::value_type.name + << " value '" << ns << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + template <typename T> + void + simple_prepend (value& v, names&& ns, const variable* var) + { + size_t n (ns.size ()); + + if (value_traits<T>::empty_value ? n <= 1 : n == 1) + { + try + { + value_traits<T>::prepend ( + v, + (n == 0 + ? T () + : value_traits<T>::convert (move (ns.front ()), nullptr))); + + return; + } + catch (const invalid_argument&) {} // Fall through. + } + + diag_record dr (fail); + + dr << "invalid " << value_traits<T>::value_type.name + << " value '" << ns << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + template <typename T> + names_view + simple_reverse (const value& v, names& s) + { + const T& x (v.as<T> ()); + + // Represent an empty simple value as empty name sequence rather than + // a single empty name. This way, for example, during serialization we + // end up with a much saner looking: + // + // config.import.foo = + // + // Rather than: + // + // config.import.foo = {} + // + if (!value_traits<T>::empty (x)) + s.emplace_back (value_traits<T>::reverse (x)); + + return s; + } + + template <typename T> + int + simple_compare (const value& l, const value& r) + { + return value_traits<T>::compare (l.as<T> (), r.as<T> ()); + } + + // vector<T> value + // + + template <typename T> + vector<T> value_traits<vector<T>>:: + convert (names&& ns) + { + vector<T> v; + + // Similar to vector_append() below except we throw instead of issuing + // diagnostics. + // + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + name& n (*i); + name* r (nullptr); + + if (n.pair) + { + r = &*++i; + + if (n.pair != '@') + throw invalid_argument ( + string ("invalid pair character: '") + n.pair + "'"); + } + + v.push_back (value_traits<T>::convert (move (n), r)); + } + + return v; + } + + template <typename T> + void + vector_append (value& v, names&& ns, const variable* var) + { + vector<T>& p (v + ? v.as<vector<T>> () + : *new (&v.data_) vector<T> ()); + + // Convert each element to T while merging pairs. + // + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + name& n (*i); + name* r (nullptr); + + if (n.pair) + { + r = &*++i; + + if (n.pair != '@') + { + diag_record dr (fail); + + dr << "unexpected pair style for " + << value_traits<T>::value_type.name << " value " + << "'" << n << "'" << n.pair << "'" << *r << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + } + + try + { + p.push_back (value_traits<T>::convert (move (n), r)); + } + catch (const invalid_argument&) + { + diag_record dr (fail); + + dr << "invalid " << value_traits<T>::value_type.name; + + if (n.pair) + dr << " element pair '" << n << "'@'" << *r << "'"; + else + dr << " element '" << n << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + } + } + + template <typename T> + void + vector_assign (value& v, names&& ns, const variable* var) + { + if (v) + v.as<vector<T>> ().clear (); + + vector_append<T> (v, move (ns), var); + } + + template <typename T> + void + vector_prepend (value& v, names&& ns, const variable* var) + { + // Reduce to append. + // + vector<T> t; + vector<T>* p; + + if (v) + { + p = &v.as<vector<T>> (); + p->swap (t); + } + else + p = new (&v.data_) vector<T> (); + + vector_append<T> (v, move (ns), var); + + p->insert (p->end (), + make_move_iterator (t.begin ()), + make_move_iterator (t.end ())); + } + + template <typename T> + static names_view + vector_reverse (const value& v, names& s) + { + auto& vv (v.as<vector<T>> ()); + s.reserve (vv.size ()); + + for (const T& x: vv) + s.push_back (value_traits<T>::reverse (x)); + + return s; + } + + template <typename T> + static int + vector_compare (const value& l, const value& r) + { + auto& lv (l.as<vector<T>> ()); + auto& rv (r.as<vector<T>> ()); + + auto li (lv.begin ()), le (lv.end ()); + auto ri (rv.begin ()), re (rv.end ()); + + for (; li != le && ri != re; ++li, ++ri) + if (int r = value_traits<T>::compare (*li, *ri)) + return r; + + if (li == le && ri != re) // l shorter than r. + return -1; + + if (ri == re && li != le) // r shorter than l. + return 1; + + return 0; + } + + template <typename T> + value_traits<vector<T>>::value_type_ex:: + value_type_ex (value_type&& v) + : value_type (move (v)) + { + type_name = value_traits<T>::type_name; + type_name += 's'; + name = type_name.c_str (); + } + + template <typename T> + const vector<T> value_traits<vector<T>>::empty_instance; + + template <typename T> + const typename value_traits<vector<T>>::value_type_ex + value_traits<vector<T>>::value_type = build2::value_type // VC14 wants =. + { + nullptr, // Patched above. + sizeof (vector<T>), + nullptr, // No base. + &value_traits<T>::value_type, + &default_dtor<vector<T>>, + &default_copy_ctor<vector<T>>, + &default_copy_assign<vector<T>>, + &vector_assign<T>, + &vector_append<T>, + &vector_prepend<T>, + &vector_reverse<T>, + nullptr, // No cast (cast data_ directly). + &vector_compare<T>, + &default_empty<vector<T>> + }; + + // map<K, V> value + // + template <typename K, typename V> + void + map_append (value& v, names&& ns, const variable* var) + { + using std::map; + + map<K, V>& p (v + ? v.as<map<K, V>> () + : *new (&v.data_) map<K, V> ()); + + // Verify we have a sequence of pairs and convert each lhs/rhs to K/V. + // + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + name& l (*i); + + if (!l.pair) + { + diag_record dr (fail); + + dr << value_traits<map<K, V>>::value_type.name << " key-value " + << "pair expected instead of '" << l << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + name& r (*++i); // Got to have the second half of the pair. + + if (l.pair != '@') + { + diag_record dr (fail); + + dr << "unexpected pair style for " + << value_traits<map<K, V>>::value_type.name << " key-value " + << "'" << l << "'" << l.pair << "'" << r << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + + try + { + K k (value_traits<K>::convert (move (l), nullptr)); + + try + { + V v (value_traits<V>::convert (move (r), nullptr)); + + p.emplace (move (k), move (v)); + } + catch (const invalid_argument&) + { + diag_record dr (fail); + + dr << "invalid " << value_traits<V>::value_type.name + << " element value '" << r << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + } + catch (const invalid_argument&) + { + diag_record dr (fail); + + dr << "invalid " << value_traits<K>::value_type.name + << " element key '" << l << "'"; + + if (var != nullptr) + dr << " in variable " << var->name; + } + } + } + + template <typename K, typename V> + void + map_assign (value& v, names&& ns, const variable* var) + { + using std::map; + + if (v) + v.as<map<K, V>> ().clear (); + + map_append<K, V> (v, move (ns), var); + } + + template <typename K, typename V> + static names_view + map_reverse (const value& v, names& s) + { + using std::map; + + auto& vm (v.as<map<K, V>> ()); + s.reserve (2 * vm.size ()); + + for (const auto& p: vm) + { + s.push_back (value_traits<K>::reverse (p.first)); + s.back ().pair = '@'; + s.push_back (value_traits<V>::reverse (p.second)); + } + + return s; + } + + template <typename K, typename V> + static int + map_compare (const value& l, const value& r) + { + using std::map; + + auto& lm (l.as<map<K, V>> ()); + auto& rm (r.as<map<K, V>> ()); + + auto li (lm.begin ()), le (lm.end ()); + auto ri (rm.begin ()), re (rm.end ()); + + for (; li != le && ri != re; ++li, ++ri) + { + int r; + if ((r = value_traits<K>::compare (li->first, ri->first)) != 0 || + (r = value_traits<V>::compare (li->second, ri->second)) != 0) + return r; + } + + if (li == le && ri != re) // l shorter than r. + return -1; + + if (ri == re && li != le) // r shorter than l. + return 1; + + return 0; + } + + template <typename K, typename V> + value_traits<std::map<K, V>>::value_type_ex:: + value_type_ex (value_type&& v) + : value_type (move (v)) + { + type_name = value_traits<K>::type_name; + type_name += '_'; + type_name += value_traits<V>::type_name; + type_name += "_map"; + name = type_name.c_str (); + } + + template <typename K, typename V> + const std::map<K,V> value_traits<std::map<K, V>>::empty_instance; + + template <typename K, typename V> + const typename value_traits<std::map<K, V>>::value_type_ex + value_traits<std::map<K, V>>::value_type = build2::value_type // VC14 wants = + { + nullptr, // Patched above. + sizeof (map<K, V>), + nullptr, // No base. + nullptr, // No element. + &default_dtor<map<K, V>>, + &default_copy_ctor<map<K, V>>, + &default_copy_assign<map<K, V>>, + &map_assign<K, V>, + &map_append<K, V>, + &map_append<K, V>, // Prepend is the same as append. + &map_reverse<K, V>, + nullptr, // No cast (cast data_ directly). + &map_compare<K, V>, + &default_empty<map<K, V>> + }; + + // variable_cache + // + template <typename K> + pair<value&, ulock> variable_cache<K>:: + insert (K k, const lookup& stem, size_t ver, const variable& var) + { + using value_data = variable_map::value_data; + + const variable_map* svars (stem.vars); // NULL if undefined. + size_t sver (stem.defined () + ? static_cast<const value_data*> (stem.value)->version + : 0); + + shared_mutex& m ( + variable_cache_mutex_shard[ + hash<variable_cache*> () (this) % variable_cache_mutex_shard_size]); + + slock sl (m); + ulock ul (m, defer_lock); + + auto i (m_.find (k)); + + // Cache hit. + // + if (i != m_.end () && + i->second.version == ver && + i->second.stem_vars == svars && + i->second.stem_version == sver && + (var.type == nullptr || i->second.value.type == var.type)) + return pair<value&, ulock> (i->second.value, move (ul)); + + // Relock for exclusive access. Note that it is entirely possible + // that between unlock and lock someone else has updated the entry. + // + sl.unlock (); + ul.lock (); + + // Note that the cache entries are never removed so we can reuse the + // iterator. + // + pair<typename map_type::iterator, bool> p (i, i == m_.end ()); + + if (p.second) + p = m_.emplace (move (k), + entry_type {value_data (nullptr), ver, svars, sver}); + + entry_type& e (p.first->second); + + if (p.second) + { + // Cache miss. + // + e.value.version++; // New value. + } + else if (e.version != ver || + e.stem_vars != svars || + e.stem_version != sver) + { + // Cache invalidation. + // + assert (e.version <= ver); + e.version = ver; + + if (e.stem_vars != svars) + e.stem_vars = svars; + else + assert (e.stem_version <= sver); + + e.stem_version = sver; + + e.value.version++; // Value changed. + } + else + { + // Cache hit. + // + if (var.type != nullptr && e.value.type != var.type) + typify (e.value, *var.type, &var); + + ul.unlock (); + } + + return pair<value&, ulock> (e.value, move (ul)); + } +} diff --git a/libbuild2/version.hxx b/libbuild2/version.hxx new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/libbuild2/version.hxx diff --git a/libbuild2/version.hxx.in b/libbuild2/version.hxx.in new file mode 100644 index 0000000..1e448d8 --- /dev/null +++ b/libbuild2/version.hxx.in @@ -0,0 +1,46 @@ +// file : libbuild2/version.hxx.in -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_VERSION // Note: using the version macro itself. + +// The numeric version format is AAAAABBBBBCCCCCDDDE where: +// +// AAAAA - major version number +// BBBBB - minor version number +// CCCCC - bugfix version number +// DDD - alpha / beta (DDD + 500) version number +// E - final (0) / snapshot (1) +// +// When DDDE is not 0, 1 is subtracted from AAAAABBBBBCCCCC. For example: +// +// Version AAAAABBBBBCCCCCDDDE +// +// 0.1.0 0000000001000000000 +// 0.1.2 0000000001000020000 +// 1.2.3 0000100002000030000 +// 2.2.0-a.1 0000200001999990010 +// 3.0.0-b.2 0000299999999995020 +// 2.2.0-a.1.z 0000200001999990011 + +// NOTE: remember to also update "fake" bootstrap values in utility.hxx if +// changing anything here. + +#define LIBBUILD2_VERSION $build2.version.project_number$ULL +#define LIBBUILD2_VERSION_STR "$build2.version.project$" +#define LIBBUILD2_VERSION_ID "$build2.version.project_id$" + +#define LIBBUILD2_VERSION_MAJOR $build2.version.major$ +#define LIBBUILD2_VERSION_MINOR $build2.version.minor$ +#define LIBBUILD2_VERSION_PATCH $build2.version.patch$ + +#define LIBBUILD2_PRE_RELEASE $build2.version.pre_release$ + +#define LIBBUILD2_SNAPSHOT $build2.version.snapshot_sn$ULL +#define LIBBUILD2_SNAPSHOT_ID "$build2.version.snapshot_id$" + +#include <libbutl/version.hxx> + +$libbutl.check(LIBBUTL_VERSION, LIBBUTL_SNAPSHOT)$ + +#endif // LIBBUILD2_VERSION |