diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2019-07-04 19:12:15 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2019-07-05 14:24:43 +0300 |
commit | 57b10c06925d0bdf6ffb38488ee908f085109e95 (patch) | |
tree | f2103684d319650c3302aef9d7a70dd64ff2a347 /libbuild2/test | |
parent | 30b4eda196e090aa820d312e6a9435a4ae84c303 (diff) |
Move config, dist, test, and install modules into library
Diffstat (limited to 'libbuild2/test')
54 files changed, 17448 insertions, 0 deletions
diff --git a/libbuild2/test/common.cxx b/libbuild2/test/common.cxx new file mode 100644 index 0000000..11c5d90 --- /dev/null +++ b/libbuild2/test/common.cxx @@ -0,0 +1,220 @@ +// file : libbuild2/test/common.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/common.hxx> + +#include <libbuild2/target.hxx> +#include <libbuild2/algorithm.hxx> + +using namespace std; + +namespace build2 +{ + namespace test + { + // Determine if we have the target (first), id path (second), or both (in + // which case we also advance the iterator). + // + static pair<const name*, const name*> + sense (names::const_iterator& i) + { + const name* tn (nullptr); + const name* pn (nullptr); + + if (i->pair) + { + tn = &*i++; + pn = &*i; + } + else + { + // If it has a type (exe{hello}) or a directory (basics/), then + // we assume it is a target. + // + (i->typed () || !i->dir.empty () ? tn : pn) = &*i; + } + + // Validate the target. + // + if (tn != nullptr) + { + if (tn->qualified ()) + fail << "project-qualified target '" << *tn << " in config.test"; + } + + // Validate the id path. + // + if (pn != nullptr) + { + if (!pn->simple () || pn->empty ()) + fail << "invalid id path '" << *pn << " in config.test"; + } + + return make_pair (tn, pn); + } + + bool common:: + pass (const target& a) const + { + if (test_ == nullptr) + return true; + + // We need to "enable" aliases that "lead up" to the targets we are + // interested in. So see if any target is in a subdirectory of this + // alias. + // + // If we don't see any targets (e.g., only id paths), then we assume all + // targets match and therefore we always pass. + // + bool r (true); + + // Directory part from root to this alias (the same in src and out). + // + const dir_path d (a.out_dir ().leaf (root_->out_path ())); + + for (auto i (test_->begin ()); i != test_->end (); ++i) + { + if (const name* n = sense (i).first) + { + // Reset result to false if no match (but we have seen a target). + // + r = n->dir.sub (d); + + // See test() below for details on this special case. + // + if (!r && !n->typed ()) + r = d.sub (n->dir); + + if (r) + break; + } + } + + return r; + } + + bool common:: + test (const target& t) const + { + if (test_ == nullptr) + return true; + + // If we don't see any targets (e.g., only id paths), then we assume + // all of them match. + // + bool r (true); + + // Directory part from root to this alias (the same in src and out). + // + const dir_path d (t.out_dir ().leaf (root_->out_path ())); + const target_type& tt (t.type ()); + + for (auto i (test_->begin ()); i != test_->end (); ++i) + { + if (const name* n = sense (i).first) + { + // Reset result to false if no match (but we have seen a target). + // + + // When specifying a directory, for example, config.tests=tests/, + // one would intuitively expect that all the tests under it will + // run. But that's not what will happen with the below test: while + // the dir{tests/} itself will match, any target underneath won't. + // So we are going to handle this type if a target specially by + // making it match any target in or under it. + // + // Note that we only do this for tests/, not dir{tests/} since it is + // not always the semantics that one wants. Sometimes one may want + // to run tests (scripts) just for the tests/ target but not for any + // of its prerequisites. So dir{tests/} is a way to disable this + // special logic. + // + // Note: the same code as in test() below. + // + if (!n->typed ()) + r = d.sub (n->dir); + else + // First quickly and cheaply weed out names that cannot possibly + // match. Only then search for a target (as if it was a + // prerequisite), which can be expensive. + // + // We cannot specify an src target in config.test since we used + // the pair separator for ids. As a result, we search for both + // out and src targets. + // + r = + t.name == n->value && // Name matches. + tt.name == n->type && // Target type matches. + d == n->dir && // Directory matches. + (search_existing (*n, *root_) == &t || + search_existing (*n, *root_, d) == &t); + + if (r) + break; + } + } + + return r; + } + + bool common:: + test (const target& t, const path& id) const + { + if (test_ == nullptr) + return true; + + // If we don't see any id paths (e.g., only targets), then we assume + // all of them match. + // + bool r (true); + + // Directory part from root to this alias (the same in src and out). + // + const dir_path d (t.out_dir ().leaf (root_->out_path ())); + const target_type& tt (t.type ()); + + for (auto i (test_->begin ()); i != test_->end (); ++i) + { + auto p (sense (i)); + + if (const name* n = p.second) + { + // If there is a target, check that it matches ours. + // + if (const name* n = p.first) + { + // Note: the same code as in test() above. + // + bool r; + + if (!n->typed ()) + r = d.sub (n->dir); + else + r = + t.name == n->value && + tt.name == n->type && + d == n->dir && + (search_existing (*n, *root_) == &t || + search_existing (*n, *root_, d) == &t); + + if (!r) + continue; // Not our target. + } + + // If the id (group) "leads up" to what we want to run or we + // (group) lead up to the id, then match. + // + const path p (n->value); + + // Reset result to false if no match (but we have seen an id path). + // + if ((r = p.sub (id) || id.sub (p))) + break; + } + } + + return r; + } + } +} diff --git a/libbuild2/test/common.hxx b/libbuild2/test/common.hxx new file mode 100644 index 0000000..5bb78ee --- /dev/null +++ b/libbuild2/test/common.hxx @@ -0,0 +1,72 @@ +// file : libbuild2/test/common.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_COMMON_HXX +#define LIBBUILD2_TEST_COMMON_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target.hxx> + +namespace build2 +{ + namespace test + { + enum class output_before {fail, warn, clean}; + enum class output_after {clean, keep}; + + struct common_data + { + const variable& config_test; + const variable& config_test_output; + + const variable& var_test; + const variable& test_options; + const variable& test_arguments; + + const variable& test_stdin; + const variable& test_stdout; + const variable& test_roundtrip; + const variable& test_input; + + const variable& test_target; + }; + + struct common: common_data + { + // The config.test.output values. + // + output_before before = output_before::warn; + output_after after = output_after::clean; + + // The config.test query interface. + // + const names* test_ = nullptr; // The config.test value if any. + scope* root_ = nullptr; // The root scope for target resolution. + + // Return true if the specified alias target should pass-through to its + // prerequisites. + // + bool + pass (const target& alias_target) const; + + // Return true if the specified target should be tested. + // + bool + test (const target& test_target) const; + + // Return true if the specified target should be tested with the + // specified testscript test (or group). + // + bool + test (const target& test_target, const path& id_path) const; + + explicit + common (common_data&& d): common_data (move (d)) {} + }; + } +} + +#endif // LIBBUILD2_TEST_COMMON_HXX diff --git a/libbuild2/test/init.cxx b/libbuild2/test/init.cxx new file mode 100644 index 0000000..3d13acc --- /dev/null +++ b/libbuild2/test/init.cxx @@ -0,0 +1,231 @@ +// file : libbuild2/test/init.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/init.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/rule.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/config/utility.hxx> + +#include <libbuild2/test/module.hxx> +#include <libbuild2/test/target.hxx> +#include <libbuild2/test/operation.hxx> + +#include <libbuild2/test/script/regex.hxx> // script::regex::init() + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace test + { + bool + boot (scope& rs, const location&, unique_ptr<module_base>& mod) + { + tracer trace ("test::boot"); + + l5 ([&]{trace << "for " << rs;}); + + // Register our operations. + // + rs.insert_operation (test_id, op_test); + rs.insert_operation (update_for_test_id, op_update_for_test); + + // Enter module variables. Do it during boot in case they get assigned + // in bootstrap.build. + // + auto& vp (var_pool.rw (rs)); + + common_data d { + + // Tests to execute. + // + // Specified as <target>@<path-id> pairs with both sides being + // optional. The variable is untyped (we want a list of name-pairs), + // overridable, and inheritable. The target is relative (in essence a + // prerequisite) which is resolved from the (root) scope where the + // config.test value is defined. + // + vp.insert ("config.test", true), + + // Test working directory before/after cleanup (see Testscript spec + // for semantics). + // + vp.insert<name_pair> ("config.test.output", true), + + // The test variable is a name which can be a path (with the + // true/false special values) or a target name. + // + // Note: none are overridable. + // + vp.insert<name> ("test", variable_visibility::target), + vp.insert<strings> ("test.options", variable_visibility::project), + vp.insert<strings> ("test.arguments", variable_visibility::project), + + // Prerequisite-specific. + // + // test.stdin and test.stdout can be used to mark a prerequisite as a + // file to redirect stdin from and to compare stdout to, respectively. + // test.roundtrip is a shortcut to mark a prerequisite as both stdin + // and stdout. + // + // Prerequisites marked with test.input are treated as additional test + // inputs: they are made sure to be up to date and their paths are + // passed as additional command line arguments (after test.options and + // test.arguments). Their primary use is to pass inputs that may have + // varying file names/paths, for example: + // + // exe{parent}: exe{child}: test.input = true + // + // Note that currently this mechanism is only available to simple + // tests though we could also support it for testscript (e.g., by + // appending the input paths to test.arguments or by passing them in a + // separate test.inputs variable). + // + vp.insert<bool> ("test.stdin", variable_visibility::prereq), + vp.insert<bool> ("test.stdout", variable_visibility::prereq), + vp.insert<bool> ("test.roundtrip", variable_visibility::prereq), + vp.insert<bool> ("test.input", variable_visibility::prereq), + + // Test target platform. + // + vp.insert<target_triplet> ("test.target", variable_visibility::project) + }; + + // These are only used in testscript. + // + vp.insert<strings> ("test.redirects", variable_visibility::project); + vp.insert<strings> ("test.cleanups", variable_visibility::project); + + // Unless already set, default test.target to build.host. Note that it + // can still be overriden by the user, e.g., in root.build. + // + { + value& v (rs.assign (d.test_target)); + + if (!v || v.empty ()) + v = cast<target_triplet> ((*global_scope)["build.host"]); + } + + mod.reset (new module (move (d))); + return false; + } + + bool + init (scope& rs, + scope&, + const location& l, + unique_ptr<module_base>& mod, + bool first, + bool, + const variable_map& config_hints) + { + tracer trace ("test::init"); + + if (!first) + { + warn (l) << "multiple test module initializations"; + return true; + } + + const dir_path& out_root (rs.out_path ()); + l5 ([&]{trace << "for " << out_root;}); + + assert (mod != nullptr); + module& m (static_cast<module&> (*mod)); + + // Configure. + // + assert (config_hints.empty ()); // We don't known any hints. + + // Adjust module priority so that the config.test.* values are saved at + // the end of config.build. + // + config::save_module (rs, "test", INT32_MAX); + + // config.test + // + if (lookup l = config::omitted (rs, m.config_test).first) + { + // Figure out which root scope it came from. + // + scope* s (&rs); + for (; + s != nullptr && !l.belongs (*s); + s = s->parent_scope ()->root_scope ()) + assert (s != nullptr); + + m.test_ = &cast<names> (l); + m.root_ = s; + } + + // config.test.output + // + if (lookup l = config::omitted (rs, m.config_test_output).first) + { + const name_pair& p (cast<name_pair> (l)); + + // If second half is empty, then first is the after value. + // + const name& a (p.second.empty () ? p.first : p.second); // after + const name& b (p.second.empty () ? p.second : p.first); // before + + // Parse and validate. + // + if (!b.simple ()) + fail << "invalid config.test.output before value '" << b << "'"; + + if (!a.simple ()) + fail << "invalid config.test.output after value '" << a << "'"; + + if (a.value == "clean") m.after = output_after::clean; + else if (a.value == "keep") m.after = output_after::keep; + else fail << "invalid config.test.output after value '" << a << "'"; + + if (b.value == "fail") m.before = output_before::fail; + else if (b.value == "warn") m.before = output_before::warn; + else if (b.value == "clean") m.before = output_before::clean; + else if (b.value == "") m.before = output_before::clean; + else fail << "invalid config.test.output before value '" << b << "'"; + } + + //@@ TODO: Need ability to specify extra diff options (e.g., + // --strip-trailing-cr, now hardcoded). + // + //@@ TODO: Pring report. + + // Register target types. + // + { + auto& t (rs.target_types); + + auto& tt (t.insert<testscript> ()); + t.insert_file ("testscript", tt); + } + + // Register our test running rule. + // + { + default_rule& dr (m); + + rs.rules.insert<target> (perform_test_id, "test", dr); + rs.rules.insert<alias> (perform_test_id, "test", dr); + } + + return true; + } + + module_functions + build2_test_load () + { + script::regex::init (); + + return module_functions {&boot, &init}; + } + } +} diff --git a/libbuild2/test/init.hxx b/libbuild2/test/init.hxx new file mode 100644 index 0000000..a76b720 --- /dev/null +++ b/libbuild2/test/init.hxx @@ -0,0 +1,36 @@ +// file : libbuild2/test/init.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_INIT_HXX +#define LIBBUILD2_TEST_INIT_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/module.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + namespace test + { + bool + boot (scope&, const location&, unique_ptr<module_base>&); + + bool + init (scope&, + scope&, + const location&, + unique_ptr<module_base>&, + bool, + bool, + const variable_map&); + + extern "C" LIBBUILD2_SYMEXPORT module_functions + build2_test_load (); + } +} + +#endif // LIBBUILD2_TEST_INIT_HXX diff --git a/libbuild2/test/module.hxx b/libbuild2/test/module.hxx new file mode 100644 index 0000000..584cb84 --- /dev/null +++ b/libbuild2/test/module.hxx @@ -0,0 +1,37 @@ +// file : libbuild2/test/module.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_MODULE_HXX +#define LIBBUILD2_TEST_MODULE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/module.hxx> + +#include <libbuild2/test/rule.hxx> +#include <libbuild2/test/common.hxx> + +namespace build2 +{ + namespace test + { + struct module: module_base, virtual common, default_rule, group_rule + { + const test::group_rule& + group_rule () const + { + return *this; + } + + explicit + module (common_data&& d) + : common (move (d)), + test::default_rule (move (d)), + test::group_rule (move (d)) {} + }; + } +} + +#endif // LIBBUILD2_TEST_MODULE_HXX diff --git a/libbuild2/test/operation.cxx b/libbuild2/test/operation.cxx new file mode 100644 index 0000000..3ff7702 --- /dev/null +++ b/libbuild2/test/operation.cxx @@ -0,0 +1,55 @@ +// file : libbuild2/test/operation.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/operation.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace test + { + static operation_id + test_pre (const values& params, meta_operation_id mo, const location& l) + { + if (!params.empty ()) + fail (l) << "unexpected parameters for operation test"; + + // Run update as a pre-operation, unless we are disfiguring. + // + return mo != disfigure_id ? update_id : 0; + } + + const operation_info op_test { + test_id, + 0, + "test", + "test", + "testing", + "tested", + "has nothing to test", // We cannot "be tested". + execution_mode::first, + 1, + &test_pre, + nullptr + }; + + // Also the explicit update-for-test operation alias. + // + const operation_info op_update_for_test { + update_id, // Note: not update_for_test_id. + test_id, + op_update.name, + op_update.name_do, + op_update.name_doing, + op_update.name_did, + op_update.name_done, + op_update.mode, + op_update.concurrency, + op_update.pre, + op_update.post + }; + } +} diff --git a/libbuild2/test/operation.hxx b/libbuild2/test/operation.hxx new file mode 100644 index 0000000..8a9aed7 --- /dev/null +++ b/libbuild2/test/operation.hxx @@ -0,0 +1,22 @@ +// file : libbuild2/test/operation.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_OPERATION_HXX +#define LIBBUILD2_TEST_OPERATION_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/operation.hxx> + +namespace build2 +{ + namespace test + { + extern const operation_info op_test; + extern const operation_info op_update_for_test; + } +} + +#endif // LIBBUILD2_TEST_OPERATION_HXX diff --git a/libbuild2/test/rule.cxx b/libbuild2/test/rule.cxx new file mode 100644 index 0000000..a6796b4 --- /dev/null +++ b/libbuild2/test/rule.cxx @@ -0,0 +1,882 @@ +// file : libbuild2/test/rule.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/rule.hxx> + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/test/target.hxx> + +#include <libbuild2/test/script/parser.hxx> +#include <libbuild2/test/script/runner.hxx> +#include <libbuild2/test/script/script.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace test + { + bool rule:: + match (action, target&, const string&) const + { + // We always match, even if this target is not testable (so that we can + // ignore it; see apply()). + // + return true; + } + + recipe rule:: + apply (action a, target& t) const + { + // Note that we are called both as the outer part during the update-for- + // test pre-operation and as the inner part during the test operation + // itself. + // + // In both cases we first determine if the target is testable and return + // noop if it's not. Otherwise, in the first case (update for test) we + // delegate to the normal update and in the second (test) -- perform the + // test. + // + // And to add a bit more complexity, we want to handle aliases slightly + // differently: we may not want to ignore their prerequisites if the + // alias is not testable since their prerequisites could be. + // + // Here is the state matrix: + // + // test'able | pass'able | neither + // | | + // update-for-test delegate (& pass) | pass | noop + // ---------------------------------------+-------------+--------- + // test test (& pass) | pass | noop + // + auto& pts (t.prerequisite_targets[a]); + + // Resolve group members. + // + if (!see_through || t.type ().see_through) + { + // Remember that we are called twice: first during update for test + // (pre-operation) and then during test. During the former, we rely on + // the normall update rule to resolve the group members. During the + // latter, there will be no rule to do this but the group will already + // have been resolved by the pre-operation. + // + // If the rule could not resolve the group, then we ignore it. + // + group_view gv (a.outer () + ? resolve_members (a, t) + : t.group_members (a)); + + if (gv.members != nullptr) + { + for (size_t i (0); i != gv.count; ++i) + { + if (const target* m = gv.members[i]) + pts.push_back (m); + } + + match_members (a, t, pts); + } + } + + // If we are passing-through, then match our prerequisites. + // + if (t.is_a<alias> () && pass (t)) + { + // For the test operation we have to implement our own search and + // match because we need to ignore prerequisites that are outside of + // our project. They can be from projects that don't use the test + // module (and thus won't have a suitable rule). Or they can be from + // no project at all (e.g., installed). Also, generally, not testing + // stuff that's not ours seems right. + // + match_prerequisites (a, t, t.root_scope ()); + } + + size_t pass_n (pts.size ()); // Number of pass-through prerequisites. + + // See if it's testable and if so, what kind. + // + bool test (false); + bool script (false); + + if (this->test (t)) + { + // We have two very different cases: testscript and simple test (plus + // it may not be a testable target at all). So as the first step + // determine which case this is. + // + // If we have any prerequisites of the testscript{} type, then this is + // the testscript case. + // + // If we can, go inside see-through groups. Normally groups won't be + // resolvable for this action but then normally they won't contain any + // testscripts either. In other words, if there is a group that + // contains testscripts as members then it will need to arrange for + // the members to be resolvable (e.g., by registering an appropriate + // rule for the test operation). + // + for (prerequisite_member p: + group_prerequisite_members (a, t, members_mode::maybe)) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + if (p.is_a<testscript> ()) + { + if (!script) + { + script = true; + + // We treat this target as testable unless the test variable is + // explicitly set to false. + // + const name* n (cast_null<name> (t[var_test])); + test = (n == nullptr || !n->simple () || n->value != "false"); + + if (!test) + break; + } + + // Collect testscripts after the pass-through prerequisites. + // + const target& pt (p.search (t)); + + // Note that for the test operation itself we don't match nor + // execute them relying on update to assign their paths. + // + // Causing update for test inputs/scripts is tricky: we cannot + // match for update-for-install because this same rule will match + // and since the target is not testable, it will return the noop + // recipe. + // + // So what we are going to do is directly match (and also execute; + // see below) a recipe for the inner update (who thought we could + // do that... but it seems we can). While at first it might feel + // iffy, it does make sense: the outer rule we would have matched + // would have simply delegated to the inner so we might as well + // take a shortcut. The only potential drawback of this approach + // is that we won't be able to provide any for-test customizations + // when updating test inputs/scripts. But such a need seems rather + // far fetched. + // + if (a.operation () == update_id) + match_inner (a, pt); + + pts.push_back (&pt); + } + } + + // If this is not a script, then determine if it is a simple test. + // Ignore testscript files themselves at the outset. + // + if (!script && !t.is_a<testscript> ()) + { + // For the simple case whether this is a test is controlled by the + // test variable. Also, it feels redundant to specify, say, "test = + // true" and "test.stdout = test.out" -- the latter already says this + // is a test. + // + const name* n (cast_null<name> (t[var_test])); + + // If the test variable is explicitly set to false then we treat + // it as not testable regardless of what other test.* variables + // or prerequisites we might have. + // + // Note that the test variable can be set to an "override" target + // (which means 'true' for our purposes). + // + if (n != nullptr && n->simple () && n->value == "false") + test = false; + else + { + // Look for test input/stdin/stdout prerequisites. The same group + // reasoning as in the testscript case above. + // + for (prerequisite_member p: + group_prerequisite_members (a, t, members_mode::maybe)) + { + const auto& vars (p.prerequisite.vars); + + if (vars.empty ()) // Common case. + continue; + + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + bool rt ( cast_false<bool> (vars[test_roundtrip])); + bool si (rt || cast_false<bool> (vars[test_stdin])); + bool so (rt || cast_false<bool> (vars[test_stdout])); + bool in ( cast_false<bool> (vars[test_input])); + + if (si || so || in) + { + // Verify it is file-based. + // + if (!p.is_a<file> ()) + { + fail << "test." << (si ? "stdin" : so ? "stdout" : "input") + << " prerequisite " << p << " of target " << t + << " is not a file"; + } + + if (!test) + { + test = true; + + // First matching prerequisite. Establish the structure in + // pts: the first element (after pass_n) is stdin (can be + // NULL), the second is stdout (can be NULL), and everything + // after that (if any) is inputs. + // + pts.push_back (nullptr); // stdin + pts.push_back (nullptr); // stdout + } + + // Collect them after the pass-through prerequisites. + // + // Note that for the test operation itself we don't match nor + // execute them relying on update to assign their paths. + // + auto match = [a, &p, &t] () -> const target* + { + const target& pt (p.search (t)); + + // The same match_inner() rationale as for the testcript + // prerequisites above. + // + if (a.operation () == update_id) + match_inner (a, pt); + + return &pt; + }; + + if (si) + { + if (pts[pass_n] != nullptr) + fail << "multiple test.stdin prerequisites for target " + << t; + + pts[pass_n] = match (); + } + + if (so) + { + if (pts[pass_n + 1] != nullptr) + fail << "multiple test.stdout prerequisites for target " + << t; + + pts[pass_n + 1] = match (); + } + + if (in) + pts.push_back (match ()); + } + } + + if (!test) + test = (n != nullptr); // We have the test variable. + + if (!test) + test = t[test_options] || t[test_arguments]; + } + } + } + + // Neither testing nor passing-through. + // + if (!test && pass_n == 0) + return noop_recipe; + + // If we are only passing-through, then use the default recipe (which + // will execute all the matched prerequisites). + // + if (!test) + return default_recipe; + + // Being here means we are definitely testing and maybe passing-through. + // + if (a.operation () == update_id) + { + // For the update pre-operation match the inner rule (actual update). + // + match_inner (a, t); + + return [pass_n] (action a, const target& t) + { + return perform_update (a, t, pass_n); + }; + } + else + { + if (script) + { + return [pass_n, this] (action a, const target& t) + { + return perform_script (a, t, pass_n); + }; + } + else + { + return [pass_n, this] (action a, const target& t) + { + return perform_test (a, t, pass_n); + }; + } + } + } + + target_state rule:: + perform_update (action a, const target& t, size_t pass_n) + { + // First execute the inner recipe then execute prerequisites. + // + target_state ts (execute_inner (a, t)); + + if (pass_n != 0) + ts |= straight_execute_prerequisites (a, t, pass_n); + + ts |= straight_execute_prerequisites_inner (a, t, 0, pass_n); + + return ts; + } + + static script::scope_state + perform_script_impl (const target& t, + const testscript& ts, + const dir_path& wd, + const common& c) + { + using namespace script; + + scope_state r; + + try + { + build2::test::script::script s (t, ts, wd); + + { + parser p; + p.pre_parse (s); + + default_runner r (c); + p.execute (s, r); + } + + r = s.state; + } + catch (const failed&) + { + r = scope_state::failed; + } + + return r; + } + + target_state rule:: + perform_script (action a, const target& t, size_t pass_n) const + { + // First pass through. + // + if (pass_n != 0) + straight_execute_prerequisites (a, t, pass_n); + + // Figure out whether the testscript file is called 'testscript', in + // which case it should be the only one. + // + auto& pts (t.prerequisite_targets[a]); + size_t pts_n (pts.size ()); + + bool one; + { + optional<bool> o; + for (size_t i (pass_n); i != pts_n; ++i) + { + const testscript& ts (*pts[i]->is_a<testscript> ()); + + bool r (ts.name == "testscript"); + + if ((r && o) || (!r && o && *o)) + fail << "both 'testscript' and other names specified for " << t; + + o = r; + } + + assert (o); // We should have a testscript or we wouldn't be here. + one = *o; + } + + // Calculate root working directory. It is in the out_base of the target + // and is called just test for dir{} targets and test-<target-name> for + // other targets. + // + dir_path wd (t.out_dir ()); + + if (t.is_a<dir> ()) + wd /= "test"; + else + wd /= "test-" + t.name; + + // Are we backlinking the test working directory to src? (See + // backlink_*() in algorithm.cxx for details.) + // + const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); + const path& buildignore_file (rs.root_extra->buildignore_file); + + dir_path bl; + if (cast_false<bool> (rs.vars[var_forwarded])) + { + bl = bs.src_path () / wd.leaf (bs.out_path ()); + clean_backlink (bl, verb_never); + } + + // If this is a (potentially) multi-testscript test, then create (and + // later cleanup) the root directory. If this is just 'testscript', then + // the root directory is used directly as test's working directory and + // it's the runner's responsibility to create and clean it up. + // + // Note that we create the root directory containing the .buildignore + // file to make sure that it is ignored by name patterns (see the + // buildignore description for details). + // + // What should we do if the directory already exists? We used to fail + // which meant the user had to go and clean things up manually every + // time a test failed. This turned out to be really annoying. So now we + // issue a warning and clean it up automatically. The drawbacks of this + // approach are the potential loss of data from the previous failed test + // run and the possibility of deleting user-created files. + // + if (exists (static_cast<const path&> (wd), false)) + fail << "working directory " << wd << " is a file/symlink"; + + if (exists (wd)) + { + if (before != output_before::clean) + { + bool fail (before == output_before::fail); + + (fail ? error : warn) << "working directory " << wd << " exists " + << (empty_buildignore (wd, buildignore_file) + ? "" + : "and is not empty ") + << "at the beginning of the test"; + + if (fail) + throw failed (); + } + + // Remove the directory itself not to confuse the runner which tries + // to detect when tests stomp on each others feet. + // + build2::rmdir_r (wd, true, 2); + } + + // Delay actually creating the directory in case all the tests are + // ignored (via config.test). + // + bool mk (!one); + + // Start asynchronous execution of the testscripts. + // + wait_guard wg; + + if (!dry_run) + wg = wait_guard (target::count_busy (), t[a].task_count); + + // Result vector. + // + using script::scope_state; + + vector<scope_state> res; + res.reserve (pts_n - pass_n); // Make sure there are no reallocations. + + for (size_t i (pass_n); i != pts_n; ++i) + { + const testscript& ts (*pts[i]->is_a<testscript> ()); + + // If this is just the testscript, then its id path is empty (and it + // can only be ignored by ignoring the test target, which makes sense + // since it's the only testscript file). + // + if (one || test (t, path (ts.name))) + { + // Because the creation of the output directory is shared between us + // and the script implementation (plus the fact that we actually + // don't clean the existing one), we are going to ignore it for + // dry-run. + // + if (!dry_run) + { + if (mk) + { + mkdir_buildignore (wd, buildignore_file, 2); + mk = false; + } + } + + if (verb) + { + diag_record dr (text); + dr << "test " << ts; + + if (!t.is_a<alias> ()) + dr << ' ' << t; + } + + res.push_back (dry_run ? scope_state::passed : scope_state::unknown); + + if (!dry_run) + { + scope_state& r (res.back ()); + + if (!sched.async (target::count_busy (), + t[a].task_count, + [this] (const diag_frame* ds, + scope_state& r, + const target& t, + const testscript& ts, + const dir_path& wd) + { + diag_frame::stack_guard dsg (ds); + r = perform_script_impl (t, ts, wd, *this); + }, + diag_frame::stack (), + ref (r), + cref (t), + cref (ts), + cref (wd))) + { + // Executed synchronously. If failed and we were not asked to + // keep going, bail out. + // + if (r == scope_state::failed && !keep_going) + break; + } + } + } + } + + if (!dry_run) + wg.wait (); + + // Re-examine. + // + bool bad (false); + for (scope_state r: res) + { + switch (r) + { + case scope_state::passed: break; + case scope_state::failed: bad = true; break; + case scope_state::unknown: assert (false); + } + + if (bad) + break; + } + + // Cleanup. + // + if (!dry_run) + { + if (!bad && !one && !mk && after == output_after::clean) + { + if (!empty_buildignore (wd, buildignore_file)) + fail << "working directory " << wd << " is not empty at the " + << "end of the test"; + + rmdir_buildignore (wd, buildignore_file, 2); + } + } + + // Backlink if the working directory exists. + // + // If we dry-run then presumably all tests passed and we shouldn't + // have anything left unless we are keeping the output. + // + if (!bl.empty () && (dry_run ? after == output_after::keep : exists (wd))) + update_backlink (wd, bl, true /* changed */); + + if (bad) + throw failed (); + + return target_state::changed; + } + + // The format of args shall be: + // + // name1 arg arg ... nullptr + // name2 arg arg ... nullptr + // ... + // nameN arg arg ... nullptr nullptr + // + static bool + run_test (const target& t, + diag_record& dr, + char const** args, + process* prev = nullptr) + { + // Find the next process, if any. + // + char const** next (args); + for (next++; *next != nullptr; next++) ; + next++; + + // Redirect stdout to a pipe unless we are last. + // + int out (*next != nullptr ? -1 : 1); + bool pr; + process_exit pe; + + try + { + process p (prev == nullptr + ? process (args, 0, out) // First process. + : process (args, *prev, out)); // Next process. + + pr = *next == nullptr || run_test (t, dr, next, &p); + p.wait (); + + assert (p.exit); + pe = *p.exit; + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + + bool wr (pe.normal () && pe.code () == 0); + + if (!wr) + { + if (pr) // First failure? + dr << fail << "test " << t << " failed"; // Multi test: test 1. + + dr << error; + print_process (dr, args); + dr << " " << pe; + } + + return pr && wr; + } + + target_state rule:: + perform_test (action a, const target& tt, size_t pass_n) const + { + // First pass through. + // + if (pass_n != 0) + straight_execute_prerequisites (a, tt, pass_n); + + // See if we have the test executable override. + // + path p; + { + // Note that the test variable's visibility is target. + // + lookup l (tt[var_test]); + + // Note that we have similar code for scripted tests. + // + const target* t (nullptr); + + if (l.defined ()) + { + const name* n (cast_null<name> (l)); + + if (n == nullptr) + fail << "invalid test executable override: null value"; + else if (n->empty ()) + fail << "invalid test executable override: empty value"; + else if (n->simple ()) + { + // Ignore the special 'true' value. + // + if (n->value != "true") + p = path (n->value); + else + t = &tt; + } + else if (n->directory ()) + fail << "invalid test executable override: '" << *n << "'"; + else + { + // Must be a target name. + // + // @@ OUT: what if this is a @-qualified pair of names? + // + t = search_existing (*n, tt.base_scope ()); + + if (t == nullptr) + fail << "invalid test executable override: unknown target: '" + << *n << "'"; + } + } + else + // By default we set it to the test target's path. + // + t = &tt; + + if (t != nullptr) + { + if (auto* pt = t->is_a<path_target> ()) + { + // Do some sanity checks: the target better be up-to-date with + // an assigned path. + // + p = pt->path (); + + if (p.empty ()) + fail << "target " << *pt << " specified in the test variable " + << "is out of date" << + info << "consider specifying it as a prerequisite of " << tt; + } + else + fail << "target " << *t << (t != &tt + ? " specified in the test variable " + : " requested to be tested ") + << "is not path-based"; + } + } + + // See apply() for the structure of prerequisite_targets in the presence + // of test.{input,stdin,stdout}. + // + auto& pts (tt.prerequisite_targets[a]); + size_t pts_n (pts.size ()); + + cstrings args; + + // Do we have stdin? + // + // We simulate stdin redirect (<file) with a fake (already terminate) + // cat pipe (cat file |). + // + bool sin (pass_n != pts_n && pts[pass_n] != nullptr); + + process cat; + if (sin) + { + const file& it (pts[pass_n]->as<file> ()); + const path& ip (it.path ()); + assert (!ip.empty ()); // Should have been assigned by update. + + cat = process (process_exit (0)); // Successfully exited. + + if (!dry_run) + { + try + { + cat.in_ofd = fdopen (ip, fdopen_mode::in); + } + catch (const io_error& e) + { + fail << "unable to open " << ip << ": " << e; + } + } + + // Purely for diagnostics. + // + args.push_back ("cat"); + args.push_back (ip.string ().c_str ()); + args.push_back (nullptr); + } + + // If dry-run, the target may not exist. + // + process_path pp (!dry_run + ? run_search (p, true /* init */) + : try_run_search (p, true)); + args.push_back (pp.empty () ? p.string ().c_str () : pp.recall_string ()); + + // Do we have options and/or arguments? + // + if (auto l = tt[test_options]) + append_options (args, cast<strings> (l)); + + if (auto l = tt[test_arguments]) + append_options (args, cast<strings> (l)); + + // Do we have inputs? + // + for (size_t i (pass_n + 2); i < pts_n; ++i) + { + const file& it (pts[i]->as<file> ()); + const path& ip (it.path ()); + assert (!ip.empty ()); // Should have been assigned by update. + args.push_back (ip.string ().c_str ()); + } + + args.push_back (nullptr); + + // Do we have stdout? + // + path dp ("diff"); + process_path dpp; + if (pass_n != pts_n && pts[pass_n + 1] != nullptr) + { + const file& ot (pts[pass_n + 1]->as<file> ()); + const path& op (ot.path ()); + assert (!op.empty ()); // Should have been assigned by update. + + dpp = run_search (dp, true); + + args.push_back (dpp.recall_string ()); + args.push_back ("-u"); + + // Note that MinGW-built diff utility (as of 3.3) fails trying to + // detect if stdin contains text or binary data. We will help it a bit + // to workaround the issue. + // +#ifdef _WIN32 + args.push_back ("--text"); +#endif + + // Ignore Windows newline fluff if that's what we are running on. + // + if (cast<target_triplet> (tt[test_target]).class_ == "windows") + args.push_back ("--strip-trailing-cr"); + + args.push_back (op.string ().c_str ()); + args.push_back ("-"); + args.push_back (nullptr); + } + + args.push_back (nullptr); // Second. + + if (verb >= 2) + print_process (args); + else if (verb) + text << "test " << tt; + + if (!dry_run) + { + diag_record dr; + if (!run_test (tt, + dr, + args.data () + (sin ? 3 : 0), // Skip cat. + sin ? &cat : nullptr)) + { + dr << info << "test command line: "; + print_process (dr, args); + dr << endf; // return + } + } + + return target_state::changed; + } + } +} diff --git a/libbuild2/test/rule.hxx b/libbuild2/test/rule.hxx new file mode 100644 index 0000000..7837074 --- /dev/null +++ b/libbuild2/test/rule.hxx @@ -0,0 +1,67 @@ +// file : libbuild2/test/rule.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_RULE_HXX +#define LIBBUILD2_TEST_RULE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/rule.hxx> +#include <libbuild2/action.hxx> + +#include <libbuild2/test/common.hxx> + +namespace build2 +{ + namespace test + { + class rule: public build2::rule, protected virtual common + { + public: + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + static target_state + perform_update (action, const target&, size_t); + + target_state + perform_test (action, const target&, size_t) const; + + target_state + perform_script (action, const target&, size_t) const; + + rule (common_data&& d, bool see_through_only) + : common (move (d)), see_through (see_through_only) {} + + bool see_through; + }; + + class default_rule: public rule + { + public: + explicit + default_rule (common_data&& d) + : common (move (d)), + rule (move (d), true /* see_through_only */) {} + }; + + // To be used for non-see-through groups that should exhibit the see- + // through behavior for install (see lib{} in the bin module for an + // example). + // + class group_rule: public rule + { + public: + explicit + group_rule (common_data&& d) + : common (move (d)), rule (move (d), false /* see_through_only */) {} + }; + } +} + +#endif // LIBBUILD2_TEST_RULE_HXX diff --git a/libbuild2/test/script/builtin.cxx b/libbuild2/test/script/builtin.cxx new file mode 100644 index 0000000..ab57d4f --- /dev/null +++ b/libbuild2/test/script/builtin.cxx @@ -0,0 +1,1979 @@ +// file : libbuild2/test/script/builtin.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/script/builtin.hxx> + +#include <chrono> +#include <locale> +#include <ostream> +#include <sstream> +#include <cstdlib> // strtoull() + +#include <libbutl/regex.mxx> +#include <libbutl/path-io.mxx> // use default operator<< implementation +#include <libbutl/fdstream.mxx> // fdopen_mode, fdstream_mode +#include <libbutl/filesystem.mxx> + +#include <libbuild2/context.hxx> // sched + +#include <libbuild2/test/script/script.hxx> + +// Strictly speaking a builtin which reads/writes from/to standard streams +// must be asynchronous so that the caller can communicate with it through +// pipes without being blocked on I/O operations. However, as an optimization, +// we allow builtins that only print diagnostics to STDERR to be synchronous +// assuming that their output will always fit the pipe buffer. Synchronous +// builtins must not read from STDIN and write to STDOUT. Later we may relax +// this rule to allow a "short" output for such builtins. +// +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace test + { + namespace script + { + using builtin_impl = uint8_t (scope&, + const strings& args, + auto_fd in, auto_fd out, auto_fd err); + + // Operation failed, diagnostics has already been issued. + // + struct failed {}; + + // Accumulate an error message, print it atomically in dtor to the + // provided stream and throw failed afterwards if requested. Prefixes + // the message with the builtin name. + // + // Move constructible-only, not assignable (based to diag_record). + // + class error_record + { + public: + template <typename T> + friend const error_record& + operator<< (const error_record& r, const T& x) + { + r.ss_ << x; + return r; + } + + error_record (ostream& o, bool fail, const char* name) + : os_ (o), fail_ (fail), empty_ (false) + { + ss_ << name << ": "; + } + + // Older versions of libstdc++ don't have the ostringstream move + // support. Luckily, GCC doesn't seem to be actually needing move due + // to copy/move elision. + // +#ifdef __GLIBCXX__ + error_record (error_record&&); +#else + error_record (error_record&& r) + : os_ (r.os_), + ss_ (move (r.ss_)), + fail_ (r.fail_), + empty_ (r.empty_) + { + r.empty_ = true; + } +#endif + + ~error_record () noexcept (false) + { + if (!empty_) + { + // The output stream can be in a bad state (for example as a + // result of unsuccessful attempt to report a previous error), so + // we check it. + // + if (os_.good ()) + { + ss_.put ('\n'); + os_ << ss_.str (); + os_.flush (); + } + + if (fail_) + throw failed (); + } + } + + private: + ostream& os_; + mutable ostringstream ss_; + + bool fail_; + bool empty_; + }; + + // Parse and normalize a path. Also, unless it is already absolute, make + // the path absolute using the specified directory. Throw invalid_path + // if the path is empty, and on parsing and normalization failures. + // + static path + parse_path (string s, const dir_path& d) + { + path p (move (s)); + + if (p.empty ()) + throw invalid_path (""); + + if (p.relative ()) + p = d / move (p); + + p.normalize (); + return p; + } + + // Builtin commands functions. + // + + // cat <file>... + // + // Note that POSIX doesn't specify if after I/O operation failure the + // command should proceed with the rest of the arguments. The current + // implementation exits immediatelly in such a case. + // + // @@ Shouldn't we check that we don't print a nonempty regular file to + // itself, as that would merely exhaust the output device? POSIX + // allows (but not requires) such a check and some implementations do + // this. That would require to fstat() file descriptors and complicate + // the code a bit. Was able to reproduce on a big file (should be + // bigger than the stream buffer size) with the test + // 'cat file >+file'. + // + // Note: must be executed asynchronously. + // + static uint8_t + cat (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "cat"); + }; + + try + { + ifdstream cin (move (in), fdstream_mode::binary); + ofdstream cout (move (out), fdstream_mode::binary); + + // Copy input stream to STDOUT. + // + auto copy = [&cout] (istream& is) + { + if (is.peek () != ifdstream::traits_type::eof ()) + cout << is.rdbuf (); + + is.clear (istream::eofbit); // Sets eofbit. + }; + + // Path of a file being printed to STDOUT. An empty path represents + // STDIN. Used in diagnostics. + // + path p; + + try + { + // Print STDIN. + // + if (args.empty ()) + copy (cin); + + // Print files. + // + for (auto i (args.begin ()); i != args.end (); ++i) + { + if (*i == "-") + { + if (!cin.eof ()) + { + p.clear (); + copy (cin); + } + + continue; + } + + p = parse_path (*i, sp.wd_path); + + ifdstream is (p, ifdstream::binary); + copy (is); + is.close (); + } + } + catch (const io_error& e) + { + error_record d (error ()); + d << "unable to print "; + + if (p.empty ()) + d << "stdin"; + else + d << "'" << p << "'"; + + d << ": " << e; + } + + cin.close (); + cout.close (); + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while creating/closing cin, cout or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // Make a copy of a file at the specified path, preserving permissions, + // and registering a cleanup for a newly created file. The file paths + // must be absolute. Fail if an exception is thrown by the underlying + // copy operation. + // + static void + cpfile (scope& sp, + const path& from, const path& to, + bool overwrite, + bool attrs, + bool cleanup, + const function<error_record()>& fail) + { + try + { + bool exists (file_exists (to)); + + cpflags f ( + overwrite + ? cpflags::overwrite_permissions | cpflags::overwrite_content + : cpflags::none); + + if (attrs) + f |= cpflags::overwrite_permissions | cpflags::copy_timestamps; + + cpfile (from, to, f); + + if (!exists && cleanup) + sp.clean ({cleanup_type::always, to}, true); + } + catch (const system_error& e) + { + fail () << "unable to copy file '" << from << "' to '" << to + << "': " << e; + } + } + + // Make a copy of a directory at the specified path, registering a + // cleanup for the created directory. The directory paths must be + // absolute. Fail if the destination directory already exists or + // an exception is thrown by the underlying copy operation. + // + static void + cpdir (scope& sp, + const dir_path& from, const dir_path& to, + bool attrs, + bool cleanup, + const function<error_record()>& fail) + { + try + { + if (try_mkdir (to) == mkdir_status::already_exists) + throw_generic_error (EEXIST); + + if (cleanup) + sp.clean ({cleanup_type::always, to}, true); + + for (const auto& de: dir_iterator (from, + false /* ignore_dangling */)) + { + path f (from / de.path ()); + path t (to / de.path ()); + + if (de.type () == entry_type::directory) + cpdir (sp, + path_cast<dir_path> (move (f)), + path_cast<dir_path> (move (t)), + attrs, + cleanup, + fail); + else + cpfile (sp, f, t, false /* overwrite */, attrs, cleanup, fail); + } + + // Note that it is essential to copy timestamps and permissions after + // the directory content is copied. + // + if (attrs) + { + path_permissions (to, path_permissions (from)); + dir_time (to, dir_time (from)); + } + } + catch (const system_error& e) + { + fail () << "unable to copy directory '" << from << "' to '" << to + << "': " << e; + } + } + + // cp [-p] [--no-cleanup] <src-file> <dst-file> + // cp [-p] [--no-cleanup] -R|-r <src-dir> <dst-dir> + // cp [-p] [--no-cleanup] <src-file>... <dst-dir>/ + // cp [-p] [--no-cleanup] -R|-r <src-path>... <dst-dir>/ + // + // Note: can be executed synchronously. + // + static uint8_t + cp (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "cp"); + }; + + try + { + in.close (); + out.close (); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool recursive (false); + bool attrs (false); + bool cleanup (true); + for (; i != e; ++i) + { + const string& o (*i); + + if (o == "-R" || o == "-r") + recursive = true; + else if (o == "-p") + attrs = true; + else if (o == "--no-cleanup") + cleanup = false; + else + { + if (o == "--") + ++i; + + break; + } + } + + // Copy files or directories. + // + if (i == e) + error () << "missing arguments"; + + const dir_path& wd (sp.wd_path); + + auto j (args.rbegin ()); + path dst (parse_path (*j++, wd)); + e = j.base (); + + if (i == e) + error () << "missing source path"; + + auto fail = [&error] () {return error (true);}; + + // If destination is not a directory path (no trailing separator) + // then make a copy of the filesystem entry at the specified path + // (the only source path is allowed in such a case). Otherwise copy + // the source filesystem entries into the destination directory. + // + if (!dst.to_directory ()) + { + path src (parse_path (*i++, wd)); + + // If there are multiple sources but no trailing separator for the + // destination, then, most likelly, it is missing. + // + if (i != e) + error () << "multiple source paths without trailing separator " + << "for destination directory"; + + if (!recursive) + // Synopsis 1: make a file copy at the specified path. + // + cpfile (sp, + src, + dst, + true /* overwrite */, + attrs, + cleanup, + fail); + else + // Synopsis 2: make a directory copy at the specified path. + // + cpdir (sp, + path_cast<dir_path> (src), path_cast<dir_path> (dst), + attrs, + cleanup, + fail); + } + else + { + for (; i != e; ++i) + { + path src (parse_path (*i, wd)); + + if (recursive && dir_exists (src)) + // Synopsis 4: copy a filesystem entry into the specified + // directory. Note that we handle only source directories here. + // Source files are handled below. + // + cpdir (sp, + path_cast<dir_path> (src), + path_cast<dir_path> (dst / src.leaf ()), + attrs, + cleanup, + fail); + else + // Synopsis 3: copy a file into the specified directory. Also, + // here we cover synopsis 4 for the source path being a file. + // + cpfile (sp, + src, + dst / src.leaf (), + true /* overwrite */, + attrs, + cleanup, + fail); + } + } + + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // echo <string>... + // + // Note: must be executed asynchronously. + // + static uint8_t + echo (scope&, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + try + { + in.close (); + ofdstream cout (move (out)); + + for (auto b (args.begin ()), i (b), e (args.end ()); i != e; ++i) + cout << (i != b ? " " : "") << *i; + + cout << '\n'; + cout.close (); + r = 0; + } + catch (const std::exception& e) + { + cerr << "echo: " << e << endl; + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // false + // + // Failure to close the file descriptors is silently ignored. + // + // Note: can be executed synchronously. + // + static builtin + false_ (scope&, uint8_t& r, const strings&, auto_fd, auto_fd, auto_fd) + { + return builtin (r = 1); + } + + // true + // + // Failure to close the file descriptors is silently ignored. + // + // Note: can be executed synchronously. + // + static builtin + true_ (scope&, uint8_t& r, const strings&, auto_fd, auto_fd, auto_fd) + { + return builtin (r = 0); + } + + // Create a symlink to a file or directory at the specified path. The + // paths must be absolute. Fall back to creating a hardlink, if symlink + // creation is not supported for the link path. If hardlink creation is + // not supported either, then fall back to copies. If requested, created + // filesystem entries are registered for cleanup. Fail if the target + // filesystem entry doesn't exist or an exception is thrown by the + // underlying filesystem operation (specifically for an already existing + // filesystem entry at the link path). + // + // Note that supporting optional removal of an existing filesystem entry + // at the link path (the -f option) tends to get hairy. As soon as an + // existing and the resulting filesystem entries could be of different + // types, we would end up with canceling an old cleanup and registering + // the new one. Also removing non-empty directories doesn't look very + // natural, but would be required if we want the behavior on POSIX and + // Windows to be consistent. + // + static void + mksymlink (scope& sp, + const path& target, const path& link, + bool cleanup, + const function<error_record()>& fail) + { + // Determine the target type, fail if the target doesn't exist. + // + bool dir (false); + + try + { + pair<bool, entry_stat> pe (path_entry (target)); + + if (!pe.first) + fail () << "unable to create symlink to '" << target << "': " + << "no such file or directory"; + + dir = pe.second.type == entry_type::directory; + } + catch (const system_error& e) + { + fail () << "unable to stat '" << target << "': " << e; + } + + // First we try to create a symlink. If that fails (e.g., "Windows + // happens"), then we resort to hard links. If that doesn't work out + // either (e.g., not on the same filesystem), then we fall back to + // copies. So things are going to get a bit nested. + // + try + { + mksymlink (target, link, dir); + + if (cleanup) + sp.clean ({cleanup_type::always, link}, true); + } + catch (const system_error& e) + { + // Note that we are not guaranteed (here and below) that the + // system_error exception is of the generic category. + // + int c (e.code ().value ()); + if (!(e.code ().category () == generic_category () && + (c == ENOSYS || // Not implemented. + c == EPERM))) // Not supported by the filesystem(s). + fail () << "unable to create symlink '" << link << "' to '" + << target << "': " << e; + + try + { + mkhardlink (target, link, dir); + + if (cleanup) + sp.clean ({cleanup_type::always, link}, true); + } + catch (const system_error& e) + { + c = e.code ().value (); + if (!(e.code ().category () == generic_category () && + (c == ENOSYS || // Not implemented. + c == EPERM || // Not supported by the filesystem(s). + c == EXDEV))) // On different filesystems. + fail () << "unable to create hardlink '" << link << "' to '" + << target << "': " << e; + + if (dir) + cpdir (sp, + path_cast<dir_path> (target), path_cast<dir_path> (link), + false, + cleanup, + fail); + else + cpfile (sp, + target, + link, + false /* overwrite */, + true /* attrs */, + cleanup, + fail); + } + } + } + + // ln [--no-cleanup] -s <target-path> <link-path> + // ln [--no-cleanup] -s <target-path>... <link-dir>/ + // + // Note: can be executed synchronously. + // + static uint8_t + ln (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "ln"); + }; + + try + { + in.close (); + out.close (); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool cleanup (true); + bool symlink (false); + + for (; i != e; ++i) + { + const string& o (*i); + + if (o == "--no-cleanup") + cleanup = false; + else if (o == "-s") + symlink = true; + else + { + if (o == "--") + ++i; + + break; + } + } + + if (!symlink) + error () << "missing -s option"; + + // Create file or directory symlinks. + // + if (i == e) + error () << "missing arguments"; + + const dir_path& wd (sp.wd_path); + + auto j (args.rbegin ()); + path link (parse_path (*j++, wd)); + e = j.base (); + + if (i == e) + error () << "missing target path"; + + auto fail = [&error] () {return error (true);}; + + // If link is not a directory path (no trailing separator), then + // create a symlink to the target path at the specified link path + // (the only target path is allowed in such a case). Otherwise create + // links to the target paths inside the specified directory. + // + if (!link.to_directory ()) + { + path target (parse_path (*i++, wd)); + + // If there are multiple targets but no trailing separator for the + // link, then, most likelly, it is missing. + // + if (i != e) + error () << "multiple target paths with non-directory link path"; + + // Synopsis 1: create a target path symlink at the specified path. + // + mksymlink (sp, target, link, cleanup, fail); + } + else + { + for (; i != e; ++i) + { + path target (parse_path (*i, wd)); + + // Synopsis 2: create a target path symlink in the specified + // directory. + // + mksymlink (sp, target, link / target.leaf (), cleanup, fail); + } + } + + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // Create a directory if not exist and its parent directories if + // necessary. Throw system_error on failure. Register created + // directories for cleanup. The directory path must be absolute. + // + static void + mkdir_p (scope& sp, const dir_path& p, bool cleanup) + { + if (!dir_exists (p)) + { + if (!p.root ()) + mkdir_p (sp, p.directory (), cleanup); + + try_mkdir (p); // Returns success or throws. + + if (cleanup) + sp.clean ({cleanup_type::always, p}, true); + } + } + + // mkdir [--no-cleanup] [-p] <dir>... + // + // Note that POSIX doesn't specify if after a directory creation failure + // the command should proceed with the rest of the arguments. The current + // implementation exits immediatelly in such a case. + // + // Note: can be executed synchronously. + // + static uint8_t + mkdir (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "mkdir"); + }; + + try + { + in.close (); + out.close (); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool parent (false); + bool cleanup (true); + for (; i != e; ++i) + { + const string& o (*i); + + if (o == "-p") + parent = true; + else if (o == "--no-cleanup") + cleanup = false; + else + { + if (*i == "--") + ++i; + + break; + } + } + + // Create directories. + // + if (i == e) + error () << "missing directory"; + + for (; i != e; ++i) + { + dir_path p (path_cast<dir_path> (parse_path (*i, sp.wd_path))); + + try + { + if (parent) + mkdir_p (sp, p, cleanup); + else if (try_mkdir (p) == mkdir_status::success) + { + if (cleanup) + sp.clean ({cleanup_type::always, p}, true); + } + else // == mkdir_status::already_exists + throw_generic_error (EEXIST); + } + catch (const system_error& e) + { + error () << "unable to create directory '" << p << "': " << e; + } + } + + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // mv [--no-cleanup] [-f] <src-path> <dst-path> + // mv [--no-cleanup] [-f] <src-path>... <dst-dir>/ + // + // Note: can be executed synchronously. + // + static uint8_t + mv (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "mv"); + }; + + try + { + in.close (); + out.close (); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool no_cleanup (false); + bool force (false); + for (; i != e; ++i) + { + const string& o (*i); + + if (o == "--no-cleanup") + no_cleanup = true; + else if (*i == "-f") + force = true; + else + { + if (o == "--") + ++i; + + break; + } + } + + // Move filesystem entries. + // + if (i == e) + error () << "missing arguments"; + + const dir_path& wd (sp.wd_path); + + auto j (args.rbegin ()); + path dst (parse_path (*j++, wd)); + e = j.base (); + + if (i == e) + error () << "missing source path"; + + auto mv = [no_cleanup, force, &wd, &sp, &error] (const path& from, + const path& to) + { + const dir_path& rwd (sp.root->wd_path); + + if (!from.sub (rwd) && !force) + error () << "'" << from << "' is out of working directory '" + << rwd << "'"; + + try + { + auto check_wd = [&wd, &error] (const path& p) + { + if (wd.sub (path_cast<dir_path> (p))) + error () << "'" << p << "' contains test working directory '" + << wd << "'"; + }; + + check_wd (from); + check_wd (to); + + bool exists (butl::entry_exists (to)); + + // Fail if the source and destination paths are the same. + // + // Note that for mventry() function (that is based on the POSIX + // rename() function) this is a noop. + // + if (exists && to == from) + error () << "unable to move entity '" << from << "' to itself"; + + // Rename/move the filesystem entry, replacing an existing one. + // + mventry (from, + to, + cpflags::overwrite_permissions | + cpflags::overwrite_content); + + // Unless suppressed, adjust the cleanups that are sub-paths of + // the source path. + // + if (!no_cleanup) + { + // "Move" the matching cleanup if the destination path doesn't + // exist and is a sub-path of the working directory. Otherwise + // just remove it. + // + // Note that it's not enough to just change the cleanup paths. + // We also need to make sure that these cleanups happen before + // the destination directory (or any of its parents) cleanup, + // that is potentially registered. To achieve that we can just + // relocate these cleanup entries to the end of the list, + // preserving their mutual order. Remember that cleanups in + // the list are executed in the reversed order. + // + bool mv_cleanups (!exists && to.sub (rwd)); + cleanups cs; + + // Remove the source path sub-path cleanups from the list, + // adjusting/caching them if required (see above). + // + for (auto i (sp.cleanups.begin ()); i != sp.cleanups.end (); ) + { + cleanup& c (*i); + path& p (c.path); + + if (p.sub (from)) + { + if (mv_cleanups) + { + // Note that we need to preserve the cleanup path + // trailing separator which indicates the removal + // method. Also note that leaf(), in particular, does + // that. + // + p = p != from + ? to / p.leaf (path_cast<dir_path> (from)) + : p.to_directory () + ? path_cast<dir_path> (to) + : to; + + cs.push_back (move (c)); + } + + i = sp.cleanups.erase (i); + } + else + ++i; + } + + // Re-insert the adjusted cleanups at the end of the list. + // + sp.cleanups.insert (sp.cleanups.end (), + make_move_iterator (cs.begin ()), + make_move_iterator (cs.end ())); + } + } + catch (const system_error& e) + { + error () << "unable to move entity '" << from << "' to '" << to + << "': " << e; + } + }; + + // If destination is not a directory path (no trailing separator) + // then move the filesystem entry to the specified path (the only + // source path is allowed in such a case). Otherwise move the source + // filesystem entries into the destination directory. + // + if (!dst.to_directory ()) + { + path src (parse_path (*i++, wd)); + + // If there are multiple sources but no trailing separator for the + // destination, then, most likelly, it is missing. + // + if (i != e) + error () << "multiple source paths without trailing separator " + << "for destination directory"; + + // Synopsis 1: move an entity to the specified path. + // + mv (src, dst); + } + else + { + // Synopsis 2: move entities into the specified directory. + // + for (; i != e; ++i) + { + path src (parse_path (*i, wd)); + mv (src, dst / src.leaf ()); + } + } + + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // rm [-r] [-f] <path>... + // + // The implementation deviates from POSIX in a number of ways. It doesn't + // interact with a user and fails immediatelly if unable to process an + // argument. It doesn't check for dots containment in the path, and + // doesn't consider files and directory permissions in any way just + // trying to remove a filesystem entry. Always fails if empty path is + // specified. + // + // Note: can be executed synchronously. + // + static uint8_t + rm (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "rm"); + }; + + try + { + in.close (); + out.close (); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool dir (false); + bool force (false); + for (; i != e; ++i) + { + if (*i == "-r") + dir = true; + else if (*i == "-f") + force = true; + else + { + if (*i == "--") + ++i; + + break; + } + } + + // Remove entries. + // + if (i == e && !force) + error () << "missing file"; + + const dir_path& wd (sp.wd_path); + const dir_path& rwd (sp.root->wd_path); + + for (; i != e; ++i) + { + path p (parse_path (*i, wd)); + + if (!p.sub (rwd) && !force) + error () << "'" << p << "' is out of working directory '" << rwd + << "'"; + + try + { + dir_path d (path_cast<dir_path> (p)); + + if (dir_exists (d)) + { + if (!dir) + error () << "'" << p << "' is a directory"; + + if (wd.sub (d)) + error () << "'" << p << "' contains test working directory '" + << wd << "'"; + + // The call can result in rmdir_status::not_exist. That's not + // very likelly but there is also nothing bad about it. + // + try_rmdir_r (d); + } + else if (try_rmfile (p) == rmfile_status::not_exist && !force) + throw_generic_error (ENOENT); + } + catch (const system_error& e) + { + error () << "unable to remove '" << p << "': " << e; + } + } + + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // rmdir [-f] <path>... + // + // Note: can be executed synchronously. + // + static uint8_t + rmdir (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "rmdir"); + }; + + try + { + in.close (); + out.close (); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool force (false); + for (; i != e; ++i) + { + if (*i == "-f") + force = true; + else + { + if (*i == "--") + ++i; + + break; + } + } + + // Remove directories. + // + if (i == e && !force) + error () << "missing directory"; + + const dir_path& wd (sp.wd_path); + const dir_path& rwd (sp.root->wd_path); + + for (; i != e; ++i) + { + dir_path p (path_cast<dir_path> (parse_path (*i, wd))); + + if (wd.sub (p)) + error () << "'" << p << "' contains test working directory '" + << wd << "'"; + + if (!p.sub (rwd) && !force) + error () << "'" << p << "' is out of working directory '" + << rwd << "'"; + + try + { + rmdir_status s (try_rmdir (p)); + + if (s == rmdir_status::not_empty) + throw_generic_error (ENOTEMPTY); + else if (s == rmdir_status::not_exist && !force) + throw_generic_error (ENOENT); + } + catch (const system_error& e) + { + error () << "unable to remove '" << p << "': " << e; + } + } + + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // sed [-n] [-i] -e <script> [<file>] + // + // Note: must be executed asynchronously. + // + static uint8_t + sed (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "sed"); + }; + + try + { + // Automatically remove a temporary file (used for in place editing) + // on failure. + // + auto_rmfile rm; + + // Do not throw when failbit is set (getline() failed to extract any + // character). + // + ifdstream cin (move (in), ifdstream::badbit); + ofdstream cout (move (out)); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool auto_prn (true); + bool in_place (false); + + struct substitute + { + string regex; + string replacement; + bool icase = false; + bool global = false; + bool print = false; + }; + optional<substitute> subst; + + for (; i != e; ++i) + { + const string& o (*i); + + if (o == "-n") + auto_prn = false; + else if (o == "-i") + in_place = true; + else if (o == "-e") + { + // Only a single script is supported. + // + if (subst) + error () << "multiple scripts"; + + // If option has no value then bail out and report. + // + if (++i == e) + break; + + const string& v (*i); + if (v.empty ()) + error () << "empty script"; + + if (v[0] != 's') + error () << "only 's' command supported"; + + // Parse the substitute command. + // + if (v.size () < 2) + error () << "no delimiter for 's' command"; + + char delim (v[1]); + if (delim == '\\' || delim == '\n') + error () << "invalid delimiter for 's' command"; + + size_t p (v.find (delim, 2)); + if (p == string::npos) + error () << "unterminated 's' command regex"; + + subst = substitute (); + subst->regex.assign (v, 2, p - 2); + + // Empty regex matches nothing, so not of much use. + // + if (subst->regex.empty ()) + error () << "empty regex in 's' command"; + + size_t b (p + 1); + p = v.find (delim, b); + if (p == string::npos) + error () << "unterminated 's' command replacement"; + + subst->replacement.assign (v, b, p - b); + + // Parse the substitute command flags. + // + char c; + for (++p; (c = v[p]) != '\0'; ++p) + { + switch (c) + { + case 'i': subst->icase = true; break; + case 'g': subst->global = true; break; + case 'p': subst->print = true; break; + default: + { + error () << "invalid 's' command flag '" << c << "'"; + } + } + } + } + else + { + if (o == "--") + ++i; + + break; + } + } + + if (!subst) + error () << "missing script"; + + // Path of a file to edit. An empty path represents stdin. + // + path p; + if (i != e) + { + if (*i != "-") + p = parse_path (*i, sp.wd_path); + + ++i; + } + + if (i != e) + error () << "unexpected argument '" << *i << "'"; + + // If we edit file in place make sure that the file path is specified + // and obtain a temporary file path. We will be writing to the + // temporary file (rather than to stdout) and will move it to the + // original file path afterwards. + // + path tp; + if (in_place) + { + if (p.empty ()) + error () << "-i option specified while reading from stdin"; + + try + { + tp = path::temp_path ("build2-sed"); + + cout.close (); // Flush and close. + + cout.open ( + fdopen (tp, + fdopen_mode::out | fdopen_mode::truncate | + fdopen_mode::create, + path_permissions (p))); + } + catch (const io_error& e) + { + error_record d (error ()); + d << "unable to open '" << tp << "': " << e; + } + catch (const system_error& e) + { + error_record d (error ()); + d << "unable to obtain temporary file: " << e; + } + + rm = auto_rmfile (tp); + } + + // Note that ECMAScript is implied if no grammar flag is specified. + // + regex re (subst->regex, + subst->icase ? regex::icase : regex::ECMAScript); + + // Edit a file or STDIN. + // + try + { + // Open a file if specified. + // + if (!p.empty ()) + { + cin.close (); // Flush and close. + cin.open (p); + } + + // Read until failbit is set (throw on badbit). + // + string s; + while (getline (cin, s)) + { + auto r (regex_replace_search ( + s, + re, + subst->replacement, + subst->global + ? regex_constants::format_default + : regex_constants::format_first_only)); + + // Add newline regardless whether the source line is newline- + // terminated or not (in accordance with POSIX). + // + if (auto_prn || (r.second && subst->print)) + cout << r.first << '\n'; + } + + cin.close (); + cout.close (); + + if (in_place) + { + mvfile ( + tp, p, + cpflags::overwrite_content | cpflags::overwrite_permissions); + + rm.cancel (); + } + + r = 0; + } + catch (const io_error& e) + { + error_record d (error ()); + d << "unable to edit "; + + if (p.empty ()) + d << "stdin"; + else + d << "'" << p << "'"; + + d << ": " << e; + } + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful (no space). + // + error (false) << "invalid regex" << e; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while creating cin, cout or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const system_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // sleep <seconds> + // + // Note: can be executed synchronously. + // + static uint8_t + sleep (scope&, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "sleep"); + }; + + try + { + in.close (); + out.close (); + + if (args.empty ()) + error () << "missing time interval"; + + if (args.size () > 1) + error () << "unexpected argument '" << args[1] << "'"; + + uint64_t n; + + for (;;) // Breakout loop. + { + const string& a (args[0]); + + // Note: strtoull() allows these. + // + if (!a.empty () && a[0] != '-' && a[0] != '+') + { + char* e (nullptr); + n = strtoull (a.c_str (), &e, 10); // Can't throw. + + if (errno != ERANGE && e == a.c_str () + a.size ()) + break; + } + + error () << "invalid time interval '" << a << "'"; + } + + // If/when required we could probably support the precise sleep mode + // (e.g., via an option). + // + sched.sleep (chrono::seconds (n)); + + r = 0; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // test -f|-d <path> + // + // Note: can be executed synchronously. + // + static uint8_t + test (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (2); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "test"); + }; + + try + { + in.close (); + out.close (); + + if (args.size () < 2) + error () << "missing path"; + + bool file (args[0] == "-f"); + + if (!file && args[0] != "-d") + error () << "invalid option"; + + if (args.size () > 2) + error () << "unexpected argument '" << args[2] << "'"; + + path p (parse_path (args[1], sp.wd_path)); + + try + { + r = (file ? file_exists (p) : dir_exists (p)) ? 0 : 1; + } + catch (const system_error& e) + { + error () << "cannot test '" << p << "': " << e; + } + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 2; + } + + // touch [--no-cleanup] [--after <ref-file>] <file>... + // + // Note that POSIX doesn't specify the behavior for touching an entry + // other than file. + // + // Also note that POSIX doesn't specify if after a file touch failure the + // command should proceed with the rest of the arguments. The current + // implementation exits immediatelly in such a case. + // + // Note: can be executed synchronously. + // + static uint8_t + touch (scope& sp, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) noexcept + try + { + uint8_t r (1); + ofdstream cerr (move (err)); + + auto error = [&cerr] (bool fail = true) + { + return error_record (cerr, fail, "touch"); + }; + + try + { + in.close (); + out.close (); + + auto mtime = [&error] (const path& p) -> timestamp + { + try + { + timestamp t (file_mtime (p)); + + if (t == timestamp_nonexistent) + throw_generic_error (ENOENT); + + return t; + } + catch (const system_error& e) + { + error () << "cannot obtain file '" << p + << "' modification time: " << e; + } + assert (false); // Can't be here. + return timestamp (); + }; + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool cleanup (true); + optional<timestamp> after; + for (; i != e; ++i) + { + const string& o (*i); + + if (o == "--no-cleanup") + cleanup = false; + else if (o == "--after") + { + if (++i == e) + error () << "missing --after option value"; + + after = mtime (parse_path (*i, sp.wd_path)); + } + else + { + if (o == "--") + ++i; + + break; + } + } + + if (i == e) + error () << "missing file"; + + // Create files. + // + for (; i != e; ++i) + { + path p (parse_path (*i, sp.wd_path)); + + try + { + // Note that we don't register (implicit) cleanup for an + // existing path. + // + if (touch_file (p) && cleanup) + sp.clean ({cleanup_type::always, p}, true); + + if (after) + { + while (mtime (p) <= *after) + touch_file (p, false /* create */); + } + } + catch (const system_error& e) + { + error () << "cannot create/update '" << p << "': " << e; + } + } + + r = 0; + } + catch (const invalid_path& e) + { + error (false) << "invalid path '" << e.path << "'"; + } + // Can be thrown while closing in, out or writing to cerr. + // + catch (const io_error& e) + { + error (false) << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + + cerr.close (); + return r; + } + catch (const std::exception&) + { + return 1; + } + + // Run builtin implementation asynchronously. + // + static builtin + async_impl (builtin_impl* fn, + scope& sp, + uint8_t& r, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) + { + return builtin ( + r, + thread ([fn, &sp, &r, &args, + in = move (in), + out = move (out), + err = move (err)] () mutable noexcept + { + r = fn (sp, args, move (in), move (out), move (err)); + })); + } + + template <builtin_impl fn> + static builtin + async_impl (scope& sp, + uint8_t& r, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) + { + return async_impl (fn, sp, r, args, move (in), move (out), move (err)); + } + + // Run builtin implementation synchronously. + // + template <builtin_impl fn> + static builtin + sync_impl (scope& sp, + uint8_t& r, + const strings& args, + auto_fd in, auto_fd out, auto_fd err) + { + r = fn (sp, args, move (in), move (out), move (err)); + return builtin (r, thread ()); + } + + const builtin_map builtins + { + {"cat", &async_impl<&cat>}, + {"cp", &sync_impl<&cp>}, + {"echo", &async_impl<&echo>}, + {"false", &false_}, + {"ln", &sync_impl<&ln>}, + {"mkdir", &sync_impl<&mkdir>}, + {"mv", &sync_impl<&mv>}, + {"rm", &sync_impl<&rm>}, + {"rmdir", &sync_impl<&rmdir>}, + {"sed", &async_impl<&sed>}, + {"sleep", &sync_impl<&sleep>}, + {"test", &sync_impl<&test>}, + {"touch", &sync_impl<&touch>}, + {"true", &true_} + }; + } + } +} diff --git a/libbuild2/test/script/builtin.hxx b/libbuild2/test/script/builtin.hxx new file mode 100644 index 0000000..b340335 --- /dev/null +++ b/libbuild2/test/script/builtin.hxx @@ -0,0 +1,74 @@ +// file : libbuild2/test/script/builtin.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_SCRIPT_BUILTIN_HXX +#define LIBBUILD2_TEST_SCRIPT_BUILTIN_HXX + +#include <map> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +namespace build2 +{ + namespace test + { + namespace script + { + class scope; + + // A process/thread-like object representing a running builtin. + // + // For now, instead of allocating the result storage dynamically, we + // expect it to be provided by the caller. + // + class builtin + { + public: + uint8_t + wait () {if (t_.joinable ()) t_.join (); return r_;} + + ~builtin () {wait ();} + + public: + builtin (uint8_t& r, thread&& t = thread ()): r_ (r), t_ (move (t)) {} + + builtin (builtin&&) = default; + + private: + uint8_t& r_; + thread t_; + }; + + // Start builtin command. Throw system_error on failure. + // + // Note that unlike argc/argv, our args don't include the program name. + // + using builtin_func = builtin (scope&, + uint8_t& result, + const strings& args, + auto_fd in, auto_fd out, auto_fd err); + + class builtin_map: public std::map<string, builtin_func*> + { + public: + using base = std::map<string, builtin_func*>; + using base::base; + + // Return NULL if not a builtin. + // + builtin_func* + find (const string& n) const + { + auto i (base::find (n)); + return i != end () ? i->second : nullptr; + } + }; + + extern const builtin_map builtins; + } + } +} + +#endif // LIBBUILD2_TEST_SCRIPT_BUILTIN_HXX diff --git a/libbuild2/test/script/lexer+command-expansion.test.testscript b/libbuild2/test/script/lexer+command-expansion.test.testscript new file mode 100644 index 0000000..1ddc246 --- /dev/null +++ b/libbuild2/test/script/lexer+command-expansion.test.testscript @@ -0,0 +1,248 @@ +# file : libbuild2/test/script/lexer+command-expansion.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +test.arguments = command-expansion + +: pass-redirect +: +{ + : in + : + $* <:"0<|" >>EOO + '0' + <| + EOO + + : arg-in + : + $* <:"0 <|" >>EOO + '0 ' + <| + EOO + + : out + : + $* <:"1>|" >>EOO + '1' + >| + EOO + + : arg-out + : + $* <:"1 >|" >>EOO + '1 ' + >| + EOO +} + +: null-redirect +: +{ + : in + : + $* <:"0<-" >>EOO + '0' + <- + EOO + + : arg-in + : + $* <:"0 <-" >>EOO + '0 ' + <- + EOO + + : out + : + $* <:"1>-" >>EOO + '1' + >- + EOO + + : arg-out + : + $* <:"1 >-" >>EOO + '1 ' + >- + EOO +} + +: trace-redirect +: +{ + : out + : + $* <:"1>!" >>EOO + '1' + >! + EOO + + : arg-out + : + $* <:"1 >!" >>EOO + '1 ' + >! + EOO +} + +: merge-redirect +: +{ + : out + : + $* <:"1>&2" >>EOO + '1' + >& + '2' + EOO + + : arg-out + : + $* <:"1 >&2" >>EOO + '1 ' + >& + '2' + EOO +} + +: str-redirect +: +{ + : in + : + { + : newline + : + $* <:"0<a b" >>EOO + '0' + < + 'a b' + EOO + + : no-newline + : + $* <:"0<:a b" >>EOO + '0' + <: + 'a b' + EOO + } + + : out + : + { + : newline + : + $* <:"1>a b" >>EOO + '1' + > + 'a b' + EOO + + : no-newline + : + $* <:"1>:a b" >>EOO + '1' + >: + 'a b' + EOO + } +} + +: doc-redirect +: +{ + : in + : + { + : newline + : + $* <:"0<<E O I" >>EOO + '0' + << + 'E O I' + EOO + + : no-newline + : + $* <:"0<<:E O I" >>EOO + '0' + <<: + 'E O I' + EOO + } + + : out + : + { + : newline + : + $* <:"1>>E O O" >>EOO + '1' + >> + 'E O O' + EOO + + : no-newline + : + $* <:"1>>:E O O" >>EOO + '1' + >>: + 'E O O' + EOO + } +} + +: file-redirect +: +{ + : in + : + $* <:"0<<<a b" >>EOO + '0' + <<< + 'a b' + EOO + + : out + : + $* <:"1>=a b" >>EOO + '1' + >= + 'a b' + EOO + + : out-app + : + $* <:"1>+a b" >>EOO + '1' + >+ + 'a b' + EOO +} + +: cleanup +: +{ + : always + : + $* <:"&file" >>EOO + & + 'file' + EOO + + : maybe + : + $* <:"&?file" >>EOO + &? + 'file' + EOO + + : never + : + $* <:"&!file" >>EOO + &! + 'file' + EOO +} diff --git a/libbuild2/test/script/lexer+command-line.test.testscript b/libbuild2/test/script/lexer+command-line.test.testscript new file mode 100644 index 0000000..eedb46f --- /dev/null +++ b/libbuild2/test/script/lexer+command-line.test.testscript @@ -0,0 +1,208 @@ +# file : libbuild2/test/script/lexer+command-line.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +test.arguments = command-line + +: semi +{ + : immediate + : + $* <"cmd;" >>EOO + 'cmd' + ; + <newline> + EOO + + : separated + : + $* <"cmd ;" >>EOO + 'cmd' + ; + <newline> + EOO + + : only + : + $* <";" >>EOO + ; + <newline> + EOO +} + +: colon +: +{ + : immediate + : + $* <"cmd: dsc" >>EOO + 'cmd' + : + 'dsc' + <newline> + EOO + + : separated + : + $* <"cmd :dsc" >>EOO + 'cmd' + : + 'dsc' + <newline> + EOO + + : only + : + $* <":" >>EOO + : + <newline> + EOO +} + +: redirect +: +{ + : pass + : + $* <"cmd <| 1>|" >>EOO + 'cmd' + <| + '1' + >| + <newline> + EOO + + : null + : + $* <"cmd <- 1>-" >>EOO + 'cmd' + <- + '1' + >- + <newline> + EOO + + : trace + : + $* <"cmd 1>!" >>EOO + 'cmd' + '1' + >! + <newline> + EOO + + : merge + : + $* <"cmd 1>&2" >>EOO + 'cmd' + '1' + >& + '2' + <newline> + EOO + + : str + : + $* <"cmd <a 1>b" >>EOO + 'cmd' + < + 'a' + '1' + > + 'b' + <newline> + EOO + + : str-nn + : + $* <"cmd <:a 1>:b" >>EOO + 'cmd' + <: + 'a' + '1' + >: + 'b' + <newline> + EOO + + : doc + : + $* <"cmd <<EOI 1>>EOO" >>EOO + 'cmd' + << + 'EOI' + '1' + >> + 'EOO' + <newline> + EOO + + : doc-nn + : + $* <"cmd <<:EOI 1>>:EOO" >>EOO + 'cmd' + <<: + 'EOI' + '1' + >>: + 'EOO' + <newline> + EOO + + : file-cmp + : + $* <"cmd <<<in >>>out 2>>>err" >>EOO + 'cmd' + <<< + 'in' + >>> + 'out' + '2' + >>> + 'err' + <newline> + EOO + + : file-write + : + $* <"cmd >=out 2>+err" >>EOO + 'cmd' + >= + 'out' + '2' + >+ + 'err' + <newline> + EOO +} + +: cleanup +: +{ + : always + : + $* <"cmd &file" >>EOO + 'cmd' + & + 'file' + <newline> + EOO + + : maybe + : + $* <"cmd &?file" >>EOO + 'cmd' + &? + 'file' + <newline> + EOO + + : never + : + $* <"cmd &!file" >>EOO + 'cmd' + &! + 'file' + <newline> + EOO +} diff --git a/libbuild2/test/script/lexer+description-line.test.testscript b/libbuild2/test/script/lexer+description-line.test.testscript new file mode 100644 index 0000000..bb5948a --- /dev/null +++ b/libbuild2/test/script/lexer+description-line.test.testscript @@ -0,0 +1,33 @@ +# file : libbuild2/test/script/lexer+description-line.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +test.arguments = description-line + +: full +: +$* <" foo bar " >>EOO +' foo bar ' +<newline> +EOO + +: space +: +$* <" " >>EOO +' ' +<newline> +EOO + +: empty +: +$* <"" >>EOO +<newline> +EOO + +: eof +: +$* <:"foo" >>EOO 2>>EOE != 0 +'foo' +EOO +stdin:1:4: error: expected newline at the end of description line +EOE diff --git a/libbuild2/test/script/lexer+first-token.test.testscript b/libbuild2/test/script/lexer+first-token.test.testscript new file mode 100644 index 0000000..3eaf976 --- /dev/null +++ b/libbuild2/test/script/lexer+first-token.test.testscript @@ -0,0 +1,97 @@ +# file : libbuild2/test/script/lexer+first-token.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Note: this mode auto-expires after each token. +# +test.arguments = first-token + +: dot +: +$* <"." >>EOO +. +<newline> +EOO + +: semi +: +$* <";" >>EOO +; +<newline> +EOO + +: colon +: +$* <":" >>EOO +: +<newline> +EOO + +: lcbrace +: +$* <"{" >>EOO +{ +<newline> +EOO + +: rcbrace +: +$* <"}" >>EOO +} +<newline> +EOO + +: setup +: +$* <"+foo" >>EOO ++ +'foo' +<newline> +EOO + +: tdown +: +$* <"- foo" >>EOO +- +'foo' +<newline> +EOO + +: plus-leading +: +$* <"foo+bar" >>EOO +'foo+bar' +<newline> +EOO + +: minus-leading +: +$* <"foo- x" >>EOO +'foo-' +'x' +<newline> +EOO + +: assign +: +$* <"foo=" >>EOO +'foo' +'=' +<newline> +EOO + +: append +: +$* <"foo+=" >>EOO +'foo' +'+=' +<newline> +EOO + +: prepend +: +$* <"foo=+" >>EOO +'foo' +'=+' +<newline> +EOO diff --git a/libbuild2/test/script/lexer+second-token.test.testscript b/libbuild2/test/script/lexer+second-token.test.testscript new file mode 100644 index 0000000..c494796 --- /dev/null +++ b/libbuild2/test/script/lexer+second-token.test.testscript @@ -0,0 +1,68 @@ +# file : libbuild2/test/script/lexer+second-token.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Note: this mode auto-expires after each token. +# +test.arguments = second-token + +: semi +: +$* <";" >>EOO +; +<newline> +EOO + +: colon +: +$* <":" >>EOO +: +<newline> +EOO + +: assign +: +$* <"=foo" >>EOO += +'foo' +<newline> +EOO + +: append +: +$* <"+= foo" >>EOO ++= +'foo' +<newline> +EOO + +: prepend +: +$* <" =+ foo" >>EOO +=+ +'foo' +<newline> +EOO + +: assign-leading +: +$* <"foo=bar" >>EOO +'foo=bar' +<newline> +EOO + +: append-leading +: +$* <"foo+= bar" >>EOO +'foo+=' +'bar' +<newline> +EOO + +: prepend-leading +: +$* <"foo =+bar" >>EOO +'foo' +'=+bar' +<newline> +EOO diff --git a/libbuild2/test/script/lexer+variable-line.test.testscript b/libbuild2/test/script/lexer+variable-line.test.testscript new file mode 100644 index 0000000..bac4f16 --- /dev/null +++ b/libbuild2/test/script/lexer+variable-line.test.testscript @@ -0,0 +1,28 @@ +# file : libbuild2/test/script/lexer+variable-line.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +test.arguments = variable-line + +: semi +: +$* <"cmd;" >>EOO +'cmd' +; +<newline> +EOO + +: semi-separated +: +$* <"cmd ;" >>EOO +'cmd' +; +<newline> +EOO + +: semi-only +: +$* <";" >>EOO +; +<newline> +EOO diff --git a/libbuild2/test/script/lexer+variable.test.testscript b/libbuild2/test/script/lexer+variable.test.testscript new file mode 100644 index 0000000..64b2bee --- /dev/null +++ b/libbuild2/test/script/lexer+variable.test.testscript @@ -0,0 +1,70 @@ +# file : libbuild2/test/script/lexer+variable.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test handling custom variable names ($*, $~, $NN). +# +test.arguments = variable + +: command +: +{ + : only + : + $* <"*" >>EOO + '*' + <newline> + EOO + + : followed + : + $* <"*abc" >>EOO + '*' + 'abc' + <newline> + EOO +} + +: working-dir +: +{ + : only + : + $* <"~" >>EOO + '~' + <newline> + EOO + + : followed + : + $* <"~123" >>EOO + '~' + '123' + <newline> + EOO +} + +: arg +: +{ + : only + : + $* <"0" >>EOO + '0' + <newline> + EOO + + : followed + : + $* <"1abc" >>EOO + '1' + 'abc' + <newline> + EOO + + : multi-digit + : + $* <"10" 2>>EOE != 0 + stdin:1:1: error: multi-digit special variable name + EOE +} diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx new file mode 100644 index 0000000..75c04c8 --- /dev/null +++ b/libbuild2/test/script/lexer.cxx @@ -0,0 +1,551 @@ +// file : libbuild2/test/script/lexer.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/script/lexer.hxx> + +#include <cstring> // strchr() + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + using type = token_type; + + void lexer:: + mode (base_mode m, char ps, optional<const char*> esc) + { + const char* s1 (nullptr); + const char* s2 (nullptr); + bool s (true); + bool n (true); + bool q (true); + + if (!esc) + { + assert (!state_.empty ()); + esc = state_.top ().escapes; + } + + switch (m) + { + case lexer_mode::command_line: + { + s1 = ":;=!|&<> $(#\t\n"; + s2 = " == "; + break; + } + case lexer_mode::first_token: + { + // First token on the script line. Like command_line but + // recognizes leading '.+-{}' as tokens as well as variable + // assignments as separators. + // + // Note that to recognize only leading '.+-{}' we shouldn't add + // them to the separator strings. + // + s1 = ":;=+!|&<> $(#\t\n"; + s2 = " == "; + break; + } + case lexer_mode::second_token: + { + // Second token on the script line. Like command_line but + // recognizes leading variable assignments. + // + // Note that to recognize only leading assignments we shouldn't + // add them to the separator strings (so this is identical to + // command_line). + // + s1 = ":;=!|&<> $(#\t\n"; + s2 = " == "; + break; + } + case lexer_mode::variable_line: + { + // Like value except we recognize ';' and don't recognize '{'. + // Note that we don't recognize ':' since having a trailing + // variable assignment is illegal. + // + s1 = "; $([]#\t\n"; + s2 = " "; + break; + } + + case lexer_mode::command_expansion: + { + // Note that whitespaces are not word separators in this mode. + // + s1 = "|&<>"; + s2 = " "; + s = false; + break; + } + case lexer_mode::here_line_single: + { + // This one is like a single-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + // Note that it might be tempting to enable line continuation + // escapes. However, we will then have to also enable escaping of + // the backslash, which makes it a lot less tempting. + // + s1 = "\n"; + s2 = " "; + esc = ""; // Disable escape sequences. + s = false; + q = false; + break; + } + case lexer_mode::here_line_double: + { + // This one is like a double-quoted string except it treats + // newlines as a separator. We also treat quotes as literals. + // + s1 = "$(\n"; + s2 = " "; + s = false; + q = false; + break; + } + case lexer_mode::description_line: + { + // This one is like a single-quoted string and has an ad hoc + // implementation. + // + break; + } + default: + { + // Make sure pair separators are only enabled where we expect + // them. + // + // @@ Should we disable pair separators in the eval mode? + // + assert (ps == '\0' || + m == lexer_mode::eval || + m == lexer_mode::attribute); + + base_lexer::mode (m, ps, esc); + return; + } + } + + assert (ps == '\0'); + state_.push (state {m, ps, s, n, q, *esc, s1, s2}); + } + + token lexer:: + next () + { + token r; + + switch (state_.top ().mode) + { + case lexer_mode::command_line: + case lexer_mode::first_token: + case lexer_mode::second_token: + case lexer_mode::variable_line: + case lexer_mode::command_expansion: + case lexer_mode::here_line_single: + case lexer_mode::here_line_double: + r = next_line (); + break; + case lexer_mode::description_line: + r = next_description (); + break; + default: + r = base_lexer::next (); + break; + } + + if (r.qtype != quote_type::unquoted) + ++quoted_; + + return r; + } + + token lexer:: + next_line () + { + bool sep (skip_spaces ()); + + xchar c (get ()); + uint64_t ln (c.line), cn (c.column); + + if (eos (c)) + return token (type::eos, sep, ln, cn, token_printer); + + state st (state_.top ()); // Make copy (see first/second_token). + lexer_mode m (st.mode); + + auto make_token = [&sep, &m, ln, cn] (type t, string v = string ()) + { + bool q (m == lexer_mode::here_line_double); + + return token (t, move (v), sep, + (q ? quote_type::double_ : quote_type::unquoted), q, + ln, cn, + token_printer); + }; + + auto make_token_with_modifiers = + [&make_token, this] (type t, + const char* mods, // To recorgnize. + const char* stop = nullptr) // To stop after. + { + string v; + if (mods != nullptr) + { + for (xchar p (peek ()); + (strchr (mods, p) != nullptr && // Modifier. + strchr (v.c_str (), p) == nullptr); // Not already seen. + p = peek ()) + { + get (); + v += p; + + if (stop != nullptr && strchr (stop, p) != nullptr) + break; + } + } + + return make_token (t, move (v)); + }; + + // Expire certain modes at the end of the token. Do it early in case + // we push any new mode (e.g., double quote). + // + if (m == lexer_mode::first_token || m == lexer_mode::second_token) + state_.pop (); + + // NOTE: remember to update mode() if adding new special characters. + + if (m != lexer_mode::command_expansion) + { + switch (c) + { + case '\n': + { + // Expire variable value mode at the end of the line. + // + if (m == lexer_mode::variable_line) + state_.pop (); + + sep = true; // Treat newline as always separated. + return make_token (type::newline); + } + } + } + + if (m != lexer_mode::here_line_single) + { + switch (c) + { + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + } + } + + + if (m == lexer_mode::variable_line) + { + switch (c) + { + // Attributes. + // + case '[': return make_token (type::lsbrace); + case ']': return make_token (type::rsbrace); + } + } + + // Line separators. + // + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token || + m == lexer_mode::variable_line) + { + switch (c) + { + case ';': return make_token (type::semi); + } + } + + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token) + { + switch (c) + { + case ':': return make_token (type::colon); + } + } + + // Command line operator/separators. + // + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token) + { + switch (c) + { + // Comparison (==, !=). + // + case '=': + case '!': + { + if (peek () == '=') + { + get (); + return make_token (c == '=' ? type::equal : type::not_equal); + } + } + } + } + + // Command operators/separators. + // + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || + m == lexer_mode::second_token || + m == lexer_mode::command_expansion) + { + switch (c) + { + // |, || + // + case '|': + { + if (peek () == '|') + { + get (); + return make_token (type::log_or); + } + else + return make_token (type::pipe); + } + // &, && + // + case '&': + { + xchar p (peek ()); + + if (p == '&') + { + get (); + return make_token (type::log_and); + } + + // These modifiers are mutually exclusive so stop after seeing + // either one. + // + return make_token_with_modifiers (type::clean, "!?", "!?"); + } + // < + // + case '<': + { + type r (type::in_str); + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '<') + { + get (); + + switch (p) + { + case '|': return make_token (type::in_pass); + case '-': return make_token (type::in_null); + case '<': + { + r = type::in_doc; + p = peek (); + + if (p == '<') + { + get (); + r = type::in_file; + } + break; + } + } + } + + // Handle modifiers. + // + const char* mods (nullptr); + switch (r) + { + case type::in_str: + case type::in_doc: mods = ":/"; break; + } + + return make_token_with_modifiers (r, mods); + } + // > + // + case '>': + { + type r (type::out_str); + xchar p (peek ()); + + if (p == '|' || p == '-' || p == '!' || p == '&' || + p == '=' || p == '+' || p == '>') + { + get (); + + switch (p) + { + case '|': return make_token (type::out_pass); + case '-': return make_token (type::out_null); + case '!': return make_token (type::out_trace); + case '&': return make_token (type::out_merge); + case '=': return make_token (type::out_file_ovr); + case '+': return make_token (type::out_file_app); + case '>': + { + r = type::out_doc; + p = peek (); + + if (p == '>') + { + get (); + r = type::out_file_cmp; + } + break; + } + } + } + + // Handle modifiers. + // + const char* mods (nullptr); + const char* stop (nullptr); + switch (r) + { + case type::out_str: + case type::out_doc: mods = ":/~"; stop = "~"; break; + } + + return make_token_with_modifiers (r, mods, stop); + } + } + } + + // Dot, plus/minus, and left/right curly braces. + // + if (m == lexer_mode::first_token) + { + switch (c) + { + case '.': return make_token (type::dot); + case '+': return make_token (type::plus); + case '-': return make_token (type::minus); + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); + } + } + + // Variable assignment (=, +=, =+). + // + if (m == lexer_mode::second_token) + { + switch (c) + { + case '=': + { + if (peek () == '+') + { + get (); + return make_token (type::prepend); + } + else + return make_token (type::assign); + } + case '+': + { + if (peek () == '=') + { + get (); + return make_token (type::append); + } + } + } + } + + // Otherwise it is a word. + // + unget (c); + return word (st, sep); + } + + token lexer:: + next_description () + { + xchar c (peek ()); + + if (eos (c)) + fail (c) << "expected newline at the end of description line"; + + uint64_t ln (c.line), cn (c.column); + + if (c == '\n') + { + get (); + state_.pop (); // Expire the description mode. + return token (type::newline, true, ln, cn, token_printer); + } + + string lexeme; + + // For now no line continutions though we could support them. + // + for (; !eos (c) && c != '\n'; c = peek ()) + { + get (); + lexeme += c; + } + + return token (move (lexeme), + false, + quote_type::unquoted, false, + ln, cn); + } + + token lexer:: + word (state st, bool sep) + { + lexer_mode m (st.mode); + + // Customized implementation that handles special variable names ($*, + // $N, $~, $@). + // + if (m != lexer_mode::variable) + return base_lexer::word (st, sep); + + xchar c (peek ()); + + if (c != '*' && c != '~' && c != '@' && !digit (c)) + return base_lexer::word (st, sep); + + get (); + + if (digit (c) && digit (peek ())) + fail (c) << "multi-digit special variable name"; + + state_.pop (); // Expire the variable mode. + return token (string (1, c), + sep, + quote_type::unquoted, false, + c.line, c.column); + } + } + } +} diff --git a/libbuild2/test/script/lexer.hxx b/libbuild2/test/script/lexer.hxx new file mode 100644 index 0000000..d96e91b --- /dev/null +++ b/libbuild2/test/script/lexer.hxx @@ -0,0 +1,94 @@ +// file : libbuild2/test/script/lexer.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_SCRIPT_LEXER_HXX +#define LIBBUILD2_TEST_SCRIPT_LEXER_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/lexer.hxx> + +#include <libbuild2/test/script/token.hxx> + +namespace build2 +{ + namespace test + { + namespace script + { + struct lexer_mode: build2::lexer_mode + { + using base_type = build2::lexer_mode; + + enum + { + command_line = base_type::value_next, + first_token, // Expires at the end of the token. + second_token, // Expires at the end of the token. + variable_line, // Expires at the end of the line. + command_expansion, + here_line_single, + here_line_double, + description_line // Expires at the end of the line. + }; + + lexer_mode () = default; + lexer_mode (value_type v): base_type (v) {} + lexer_mode (base_type v): base_type (v) {} + }; + + class lexer: public build2::lexer + { + public: + using base_lexer = build2::lexer; + using base_mode = build2::lexer_mode; + + lexer (istream& is, + const path& name, + lexer_mode m, + const char* escapes = nullptr) + : base_lexer (is, + name, + 1 /* line */, + nullptr /* escapes */, + false /* set_mode */) + { + mode (m, '\0', escapes); + } + + virtual void + mode (base_mode, + char = '\0', + optional<const char*> = nullopt) override; + + // Number of quoted (double or single) tokens since last reset. + // + size_t + quoted () const {return quoted_;} + + void + reset_quoted (size_t q) {quoted_ = q;} + + virtual token + next () override; + + protected: + token + next_line (); + + token + next_description (); + + virtual token + word (state, bool) override; + + protected: + size_t quoted_; + }; + } + } +} + +#endif // LIBBUILD2_TEST_SCRIPT_LEXER_HXX diff --git a/libbuild2/test/script/lexer.test.cxx b/libbuild2/test/script/lexer.test.cxx new file mode 100644 index 0000000..5a421b8 --- /dev/null +++ b/libbuild2/test/script/lexer.test.cxx @@ -0,0 +1,85 @@ +// file : libbuild2/test/script/lexer.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cassert> +#include <iostream> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/test/script/token.hxx> +#include <libbuild2/test/script/lexer.hxx> + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + // Usage: argv[0] <lexer-mode> + // + int + main (int argc, char* argv[]) + { + lexer_mode m; + { + assert (argc == 2); + string s (argv[1]); + + if (s == "command-line") m = lexer_mode::command_line; + else if (s == "first-token") m = lexer_mode::first_token; + else if (s == "second-token") m = lexer_mode::second_token; + else if (s == "variable-line") m = lexer_mode::variable_line; + else if (s == "command-expansion") m = lexer_mode::command_expansion; + else if (s == "here-line-single") m = lexer_mode::here_line_single; + else if (s == "here-line-double") m = lexer_mode::here_line_double; + else if (s == "description-line") m = lexer_mode::description_line; + else if (s == "variable") m = lexer_mode::variable; + else assert (false); + } + + try + { + cin.exceptions (istream::failbit | istream::badbit); + + // Some modes auto-expire so we need something underneath. + // + bool u (m == lexer_mode::first_token || + m == lexer_mode::second_token || + m == lexer_mode::variable_line || + m == lexer_mode::description_line || + m == lexer_mode::variable); + + lexer l (cin, path ("stdin"), u ? lexer_mode::command_line : m); + if (u) + l.mode (m); + + // No use printing eos since we will either get it or loop forever. + // + for (token t (l.next ()); t.type != token_type::eos; t = l.next ()) + { + // Print each token on a separate line without quoting operators. + // + t.printer (cout, t, false); + cout << endl; + } + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::test::script::main (argc, argv); +} diff --git a/libbuild2/test/script/parser+cleanup.test.testscript b/libbuild2/test/script/parser+cleanup.test.testscript new file mode 100644 index 0000000..321664c --- /dev/null +++ b/libbuild2/test/script/parser+cleanup.test.testscript @@ -0,0 +1,58 @@ +# file : libbuild2/test/script/parser+cleanup.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: always +: +$* <<EOI >>EOO +cmd &file +EOI +cmd &file +EOO + +: maybe +: +$* <<EOI >>EOO +cmd &?file +EOI +cmd &?file +EOO + +: never +: +$* <<EOI >>EOO +cmd &!file +EOI +cmd &!file +EOO + +: empty +: +$* <<EOI 2>>EOE != 0 +cmd &"" +EOI +testscript:1:6: error: empty cleanup path +EOE + +: missed-before +: +{ + : token + : + : Path missed before command next token + : + $* <<EOI 2>>EOE != 0 + cmd & >file + EOI + testscript:1:7: error: missing cleanup path + EOE + + : end + : Test path missed before end of command + : + $* <<EOI 2>>EOE != 0 + cmd & + EOI + testscript:1:6: error: missing cleanup path + EOE +} diff --git a/libbuild2/test/script/parser+command-if.test.testscript b/libbuild2/test/script/parser+command-if.test.testscript new file mode 100644 index 0000000..7425da2 --- /dev/null +++ b/libbuild2/test/script/parser+command-if.test.testscript @@ -0,0 +1,548 @@ +# file : libbuild2/test/script/parser+command-if.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: if +: +{ + : true + : + $* <<EOI >>EOO + if true foo + cmd1 + cmd2 + end + EOI + ? true foo + cmd1 + cmd2 + EOO + + : false + : + $* <<EOI >>EOO + if false foo + cmd1 + cmd2 + end + EOI + ? false foo + EOO + + : not-true + : + $* <<EOI >>EOO + if! true foo + cmd1 + cmd2 + end + EOI + ? true foo + EOO + + : not-false + : + $* <<EOI >>EOO + if! false foo + cmd1 + cmd2 + end + EOI + ? false foo + cmd1 + cmd2 + EOO + + : without-command + : + $* <<EOI 2>>EOE != 0 + if + cmd + end + EOI + testscript:1:3: error: missing program + EOE + + : after-semi + : + $* -s <<EOI >>EOO + cmd1; + if true + cmd2 + end + EOI + { + { + cmd1 + ? true + cmd2 + } + } + EOO + + : setup + : + $* -s <<EOI >>EOO + +if true + cmd + end + EOI + { + ? true + +cmd + } + EOO + + : tdown + : + $* -s <<EOI >>EOO + -if true + cmd + end + EOI + { + ? true + -cmd + } + EOO +} + +: elif +: +{ + : true + : + $* <<EOI >>EOO + if false + cmd1 + cmd2 + elif true + cmd3 + cmd4 + end + EOI + ? false + ? true + cmd3 + cmd4 + EOO + + : false + : + $* <<EOI >>EOO + if false + cmd1 + cmd2 + elif false + cmd3 + cmd4 + end + EOI + ? false + ? false + EOO + + : not-true + : + $* <<EOI >>EOO + if false + cmd1 + cmd2 + elif! true + cmd3 + cmd4 + end + EOI + ? false + ? true + EOO + + : not-false + : + $* <<EOI >>EOO + if false + cmd1 + cmd2 + elif! false + cmd3 + cmd4 + end + EOI + ? false + ? false + cmd3 + cmd4 + EOO + + : without-if + : + $* <<EOI 2>>EOE != 0 + cmd + elif true + cmd + end + EOI + testscript:2:1: error: 'elif' without preceding 'if' + EOE + + : not-without-if + : + $* <<EOI 2>>EOE != 0 + cmd + elif! true + cmd + end + EOI + testscript:2:1: error: 'elif!' without preceding 'if' + EOE + + : after-else + : + $* <<EOI 2>>EOE != 0 + if false + cmd + else + cmd + elif true + cmd + end + EOI + testscript:5:1: error: 'elif' after 'else' + EOE +} + +: else +: +{ + : true + : + $* <<EOI >>EOO + if false + cmd1 + cmd2 + else + cmd3 + cmd4 + end + EOI + ? false + cmd3 + cmd4 + EOO + + : false + : + $* <<EOI >>EOO + if true + cmd1 + cmd2 + else + cmd3 + cmd4 + end + EOI + ? true + cmd1 + cmd2 + EOO + + : chain + : + $* <<EOI >>EOO + if false + cmd + cmd + elif false + cmd + cmd + elif false + cmd + cmd + elif true + cmd1 + cmd2 + elif false + cmd + cmd + else + cmd + cmd + end + EOI + ? false + ? false + ? false + ? true + cmd1 + cmd2 + EOO + + : command-after + : + $* <<EOI 2>>EOE != 0 + if true + cmd + else cmd + cmd + end + EOI + testscript:3:6: error: expected newline instead of 'cmd' + EOE + + : without-if + : + $* <<EOI 2>>EOE != 0 + cmd + else + cmd + end + EOI + testscript:2:1: error: 'else' without preceding 'if' + EOE + + : after-else + : + $* <<EOI 2>>EOE != 0 + if false + cmd + else + cmd + else + cmd + end + EOI + testscript:5:1: error: 'else' after 'else' + EOE +} + +: end +{ + : without-if + : + $* <<EOI 2>>EOE != 0 + cmd + end + EOI + testscript:2:1: error: 'end' without preceding 'if' + EOE + + : before + { + : semi + : + $* -s <<EOI >>EOO + if true + cmd1 + end; + cmd2 + EOI + { + { + ? true + cmd1 + cmd2 + } + } + EOO + + : command + : + $* <<EOI 2>>EOE != 0 + if true + cmd + end cmd + EOI + testscript:3:5: error: expected newline instead of 'cmd' + EOE + + : colon + : + $* -s <<EOI >>EOO + if true + cmd1 + cmd2 + end : test + EOI + { + : id:test + { + ? true + cmd1 + cmd2 + } + } + EOO + } +} + +: nested +: +{ + : take + : + $* <<EOI >>EOO + if true + cmd1 + if false + cmd + elif false + if true + cmd + end + else + cmd2 + end + cmd3 + end + EOI + ? true + cmd1 + ? false + ? false + cmd2 + cmd3 + EOO + + : skip + : + $* <<EOI >>EOO + if false + cmd1 + if false + cmd + elif false + if true + cmd + end + else + cmd2 + end + cmd3 + else + cmd + end + EOI + ? false + cmd + EOO +} + +: contained +{ + : semi + : + $* <<EOI 2>>EOE != 0 + if + cmd; + cmd + end + EOI + testscript:2:3: error: ';' inside 'if' + EOE + + : colon-leading + : + $* <<EOI 2>>EOE != 0 + if + : foo + cmd + end + EOI + testscript:2:3: error: description inside 'if' + EOE + + : colon-trailing + : + $* <<EOI 2>>EOE != 0 + if + cmd : foo + end + EOI + testscript:2:3: error: description inside 'if' + EOE + + : eos + : + $* <<EOI 2>>EOE != 0 + if + EOI + testscript:2:1: error: expected closing 'end' + EOE + + : scope + : + $* <<EOI 2>>EOE != 0 + if + cmd + { + } + end + EOI + testscript:3:3: error: expected closing 'end' + EOE + + : setup + : + $* <<EOI 2>>EOE != 0 + if + +cmd + end + EOI + testscript:2:3: error: setup command inside 'if' + EOE + + : tdown + : + $* <<EOI 2>>EOE != 0 + if + -cmd + end + EOI + testscript:2:3: error: teardown command inside 'if' + EOE +} + +: line-index +: +$* -l <<EOI >>EOO +if false + cmd + if true + cmd + end + cmd +elif false + cmd +else + cmd +end +EOI +? false # 1 +? false # 6 +cmd # 8 +EOO + +: var +: +$* <<EOI >>EOO +if true + x = foo +else + x = bar +end; +cmd $x +EOI +? true +cmd foo +EOO + +: leading-and-trailing-description +: +$* <<EOI 2>>EOE != 0 +: foo +if true + cmd +end : bar +EOI +testscript:4:1: error: both leading and trailing descriptions +EOE diff --git a/libbuild2/test/script/parser+command-re-parse.test.testscript b/libbuild2/test/script/parser+command-re-parse.test.testscript new file mode 100644 index 0000000..f5a67f3 --- /dev/null +++ b/libbuild2/test/script/parser+command-re-parse.test.testscript @@ -0,0 +1,12 @@ +# file : libbuild2/test/script/parser+command-re-parse.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: double-quote +: +$* <<EOI >>EOO +x = cmd \">-\" "'<-'" +$x +EOI +cmd '>-' '<-' +EOO diff --git a/libbuild2/test/script/parser+description.test.testscript b/libbuild2/test/script/parser+description.test.testscript new file mode 100644 index 0000000..d17a69f --- /dev/null +++ b/libbuild2/test/script/parser+description.test.testscript @@ -0,0 +1,486 @@ +# file : libbuild2/test/script/parser+description.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: id +: +{ + : lead + : + $* <<EOI >>EOO + : foo + cmd + EOI + : id:foo + cmd + EOO + + : trail + : + $* <<EOI >>EOO + cmd : foo + EOI + : id:foo + cmd + EOO + + : dup + : Id uniqueness + : + { + : test + : + { + : test + : + $* <<EOI 2>>EOE != 0 + : foo + cmd + : foo + cmd + EOI + testscript:3:1: error: duplicate id foo + testscript:1:1: info: previously used here + EOE + + : group + : + $* <<EOI 2>>EOE != 0 + : foo + cmd + : foo + { + cmd + cmd + } + EOI + testscript:3:1: error: duplicate id foo + testscript:1:1: info: previously used here + EOE + + : derived + : + $* <<EOI 2>>EOE != 0 + : 3 + cmd + cmd + EOI + testscript:3:1: error: duplicate id 3 + testscript:1:1: info: previously used here + EOE + } + + : group + : + { + : test + : + $* <<EOI 2>>EOE != 0 + : foo + { + cmd + cmd + } + : foo + cmd + EOI + testscript:6:1: error: duplicate id foo + testscript:1:1: info: previously used here + EOE + + : group + : + $* <<EOI 2>>EOE != 0 + : foo + { + cmd + cmd + } + : foo + { + cmd + cmd + } + EOI + testscript:6:1: error: duplicate id foo + testscript:1:1: info: previously used here + EOE + + : derived + : + $* <<EOI 2>>EOE != 0 + : 3 + cmd + { + cmd + cmd + } + EOI + testscript:3:1: error: duplicate id 3 + testscript:1:1: info: previously used here + EOE + } + } +} + +: summary +{ + : lead + : + $* <<EOI >>EOO + : foo bar + cmd + EOI + : sm:foo bar + cmd + EOO + + : trail + : + $* <<EOI >>EOO + cmd: foo bar + EOI + : sm:foo bar + cmd + EOO + + : id + : + $* <<EOI >>EOO + : foo-bar + : foo bar + cmd + EOI + : id:foo-bar + : sm:foo bar + cmd + EOO +} + +: details +{ + : id + : + $* <<EOI >>EOO + : foo-bar + : + : foo bar + : bar baz + cmd + EOI + : id:foo-bar + : + : foo bar + : bar baz + cmd + EOO + + : summary + : + { + : only + : + $* <<EOI >>EOO + : foo bar + : + : foo bar + : bar baz + cmd + EOI + : sm:foo bar + : + : foo bar + : bar baz + cmd + EOO + + : assumed + : + $* <<EOI >>EOO + : foo bar + : bar baz + cmd + EOI + : foo bar + : bar baz + cmd + EOO + + : id + : + $* <<EOI >>EOO + : foo-bar + : foo bar + : + : foo bar + : bar baz + cmd + EOI + : id:foo-bar + : sm:foo bar + : + : foo bar + : bar baz + cmd + EOO + + : id-assumed + : + $* <<EOI >>EOO + : foo-bar + : bar baz + : baz fox + cmd + EOI + : foo-bar + : bar baz + : baz fox + cmd + EOO + } +} + +: legal +: +: Legal places for description. +: +{ + : var + : + $* <<EOI >>EOO + : foo bar + x = y; + cmd $x + EOI + : sm:foo bar + cmd y + EOO +} + +: illegal +: +: Illegal places for description. +: +{ + : eof + : + $* <": foo" 2>>EOE != 0 + testscript:2:1: error: description before <end of file> + EOE + + : rcbrace + : + $* <<EOI 2>>EOE != 0 + { + cmd + : foo + } + EOI + testscript:4:1: error: description before '}' + EOE + + : setup + : + $* <<EOI 2>>EOE != 0 + : foo + +cmd + EOI + testscript:2:1: error: description before setup command + EOE + + : tdown + : + $* <<EOI 2>>EOE != 0 + : foo + -cmd + EOI + testscript:2:1: error: description before teardown command + EOE + + : var + : + $* <<EOI 2>>EOE != 0 + : foo + x = y + EOI + testscript:2:1: error: description before setup/teardown variable + EOE + + : var-if + : + $* <<EOI 2>>EOE != 0 + : foo + if true + x = y + end + EOI + testscript:2:1: error: description before/after setup/teardown variable-if + EOE + + : var-if-after + : + $* <<EOI 2>>EOE != 0 + if true + x = y + end : foo + EOI + testscript:1:1: error: description before/after setup/teardown variable-if + EOE + + : test + : + $* <<EOI 2>>EOE != 0 + cmd1; + : foo + cmd2 + EOI + testscript:2:1: error: description inside test + EOE +} + +: test-scope +: +: Interaction with test scope merging. +: +{ + : both + : + : No merge since both have description. + : + $* -s -i <<EOI >>EOO + : foo + { + : bar + cmd + } + EOI + { + : id:foo + { # foo + : id:bar + { # foo/bar + cmd + } + } + } + EOO + + : test + : + : No merge since test has description. + : + $* -s -i <<EOI >>EOO + { + : foo-bar + : foo bar + cmd + } + EOI + { + { # 1 + : id:foo-bar + : sm:foo bar + { # 1/foo-bar + cmd + } + } + } + EOO + + : group + : + $* -s -i <<EOI >>EOO + : foo-bar + : foo bar + { + cmd + } + EOI + { + : id:foo-bar + : sm:foo bar + { # foo-bar + cmd + } + } + EOO +} + +: blanks +: +$* <<EOI >>EOO +: +: +: foo bar +: bar baz +: +: baz fox +: +: +cmd +EOI +: foo bar +: bar baz +: +: baz fox +cmd +EOO + +: strip +: +$* <<EOI >>EOO +: foo-bar +: bar baz +: +: baz fox +: fox biz +:biz buz +: +cmd +EOI +: id:foo-bar +: sm:bar baz +: +: baz fox +: fox biz +: biz buz +cmd +EOO + +: trail-compound +: +$* <<EOI >>EOO +cmd1; +cmd2: foo +EOI +: id:foo +cmd1 +cmd2 +EOO + +: empty +: +$* <<EOI 2>>EOE != 0 +: +: +cmd +EOI +testscript:1:1: error: empty description +EOE + +: trail-empty +: +$* <<EOI 2>>EOE != 0 +cmd: +EOI +testscript:1:4: error: empty description +EOE + +: both +: +$* <<EOI 2>>EOE != 0 +: foo +cmd : bar +EOI +testscript:2:1: error: both leading and trailing descriptions +EOE diff --git a/libbuild2/test/script/parser+directive.test.testscript b/libbuild2/test/script/parser+directive.test.testscript new file mode 100644 index 0000000..9d04ce7 --- /dev/null +++ b/libbuild2/test/script/parser+directive.test.testscript @@ -0,0 +1,74 @@ +# file : libbuild2/test/script/parser+directive.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: not-directive +: +$* <<EOI >>EOO +x = x +".include" foo.testscript +\.include foo.testscript +EOI +.include foo.testscript +.include foo.testscript +EOO + +: expected-name +: +$* <<EOI 2>>EOE != 0 +.$ +EOI +testscript:1:2: error: expected directive name instead of '$' +EOE + +: unknown-name +: +$* <<EOI 2>>EOE != 0 +.bogus +EOI +testscript:1:2: error: unknown directive 'bogus' +EOE + +: separated +: +touch foo.testscript; +$* <<EOI +. include foo.testscript +EOI + +: not-separated +: +touch foo.testscript; +$* <<EOI +x = foo.testscript +.include$x +EOI + +: var-expansion +: +cat <<EOI >="foo-$(build.verson.project).testscript"; +cmd +EOI +$* <<EOI >>EOO +.include "foo-$(build.verson.project).testscript" +EOI +cmd +EOO + +: after-semi +: +$* <<EOI 2>>EOE != 0 +cmd; +.include foo.testscript +EOI +testscript:2:1: error: directive after ';' +EOE + +: semi-after +: +$* <<EOI 2>>EOE != 0 +.include foo.testscript; +cmd +EOI +testscript:1:24: error: ';' after directive +EOE diff --git a/libbuild2/test/script/parser+exit.test.testscript b/libbuild2/test/script/parser+exit.test.testscript new file mode 100644 index 0000000..284e9a7 --- /dev/null +++ b/libbuild2/test/script/parser+exit.test.testscript @@ -0,0 +1,27 @@ +# file : libbuild2/test/script/parser+exit.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: eq +: +$* <<EOI >>EOO +cmd == 1 +EOI +cmd == 1 +EOO + +: ne +: +$* <<EOI >>EOO +cmd!=1 +EOI +cmd != 1 +EOO + +: end +: +$* <<EOI 2>>EOE != 0 +cmd != 1 <"foo" +EOI +testscript:1:10: error: unexpected '<' after command exit status +EOE diff --git a/libbuild2/test/script/parser+expansion.test.testscript b/libbuild2/test/script/parser+expansion.test.testscript new file mode 100644 index 0000000..7ea92f9 --- /dev/null +++ b/libbuild2/test/script/parser+expansion.test.testscript @@ -0,0 +1,36 @@ +# file : libbuild2/test/script/parser+expansion.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: quote +: +: Make sure everything expanded as strings. +: +$* <<EOI >>EOO +x = dir/ proj% proj%name proj%proj%dir/type{name name {name}} +cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}} +cmd $x +EOI +cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}} +cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}} +EOO + +: unterm-quoted-seq +: +$* <<EOI 2>>EOE != 0 +x = "'a bc" +cmd xy$x +EOI +<string>:1:8: error: unterminated single-quoted sequence + testscript:2:5: info: while parsing string 'xy'a bc' +EOE + +: invalid-redirect +: +$* <<EOI 2>>EOE != 0 +x = "1>&a" +cmd $x +EOI +<string>:1:4: error: stdout merge redirect file descriptor must be 2 + testscript:2:5: info: while parsing string '1>&a' +EOE diff --git a/libbuild2/test/script/parser+here-document.test.testscript b/libbuild2/test/script/parser+here-document.test.testscript new file mode 100644 index 0000000..00f3fbd --- /dev/null +++ b/libbuild2/test/script/parser+here-document.test.testscript @@ -0,0 +1,213 @@ +# file : libbuild2/test/script/parser+here-document.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: end-marker +: +{ + : missing-newline + : + $* <'cmd <<' 2>>EOE != 0 + testscript:1:7: error: expected here-document end marker + EOE + + : missing-exit + : + $* <'cmd << != 0' 2>>EOE != 0 + testscript:1:8: error: expected here-document end marker + EOE + + : missing-empty + : + $* <'cmd <<""' 2>>EOE != 0 + testscript:1:7: error: expected here-document end marker + EOE + + : unseparated-expansion + : + $* <'cmd <<FOO$foo' 2>>EOE != 0 + testscript:1:10: error: here-document end marker must be literal + EOE + + : quoted-single-partial + : + $* <"cmd <<F'O'O" 2>>EOE != 0 + testscript:1:7: error: partially-quoted here-document end marker + EOE + + : quoted-double-partial + : + $* <'cmd <<"FO"O' 2>>EOE != 0 + testscript:1:7: error: partially-quoted here-document end marker + EOE + + : quoted-mixed + : + $* <"cmd <<\"FO\"'O'" 2>>EOE != 0 + testscript:1:7: error: partially-quoted here-document end marker + EOE + + : unseparated + : + $* <<EOI >>EOO + cmd <<EOF!=0 + foo + EOF + EOI + cmd <<EOF != 0 + foo + EOF + EOO + + : quoted-single + : + $* <<EOI >>EOO + cmd <<'EOF' + foo + EOF + EOI + cmd <<EOF + foo + EOF + EOO + + : quoted-double + : + $* <<EOI >>EOO + cmd <<"EOF" + foo + EOF + EOI + cmd <<EOF + foo + EOF + EOO +} + +: indent +: +{ + : basic + : + $* <<EOI >>EOO + cmd <<EOF + foo + bar + baz + EOF + EOI + cmd <<EOF + foo + bar + baz + EOF + EOO + + : blank + : + $* <<EOI >>EOO + cmd <<EOF + foo + + + bar + EOF + EOI + cmd <<EOF + foo + + + bar + EOF + EOO + + : non-ws-prefix + : + $* <<EOI >>EOO + cmd <<EOF + x EOF + EOF + EOI + cmd <<EOF + x EOF + EOF + EOO + + : whole-token + : Test the case where the indentation is a whole token + : + $* <<EOI >>EOO + x = foo bar + cmd <<"EOF" + $x + EOF + EOI + cmd <<EOF + foo bar + EOF + EOO + + : long-line + : Test the case where the line contains multiple tokens + : + $* <<EOI >>EOO + x = foo + cmd <<"EOF" + $x bar $x + EOF + EOI + cmd <<EOF + foo bar foo + EOF + EOO + + : unindented + : + $* <<EOI 2>>EOE != 0 + cmd <<EOF + bar + EOF + EOI + testscript:2:1: error: unindented here-document line + EOE +} + +: blank +: +$* <<EOI >>EOO +cmd <<EOF + +foo + +bar + +EOF +EOI +cmd <<EOF + +foo + +bar + +EOF +EOO + +: quote +: +: Note: they are still recognized in eval contexts. +: +$* <<EOI >>EOO +cmd <<"EOF" +'single' +"double" +b'o't"h" +('single' "double") +EOF +EOI +cmd <<EOF +'single' +"double" +b'o't"h" +single double +EOF +EOO diff --git a/libbuild2/test/script/parser+here-string.test.testscript b/libbuild2/test/script/parser+here-string.test.testscript new file mode 100644 index 0000000..785951d --- /dev/null +++ b/libbuild2/test/script/parser+here-string.test.testscript @@ -0,0 +1,19 @@ +# file : libbuild2/test/script/parser+here-string.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: empty +: +$* <<EOI >>EOO +cmd <"" +EOI +cmd <'' +EOO + +: empty-nn +: +$* <<EOI >>EOO +cmd <:"" +EOI +cmd <:'' +EOO diff --git a/libbuild2/test/script/parser+include.test.testscript b/libbuild2/test/script/parser+include.test.testscript new file mode 100644 index 0000000..c86b583 --- /dev/null +++ b/libbuild2/test/script/parser+include.test.testscript @@ -0,0 +1,104 @@ +# file : libbuild2/test/script/parser+include.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: none +: +$* <<EOI +.include +.include --once +EOI + +: empty +: +touch foo.testscript; +$* <<EOI +.include foo.testscript +.include --once foo.testscript +EOI + +: one +: +cat <"cmd" >=foo.testscript; +$* <<EOI >>EOO +.include foo.testscript +EOI +cmd +EOO + +: multiple +: +cat <"cmd foo" >=foo.testscript; +cat <"cmd bar" >=bar.testscript; +$* <<EOI >>EOO +.include foo.testscript bar.testscript +EOI +cmd foo +cmd bar +EOO + +: once +: +cat <"cmd" >=foo.testscript; +$* <<EOI >>EOO +.include foo.testscript +x +.include --once foo.testscript +.include --once bar/../foo.testscript +y +.include ../once/foo.testscript +EOI +cmd +x +y +cmd +EOO + +: group-id +: +cat <<EOI >=foo.testscript; +{ + x = b +} +EOI +$* -s -i <<EOI >>EOO +x = a +.include foo.testscript +EOI +{ + { # 2-foo-1 + } +} +EOO + +: test-id +: +cat <<EOI >=foo.testscript; +cmd +EOI +$* -s -i <<EOI >>EOO +x = a +.include foo.testscript +EOI +{ + { # 2-foo-1 + cmd + } +} +EOO + +: invalid-path +: +$* <<EOI 2>>EOE != 0 +.include "" +EOI +testscript:1:2: error: invalid testscript include path '' +EOE + +: unable-open +: +$* <<EOI 2>>~/EOE/ != 0 +.include foo.testscript +EOI +/testscript:1:2: error: unable to read testscript foo.testscript: .+/ +EOE diff --git a/libbuild2/test/script/parser+pipe-expr.test.testscript b/libbuild2/test/script/parser+pipe-expr.test.testscript new file mode 100644 index 0000000..8b6b4f9 --- /dev/null +++ b/libbuild2/test/script/parser+pipe-expr.test.testscript @@ -0,0 +1,133 @@ +# file : libbuild2/test/script/parser+pipe-expr.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: pipe +: +$* <<EOI >>EOO +cmd1 | cmd2|cmd3 +EOI +cmd1 | cmd2 | cmd3 +EOO + +: log +: +$* <<EOI >>EOO +cmd1 || cmd2&&cmd3 +EOI +cmd1 || cmd2 && cmd3 +EOO + +: pipe-log +: +$* <<EOI >>EOO +cmd1 | cmd2 && cmd3 | cmd4 +EOI +cmd1 | cmd2 && cmd3 | cmd4 +EOO + +: exit +: +$* <<EOI >>EOO +cmd1|cmd2==1&&cmd3!=0|cmd4 +EOI +cmd1 | cmd2 == 1 && cmd3 != 0 | cmd4 +EOO + +: here-doc +: +$* <<EOI >>EOO +cmd1 <<EOI1 | cmd2 >>EOO2 && cmd3 <<EOI3 2>&1 | cmd4 2>>EOE4 >>EOO4 +input +one +EOI1 +ouput +two +EOO2 +input +three +EOI3 +error +four +EOE4 +output +four +EOO4 +EOI +cmd1 <<EOI1 | cmd2 >>EOO2 && cmd3 <<EOI3 2>&1 | cmd4 >>EOO4 2>>EOE4 +input +one +EOI1 +ouput +two +EOO2 +input +three +EOI3 +output +four +EOO4 +error +four +EOE4 +EOO + +: leading +: +$* <<EOI 2>>EOE != 0 +| cmd +EOI +testscript:1:1: error: missing program +EOE + +: trailing +: +$* <<EOI 2>>EOE != 0 +cmd && +EOI +testscript:1:7: error: missing program +EOE + +: redirected +: +{ + : input + : + { + : first + : + $* <<EOI >>EOO + cmd1 <foo | cmd2 + EOI + cmd1 <foo | cmd2 + EOO + + : non-first + : + $* <<EOI 2>>EOE != 0 + cmd1 | cmd2 <foo + EOI + testscript:1:13: error: stdin is both piped and redirected + EOE + } + + : output + : + { + : last + : + $* <<EOI >>EOO + cmd1 | cmd2 >foo + EOI + cmd1 | cmd2 >foo + EOO + + : non-last + : + $* <<EOI 2>>EOE != 0 + cmd1 >foo | cmd2 + EOI + testscript:1:11: error: stdout is both redirected and piped + EOE + } +} diff --git a/libbuild2/test/script/parser+pre-parse.test.testscript b/libbuild2/test/script/parser+pre-parse.test.testscript new file mode 100644 index 0000000..f98512a --- /dev/null +++ b/libbuild2/test/script/parser+pre-parse.test.testscript @@ -0,0 +1,23 @@ +# file : libbuild2/test/script/parser+pre-parse.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: attribute +: +{ + : pair + : + $* <<EOI 2>>EOE != 0 + x = [foo=bar] + EOI + testscript:1:5: error: unknown value attribute foo=bar + EOE + + : pair-empty + : + $* <<EOI 2>>EOE != 0 + x = [foo=] + EOI + testscript:1:5: error: unknown value attribute foo + EOE +} diff --git a/libbuild2/test/script/parser+redirect.test.testscript b/libbuild2/test/script/parser+redirect.test.testscript new file mode 100644 index 0000000..a8691da --- /dev/null +++ b/libbuild2/test/script/parser+redirect.test.testscript @@ -0,0 +1,356 @@ +# file : libbuild2/test/script/parser+redirect.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# @@ Add tests for redirects other than trace, here-*, file and merge. +# @@ Does it make sense to split into separate files - one per redirect type? +# + +: trace +: +{ + $* <'cmd >!' >'cmd >!' : out + $* <'cmd 2>!' >'cmd 2>!' : err +} + +: str +: +{ + : literal + : + { + : portable-path + : + $* <<EOI >>EOO + cmd </foo >/bar 2>/baz + EOI + cmd </foo >/bar 2>/baz + EOO + } + + : regex + : + { + : portable-path + : + $* <<EOI >>EOO + cmd >/~%foo% 2>/~%bar% + EOI + cmd >/~%foo% 2>/~%bar% + EOO + } +} + +: doc +: +{ + : literal + : + { + : portable-path + : + $* <<EOI >>EOO + cmd <</EOI_ >/EOO_ 2>/EOE_ + foo + EOI_ + bar + EOO_ + baz + EOE_ + EOI + cmd <</EOI_ >/EOO_ 2>/EOE_ + foo + EOI_ + bar + EOO_ + baz + EOE_ + EOO + + : sharing + : + { + : in-out + : + $* <<EOI >>EOO + cmd <<:/EOF >>:/EOF + foo + EOF + EOI + cmd <<:/EOF >>:/EOF + foo + EOF + EOO + + : different + : + { + : modifiers + : + $* <<EOI 2>>EOE != 0 + cmd <<:/EOF >>:EOF + foo + EOF + EOI + testscript:1:16: error: different modifiers for shared here-document 'EOF' + EOE + + : quoting + : + $* <<EOI 2>>EOE != 0 + cmd <<EOF >>"EOF" + foo + EOF + EOI + testscript:1:13: error: different quoting for shared here-document 'EOF' + EOE + } + } + } + + : regex + : + { + : portable-path + : + $* <<EOI >>EOO + cmd >/~%EOF% 2>/~%EOE% + foo + EOF + bar + EOE + EOI + cmd >/~%EOF% 2>/~%EOE% + foo + EOF + bar + EOE + EOO + + : sharing + : + { + : in-out + : + $* <<EOI >>EOO + cmd >>~/EOF/ 2>>~/EOF/ + foo + EOF + EOI + cmd >>~/EOF/ 2>>~/EOF/ + foo + EOF + EOO + + : different + : + { + : introducers + : + $* <<EOI 2>>EOE != 0 + cmd >>~/EOF/ 2>>~%EOF% + foo + EOF + EOI + testscript:1:18: error: different introducers for shared here-document regex 'EOF' + EOE + + : flags + : + $* <<EOI 2>>EOE != 0 + cmd >>~/EOF/ 2>>~/EOF/i + foo + EOF + EOI + testscript:1:18: error: different global flags for shared here-document regex 'EOF' + EOE + } + } + } +} + +: file +: +{ + : cmp + : + $* <<EOI >>EOO + cmd 0<<<a 1>>>b 2>>>c + EOI + cmd <<<a >>>b 2>>>c + EOO + + : write + : + $* <<EOI >>EOO + cmd 1>=b 2>+c + EOI + cmd >=b 2>+c + EOO + + : quote + : + $* <<EOI >>EOO + cmd 0<<<"a f" 1>="b f" 2>+"c f" + EOI + cmd <<<'a f' >='b f' 2>+'c f' + EOO + + : in + : + { + : missed + : + $* <<EOI 2>>EOE !=0 + cmd <<< + EOI + testscript:1:8: error: missing stdin file + EOE + + : empty + : + $* <<EOI 2>>EOE !=0 + cmd <<<"" + EOI + testscript:1:8: error: empty stdin redirect path + EOE + } + + : out + : + { + : missed + : + $* <<EOI 2>>EOE !=0 + cmd >= + EOI + testscript:1:7: error: missing stdout file + EOE + + : empty + : + $* <<EOI 2>>EOE !=0 + cmd >="" + EOI + testscript:1:7: error: empty stdout redirect path + EOE + } + + : err + : + { + : missed + : + $* <<EOI 2>>EOE !=0 + cmd 2>= + EOI + testscript:1:8: error: missing stderr file + EOE + + : empty + : + $* <<EOI 2>>EOE !=0 + cmd 2>="" + EOI + testscript:1:8: error: empty stderr redirect path + EOE + } +} + +: merge +{ + : out + : + { + : err + : + $* <<EOI >>EOO + cmd 1>&2 + EOI + cmd >&2 + EOO + + : no-mutual + : + $* <<EOI >>EOO + cmd 1>&2 2>&1 2>a + EOI + cmd >&2 2>a + EOO + + : not-descriptor + : + $* <<EOI 2>>EOE != 0 + cmd 1>&a + EOI + testscript:1:8: error: stdout merge redirect file descriptor must be 2 + EOE + + : self + : + $* <<EOI 2>>EOE != 0 + cmd 1>&1 + EOI + testscript:1:8: error: stdout merge redirect file descriptor must be 2 + EOE + + : missed + : + $* <<EOI 2>>EOE != 0 + cmd 1>& + EOI + testscript:1:8: error: missing stdout file descriptor + EOE + } + + : err + { + : out + : + $* <<EOI >>EOO + cmd 2>&1 + EOI + cmd 2>&1 + EOO + + : no-mutual + : + $* <<EOI >>EOO + cmd 1>&2 2>&1 >a + EOI + cmd >a 2>&1 + EOO + + : not-descriptor + : + $* <<EOI 2>>EOE != 0 + cmd 2>&a + EOI + testscript:1:8: error: stderr merge redirect file descriptor must be 1 + EOE + + : self + : + $* <<EOI 2>>EOE != 0 + cmd 2>&2 + EOI + testscript:1:8: error: stderr merge redirect file descriptor must be 1 + EOE + + : missed + : + $* <<EOI 2>>EOE != 0 + cmd 2>& + EOI + testscript:1:8: error: missing stderr file descriptor + EOE + } + + : mutual + : + $* <<EOI 2>>EOE != 0 + cmd 1>&2 2>&1 + EOI + testscript:1:14: error: stdout and stderr redirected to each other + EOE +} diff --git a/libbuild2/test/script/parser+regex.test.testscript b/libbuild2/test/script/parser+regex.test.testscript new file mode 100644 index 0000000..d5f899a --- /dev/null +++ b/libbuild2/test/script/parser+regex.test.testscript @@ -0,0 +1,223 @@ +# file : libbuild2/test/script/parser+regex.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: here-string +: +{ + : stdout + : + { + : missed + : + $* <'cmd >~' 2>>EOE != 0 + testscript:1:7: error: missing stdout here-string regex + EOE + + : no-introducer + : + $* <'cmd >~""' 2>>EOE != 0 + testscript:1:7: error: no introducer character in stdout regex redirect + EOE + + : no-term-introducer + : + $* <'cmd >~/' 2>>EOE != 0 + testscript:1:7: error: no closing introducer character in stdout regex redirect + EOE + + : portable-path-introducer + : + $* <'cmd >/~/foo/' 2>>EOE != 0 + testscript:1:8: error: portable path modifier and '/' introducer in stdout regex redirect + EOE + + : empty + : + $* <'cmd >~//' 2>>EOE != 0 + testscript:1:7: error: stdout regex redirect is empty + EOE + + : no-flags + : + $* <'cmd >~/fo*/' >'cmd >~/fo*/' + + : idot + : + $* <'cmd >~/fo*/d' >'cmd >~/fo*/d' + + : icase + : + $* <'cmd >~/fo*/i' >'cmd >~/fo*/i' + + : invalid-flags1 + : + $* <'cmd >~/foo/z' 2>>EOE != 0 + testscript:1:7: error: junk at the end of stdout regex redirect + EOE + + : invalid-flags2 + : + $* <'cmd >~/foo/iz' 2>>EOE != 0 + testscript:1:7: error: junk at the end of stdout regex redirect + EOE + + : no-newline + : + $* <'cmd >:~/fo*/' >'cmd >:~/fo*/' + } + + : stderr + : + { + : missed + : + $* <'cmd 2>~' 2>>EOE != 0 + testscript:1:8: error: missing stderr here-string regex + EOE + + : no-introducer + : + : Note that there is no need to reproduce all the errors as for stdout. + : All we need is to make sure that the proper description is passed to + : the parse_regex() function. + : + $* <'cmd 2>~""' 2>>EOE != 0 + testscript:1:8: error: no introducer character in stderr regex redirect + EOE + } + + : modifier-last + : + $* <'cmd >~/x' 2>>EOE != 0 + testscript:1:7: error: no closing introducer character in stdout regex redirect + EOE +} + +: here-doc +: +{ + : stdout + : + { + : missed + : + $* <'cmd >>~' 2>>EOE != 0 + testscript:1:8: error: expected here-document regex end marker + EOE + + : portable-path-introducer + : + $* <<EOI 2>>EOE != 0 + cmd >>/~/EOO/ + foo + EOO + EOI + testscript:1:5: error: portable path modifier and '/' introducer in here-document regex end marker + EOE + + : unterminated-line-char + : + $* <<EOI 2>>EOE != 0 + cmd >>~/EOO/ + / + EOO + EOI + testscript:2:1: error: no syntax line characters + EOE + + : empty + : + $* <<EOI 2>>EOE != 0 + cmd >>:~/EOO/ + EOO + EOI + testscript:2:1: error: empty here-document regex + EOE + + : no-flags + : + $* <<EOI >>EOO + cmd 2>>~/EOE/ + foo + /? + /foo/ + /foo/* + /foo/i + /foo/i* + + // + //* + EOE + EOI + cmd 2>>~/EOE/ + foo + /? + /foo/ + /foo/* + /foo/i + /foo/i* + + // + //* + EOE + EOO + + : no-newline + : + $* <'cmd >:~/fo*/' >'cmd >:~/fo*/' + $* <<EOI >>EOO + cmd 2>>:~/EOE/ + foo + EOE + EOI + cmd 2>>:~/EOE/ + foo + EOE + EOO + + : end-marker-restore + : + { + : idot + : + $* <<EOI >>EOO + cmd 2>>~/EOE/d + foo + EOE + EOI + cmd 2>>~/EOE/d + foo + EOE + EOO + + : icase + : + $* <<EOI >>EOO + cmd 2>>~/EOE/i + foo + EOE + EOI + cmd 2>>~/EOE/i + foo + EOE + EOO + } + } + + : stderr + : + { + : missed + : + $* <'cmd 2>>~' 2>>EOE != 0 + testscript:1:9: error: expected here-document regex end marker + EOE + } + + : modifier-last + : + $* <'cmd >>~:/FOO/' 2>>EOE != 0 + testscript:1:8: error: expected here-document regex end marker + EOE +} diff --git a/libbuild2/test/script/parser+scope-if.test.testscript b/libbuild2/test/script/parser+scope-if.test.testscript new file mode 100644 index 0000000..aad3f37 --- /dev/null +++ b/libbuild2/test/script/parser+scope-if.test.testscript @@ -0,0 +1,554 @@ +# file : libbuild2/test/script/parser+scope-if.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: if +: +{ + : true + : + $* -s <<EOI >>EOO + if true foo + { + cmd + } + EOI + { + ? true foo + { + cmd + } + } + EOO + + : false + : + $* -s <<EOI >>EOO + if false foo + { + cmd + } + EOI + { + ? false foo + } + EOO + + : not-true + : + $* -s <<EOI >>EOO + if! true + { + cmd + } + EOI + { + ? true + } + EOO + + : not-false + : + $* -s <<EOI >>EOO + if! false + { + cmd + } + EOI + { + ? false + { + cmd + } + } + EOO + + : eos-inside + : + $* <<EOI 2>>EOE != 0 + if + { + EOI + testscript:3:1: error: expected '}' at the end of the scope + EOE + +} + +: elif +: +{ + : true + : + $* -s <<EOI >>EOO + if false + { + cmd + } + elif true + { + cmd1 + } + EOI + { + ? false + ? true + { + cmd1 + } + } + EOO + + : false + : + $* -s <<EOI >>EOO + if false + { + cmd + } + elif false + { + cmd + } + EOI + { + ? false + ? false + } + EOO + + : not-false + : + $* -s <<EOI >>EOO + if false + { + cmd + } + elif! false + { + cmd1 + } + EOI + { + ? false + ? false + { + cmd1 + } + } + EOO + + : not-true + : + $* -s <<EOI >>EOO + if false + { + cmd + } + elif! true + { + cmd + } + EOI + { + ? false + ? true + } + EOO + + : after-else + : + $* <<EOI 2>>EOE != 0 + if false + { + cmd + } + else + { + cmd + } + elif true + { + cmd + } + EOI + testscript:9:1: error: 'elif' after 'else' + EOE +} + +: else +: +{ + : true + : + $* -s <<EOI >>EOO + if false + { + cmd + } + else + { + cmd1 + } + EOI + { + ? false + { + cmd1 + } + } + EOO + + : false + : + $* -s <<EOI >>EOO + if true + { + cmd1 + } + else + { + cmd + } + EOI + { + ? true + { + cmd1 + } + } + EOO + + : chain + : + $* -s <<EOI >>EOO + if false + { + cmd + } + elif false + { + cmd + cmd + } + elif false + { + cmd + } + elif true + { + cmd1 + cmd2 + } + elif false + { + cmd + } + else + { + cmd + cmd + } + EOI + { + ? false + ? false + ? false + ? true + { + { + cmd1 + } + { + cmd2 + } + } + } + EOO + + : scope-expected + : + $* <<EOI 2>>EOE != 0 + if + { + cmd + } + else + cmd + EOI + testscript:5:1: error: expected scope after 'else' + EOE + + : after-else + : + $* <<EOI 2>>EOE != 0 + if false + { + cmd + } + else + { + cmd + } + else + { + cmd + } + EOI + testscript:9:1: error: 'else' after 'else' + EOE +} + +: nested +: +{ + : take + : + $* -s <<EOI >>EOO + if true + { + cmd1 + if false + { + cmd + } + elif false + { + if true + { + cmd + } + } + else + { + cmd2 + } + cmd3 + } + EOI + { + ? true + { + { + cmd1 + } + ? false + ? false + { + { + cmd2 + } + } + { + cmd3 + } + } + } + EOO + + : skip + : + $* -s <<EOI >>EOO + if false + { + cmd1 + if false + { + cmd + } + elif false + { + if true + { + cmd + } + } + else + { + cmd2 + } + cmd3 + } + else + { + cmd + } + EOI + { + ? false + { + { + cmd + } + } + } + EOO +} + +: demote +: +{ + : group + : Chain remains a group + : + $* -s <<EOI >>EOO + if false + { + cmd + } + elif true + { + cmd1 + cmd2 + } + else + { + cmd + } + EOI + { + ? false + ? true + { + { + cmd1 + } + { + cmd2 + } + } + } + EOO + + : test + : Chain demoted to test + : + $* -s <<EOI >>EOO + if false + { + cmd + } + elif true + { + cmd1 + } + else + { + cmd + } + EOI + { + ? false + ? true + { + cmd1 + } + } + EOO +} + +: line-index +: Make sure command line index spans setup/if/teardown +: +$* -s -l <<EOI >>EOO ++setup # 1 + +if false one # 2 +{ + cmd +} +elif false two # 3 +{ + cmd +} +elif true # 4 +{ + cmd1 +} +elif false # 5 +{ + cmd +} +else +{ + cmd +} + +if false one # 6 +{ + cmd +} +elif false two # 7 +{ + cmd +} +else +{ + cmd2 +} + +-tdown # 8 +EOI +{ + +setup # 1 + ? false one # 2 + ? false two # 3 + ? true # 4 + { + cmd1 # 0 + } + ? false one # 6 + ? false two # 7 + { + cmd2 # 0 + } + -tdown # 8 +} +EOO + +: scope-comman-if +: +$* -s <<EOI >>EOO +if true +{ + cmd +} +if true + cmd1 + cmd2 +end +EOI +{ + ? true + { + cmd + } + { + ? true + cmd1 + cmd2 + } +} +EOO + +: shared-id-desc +: +$* -s -i <<EOI >>EOO +: test summary +: +if false +{ + cmd +} +else +{ + cmd1 +} +EOI +{ + ? false + : sm:test summary + { # 3 + cmd1 + } +} +EOO diff --git a/libbuild2/test/script/parser+scope.test.testscript b/libbuild2/test/script/parser+scope.test.testscript new file mode 100644 index 0000000..bfb1a59 --- /dev/null +++ b/libbuild2/test/script/parser+scope.test.testscript @@ -0,0 +1,280 @@ +# file : libbuild2/test/script/parser+scope.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +$* testscript <'cmd $@' >"cmd 1" : id-testscript +$* foo.testscript <'cmd $@' >"cmd foo/1" : id + +: wd-testscript +: +$* testscript <'cmd "$~"' >~"%cmd '?.+[/\\\\]test-driver[/\\\\]1'?%" + +: wd +: +$* foo.testscript <'cmd "$~"' >~"%cmd '?.+[/\\\\]test-driver[/\\\\]foo[/\\\\]1'?%" + +: group +: +{ + : empty + : + $* -s <<EOI + { + } + EOI + + : empty-empty + : + $* -s <<EOI + { + { + } + } + EOI + + : non-empty + : + $* -s <<EOI >>EOO + { + cmd1 + cmd2 + } + EOI + { + { + { + cmd1 + } + { + cmd2 + } + } + } + EOO +} + +: test +: +{ + : explicit + : + { + : one-level + : + $* -s -i <<EOI >>EOO + { + cmd + } + EOI + { + { # 1 + cmd + } + } + EOO + + : nested + : + $* -s -i <<EOI >>EOO + { + { + cmd + } + } + EOI + { + { # 1 + cmd + } + } + EOO + + : var + : + $* -s -i <<EOI >>EOO + { + x = abc + cmd $x + } + EOI + { + { # 1 + cmd abc + } + } + EOO + + : setup + : + $* -s -i <<EOI >>EOO + { + x = abc + +setup + cmd $x + } + EOI + { + { # 1 + +setup + { # 1/4 + cmd abc + } + } + } + EOO + } + + : implicit + { + : one-cmd + : + $* -s <<EOI >>EOO + cmd1 + EOI + { + { + cmd1 + } + } + EOO + + : two-cmd + : + $* -s <<EOI >>EOO + cmd1; + cmd2 + EOI + { + { + cmd1 + cmd2 + } + } + EOO + + : three-cmd + : + $* -s <<EOI >>EOO + cmd1; + cmd2; + cmd3 + EOI + { + { + cmd1 + cmd2 + cmd3 + } + } + EOO + + : var + : + $* -s <<EOI >>EOO + cmd1; + x = abc; + cmd2 $x + EOI + { + { + cmd1 + cmd2 abc + } + } + EOO + + : var-first + : + $* -s <<EOI >>EOO + x = abc; + cmd $x + EOI + { + { + cmd abc + } + } + EOO + + : var-setup-tdown + : + $* -s <<EOI >>EOO + x = abc + cmd $x + y = 123 + EOI + { + { + cmd abc + } + } + EOO + + : after-tdown + : + $* <<EOI 2>>EOE != 0 + cmd1 + x = abc + cmd2 + EOI + testscript:3:1: error: test after teardown + testscript:2:1: info: last teardown line appears here + EOE + } +} + +: expected +{ + : newline-lcbrace + : + $* <:"{x" 2>>EOE != 0 + testscript:1:2: error: expected newline after '{' + EOE + + : rcbrace + : + $* <"{" 2>>EOE != 0 + testscript:2:1: error: expected '}' at the end of the scope + EOE + + : line-rcbrace + : + $* <<EOI 2>>EOE != 0 + { + cmd; + } + EOI + testscript:3:1: error: expected another line after ';' + EOE + + : newline-rcbrace + : + $* <<:EOI 2>>EOE != 0 + { + } + EOI + testscript:2:2: error: expected newline after '}' + EOE + + : line-eof + : + $* <<EOI 2>>EOE != 0 + cmd; + EOI + testscript:2:1: error: expected another line after ';' + EOE + + : newline-cmd + : + $* <<:EOI 2>>EOE != 0 + cmd; + EOI + testscript:1:5: error: expected newline instead of <end of file> + EOE + + : newline-var + : + $* <:"x = abc;" 2>>EOE != 0 + testscript:1:9: error: expected newline instead of <end of file> + EOE +} diff --git a/libbuild2/test/script/parser+setup-teardown.test.testscript b/libbuild2/test/script/parser+setup-teardown.test.testscript new file mode 100644 index 0000000..5f1418a --- /dev/null +++ b/libbuild2/test/script/parser+setup-teardown.test.testscript @@ -0,0 +1,151 @@ +# file : libbuild2/test/script/parser+setup-teardown.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +: setup +: +{ + : followed + : + { + : semi + : + $* <"+cmd;" 2>>EOE != 0 + testscript:1:5: error: ';' after setup command + EOE + + : colon + : + $* <"+cmd:" 2>>EOE != 0 + testscript:1:5: error: ':' after setup command + EOE + } + + : after + : + { + : test + : + $* <<EOI 2>>EOE != 0 + cmd + +cmd + EOI + testscript:2:1: error: setup command after tests + EOE + + : after-tdownt + : + $* <<EOI 2>>EOE != 0 + -cmd + +cmd + EOI + testscript:2:1: error: setup command after teardown + EOE + } + + : in-test + : + $* <<EOI 2>>EOE != 0 + cmd; + +cmd + EOI + testscript:2:1: error: setup command in test + EOE +} + +: tdown +: +{ + : followed + : + { + : semi + : + $* <"-cmd;" 2>>EOE != 0 + testscript:1:5: error: ';' after teardown command + EOE + + : colon + : + $* <"-cmd:" 2>>EOE != 0 + testscript:1:5: error: ':' after teardown command + EOE + } + + : in-test + : + $* <<EOI 2>>EOE != 0 + cmd; + -cmd + EOI + testscript:2:1: error: teardown command in test + EOE +} + +: var +: +{ + : between-tests + : + $* <<EOI 2>>EOE != 0 + cmd + x = y + cmd + EOI + testscript:3:1: error: test after teardown + testscript:2:1: info: last teardown line appears here + EOE + + : between-tests-scope + : + $* <<EOI 2>>EOE != 0 + cmd + x = y + { + cmd + } + EOI + testscript:3:1: error: scope after teardown + testscript:2:1: info: last teardown line appears here + EOE + + : between-tests-command-if + : + $* <<EOI 2>>EOE != 0 + cmd + x = y + if true + cmd + end + EOI + testscript:3:1: error: test after teardown + testscript:2:1: info: last teardown line appears here + EOE + + : between-tests-scope-if + : + $* <<EOI 2>>EOE != 0 + cmd + x = y + if true + { + cmd + } + EOI + testscript:3:1: error: scope after teardown + testscript:2:1: info: last teardown line appears here + EOE + + : between-tests-variable-if + : + $* <<EOI >>EOO + cmd + x = y + if true + y = x + end + EOI + cmd + ? true + EOO +} diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx new file mode 100644 index 0000000..260bc88 --- /dev/null +++ b/libbuild2/test/script/parser.cxx @@ -0,0 +1,3451 @@ +// file : libbuild2/test/script/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/script/parser.hxx> + +#include <sstream> + +#include <libbuild2/context.hxx> // sched, keep_going + +#include <libbuild2/test/script/lexer.hxx> +#include <libbuild2/test/script/runner.hxx> + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + using type = token_type; + + // Return true if the string contains only a single digit characters + // (used to detect the special $N variables). + // + static inline bool + digit (const string& s) + { + return s.size () == 1 && butl::digit (s[0]); + } + + // + // Pre-parse. + // + + void parser:: + pre_parse (script& s) + { + const path& p (s.script_target.path ()); + assert (!p.empty ()); // Should have been assigned. + + try + { + ifdstream ifs (p); + pre_parse (ifs, s); + } + catch (const io_error& e) + { + fail << "unable to read testscript " << p << ": " << e << endf; + } + } + + void parser:: + pre_parse (istream& is, script& s) + { + path_ = &*s.paths_.insert (s.script_target.path ()).first; + + pre_parse_ = true; + + lexer l (is, *path_, lexer_mode::command_line); + set_lexer (&l); + + id_prefix_.clear (); + + id_map idm; + include_set ins; + + script_ = &s; + runner_ = nullptr; + group_ = script_; + id_map_ = &idm; + include_set_ = &ins; + scope_ = nullptr; + + //@@ PAT TODO: set pbase_? + + // Start location of the implied script group is the beginning of + // the file. End location -- end of the file. + // + group_->start_loc_ = location (path_, 1, 1); + + token t (pre_parse_scope_body ()); + + if (t.type != type::eos) + fail (t) << "stray " << t; + + group_->end_loc_ = get_location (t); + } + + bool parser:: + pre_parse_demote_group_scope (unique_ptr<scope>& s) + { + // See if this turned out to be an explicit test scope. An explicit + // test scope contains a single test, only variable assignments in + // setup and nothing in teardown. Plus only the group can have the + // description. Because we apply this recursively, also disqualify + // a test scope that has an if-condition. + // + // If we have a chain, then all the scopes must be demotable. So we + // first check if this scope is demotable and if so then recurse for + // the next in chain. + // + group& g (static_cast<group&> (*s)); + + auto& sc (g.scopes); + auto& su (g.setup_); + auto& td (g.tdown_); + + test* t; + if (sc.size () == 1 && + (t = dynamic_cast<test*> (sc.back ().get ())) != nullptr && + find_if ( + su.begin (), su.end (), + [] (const line& l) { + return l.type != line_type::var; + }) == su.end () && + + td.empty () && + !t->desc && + !t->if_cond_) + { + if (g.if_chain != nullptr && + !pre_parse_demote_group_scope (g.if_chain)) + return false; + + // It would have been nice to reuse the test object and only throw + // away the group. However, the merged scope has to use id_path and + // wd_path of the group. So to keep things simple we are going to + // throw away both and create a new test object. + // + // We always use the group's id since the test cannot have a + // user-provided one. + // + unique_ptr<test> m (new test (g.id_path.leaf ().string (), *group_)); + + // Move the description, if-condition, and if-chain. + // + m->desc = move (g.desc); + m->if_cond_ = move (g.if_cond_); + m->if_chain = move (g.if_chain); + + // Merge the lines of the group and the test. + // + if (su.empty ()) + m->tests_ = move (t->tests_); + else + { + m->tests_ = move (su); // Should come first. + m->tests_.insert (m->tests_.end (), + make_move_iterator (t->tests_.begin ()), + make_move_iterator (t->tests_.end ())); + } + + // Use start/end locations of the outer scope. + // + m->start_loc_ = g.start_loc_; + m->end_loc_ = g.end_loc_; + + s = move (m); + return true; + } + + return false; + } + + token parser:: + pre_parse_scope_body () + { + // enter: next token is first token of scope body + // leave: rcbrace or eos (returned) + + token t; + type tt; + + // Parse lines (including nested scopes) until we see '}' or eos. + // + for (;;) + { + // Start lexing each line recognizing leading '.+-{}'. + // + tt = peek (lexer_mode::first_token); + + // Handle description. + // + optional<description> d; + if (tt == type::colon) + d = pre_parse_leading_description (t, tt); + + // Determine the line type by peeking at the first token. + // + switch (tt) + { + case type::eos: + case type::rcbrace: + { + next (t, tt); + + if (d) + fail (t) << "description before " << t; + + return t; + } + case type::lcbrace: + { + // Nested scope. + // + next (t, tt); // Get '{'. + const location sl (get_location (t)); + + // First check that we don't have any teardown commands yet. + // This will detect things like variable assignments between + // scopes. + // + if (!group_->tdown_.empty ()) + { + location tl ( + group_->tdown_.back ().tokens.front ().location ()); + + fail (sl) << "scope after teardown" << + info (tl) << "last teardown line appears here"; + } + + // If there is no user-supplied id, use the line number + // (prefixed with include id) as the scope id. + // + const string& id ( + d && !d->id.empty () + ? d->id + : insert_id (id_prefix_ + to_string (sl.line), sl)); + + unique_ptr<scope> g (pre_parse_scope_block (t, tt, id)); + g->desc = move (d); + + pre_parse_demote_group_scope (g); + group_->scopes.push_back (move (g)); + continue; + } + default: + { + pre_parse_line (t, tt, d); + assert (tt == type::newline); + break; + } + } + } + } + + unique_ptr<group> parser:: + pre_parse_scope_block (token& t, type& tt, const string& id) + { + // enter: lcbrace + // leave: newline after rcbrace + + const location sl (get_location (t)); + + if (next (t, tt) != type::newline) + fail (t) << "expected newline after '{'"; + + // Push group. + // + id_map idm; + include_set ins; + + unique_ptr<group> g (new group (id, *group_)); + + id_map* om (id_map_); + id_map_ = &idm; + + include_set* os (include_set_); + include_set_ = &ins; + + group* og (group_); + group_ = g.get (); + + // Parse body. + // + group_->start_loc_ = sl; + token e (pre_parse_scope_body ()); + group_->end_loc_ = get_location (e); + + // Pop group. + // + group_ = og; + include_set_ = os; + id_map_ = om; + + if (e.type != type::rcbrace) + fail (e) << "expected '}' at the end of the scope"; + + if (next (t, tt) != type::newline) + fail (t) << "expected newline after '}'"; + + return g; + } + + // Parse a logical line (as well as scope-if since the only way to + // recognize it is to parse the if line). + // + // If one is true then only parse one line returning an indication of + // whether the line ended with a semicolon. + // + bool parser:: + pre_parse_line (token& t, type& tt, + optional<description>& d, + lines* ls, + bool one) + { + // enter: next token is peeked at (type in tt) + // leave: newline + + // Note: token is only peeked at. + // + const location ll (get_location (peeked ())); + + // Determine the line type/start token. + // + line_type lt; + type st (type::eos); + + switch (tt) + { + case type::dot: + { + // Directive. + // + next (t, tt); // Skip dot. + next (t, tt); // Get the directive name. + + if (tt != type::word || t.qtype != quote_type::unquoted) + fail (t) << "expected directive name instead of " << t; + + // Make sure we are not inside a test (i.e., after semi). + // + if (ls != nullptr) + fail (ll) << "directive after ';'"; + + const string& n (t.value); + + if (n == "include") + pre_parse_directive (t, tt); + else + fail (t) << "unknown directive '" << n << "'"; + + assert (tt == type::newline); + return false; + } + case type::plus: + case type::minus: + { + // Setup/teardown command. + // + st = tt; + + next (t, tt); // Start saving tokens from the next one. + replay_save (); + next (t, tt); + + // See if this is a special command. + // + lt = line_type::cmd; // Default. + + if (tt == type::word && t.qtype == quote_type::unquoted) + { + const string& n (t.value); + + if (n == "if") lt = line_type::cmd_if; + else if (n == "if!") lt = line_type::cmd_ifn; + } + + break; + } + default: + { + // Either variable assignment or test command. + // + replay_save (); // Start saving tokens from the current one. + next (t, tt); + + // Decide whether this is a variable assignment or a command. + // + // It is an assignment if the first token is an unquoted name and + // the next token is an assign/append/prepend operator. Assignment + // to a computed variable name must use the set builtin. + // + // Note also thatspecial commands take precedence over variable + // assignments. + // + lt = line_type::cmd; // Default. + + if (tt == type::word && t.qtype == quote_type::unquoted) + { + const string& n (t.value); + + if (n == "if") lt = line_type::cmd_if; + else if (n == "if!") lt = line_type::cmd_ifn; + else if (n == "elif") lt = line_type::cmd_elif; + else if (n == "elif!") lt = line_type::cmd_elifn; + else if (n == "else") lt = line_type::cmd_else; + else if (n == "end") lt = line_type::cmd_end; + else + { + // Switch the recognition of leading variable assignments for + // the next token. This is safe to do because we know we + // cannot be in the quoted mode (since the current token is + // not quoted). + // + type p (peek (lexer_mode::second_token)); + + if (p == type::assign || + p == type::prepend || + p == type::append) + { + lt = line_type::var; + st = p; + } + } + } + + break; + } + } + + // Pre-parse the line keeping track of whether it ends with a semi. + // + bool semi (false); + + line ln; + switch (lt) + { + case line_type::var: + { + // Check if we are trying to modify any of the special aliases + // ($*, $N, $~, $@). + // + string& n (t.value); + + if (n == "*" || n == "~" || n == "@" || digit (n)) + fail (t) << "attempt to set '" << n << "' variable directly"; + + // Pre-enter the variables now while we are executing serially. + // Once parallel, it becomes a lot harder to do. + // + ln.var = &script_->var_pool.insert (move (n)); + + next (t, tt); // Assignment kind. + parse_variable_line (t, tt); + + semi = (tt == type::semi); + + if (tt == type::semi) + next (t, tt); + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + + break; + } + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + case line_type::cmd_end: + next (t, tt); // Skip to start of command. + // Fall through. + case line_type::cmd: + { + pair<command_expr, here_docs> p; + + if (lt != line_type::cmd_else && lt != line_type::cmd_end) + p = parse_command_expr (t, tt); + + // Colon and semicolon are only valid in test command lines and + // after 'end' in if-else. Note that we still recognize them + // lexically, they are just not valid tokens per the grammar. + // + if (tt != type::newline) + { + if (lt != line_type::cmd && lt != line_type::cmd_end) + fail (t) << "expected newline instead of " << t; + + switch (st) + { + case type::plus: fail (t) << t << " after setup command" << endf; + case type::minus: fail (t) << t << " after teardown command" << endf; + } + } + + switch (tt) + { + case type::colon: + { + if (d) + fail (ll) << "both leading and trailing descriptions"; + + d = parse_trailing_description (t, tt); + break; + } + case type::semi: + { + semi = true; + next (t, tt); // Get newline. + break; + } + } + + if (tt != type::newline) + fail (t) << "expected newline instead of " << t; + + parse_here_documents (t, tt, p); + break; + } + } + + assert (tt == type::newline); + + // Stop saving and get the tokens. + // + lines ls_data; + + if (ls == nullptr) + ls = &ls_data; + + ln.type = lt; + ln.tokens = replay_data (); + ls->push_back (move (ln)); + + if (lt == line_type::cmd_if || lt == line_type::cmd_ifn) + { + semi = pre_parse_if_else (t, tt, d, *ls); + + // If this turned out to be scope-if, then ls is empty, semi is + // false, and none of the below logic applies. + // + if (ls->empty ()) + return semi; + } + + // Unless we were told where to put it, decide where it actually goes. + // + if (ls == &ls_data) + { + // First pre-check variable and variable-if: by themselves (i.e., + // without a trailing semicolon) they are treated as either setup or + // teardown without plus/minus. Also handle illegal line types. + // + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + case line_type::cmd_end: + { + fail (ll) << lt << " without preceding 'if'" << endf; + } + case line_type::cmd_if: + case line_type::cmd_ifn: + { + // See if this is a variable-only command-if. + // + if (find_if (ls_data.begin (), ls_data.end (), + [] (const line& l) { + return l.type == line_type::cmd; + }) != ls_data.end ()) + break; + } + // Fall through. + case line_type::var: + { + // If there is a semicolon after the variable then we assume + // it is part of a test (there is no reason to use semicolons + // after variables in the group scope). Otherwise -- setup or + // teardown. + // + if (!semi) + { + if (d) + { + if (lt == line_type::var) + fail (ll) << "description before setup/teardown variable"; + else + fail (ll) << "description before/after setup/teardown " + << "variable-if"; + } + + // If we don't have any nested scopes or teardown commands, + // then we assume this is a setup, otherwise -- teardown. + // + ls = group_->scopes.empty () && group_->tdown_.empty () + ? &group_->setup_ + : &group_->tdown_; + } + break; + } + default: + break; + } + + // If pre-check didn't change the destination, then it's a test. + // + if (ls == &ls_data) + { + switch (st) + { + // Setup. + // + case type::plus: + { + if (d) + fail (ll) << "description before setup command"; + + if (!group_->scopes.empty ()) + fail (ll) << "setup command after tests"; + + if (!group_->tdown_.empty ()) + fail (ll) << "setup command after teardown"; + + ls = &group_->setup_; + break; + } + + // Teardown. + // + case type::minus: + { + if (d) + fail (ll) << "description before teardown command"; + + ls = &group_->tdown_; + break; + } + + // Test command or variable. + // + default: + { + // First check that we don't have any teardown commands yet. + // This will detect things like variable assignments between + // tests. + // + if (!group_->tdown_.empty ()) + { + location tl ( + group_->tdown_.back ().tokens.front ().location ()); + + fail (ll) << "test after teardown" << + info (tl) << "last teardown line appears here"; + } + break; + } + } + } + + // If the destination changed, then move the data over. + // + if (ls != &ls_data) + ls->insert (ls->end (), + make_move_iterator (ls_data.begin ()), + make_move_iterator (ls_data.end ())); + } + + // If this command ended with a semicolon, then the next one should + // go to the same place. + // + if (semi && !one) + { + tt = peek (lexer_mode::first_token); + const location ll (get_location (peeked ())); + + switch (tt) + { + case type::colon: + fail (ll) << "description inside test" << endf; + case type::eos: + case type::rcbrace: + case type::lcbrace: + fail (ll) << "expected another line after ';'" << endf; + case type::plus: + fail (ll) << "setup command in test" << endf; + case type::minus: + fail (ll) << "teardown command in test" << endf; + default: + semi = pre_parse_line (t, tt, d, ls); + assert (tt == type::newline); // End of last test line. + } + } + + // If this is a test then create implicit test scope. + // + if (ls == &ls_data) + { + // If there is no user-supplied id, use the line number (prefixed + // with include id) as the scope id. + // + const string& id ( + d && !d->id.empty () + ? d->id + : insert_id (id_prefix_ + to_string (ll.line), ll)); + + unique_ptr<test> p (new test (id, *group_)); + + p->desc = move (d); + + p->start_loc_ = ll; + p->tests_ = move (ls_data); + p->end_loc_ = get_location (t); + + group_->scopes.push_back (move (p)); + } + + return semi; + } + + bool parser:: + pre_parse_if_else (token& t, type& tt, + optional<description>& d, + lines& ls) + { + // enter: <newline> (previous line) + // leave: <newline> + + tt = peek (lexer_mode::first_token); + + return tt == type::lcbrace + ? pre_parse_if_else_scope (t, tt, d, ls) + : pre_parse_if_else_command (t, tt, d, ls); + } + + bool parser:: + pre_parse_if_else_scope (token& t, type& tt, + optional<description>& d, + lines& ls) + { + // enter: peeked token of next line (lcbrace) + // leave: newline + + assert (ls.size () == 1); // The if/if! line. + + // Use if/if! as the entire scope chain location. + // + const location sl (ls.back ().tokens.front ().location ()); + + // First check that we don't have any teardown commands yet. This + // will detect things like variable assignments between scopes. + // + if (!group_->tdown_.empty ()) + { + location tl ( + group_->tdown_.back ().tokens.front ().location ()); + + fail (sl) << "scope after teardown" << + info (tl) << "last teardown line appears here"; + } + + // If there is no user-supplied id, use the line number (prefixed with + // include id) as the scope id. Note that we use the same id for all + // scopes in the chain. + // + const string& id ( + d && !d->id.empty () + ? d->id + : insert_id (id_prefix_ + to_string (sl.line), sl)); + + unique_ptr<scope> root; + + // Parse the if-else scope chain. + // + line_type bt (line_type::cmd_if); // Current block. + + for (unique_ptr<scope>* ps (&root);; ps = &(*ps)->if_chain) + { + next (t, tt); // Get '{'. + + { + unique_ptr<group> g (pre_parse_scope_block (t, tt, id)); + + // If-condition. + // + g->if_cond_ = move (ls.back ()); + ls.clear (); + + // Description. For now we just duplicate it through the entire + // chain. + // + g->desc = (ps == &root ? d : root->desc); + + *ps = move (g); + } + + // See if what comes next is another chain element. + // + line_type lt (line_type::cmd_end); + + type pt (peek (lexer_mode::first_token)); + const token& p (peeked ()); + const location ll (get_location (p)); + + if (pt == type::word && p.qtype == quote_type::unquoted) + { + if (p.value == "elif") lt = line_type::cmd_elif; + else if (p.value == "elif!") lt = line_type::cmd_elifn; + else if (p.value == "else") lt = line_type::cmd_else; + } + + if (lt == line_type::cmd_end) + break; + + // Check if-else block sequencing. + // + if (bt == line_type::cmd_else) + { + if (lt == line_type::cmd_else || + lt == line_type::cmd_elif || + lt == line_type::cmd_elifn) + fail (ll) << lt << " after " << bt; + } + + // Parse just the condition line using pre_parse_line() in the "one" + // mode and into ls so that it is naturally picked up as if_cond_ on + // the next iteration. + // + optional<description> td; + bool semi (pre_parse_line (t, (tt = pt), td, &ls, true)); + assert (ls.size () == 1 && ls.back ().type == lt); + assert (tt == type::newline); + + // For any of these lines trailing semi or description is illegal. + // + // @@ Not the exact location of semi/colon. + // + if (semi) + fail (ll) << "';' after " << lt; + + if (td) + fail (ll) << "description after " << lt; + + // Make sure what comes next is another scope. + // + tt = peek (lexer_mode::first_token); + + if (tt != type::lcbrace) + fail (ll) << "expected scope after " << lt; + + // Update current if-else block. + // + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: bt = line_type::cmd_elif; break; + case line_type::cmd_else: bt = line_type::cmd_else; break; + default: break; + } + } + + pre_parse_demote_group_scope (root); + group_->scopes.push_back (move (root)); + return false; // We never end with a semi. + } + + bool parser:: + pre_parse_if_else_command (token& t, type& tt, + optional<description>& d, + lines& ls) + { + // enter: peeked first token of next line (type in tt) + // leave: newline + + // Parse lines until we see closing 'end'. Nested if-else blocks are + // handled recursively. + // + for (line_type bt (line_type::cmd_if); // Current block. + ; + tt = peek (lexer_mode::first_token)) + { + const location ll (get_location (peeked ())); + + switch (tt) + { + case type::colon: + fail (ll) << "description inside " << bt << endf; + case type::eos: + case type::rcbrace: + case type::lcbrace: + fail (ll) << "expected closing 'end'" << endf; + case type::plus: + fail (ll) << "setup command inside " << bt << endf; + case type::minus: + fail (ll) << "teardown command inside " << bt << endf; + } + + // Parse one line. Note that this one line can still be multiple + // lines in case of if-else. In this case we want to view it as + // cmd_if, not cmd_end. Thus remember the start position of the + // next logical line. + // + size_t i (ls.size ()); + + optional<description> td; + bool semi (pre_parse_line (t, tt, td, &ls, true)); + assert (tt == type::newline); + + line_type lt (ls[i].type); + + // First take care of 'end'. + // + if (lt == line_type::cmd_end) + { + if (td) + { + if (d) + fail (ll) << "both leading and trailing descriptions"; + + d = move (td); + } + + return semi; + } + + // For any other line trailing semi or description is illegal. + // + // @@ Not the exact location of semi/colon. + // + if (semi) + fail (ll) << "';' inside " << bt; + + if (td) + fail (ll) << "description inside " << bt; + + // Check if-else block sequencing. + // + if (bt == line_type::cmd_else) + { + if (lt == line_type::cmd_else || + lt == line_type::cmd_elif || + lt == line_type::cmd_elifn) + fail (ll) << lt << " after " << bt; + } + + // Update current if-else block. + // + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: bt = line_type::cmd_elif; break; + case line_type::cmd_else: bt = line_type::cmd_else; break; + default: break; + } + } + } + + void parser:: + pre_parse_directive (token& t, type& tt) + { + // enter: directive name + // leave: newline + + string d (t.value); + location l (get_location (t)); + next (t, tt); + + // Suspend pre-parsing since we want to really parse the line, with + // expansion, etc. Also parse the whole line in one go. + // + names args; + + if (tt != type::newline) + { + pre_parse_ = false; + args = parse_names (t, tt, + pattern_mode::expand, + false, + "directive argument", + nullptr); + pre_parse_ = true; + } + + if (tt != type::newline) + fail (t) << t << " after directive"; + + if (d == "include") + pre_parse_include_line (move (args), move (l)); + else + assert (false); // Unhandled directive. + } + + void parser:: + pre_parse_include_line (names args, location dl) + { + auto i (args.begin ()); + + // Process options. + // + bool once (false); + for (; i != args.end () && i->simple (); ++i) + { + if (i->value == "--once") + once = true; + else + break; + } + + // Process arguments. + // + auto include = [&dl, once, this] (string n) // throw invalid_path + { + // It may be tempting to use relative paths in diagnostics but it + // most likely will be misguided. + // + auto enter_path = [this] (string n) -> const path& + { + path p (move (n)); + + if (p.relative ()) + p = path_->directory () / p; + + p.normalize (); + + return *script_->paths_.insert (move (p)).first; + }; + + const path& p (enter_path (move (n))); + + if (include_set_->insert (p).second || !once) + { + try + { + ifdstream ifs (p); + lexer l (ifs, p, lexer_mode::command_line); + + const path* op (path_); + path_ = &p; + + lexer* ol (lexer_); + set_lexer (&l); + + string oip (id_prefix_); + id_prefix_ += to_string (dl.line); + id_prefix_ += '-'; + id_prefix_ += p.leaf ().base ().string (); + id_prefix_ += '-'; + + token t (pre_parse_scope_body ()); + + if (t.type != type::eos) + fail (t) << "stray " << t; + + id_prefix_ = oip; + set_lexer (ol); + path_ = op; + } + catch (const io_error& e) + { + fail (dl) << "unable to read testscript " << p << ": " << e; + } + } + }; + + for (; i != args.end (); ++i) + { + name& n (*i); + + try + { + if (n.simple () && !n.empty ()) + { + include (move (n.value)); + continue; + } + } + catch (const invalid_path&) {} // Fall through. + + diag_record dr (fail (dl)); + dr << "invalid testscript include path "; + to_stream (dr.os, n, true); // Quote. + } + } + + description parser:: + pre_parse_leading_description (token& t, type& tt) + { + // enter: peeked at colon (type in tt) + // leave: peeked at in the first_token mode (type in tt) + + assert (tt == type::colon); + + description r; + location loc (get_location (peeked ())); + + string sp; // Strip prefix. + size_t sn (0); // Strip prefix length. + + for (size_t ln (1); tt == type::colon; ++ln) + { + next (t, tt); // Get ':'. + + mode (lexer_mode::description_line); + next (t, tt); + + // If it is empty, then we get newline right away. + // + const string& l (tt == type::word ? t.value : string ()); + + if (tt == type::word) + next (t, tt); // Get newline. + + assert (tt == type::newline); + + // If this is the first line, then get the "strip prefix", i.e., + // the beginning of the line that contains only whitespaces. If + // the subsequent lines start with the same prefix, then we strip + // it. + // + if (ln == 1) + { + sn = l.find_first_not_of (" \t"); + sp.assign (l, 0, sn == string::npos ? (sn = 0) : sn); + } + + // Apply strip prefix. + // + size_t i (l.compare (0, sn, sp) == 0 ? sn : 0); + + // Strip trailing whitespaces, as a courtesy to the user. + // + size_t j (l.find_last_not_of (" \t")); + j = j != string::npos ? j + 1 : i; + + size_t n (j - i); // [i, j) is our data. + + if (ln == 1) + { + // First line. Ignore if it's blank. + // + if (n == 0) + --ln; // Stay as if on the first line. + else + { + // Otherwise, see if it is the id. Failed that we assume it is + // the summary until we see the next line. + // + (l.find_first_of (" \t.", i) >= j ? r.id : r.summary). + assign (l, i, n); + + // If this is an id then validate it. + // + if (!r.id.empty ()) + { + for (char c: r.id) + { + if (!(alnum (c) || c == '_' || c == '-' || c == '+')) + fail (loc) << "illegal character '" << c + << "' in test id '" << r.id << "'"; + } + } + } + } + else if (ln == 2) + { + // If this is a blank then whatever we have in id/summary is good. + // Otherwise, if we have id, then assume this is summary until we + // see the next line. And if not, then move what we (wrongly) + // assumed to be the summary to details. + // + if (n != 0) + { + if (!r.id.empty ()) + r.summary.assign (l, i, n); + else + { + r.details = move (r.summary); + r.details += '\n'; + r.details.append (l, i, n); + + r.summary.clear (); + } + } + } + // Don't treat line 3 as special if we have given up on id/summary. + // + else if (ln == 3 && r.details.empty ()) + { + // If this is a blank and we have id and/or summary, then we are + // good. Otherwise, if we have both, then move what we (wrongly) + // assumed to be id and summary to details. + // + if (n != 0) + { + if (!r.id.empty () && !r.summary.empty ()) + { + r.details = move (r.id); + r.details += '\n'; + r.details += r.summary; + r.details += '\n'; + + r.id.clear (); + r.summary.clear (); + } + + r.details.append (l, i, n); + } + } + else + { + if (!r.details.empty ()) + r.details += '\n'; + + r.details.append (l, i, n); + } + + tt = peek (lexer_mode::first_token); + } + + // Zap trailing newlines in the details. + // + size_t p (r.details.find_last_not_of ('\n')); + if (p != string::npos && ++p != r.details.size ()) + r.details.resize (p); + + if (r.empty ()) + fail (loc) << "empty description"; + + // Insert id into the id map if we have one. + // + if (!r.id.empty ()) + insert_id (r.id, loc); + + return r; + } + + description parser:: + parse_trailing_description (token& t, type& tt) + { + // enter: colon + // leave: newline + + // Parse one-line trailing description. + // + description r; + + // @@ Would be nice to omit trailing description from replay. + // + const location loc (get_location (t)); + + mode (lexer_mode::description_line); + next (t, tt); + + // If it is empty, then we will get newline right away. + // + if (tt == type::word) + { + string l (move (t.value)); + trim (l); // Strip leading/trailing whitespaces. + + // Decide whether this is id or summary. + // + (l.find_first_of (" \t") == string::npos ? r.id : r.summary) = + move (l); + + next (t, tt); // Get newline. + } + + assert (tt == type::newline); // Lexer mode invariant. + + if (r.empty ()) + fail (loc) << "empty description"; + + // Insert id into the id map if we have one. + // + if (pre_parse_ && !r.id.empty ()) + insert_id (r.id, loc); + + return r; + } + + value parser:: + parse_variable_line (token& t, type& tt) + { + // enter: assignment + // leave: newline or semi + + // We cannot reuse the value mode since it will recognize { which we + // want to treat as a literal. + // + mode (lexer_mode::variable_line); + next (t, tt); + + // Parse value attributes if any. Note that it's ok not to have + // anything after the attributes (e.g., foo=[null]). + // + attributes_push (t, tt, true); + + // @@ PAT: Should we expand patterns? Note that it will only be + // simple ones since we have disabled {}. Also, what would be the + // pattern base directory? + // + return tt != type::newline && tt != type::semi + ? parse_value (t, tt, + pattern_mode::ignore, + "variable value", + nullptr) + : value (names ()); + } + + command_expr parser:: + parse_command_line (token& t, type& tt) + { + // enter: first token of the command line + // leave: <newline> + + // Note: this one is only used during execution. + + pair<command_expr, here_docs> p (parse_command_expr (t, tt)); + + switch (tt) + { + case type::colon: parse_trailing_description (t, tt); break; + case type::semi: next (t, tt); break; // Get newline. + } + + assert (tt == type::newline); + + parse_here_documents (t, tt, p); + assert (tt == type::newline); + + return move (p.first); + } + + // Parse the regular expression representation (non-empty string value + // framed with introducer characters and optionally followed by flag + // characters from the {di} set, for example '/foo/id') into + // components. Also return end-of-parsing position if requested, + // otherwise treat any unparsed characters left as an error. + // + struct regex_parts + { + string value; + char intro; + string flags; // Combination of characters from {di} set. + + // Create a special empty object. + // + regex_parts (): intro ('\0') {} + + regex_parts (string v, char i, string f) + : value (move (v)), intro (i), flags (move (f)) {} + }; + + static regex_parts + parse_regex (const string& s, + const location& l, + const char* what, + size_t* end = nullptr) + { + if (s.empty ()) + fail (l) << "no introducer character in " << what; + + size_t p (s.find (s[0], 1)); // Find terminating introducer. + + if (p == string::npos) + fail (l) << "no closing introducer character in " << what; + + size_t rn (p - 1); // Regex length. + if (rn == 0) + fail (l) << what << " is empty"; + + // Find end-of-flags position. + // + size_t fp (++p); // Save flags starting position. + for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ; + + // If string end is not reached then report invalid flags, unless + // end-of-parsing position is requested (which means regex is just a + // prefix). + // + if (s[p] != '\0' && end == nullptr) + fail (l) << "junk at the end of " << what; + + if (end != nullptr) + *end = p; + + return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp)); + } + + pair<command_expr, parser::here_docs> parser:: + parse_command_expr (token& t, type& tt) + { + // enter: first token of the command line + // leave: <newline> + + command_expr expr; + + // OR-ed to an implied false for the first term. + // + expr.push_back ({expr_operator::log_or, command_pipe ()}); + + command c; // Command being assembled. + + // Make sure the command makes sense. + // + auto check_command = [&c, this] (const location& l, bool last) + { + if (c.out.type == redirect_type::merge && + c.err.type == redirect_type::merge) + fail (l) << "stdout and stderr redirected to each other"; + + if (!last && c.out.type != redirect_type::none) + fail (l) << "stdout is both redirected and piped"; + }; + + // Check that the introducer character differs from '/' if the + // portable path modifier is specified. Must be called before + // parse_regex() (see below) to make sure its diagnostics is + // meaningful. + // + // Note that the portable path modifier assumes '/' to be a valid + // regex character and so makes it indistinguishable from the + // terminating introducer. + // + auto check_regex_mod = [this] (const string& mod, + const string& re, + const location& l, + const char* what) + { + // Handles empty regex properly. + // + if (mod.find ('/') != string::npos && re[0] == '/') + fail (l) << "portable path modifier and '/' introducer in " + << what; + }; + + // Pending positions where the next word should go. + // + enum class pending + { + none, + program, + in_string, + in_document, + in_file, + out_merge, + out_string, + out_str_regex, + out_document, + out_doc_regex, + out_file, + err_merge, + err_string, + err_str_regex, + err_document, + err_doc_regex, + err_file, + clean + }; + pending p (pending::program); + string mod; // Modifiers for pending in_* and out_* positions. + here_docs hd; // Expected here-documents. + + // Add the next word to either one of the pending positions or to + // program arguments by default. + // + auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( + string&& w, const location& l) + { + auto add_merge = [&l, this] (redirect& r, const string& w, int fd) + { + try + { + size_t n; + if (stoi (w, &n) == fd && n == w.size ()) + { + r.fd = fd; + return; + } + } + catch (const exception&) {} // Fall through. + + fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect " + << "file descriptor must be " << fd; + }; + + auto add_here_str = [] (redirect& r, string&& w) + { + if (r.modifiers.find (':') == string::npos) + w += '\n'; + r.str = move (w); + }; + + auto add_here_str_regex = [&l, &check_regex_mod] ( + redirect& r, int fd, string&& w) + { + const char* what (nullptr); + switch (fd) + { + case 1: what = "stdout regex redirect"; break; + case 2: what = "stderr regex redirect"; break; + } + + check_regex_mod (r.modifiers, w, l, what); + + regex_parts rp (parse_regex (w, l, what)); + + regex_lines& re (r.regex); + re.intro = rp.intro; + + re.lines.emplace_back ( + l.line, l.column, move (rp.value), move (rp.flags)); + + // Add final blank line unless suppressed. + // + // Note that the position is synthetic, but that's ok as we don't + // expect any diagnostics to refer this line. + // + if (r.modifiers.find (':') == string::npos) + re.lines.emplace_back (l.line, l.column, string (), false); + }; + + auto parse_path = [&l, this] (string&& w, const char* what) -> path + { + try + { + path p (move (w)); + + if (!p.empty ()) + { + p.normalize (); + return p; + } + + fail (l) << "empty " << what << endf; + } + catch (const invalid_path& e) + { + fail (l) << "invalid " << what << " '" << e.path << "'" << endf; + } + }; + + auto add_file = [&parse_path] (redirect& r, int fd, string&& w) + { + const char* what (nullptr); + switch (fd) + { + case 0: what = "stdin redirect path"; break; + case 1: what = "stdout redirect path"; break; + case 2: what = "stderr redirect path"; break; + } + + r.file.path = parse_path (move (w), what); + }; + + switch (p) + { + case pending::none: c.arguments.push_back (move (w)); break; + case pending::program: + c.program = parse_path (move (w), "program path"); + break; + + case pending::out_merge: add_merge (c.out, w, 2); break; + case pending::err_merge: add_merge (c.err, w, 1); break; + + case pending::in_string: add_here_str (c.in, move (w)); break; + case pending::out_string: add_here_str (c.out, move (w)); break; + case pending::err_string: add_here_str (c.err, move (w)); break; + + case pending::out_str_regex: + { + add_here_str_regex (c.out, 1, move (w)); + break; + } + case pending::err_str_regex: + { + add_here_str_regex (c.err, 2, move (w)); + break; + } + + // These are handled specially below. + // + case pending::in_document: + case pending::out_document: + case pending::err_document: + case pending::out_doc_regex: + case pending::err_doc_regex: assert (false); break; + + case pending::in_file: add_file (c.in, 0, move (w)); break; + case pending::out_file: add_file (c.out, 1, move (w)); break; + case pending::err_file: add_file (c.err, 2, move (w)); break; + + case pending::clean: + { + cleanup_type t; + switch (mod[0]) // Ok, if empty + { + case '!': t = cleanup_type::never; break; + case '?': t = cleanup_type::maybe; break; + default: t = cleanup_type::always; break; + } + + c.cleanups.push_back ( + {t, parse_path (move (w), "cleanup path")}); + break; + } + } + + p = pending::none; + mod.clear (); + }; + + // Make sure we don't have any pending positions to fill. + // + auto check_pending = [&p, this] (const location& l) + { + const char* what (nullptr); + + switch (p) + { + case pending::none: break; + case pending::program: what = "program"; break; + case pending::in_string: what = "stdin here-string"; break; + case pending::in_document: what = "stdin here-document end"; break; + case pending::in_file: what = "stdin file"; break; + case pending::out_merge: what = "stdout file descriptor"; break; + case pending::out_string: what = "stdout here-string"; break; + case pending::out_document: what = "stdout here-document end"; break; + case pending::out_file: what = "stdout file"; break; + case pending::err_merge: what = "stderr file descriptor"; break; + case pending::err_string: what = "stderr here-string"; break; + case pending::err_document: what = "stderr here-document end"; break; + case pending::err_file: what = "stderr file"; break; + case pending::clean: what = "cleanup path"; break; + + case pending::out_str_regex: + { + what = "stdout here-string regex"; + break; + } + case pending::err_str_regex: + { + what = "stderr here-string regex"; + break; + } + case pending::out_doc_regex: + { + what = "stdout here-document regex end"; + break; + } + case pending::err_doc_regex: + { + what = "stderr here-document regex end"; + break; + } + } + + if (what != nullptr) + fail (l) << "missing " << what; + }; + + // Parse the redirect operator. + // + auto parse_redirect = + [&c, &expr, &p, &mod, this] (token& t, const location& l) + { + // Our semantics is the last redirect seen takes effect. + // + assert (p == pending::none && mod.empty ()); + + // See if we have the file descriptor. + // + unsigned long fd (3); + if (!t.separated) + { + if (c.arguments.empty ()) + fail (l) << "missing redirect file descriptor"; + + const string& s (c.arguments.back ()); + + try + { + size_t n; + fd = stoul (s, &n); + + if (n != s.size () || fd > 2) + throw invalid_argument (string ()); + } + catch (const exception&) + { + fail (l) << "invalid redirect file descriptor '" << s << "'"; + } + + c.arguments.pop_back (); + } + + type tt (t.type); + + // Validate/set default file descriptor. + // + switch (tt) + { + case type::in_pass: + case type::in_null: + case type::in_str: + case type::in_doc: + case type::in_file: + { + if ((fd = fd == 3 ? 0 : fd) != 0) + fail (l) << "invalid in redirect file descriptor " << fd; + + if (!expr.back ().pipe.empty ()) + fail (l) << "stdin is both piped and redirected"; + + break; + } + case type::out_pass: + case type::out_null: + case type::out_trace: + case type::out_merge: + case type::out_str: + case type::out_doc: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + { + if ((fd = fd == 3 ? 1 : fd) == 0) + fail (l) << "invalid out redirect file descriptor " << fd; + + break; + } + } + + mod = move (t.value); + + redirect_type rt (redirect_type::none); + switch (tt) + { + case type::in_pass: + case type::out_pass: rt = redirect_type::pass; break; + + case type::in_null: + case type::out_null: rt = redirect_type::null; break; + + case type::out_trace: rt = redirect_type::trace; break; + + case type::out_merge: rt = redirect_type::merge; break; + + case type::in_str: + case type::out_str: + { + bool re (mod.find ('~') != string::npos); + assert (tt == type::out_str || !re); + + rt = re + ? redirect_type::here_str_regex + : redirect_type::here_str_literal; + + break; + } + + case type::in_doc: + case type::out_doc: + { + bool re (mod.find ('~') != string::npos); + assert (tt == type::out_doc || !re); + + rt = re + ? redirect_type::here_doc_regex + : redirect_type::here_doc_literal; + + break; + } + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: rt = redirect_type::file; break; + } + + redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err); + r = redirect (rt); + + // Don't move as still may be used for pending here-document end + // marker processing. + // + r.modifiers = mod; + + switch (rt) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: + break; + case redirect_type::merge: + switch (fd) + { + case 0: assert (false); break; + case 1: p = pending::out_merge; break; + case 2: p = pending::err_merge; break; + } + break; + case redirect_type::here_str_literal: + switch (fd) + { + case 0: p = pending::in_string; break; + case 1: p = pending::out_string; break; + case 2: p = pending::err_string; break; + } + break; + case redirect_type::here_str_regex: + switch (fd) + { + case 0: assert (false); break; + case 1: p = pending::out_str_regex; break; + case 2: p = pending::err_str_regex; break; + } + break; + case redirect_type::here_doc_literal: + switch (fd) + { + case 0: p = pending::in_document; break; + case 1: p = pending::out_document; break; + case 2: p = pending::err_document; break; + } + break; + case redirect_type::here_doc_regex: + switch (fd) + { + case 0: assert (false); break; + case 1: p = pending::out_doc_regex; break; + case 2: p = pending::err_doc_regex; break; + } + break; + case redirect_type::file: + switch (fd) + { + case 0: p = pending::in_file; break; + case 1: p = pending::out_file; break; + case 2: p = pending::err_file; break; + } + + // Also sets for stdin, but this is harmless. + // + r.file.mode = tt == type::out_file_ovr + ? redirect_fmode::overwrite + : (tt == type::out_file_app + ? redirect_fmode::append + : redirect_fmode::compare); + + break; + + case redirect_type::here_doc_ref: assert (false); break; + } + }; + + // Set pending cleanup type. + // + auto parse_clean = [&p, &mod] (token& t) + { + p = pending::clean; + mod = move (t.value); + }; + + const location ll (get_location (t)); // Line location. + + // Keep parsing chunks of the command line until we see one of the + // "terminators" (newline, semicolon, exit status comparison, etc). + // + location l (ll); + names ns; // Reuse to reduce allocations. + + for (bool done (false); !done; l = get_location (t)) + { + switch (tt) + { + case type::semi: + case type::colon: + case type::newline: + { + done = true; + break; + } + + case type::equal: + case type::not_equal: + { + if (!pre_parse_) + check_pending (l); + + c.exit = parse_command_exit (t, tt); + + // Only a limited set of things can appear after the exit status + // so we check this here. + // + switch (tt) + { + case type::semi: + case type::colon: + case type::newline: + + case type::pipe: + case type::log_or: + case type::log_and: + break; + default: + fail (t) << "unexpected " << t << " after command exit status"; + } + + break; + } + + case type::pipe: + case type::log_or: + case type::log_and: + + case type::in_pass: + case type::out_pass: + + case type::in_null: + case type::out_null: + + case type::out_trace: + + case type::out_merge: + + case type::in_str: + case type::in_doc: + case type::out_str: + case type::out_doc: + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + + case type::clean: + { + if (pre_parse_) + { + // The only things we need to handle here are the here-document + // and here-document regex end markers since we need to know + // how many of them to pre-parse after the command. + // + switch (tt) + { + case type::in_doc: + case type::out_doc: + mod = move (t.value); + + bool re (mod.find ('~') != string::npos); + const char* what (re + ? "here-document regex end marker" + : "here-document end marker"); + + // We require the end marker to be a literal, unquoted word. + // In particularm, we don't allow quoted because of cases + // like foo"$bar" (where we will see word 'foo'). + // + next (t, tt); + + // We require the end marker to be an unquoted or completely + // quoted word. The complete quoting becomes important for + // cases like foo"$bar" (where we will see word 'foo'). + // + // For good measure we could have also required it to be + // separated from the following token, but out grammar + // allows one to write >>EOO;. The problematic sequence + // would be >>FOO$bar -- on reparse it will be expanded + // as a single word. + // + if (tt != type::word || t.value.empty ()) + fail (t) << "expected " << what; + + peek (); + const token& p (peeked ()); + if (!p.separated) + { + switch (p.type) + { + case type::dollar: + case type::lparen: + fail (p) << what << " must be literal"; + } + } + + quote_type qt (t.qtype); + switch (qt) + { + case quote_type::unquoted: + qt = quote_type::single; // Treat as single-quoted. + break; + case quote_type::single: + case quote_type::double_: + if (t.qcomp) + break; + // Fall through. + case quote_type::mixed: + fail (t) << "partially-quoted " << what; + } + + regex_parts r; + string end (move (t.value)); + + if (re) + { + check_regex_mod (mod, end, l, what); + + r = parse_regex (end, l, what); + end = move (r.value); // The "cleared" end marker. + } + + bool literal (qt == quote_type::single); + bool shared (false); + + for (const auto& d: hd) + { + if (d.end == end) + { + auto check = [&t, &end, &re, this] (bool c, + const char* what) + { + if (!c) + fail (t) << "different " << what + << " for shared here-document " + << (re ? "regex '" : "'") << end << "'"; + }; + + check (d.modifiers == mod, "modifiers"); + check (d.literal == literal, "quoting"); + + if (re) + { + check (d.regex == r.intro, "introducers"); + check (d.regex_flags == r.flags, "global flags"); + } + + shared = true; + break; + } + } + + if (!shared) + hd.push_back ( + here_doc { + {}, + move (end), + literal, + move (mod), + r.intro, move (r.flags)}); + + break; + } + + next (t, tt); + break; + } + + // If this is one of the operators/separators, check that we + // don't have any pending locations to be filled. + // + check_pending (l); + + // Note: there is another one in the inner loop below. + // + switch (tt) + { + case type::pipe: + case type::log_or: + case type::log_and: + { + // Check that the previous command makes sense. + // + check_command (l, tt != type::pipe); + expr.back ().pipe.push_back (move (c)); + + c = command (); + p = pending::program; + + if (tt != type::pipe) + { + expr_operator o (tt == type::log_or + ? expr_operator::log_or + : expr_operator::log_and); + expr.push_back ({o, command_pipe ()}); + } + + break; + } + + case type::in_pass: + case type::out_pass: + + case type::in_null: + case type::out_null: + + case type::out_trace: + + case type::out_merge: + + case type::in_str: + case type::in_doc: + case type::out_str: + case type::out_doc: + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + { + parse_redirect (t, l); + break; + } + + case type::clean: + { + parse_clean (t); + break; + } + + default: assert (false); break; + } + + next (t, tt); + break; + } + default: + { + // Here-document end markers are literal (we verified that above + // during pre-parsing) and we need to know whether they were + // quoted. So handle this case specially. + // + { + int fd; + switch (p) + { + case pending::in_document: fd = 0; break; + case pending::out_document: + case pending::out_doc_regex: fd = 1; break; + case pending::err_document: + case pending::err_doc_regex: fd = 2; break; + default: fd = -1; break; + } + + if (fd != -1) + { + here_redirect rd { + expr.size () - 1, expr.back ().pipe.size (), fd}; + + string end (move (t.value)); + + regex_parts r; + + if (p == pending::out_doc_regex || + p == pending::err_doc_regex) + { + // We can't fail here as we already parsed all the end + // markers during pre-parsing stage, and so no need in the + // description. + // + r = parse_regex (end, l, ""); + end = move (r.value); // The "cleared" end marker. + } + + bool shared (false); + for (auto& d: hd) + { + // No need to check that redirects that share here-document + // have the same modifiers, etc. That have been done during + // pre-parsing. + // + if (d.end == end) + { + d.redirects.emplace_back (rd); + shared = true; + break; + } + } + + if (!shared) + hd.push_back ( + here_doc { + {rd}, + move (end), + (t.qtype == quote_type::unquoted || + t.qtype == quote_type::single), + move (mod), + r.intro, move (r.flags)}); + + p = pending::none; + mod.clear (); + + next (t, tt); + break; + } + } + + // Parse the next chunk as simple names to get expansion, etc. + // Note that we do it in the chunking mode to detect whether + // anything in each chunk is quoted. + // + // @@ PAT: should we support pattern expansion? This is even + // fuzzier than the variable case above. Though this is the + // shell semantics. Think what happens when we do rm *.txt? + // + reset_quoted (t); + parse_names (t, tt, + ns, + pattern_mode::ignore, + true, + "command line", + nullptr); + + if (pre_parse_) // Nothing else to do if we are pre-parsing. + break; + + // Process what we got. Determine whether anything inside was + // quoted (note that the current token is "next" and is not part + // of this). + // + bool q ((quoted () - + (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); + + for (name& n: ns) + { + string s; + + try + { + s = value_traits<string>::convert (move (n), nullptr); + } + catch (const invalid_argument&) + { + diag_record dr (fail (l)); + dr << "invalid string value "; + to_stream (dr.os, n, true); // Quote. + } + + // If it is a quoted chunk, then we add the word as is. + // Otherwise we re-lex it. But if the word doesn't contain any + // interesting characters (operators plus quotes/escapes), + // then no need to re-lex. + // + // NOTE: update quoting (script.cxx:to_stream_q()) if adding + // any new characters. + // + if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) + add_word (move (s), l); + else + { + // If the chunk re-parsing results in error, our diagnostics + // will look like this: + // + // <string>:1:4: error: stdout merge redirect file descriptor must be 2 + // testscript:2:5: info: while parsing string '1>&a' + // + auto df = make_diag_frame ( + [s, &l](const diag_record& dr) + { + dr << info (l) << "while parsing string '" << s << "'"; + }); + + // When re-lexing we do "effective escaping" and only for + // ['"\] (quotes plus the backslash itself). In particular, + // there is no way to escape redirects, operators, etc. The + // idea is to prefer quoting except for passing literal + // quotes, for example: + // + // args = \"&foo\" + // cmd $args # cmd &foo + // + // args = 'x=\"foo bar\"' + // cmd $args # cmd x="foo bar" + // + + path name ("<string>"); + istringstream is (s); + lexer lex (is, name, + lexer_mode::command_expansion, + "\'\"\\"); + + // Treat the first "sub-token" as always separated from what + // we saw earlier. + // + // Note that this is not "our" token so we cannot do + // fail(t). Rather we should do fail(l). + // + token t (lex.next ()); + location l (build2::get_location (t, name)); + t.separated = true; + + string w; + bool f (t.type == type::eos); // If the whole thing is empty. + + for (; t.type != type::eos; t = lex.next ()) + { + type tt (t.type); + l = build2::get_location (t, name); + + // Re-lexing double-quotes will recognize $, ( inside as + // tokens so we have to reverse them back. Since we don't + // treat spaces as separators we can be sure we will get + // it right. + // + switch (tt) + { + case type::dollar: w += '$'; continue; + case type::lparen: w += '('; continue; + } + + // Retire the current word. We need to distinguish between + // empty and non-existent (e.g., > vs >""). + // + if (!w.empty () || f) + { + add_word (move (w), l); + f = false; + } + + if (tt == type::word) + { + w = move (t.value); + f = true; + continue; + } + + // If this is one of the operators/separators, check that + // we don't have any pending locations to be filled. + // + check_pending (l); + + // Note: there is another one in the outer loop above. + // + switch (tt) + { + case type::pipe: + case type::log_or: + case type::log_and: + { + // Check that the previous command makes sense. + // + check_command (l, tt != type::pipe); + expr.back ().pipe.push_back (move (c)); + + c = command (); + p = pending::program; + + if (tt != type::pipe) + { + expr_operator o (tt == type::log_or + ? expr_operator::log_or + : expr_operator::log_and); + expr.push_back ({o, command_pipe ()}); + } + + break; + } + + case type::in_pass: + case type::out_pass: + + case type::in_null: + case type::out_null: + + case type::out_trace: + + case type::out_merge: + + case type::in_str: + case type::out_str: + + case type::in_file: + case type::out_file_cmp: + case type::out_file_ovr: + case type::out_file_app: + { + parse_redirect (t, l); + break; + } + + case type::clean: + { + parse_clean (t); + break; + } + + case type::in_doc: + case type::out_doc: + { + fail (l) << "here-document redirect in expansion"; + break; + } + } + } + + // Don't forget the last word. + // + if (!w.empty () || f) + add_word (move (w), l); + } + } + + ns.clear (); + break; + } + } + } + + if (!pre_parse_) + { + // Verify we don't have anything pending to be filled and the + // command makes sense. + // + check_pending (l); + check_command (l, true); + + expr.back ().pipe.push_back (move (c)); + } + + return make_pair (move (expr), move (hd)); + } + + command_exit parser:: + parse_command_exit (token& t, type& tt) + { + // enter: equal/not_equal + // leave: token after exit status (one parse_names() chunk) + + exit_comparison comp (tt == type::equal + ? exit_comparison::eq + : exit_comparison::ne); + + // The next chunk should be the exit status. + // + next (t, tt); + location l (get_location (t)); + names ns (parse_names (t, tt, + pattern_mode::ignore, + true, + "exit status", + nullptr)); + unsigned long es (256); + + if (!pre_parse_) + { + try + { + if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ()) + es = stoul (ns[0].value); + } + catch (const exception&) {} // Fall through. + + if (es > 255) + { + diag_record dr; + + dr << fail (l) << "expected exit status instead of "; + to_stream (dr.os, ns, true); // Quote. + + dr << info << "exit status is an unsigned integer less than 256"; + } + } + + return command_exit {comp, static_cast<uint8_t> (es)}; + } + + void parser:: + parse_here_documents (token& t, type& tt, + pair<command_expr, here_docs>& p) + { + // enter: newline + // leave: newline + + // Parse here-document fragments in the order they were mentioned on + // the command line. + // + for (here_doc& h: p.second) + { + // Switch to the here-line mode which is like single/double-quoted + // string but recognized the newline as a separator. + // + mode (h.literal + ? lexer_mode::here_line_single + : lexer_mode::here_line_double); + next (t, tt); + + parsed_doc v ( + parse_here_document (t, tt, h.end, h.modifiers, h.regex)); + + if (!pre_parse_) + { + assert (!h.redirects.empty ()); + auto i (h.redirects.cbegin ()); + + command& c (p.first[i->expr].pipe[i->pipe]); + redirect& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err); + + if (v.re) + { + r.regex = move (v.regex); + r.regex.flags = move (h.regex_flags); + } + else + r.str = move (v.str); + + r.end = move (h.end); + r.end_line = v.end_line; + r.end_column = v.end_column; + + // Note that our references cannot be invalidated because the + // command_expr/command-pipe vectors already contain all their + // elements. + // + for (++i; i != h.redirects.cend (); ++i) + { + command& c (p.first[i->expr].pipe[i->pipe]); + + (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err) = + redirect (redirect_type::here_doc_ref, r); + } + } + + expire_mode (); + } + } + + parser::parsed_doc parser:: + parse_here_document (token& t, type& tt, + const string& em, + const string& mod, + char re) + { + // enter: first token on first line + // leave: newline (after end marker) + + // String literal. Note that when decide if to terminate the previously + // added line with a newline, we need to distinguish a yet empty result + // and the one that has a single blank line added. + // + optional<string> rs; + + regex_lines rre; + + // Here-documents can be indented. The leading whitespaces of the end + // marker line (called strip prefix) determine the indentation. Every + // other line in the here-document should start with this prefix which + // is automatically stripped. The only exception is a blank line. + // + // The fact that the strip prefix is only known at the end, after + // seeing all the lines, is rather inconvenient. As a result, the way + // we implement this is a bit hackish (though there is also something + // elegant about it): at the end of the pre-parse stage we are going + // re-examine the sequence of tokens that comprise this here-document + // and "fix up" the first token of each line by stripping the prefix. + // + string sp; + + // Remember the position of the first token in this here-document. + // + size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0); + + // We will use the location of the first token on the line for the + // regex diagnostics. At the end of the loop it will point to the + // beginning of the end marker. + // + location l; + + while (tt != type::eos) + { + l = get_location (t); + + // Check if this is the end marker. For starters, it should be a + // single, unquoted word followed by a newline. + // + if (tt == type::word && + t.qtype == quote_type::unquoted && + peek () == type::newline) + { + const string& v (t.value); + + size_t vn (v.size ()); + size_t en (em.size ()); + + // Then check that it ends with the end marker. + // + if (vn >= en && v.compare (vn - en, en, em) == 0) + { + // Now check that the prefix only contains whitespaces. + // + size_t n (vn - en); + + if (v.find_first_not_of (" \t") >= n) + { + assert (pre_parse_ || n == 0); // Should have been stripped. + + if (n != 0) + sp.assign (v, 0, n); // Save the strip prefix. + + next (t, tt); // Get the newline. + break; + } + } + } + + // Expand the line (can be blank). + // + // @@ PAT: one could argue that if we do it in variables, then we + // should do it here as well. Though feels bizarre. + // + names ns (tt != type::newline + ? parse_names (t, tt, + pattern_mode::ignore, + false, + "here-document line", + nullptr) + : names ()); + + if (!pre_parse_) + { + // What shall we do if the expansion results in multiple names? + // For, example if the line contains just the variable expansion + // and it is of type strings. Adding all the elements space- + // separated seems like the natural thing to do. + // + string s; + for (auto b (ns.begin ()), i (b); i != ns.end (); ++i) + { + string n; + + try + { + n = value_traits<string>::convert (move (*i), nullptr); + } + catch (const invalid_argument&) + { + fail (l) << "invalid string value '" << *i << "'"; + } + + if (i == b) + s = move (n); + else + { + s += ' '; + s += n; + } + } + + if (!re) + { + // Add newline after previous line. + // + if (rs) + { + *rs += '\n'; + *rs += s; + } + else + rs = move (s); + } + else + { + // Due to expansion we can end up with multiple lines. If empty + // then will add a blank textual literal. + // + for (size_t p (0); p != string::npos; ) + { + string ln; + size_t np (s.find ('\n', p)); + + if (np != string::npos) + { + ln = string (s, p, np - p); + p = np + 1; + } + else + { + ln = string (s, p); + p = np; + } + + if (ln[0] != re) // Line doesn't start with regex introducer. + { + // This is a line-char literal (covers blank lines as well). + // + // Append textual literal. + // + rre.lines.emplace_back (l.line, l.column, move (ln), false); + } + else // Line starts with the regex introducer. + { + // This is a char-regex, or a sequence of line-regex syntax + // characters or both (in this specific order). So we will + // add regex (with optional special characters) or special + // literal. + // + size_t p (ln.find (re, 1)); + if (p == string::npos) + { + // No regex, just a sequence of syntax characters. + // + string spec (ln, 1); + if (spec.empty ()) + fail (l) << "no syntax line characters"; + + // Append special literal. + // + rre.lines.emplace_back ( + l.line, l.column, move (spec), true); + } + else + { + // Regex (probably with syntax characters). + // + regex_parts re; + + // Empty regex is a special case repesenting a blank line. + // + if (p == 1) + // Position to optional specal characters of an empty + // regex. + // + ++p; + else + // Can't fail as all the pre-conditions verified + // (non-empty with both introducers in place), so no + // description required. + // + re = parse_regex (ln, l, "", &p); + + // Append regex with optional special characters. + // + rre.lines.emplace_back (l.line, l.column, + move (re.value), move (re.flags), + string (ln, p)); + } + } + } + } + } + + // We should expand the whole line at once so this would normally be + // a newline but can also be an end-of-stream. + // + if (tt == type::newline) + next (t, tt); + else + assert (tt == type::eos); + } + + if (tt == type::eos) + fail (t) << "missing here-document end marker '" << em << "'"; + + if (pre_parse_) + { + // Strip the indentation prefix if there is one. + // + assert (replay_ == replay::save); + + if (!sp.empty ()) + { + size_t sn (sp.size ()); + + for (; ri != replay_data_.size (); ++ri) + { + token& rt (replay_data_[ri].token); + + if (rt.type == type::newline) // Blank + continue; + + if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0) + fail (rt) << "unindented here-document line"; + + // If the word is equal to the strip prefix then we have to drop + // the token. Note that simply making it an empty word won't + // have the same semantics. For instance, it would trigger + // concatenated expansion. + // + if (rt.value.size () == sn) + replay_data_.erase (replay_data_.begin () + ri); + else + { + rt.value.erase (0, sn); + rt.column += sn; + ++ri; + } + + // Skip until next newline. + // + for (; replay_data_[ri].token.type != type::newline; ++ri) ; + } + } + } + else + { + // Add final newline unless suppressed. + // + if (mod.find (':') == string::npos) + { + if (re) + // Note that the position is synthetic, but that's ok as we don't + // expect any diagnostics to refer this line. + // + rre.lines.emplace_back (l.line, l.column, string (), false); + else if (rs) + *rs += '\n'; + else + rs = "\n"; + } + + // Finalize regex lines. + // + if (re) + { + // Empty regex matches nothing, so not of much use. + // + if (rre.lines.empty ()) + fail (l) << "empty here-document regex"; + + rre.intro = re; + } + } + + return re + ? parsed_doc (move (rre), l.line, l.column) + : parsed_doc (rs ? move (*rs) : string (), l.line, l.column); + } + + // + // Execute. + // + + void parser:: + execute (script& s, runner& r) + { + assert (s.state == scope_state::unknown); + + auto g ( + make_exception_guard ( + [&s] () {s.state = scope_state::failed;})); + + if (!s.empty ()) + execute (s, s, r); + else + s.state = scope_state::passed; + } + + void parser:: + execute (scope& sc, script& s, runner& r) + { + path_ = nullptr; // Set by replays. + + pre_parse_ = false; + + set_lexer (nullptr); + + script_ = &s; + runner_ = &r; + group_ = nullptr; + id_map_ = nullptr; + include_set_ = nullptr; + scope_ = ≻ + + //@@ PAT TODO: set pbase_? + + exec_scope_body (); + } + + static void + execute_impl (scope& s, script& scr, runner& r) + { + try + { + parser p; + p.execute (s, scr, r); + } + catch (const failed&) + { + s.state = scope_state::failed; + } + } + + void parser:: + exec_scope_body () + { + size_t li (0); + + runner_->enter (*scope_, scope_->start_loc_); + + if (test* t = dynamic_cast<test*> (scope_)) + { + exec_lines ( + t->tests_.begin (), t->tests_.end (), li, command_type::test); + } + else if (group* g = dynamic_cast<group*> (scope_)) + { + bool exec_scope ( + exec_lines ( + g->setup_.begin (), g->setup_.end (), li, command_type::setup)); + + if (exec_scope) + { + atomic_count task_count (0); + wait_guard wg (task_count); + + // Start asynchronous execution of inner scopes keeping track of + // how many we have handled. + // + for (unique_ptr<scope>& chain: g->scopes) + { + // Check if this scope is ignored (e.g., via config.test). + // + if (!runner_->test (*chain) || !exec_scope) + { + chain = nullptr; + continue; + } + + // Pick a scope from the if-else chain. + // + // In fact, we are going to drop all but the selected (if any) + // scope. This way we can re-examine the scope states later. It + // will also free some memory. + // + unique_ptr<scope>* ps; + for (ps = &chain; *ps != nullptr; ps = &ps->get ()->if_chain) + { + scope& s (**ps); + + if (!s.if_cond_) // Unconditional. + { + assert (s.if_chain == nullptr); + break; + } + + line l (move (*s.if_cond_)); + line_type lt (l.type); + + replay_data (move (l.tokens)); + + token t; + type tt; + + next (t, tt); + const location ll (get_location (t)); + next (t, tt); // Skip to start of command. + + bool take; + if (lt != line_type::cmd_else) + { + // Note: the line index count continues from setup. + // + command_expr ce (parse_command_line (t, tt)); + + try + { + take = runner_->run_if (*scope_, ce, ++li, ll); + } + catch (const exit_scope& e) + { + // Bail out if the scope is exited with the failure status. + // Otherwise leave the scope normally. + // + if (!e.status) + throw failed (); + + // Stop iterating through if conditions, and stop executing + // inner scopes. + // + exec_scope = false; + replay_stop (); + break; + } + + if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) + take = !take; + } + else + { + assert (tt == type::newline); + take = true; + } + + replay_stop (); + + if (take) + { + // Count the remaining conditions for the line index. + // + for (scope* r (s.if_chain.get ()); + r != nullptr && + r->if_cond_->type != line_type::cmd_else; + r = r->if_chain.get ()) + ++li; + + s.if_chain.reset (); // Drop remaining scopes. + break; + } + } + + chain.reset (*ps == nullptr || (*ps)->empty () || !exec_scope + ? nullptr + : ps->release ()); + + if (chain != nullptr) + { + // Hand it off to a sub-parser potentially in another thread. + // But we could also have handled it serially in this parser: + // + // scope* os (scope_); + // scope_ = chain.get (); + // exec_scope_body (); + // scope_ = os; + + // Pass our diagnostics stack (this is safe since we are going + // to wait for completion before unwinding the diag stack). + // + // If the scope was executed synchronously, check the status + // and bail out if we weren't asked to keep going. + // + // UBSan workaround. + // + const diag_frame* df (diag_frame::stack ()); + if (!sched.async (task_count, + [] (const diag_frame* ds, + scope& s, + script& scr, + runner& r) + { + diag_frame::stack_guard dsg (ds); + execute_impl (s, scr, r); + }, + df, + ref (*chain), + ref (*script_), + ref (*runner_))) + { + // Bail out if the scope has failed and we weren't instructed + // to keep going. + // + if (chain->state == scope_state::failed && !keep_going) + throw failed (); + } + } + } + + wg.wait (); + + // Re-examine the scopes we have executed collecting their state. + // + for (const unique_ptr<scope>& chain: g->scopes) + { + if (chain == nullptr) + continue; + + switch (chain->state) + { + case scope_state::passed: break; + case scope_state::failed: throw failed (); + default: assert (false); + } + } + } + + exec_lines ( + g->tdown_.begin (), g->tdown_.end (), li, command_type::teardown); + } + else + assert (false); + + runner_->leave (*scope_, scope_->end_loc_); + + scope_->state = scope_state::passed; + } + + bool parser:: + exec_lines (lines::iterator i, lines::iterator e, + size_t& li, + command_type ct) + { + try + { + token t; + type tt; + + for (; i != e; ++i) + { + line& ln (*i); + line_type lt (ln.type); + + assert (path_ == nullptr); + + // Set the tokens and start playing. + // + replay_data (move (ln.tokens)); + + // We don't really need to change the mode since we already know + // the line type. + // + next (t, tt); + const location ll (get_location (t)); + + switch (lt) + { + case line_type::var: + { + // Parse. + // + string name (move (t.value)); + + next (t, tt); + type kind (tt); // Assignment kind. + + value rhs (parse_variable_line (t, tt)); + + if (tt == type::semi) + next (t, tt); + + assert (tt == type::newline); + + // Assign. + // + const variable& var (*ln.var); + + value& lhs (kind == type::assign + ? scope_->assign (var) + : scope_->append (var)); + + build2::parser::apply_value_attributes ( + &var, lhs, move (rhs), kind); + + // If we changes any of the test.* values, then reset the $*, + // $N special aliases. + // + if (var.name == script_->test_var.name || + var.name == script_->options_var.name || + var.name == script_->arguments_var.name || + var.name == script_->redirects_var.name || + var.name == script_->cleanups_var.name) + { + scope_->reset_special (); + } + + replay_stop (); + break; + } + case line_type::cmd: + { + // We use the 0 index to signal that this is the only command. + // Note that we only do this for test commands. + // + if (ct == command_type::test && li == 0) + { + lines::iterator j (i); + for (++j; j != e && j->type == line_type::var; ++j) ; + + if (j != e) // We have another command. + ++li; + } + else + ++li; + + command_expr ce (parse_command_line (t, tt)); + runner_->run (*scope_, ce, ct, li, ll); + + replay_stop (); + break; + } + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + { + next (t, tt); // Skip to start of command. + + bool take; + if (lt != line_type::cmd_else) + { + // Assume if-else always involves multiple commands. + // + command_expr ce (parse_command_line (t, tt)); + take = runner_->run_if (*scope_, ce, ++li, ll); + + if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) + take = !take; + } + else + { + assert (tt == type::newline); + take = true; + } + + replay_stop (); + + // If end is true, then find the 'end' line. Otherwise, find + // the next if-else line. If skip is true then increment the + // command line index. + // + auto next = [e, &li] + (lines::iterator j, bool end, bool skip) -> lines::iterator + { + // We need to be aware of nested if-else chains. + // + size_t n (0); + + for (++j; j != e; ++j) + { + line_type lt (j->type); + + if (lt == line_type::cmd_if || + lt == line_type::cmd_ifn) + ++n; + + // If we are nested then we just wait until we get back + // to the surface. + // + if (n == 0) + { + switch (lt) + { + case line_type::cmd_elif: + case line_type::cmd_elifn: + case line_type::cmd_else: + if (end) break; + // Fall through. + case line_type::cmd_end: return j; + default: break; + } + } + + if (lt == line_type::cmd_end) + --n; + + if (skip) + { + // Note that we don't count else and end as commands. + // + switch (lt) + { + case line_type::cmd: + case line_type::cmd_if: + case line_type::cmd_ifn: + case line_type::cmd_elif: + case line_type::cmd_elifn: ++li; break; + default: break; + } + } + } + + assert (false); // Missing end. + return e; + }; + + // If we are taking this branch then we need to parse all the + // lines until the next if-else line and then skip all the + // lines until the end (unless next is already end). + // + // Otherwise, we need to skip all the lines until the next + // if-else line and then continue parsing. + // + if (take) + { + lines::iterator j (next (i, false, false)); // Next if-else. + if (!exec_lines (i + 1, j, li, ct)) + return false; + + i = j->type == line_type::cmd_end ? j : next (j, true, true); + } + else + { + i = next (i, false, true); + if (i->type != line_type::cmd_end) + --i; // Continue with this line (e.g., elif or else). + } + + break; + } + case line_type::cmd_end: + { + assert (false); + } + } + } + + return true; + } + catch (const exit_scope& e) + { + // Bail out if the scope is exited with the failure status. Otherwise + // leave the scope normally. + // + if (!e.status) + throw failed (); + + replay_stop (); + return false; + } + } + + // + // The rest. + // + + lookup parser:: + lookup_variable (name&& qual, string&& name, const location& loc) + { + assert (!pre_parse_); + + if (!qual.empty ()) + fail (loc) << "qualified variable name"; + + // If we have no scope (happens when pre-parsing directives), then we + // only look for buildfile variables. + // + // Otherwise, every variable that is ever set in a script has been + // pre-entered during pre-parse or introduced with the set builtin + // during test execution. Which means that if one is not found in the + // script pool then it can only possibly be set in the buildfile. + // + // Note that we need to acquire the variable pool lock. The pool can + // be changed from multiple threads by the set builtin. The obtained + // variable pointer can safelly be used with no locking as the variable + // pool is an associative container (underneath) and we are only adding + // new variables into it. + // + const variable* pvar (nullptr); + + if (scope_ != nullptr) + { + slock sl (script_->var_pool_mutex); + pvar = script_->var_pool.find (name); + } + + return pvar != nullptr + ? scope_->find (*pvar) + : script_->find_in_buildfile (name); + } + + size_t parser:: + quoted () const + { + size_t r (0); + + if (replay_ != replay::play) + r = lexer_->quoted (); + else + { + // Examine tokens we have replayed since last reset. + // + for (size_t i (replay_quoted_); i != replay_i_; ++i) + if (replay_data_[i].token.qtype != quote_type::unquoted) + ++r; + } + + return r; + } + + void parser:: + reset_quoted (token& cur) + { + if (replay_ != replay::play) + lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0); + else + { + replay_quoted_ = replay_i_ - 1; + + // Must be the same token. + // + assert (replay_data_[replay_quoted_].token.qtype == cur.qtype); + } + } + + const string& parser:: + insert_id (string id, location l) + { + auto p (id_map_->emplace (move (id), move (l))); + + if (!p.second) + fail (l) << "duplicate id " << p.first->first << + info (p.first->second) << "previously used here"; + + return p.first->first; + } + + void parser:: + set_lexer (lexer* l) + { + lexer_ = l; + base_parser::lexer_ = l; + } + + void parser:: + apply_value_attributes (const variable* var, + value& lhs, + value&& rhs, + const string& attributes, + token_type kind, + const path& name) + { + path_ = &name; + + istringstream is (attributes); + lexer l (is, name, lexer_mode::attribute); + set_lexer (&l); + + token t; + type tt; + next (t, tt); + + if (tt != type::lsbrace && tt != type::eos) + fail (t) << "expected '[' instead of " << t; + + attributes_push (t, tt, true); + + if (tt != type::eos) + fail (t) << "trailing junk after ']'"; + + build2::parser::apply_value_attributes (var, lhs, move (rhs), kind); + } + + // parser::parsed_doc + // + parser::parsed_doc:: + parsed_doc (string s, uint64_t l, uint64_t c) + : str (move (s)), re (false), end_line (l), end_column (c) + { + } + + parser::parsed_doc:: + parsed_doc (regex_lines&& r, uint64_t l, uint64_t c) + : regex (move (r)), re (true), end_line (l), end_column (c) + { + } + + parser::parsed_doc:: + parsed_doc (parsed_doc&& d) + : re (d.re), end_line (d.end_line), end_column (d.end_column) + { + if (re) + new (®ex) regex_lines (move (d.regex)); + else + new (&str) string (move (d.str)); + } + + parser::parsed_doc:: + ~parsed_doc () + { + if (re) + regex.~regex_lines (); + else + str.~string (); + } + } + } +} diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx new file mode 100644 index 0000000..1beee49 --- /dev/null +++ b/libbuild2/test/script/parser.hxx @@ -0,0 +1,250 @@ +// file : libbuild2/test/script/parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_SCRIPT_PARSER_HXX +#define LIBBUILD2_TEST_SCRIPT_PARSER_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/parser.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/test/script/token.hxx> +#include <libbuild2/test/script/script.hxx> + +namespace build2 +{ + namespace test + { + namespace script + { + class lexer; + class runner; + + class parser: protected build2::parser + { + // Pre-parse. Issue diagnostics and throw failed in case of an error. + // + public: + void + pre_parse (script&); + + void + pre_parse (istream&, script&); + + // Helpers. + // + // Parse attribute string and perform attribute-guided assignment. + // Issue diagnostics and throw failed in case of an error. + // + void + apply_value_attributes (const variable*, // Optional. + value& lhs, + value&& rhs, + const string& attributes, + token_type assign_kind, + const path& name); // For diagnostics. + + // Recursive descent parser. + // + // Usually (but not always) parse functions receive the token/type + // from which it should start consuming and in return the token/type + // should contain the first token that has not been consumed. + // + // Functions that are called parse_*() rather than pre_parse_*() are + // used for both stages. + // + protected: + bool + pre_parse_demote_group_scope (unique_ptr<scope>&); + + token + pre_parse_scope_body (); + + unique_ptr<group> + pre_parse_scope_block (token&, token_type&, const string&); + + bool + pre_parse_line (token&, token_type&, + optional<description>&, + lines* = nullptr, + bool one = false); + + bool + pre_parse_if_else (token&, token_type&, + optional<description>&, + lines&); + + bool + pre_parse_if_else_scope (token&, token_type&, + optional<description>&, + lines&); + + bool + pre_parse_if_else_command (token&, token_type&, + optional<description>&, + lines&); + + void + pre_parse_directive (token&, token_type&); + + void + pre_parse_include_line (names, location); + + description + pre_parse_leading_description (token&, token_type&); + + description + parse_trailing_description (token&, token_type&); + + value + parse_variable_line (token&, token_type&); + + command_expr + parse_command_line (token&, token_type&); + + // Ordered sequence of here-document redirects that we can expect to + // see after the command line. + // + struct here_redirect + { + size_t expr; // Index in command_expr. + size_t pipe; // Index in command_pipe. + int fd; // Redirect fd (0 - in, 1 - out, 2 - err). + }; + + struct here_doc + { + // Redirects that share here_doc. Most of the time we will have no + // more than 2 (2 - for the roundtrip test cases). + // + small_vector<here_redirect, 2> redirects; + + string end; + bool literal; // Literal (single-quote). + string modifiers; + + // Regex introducer ('\0' if not a regex, so can be used as bool). + // + char regex; + + // Regex global flags. Meaningful if regex != '\0'. + // + string regex_flags; + }; + using here_docs = vector<here_doc>; + + pair<command_expr, here_docs> + parse_command_expr (token&, token_type&); + + command_exit + parse_command_exit (token&, token_type&); + + void + parse_here_documents (token&, token_type&, + pair<command_expr, here_docs>&); + + struct parsed_doc + { + union + { + string str; // Here-document literal. + regex_lines regex; // Here-document regex. + }; + + bool re; // True if regex. + uint64_t end_line; // Here-document end marker location. + uint64_t end_column; + + parsed_doc (string, uint64_t line, uint64_t column); + parsed_doc (regex_lines&&, uint64_t line, uint64_t column); + parsed_doc (parsed_doc&&); // Note: move constuctible-only type. + ~parsed_doc (); + }; + + parsed_doc + parse_here_document (token&, token_type&, + const string&, + const string& mode, + char re_intro); // '\0' if not a regex. + + // Execute. Issue diagnostics and throw failed in case of an error. + // + public: + void + execute (script& s, runner& r); + + void + execute (scope&, script&, runner&); + + protected: + void + exec_scope_body (); + + // Return false if the execution of the scope should be terminated + // with the success status (e.g., as a result of encountering the exit + // builtin). For unsuccessful termination the failed exception should + // be thrown. + // + bool + exec_lines (lines::iterator, lines::iterator, size_t&, command_type); + + // Customization hooks. + // + protected: + virtual lookup + lookup_variable (name&&, string&&, const location&) override; + + // Number of quoted tokens since last reset. Note that this includes + // the peeked token, if any. + // + protected: + size_t + quoted () const; + + void + reset_quoted (token& current); + + size_t replay_quoted_; + + // Insert id into the id map checking for duplicates. + // + protected: + const string& + insert_id (string, location); + + // Set lexer pointers for both the current and the base classes. + // + protected: + void + set_lexer (lexer* l); + + protected: + using base_parser = build2::parser; + + script* script_; + + // Pre-parse state. + // + using id_map = std::unordered_map<string, location>; + using include_set = std::set<path>; + + group* group_; + id_map* id_map_; + include_set* include_set_; // Testscripts already included in this + // scope. Must be absolute and normalized. + lexer* lexer_; + string id_prefix_; // Auto-derived id prefix. + + // Execute state. + // + runner* runner_; + scope* scope_; + }; + } + } +} + +#endif // LIBBUILD2_TEST_SCRIPT_PARSER_HXX diff --git a/libbuild2/test/script/parser.test.cxx b/libbuild2/test/script/parser.test.cxx new file mode 100644 index 0000000..8702e18 --- /dev/null +++ b/libbuild2/test/script/parser.test.cxx @@ -0,0 +1,245 @@ +// file : libbuild2/test/script/parser.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cassert> +#include <iostream> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> // reset() +#include <libbuild2/scheduler.hxx> + +#include <libbuild2/test/target.hxx> + +#include <libbuild2/test/script/token.hxx> +#include <libbuild2/test/script/parser.hxx> +#include <libbuild2/test/script/runner.hxx> + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + // Here we assume we are running serially. + // + class print_runner: public runner + { + public: + print_runner (bool scope, bool id, bool line) + : scope_ (scope), id_ (id), line_ (line) {} + + virtual bool + test (scope&) const override + { + return true; + } + + virtual void + enter (scope& s, const location&) override + { + if (s.desc) + { + const auto& d (*s.desc); + + if (!d.id.empty ()) + cout << ind_ << ": id:" << d.id << endl; + + if (!d.summary.empty ()) + cout << ind_ << ": sm:" << d.summary << endl; + + if (!d.details.empty ()) + { + if (!d.id.empty () || !d.summary.empty ()) + cout << ind_ << ":" << endl; // Blank. + + const auto& s (d.details); + for (size_t b (0), e (0), n; e != string::npos; b = e + 1) + { + e = s.find ('\n', b); + n = ((e != string::npos ? e : s.size ()) - b); + + cout << ind_ << ':'; + if (n != 0) + { + cout << ' '; + cout.write (s.c_str () + b, static_cast<streamsize> (n)); + } + cout << endl; + } + } + } + + if (scope_) + { + cout << ind_ << "{"; + + if (id_ && !s.id_path.empty ()) // Skip empty root scope id. + cout << " # " << s.id_path.string (); + + cout << endl; + + ind_ += " "; + } + } + + virtual void + run (scope&, + const command_expr& e, command_type t, + size_t i, + const location&) override + { + const char* s (nullptr); + + switch (t) + { + case command_type::test: s = ""; break; + case command_type::setup: s = "+"; break; + case command_type::teardown: s = "-"; break; + } + + cout << ind_ << s << e; + + if (line_) + cout << " # " << i; + + cout << endl; + } + + virtual bool + run_if (scope&, + const command_expr& e, + size_t i, + const location&) override + { + cout << ind_ << "? " << e; + + if (line_) + cout << " # " << i; + + cout << endl; + + return e.back ().pipe.back ().program.string () == "true"; + } + + virtual void + leave (scope&, const location&) override + { + if (scope_) + { + ind_.resize (ind_.size () - 2); + cout << ind_ << "}" << endl; + } + } + + private: + bool scope_; + bool id_; + bool line_; + string ind_; + }; + + // Usage: argv[0] [-s] [-i] [-l] [<testscript-name>] + // + int + main (int argc, char* argv[]) + { + tracer trace ("main"); + + // Fake build system driver, default verbosity. + // + init_diag (1); + init (argv[0]); + sched.startup (1); // Serial execution. + reset (strings ()); // No command line variables. + + bool scope (false); + bool id (false); + bool line (false); + path name; + + for (int i (1); i != argc; ++i) + { + string a (argv[i]); + + if (a == "-s") + scope = true; + else if (a == "-i") + id = true; + else if (a == "-l") + line = true; + else + { + name = path (move (a)); + break; + } + } + + if (name.empty ()) + name = path ("testscript"); + + assert (!id || scope); // Id can only be printed with scope. + + try + { + cin.exceptions (istream::failbit | istream::badbit); + + // Enter mock targets. Use fixed names and paths so that we can use + // them in expected results. Strictly speaking target paths should + // be absolute. However, the testscript implementation doesn't + // really care. + // + file& tt ( + targets.insert<file> (work, + dir_path (), + "driver", + string (), + trace)); + + value& v ( + tt.assign ( + var_pool.rw ().insert<target_triplet> ( + "test.target", variable_visibility::project))); + + v = cast<target_triplet> ((*global_scope)["build.host"]); + + testscript& st ( + targets.insert<testscript> (work, + dir_path (), + name.leaf ().base ().string (), + name.leaf ().extension (), + trace)); + + tt.path (path ("driver")); + st.path (name); + + // Parse and run. + // + parser p; + script s (tt, st, dir_path (work) /= "test-driver"); + p.pre_parse (cin, s); + + print_runner r (scope, id, line); + p.execute (s, r); + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::test::script::main (argc, argv); +} diff --git a/libbuild2/test/script/regex.cxx b/libbuild2/test/script/regex.cxx new file mode 100644 index 0000000..20dfaa6 --- /dev/null +++ b/libbuild2/test/script/regex.cxx @@ -0,0 +1,440 @@ +// file : libbuild2/test/script/regex.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <locale> + +#include <libbuild2/test/script/regex.hxx> + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + namespace regex + { + static_assert (alignof (char_string) % 4 == 0, + "unexpected char_string alignment"); + + static_assert (alignof (char_regex) % 4 == 0, + "unexpected char_regex alignment"); + + static_assert (sizeof (uintptr_t) > sizeof (int16_t), + "unexpected uintptr_t size"); + + const line_char line_char::nul (0); + const line_char line_char::eof (-1); + + // line_char + // + // We package the special character into uintptr_t with the following + // steps: + // + // - narrow down int value to int16_t (preserves all the valid values) + // + // - convert to uint16_t (bitwise representation stays the same, but no + // need to bother with signed value widening, leftmost bits loss on + // left shift, etc) + // + // - convert to uintptr_t (storage type) + // + // - shift left by two bits (the operation is fully reversible as + // uintptr_t is wider then uint16_t) + // + line_char:: + line_char (int c) + : data_ ( + (static_cast <uintptr_t> ( + static_cast<uint16_t> ( + static_cast<int16_t> (c))) << 2) | + static_cast <uintptr_t> (line_type::special)) + { + // @@ How can we allow anything for basic_regex but only subset + // for our own code? + // + const char ex[] = "pn\n\r"; + + assert (c == 0 || // Null character. + + // EOF. Note that is also passed by msvcrt as _Meta_eos + // enum value. + // + c == -1 || + + // libstdc++ line/paragraph separators. + // + c == u'\u2028' || c == u'\u2029' || + + (c > 0 && c <= 255 && ( + // Supported regex special characters. + // + syntax (c) || + + // libstdc++ look-ahead tokens, newline chars. + // + string::traits_type::find (ex, 4, c) != nullptr))); + } + + line_char:: + line_char (const char_string& s, line_pool& p) + : line_char (&(*p.strings.emplace (s).first)) + { + } + + line_char:: + line_char (char_string&& s, line_pool& p) + : line_char (&(*p.strings.emplace (move (s)).first)) + { + } + + line_char:: + line_char (char_regex r, line_pool& p) + // Note: in C++17 can write as p.regexes.emplace_front(move (r)) + // + : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r)))) + { + } + + bool + line_char::syntax (char c) + { + return string::traits_type::find ( + "()|.*+?{}\\0123456789,=!", 23, c) != nullptr; + } + + bool + operator== (const line_char& l, const line_char& r) + { + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt == rt) + { + bool res (true); + + switch (lt) + { + case line_type::special: res = l.special () == r.special (); break; + case line_type::regex: assert (false); break; + + // Note that we use pointers (rather than vales) comparison + // assuming that the strings must belong to the same pool. + // + case line_type::literal: res = l.literal () == r.literal (); break; + } + + return res; + } + + // Match literal with regex. + // + if (lt == line_type::literal && rt == line_type::regex) + return regex_match (*l.literal (), *r.regex ()); + else if (rt == line_type::literal && lt == line_type::regex) + return regex_match (*r.literal (), *l.regex ()); + + return false; + } + + bool + operator< (const line_char& l, const line_char& r) + { + if (l == r) + return false; + + line_type lt (l.type ()); + line_type rt (r.type ()); + + if (lt != rt) + return lt < rt; + + bool res (false); + + switch (lt) + { + case line_type::special: res = l.special () < r.special (); break; + case line_type::literal: res = *l.literal () < *r.literal (); break; + case line_type::regex: assert (false); break; + } + + return res; + } + + // line_char_locale + // + + // An exemplar locale with the std::ctype<line_char> facet. It is used + // for the subsequent line char locale objects creation (see below) + // which normally ends up with a shallow copy of a reference-counted + // object. + // + // Note that creating the line char locales from the exemplar is not + // merely an optimization: there is a data race in the libstdc++ (at + // least as of GCC 9.1) implementation of the locale(const locale&, + // Facet*) constructor (bug #91057). + // + // Also note that we install the facet in init() rather than during + // the object creation to avoid a race with the std::locale-related + // global variables initialization. + // + static locale line_char_locale_exemplar; + + void + init () + { + line_char_locale_exemplar = + locale (locale (), + new std::ctype<line_char> ()); // Hidden by ctype bitmask. + } + + line_char_locale:: + line_char_locale () + : locale (line_char_locale_exemplar) + { + // Make sure init() has been called. + // + // Note: has_facet() is hidden by a private function in libc++. + // + assert (std::has_facet<std::ctype<line_char>> (*this)); + } + + // char_regex + // + // Transform regex according to the extended flags {idot}. If regex is + // malformed then keep transforming, so the resulting string is + // malformed the same way. We expect the error to be reported by the + // char_regex ctor. + // + static string + transform (const string& s, char_flags f) + { + assert ((f & char_flags::idot) != char_flags::none); + + string r; + bool escape (false); + bool cclass (false); + + for (char c: s) + { + // Inverse escaping for a dot which is out of the char class + // brackets. + // + bool inverse (c == '.' && !cclass); + + // Handle the escape case. Note that we delay adding the backslash + // since we may have to inverse things. + // + if (escape) + { + if (!inverse) + r += '\\'; + + r += c; + escape = false; + + continue; + } + else if (c == '\\') + { + escape = true; + continue; + } + + // Keep track of being inside the char class brackets, escape if + // inversion. Note that we never inverse square brackets. + // + if (c == '[' && !cclass) + cclass = true; + else if (c == ']' && cclass) + cclass = false; + else if (inverse) + r += '\\'; + + r += c; + } + + if (escape) // Regex is malformed but that's not our problem. + r += '\\'; + + return r; + } + + static char_regex::flag_type + to_std_flags (char_flags f) + { + // Note that ECMAScript flag is implied in the absense of a grammar + // flag. + // + return (f & char_flags::icase) != char_flags::none + ? char_regex::icase + : char_regex::flag_type (); + } + + char_regex:: + char_regex (const char_string& s, char_flags f) + : base_type ((f & char_flags::idot) != char_flags::none + ? transform (s, f) + : s, + to_std_flags (f)) + { + } + } + } + } +} + +namespace std +{ + using namespace build2::test::script::regex; + + // char_traits<line_char> + // + line_char* char_traits<line_char>:: + assign (char_type* s, size_t n, char_type c) + { + for (size_t i (0); i != n; ++i) + s[i] = c; + return s; + } + + line_char* char_traits<line_char>:: + move (char_type* d, const char_type* s, size_t n) + { + if (n > 0 && d != s) + { + // If d < s then it can't be in [s, s + n) range and so using copy() is + // safe. Otherwise d + n is out of (s, s + n] range and so using + // copy_backward() is safe. + // + if (d < s) + std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy(). + else + copy_backward (s, s + n, d + n); + } + + return d; + } + + line_char* char_traits<line_char>:: + copy (char_type* d, const char_type* s, size_t n) + { + std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy(). + return d; + } + + int char_traits<line_char>:: + compare (const char_type* s1, const char_type* s2, size_t n) + { + for (size_t i (0); i != n; ++i) + { + if (s1[i] < s2[i]) + return -1; + else if (s2[i] < s1[i]) + return 1; + } + + return 0; + } + + size_t char_traits<line_char>:: + length (const char_type* s) + { + size_t i (0); + while (s[i] != char_type::nul) + ++i; + + return i; + } + + const line_char* char_traits<line_char>:: + find (const char_type* s, size_t n, const char_type& c) + { + for (size_t i (0); i != n; ++i) + { + if (s[i] == c) + return s + i; + } + + return nullptr; + } + + // ctype<line_char> + // + locale::id ctype<line_char>::id; + + const line_char* ctype<line_char>:: + is (const char_type* b, const char_type* e, mask* m) const + { + while (b != e) + { + const char_type& c (*b++); + + *m++ = c.type () == line_type::special && c.special () >= 0 && + build2::digit (static_cast<char> (c.special ())) + ? digit + : 0; + } + + return e; + } + + const line_char* ctype<line_char>:: + scan_is (mask m, const char_type* b, const char_type* e) const + { + for (; b != e; ++b) + { + if (is (m, *b)) + return b; + } + + return e; + } + + const line_char* ctype<line_char>:: + scan_not (mask m, const char_type* b, const char_type* e) const + { + for (; b != e; ++b) + { + if (!is (m, *b)) + return b; + } + + return e; + } + + const char* ctype<line_char>:: + widen (const char* b, const char* e, char_type* c) const + { + while (b != e) + *c++ = widen (*b++); + + return e; + } + + const line_char* ctype<line_char>:: + narrow (const char_type* b, const char_type* e, char def, char* c) const + { + while (b != e) + *c++ = narrow (*b++, def); + + return e; + } + + // regex_traits<line_char> + // + int regex_traits<line_char>:: + value (char_type c, int radix) const + { + assert (radix == 8 || radix == 10 || radix == 16); + + if (c.type () != line_type::special) + return -1; + + const char digits[] = "0123456789ABCDEF"; + const char* d (string::traits_type::find (digits, radix, c.special ())); + return d != nullptr ? static_cast<int> (d - digits) : -1; + } +} diff --git a/libbuild2/test/script/regex.hxx b/libbuild2/test/script/regex.hxx new file mode 100644 index 0000000..faec1fc --- /dev/null +++ b/libbuild2/test/script/regex.hxx @@ -0,0 +1,703 @@ +// file : libbuild2/test/script/regex.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_SCRIPT_REGEX_HXX +#define LIBBUILD2_TEST_SCRIPT_REGEX_HXX + +#include <list> +#include <regex> +#include <locale> +#include <string> // basic_string +#include <type_traits> // make_unsigned, enable_if, is_* +#include <unordered_set> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +namespace build2 +{ + namespace test + { + namespace script + { + namespace regex + { + using char_string = std::basic_string<char>; + + enum class char_flags: uint16_t + { + icase = 0x1, // Case-insensitive match. + idot = 0x2, // Invert '.' escaping. + + none = 0 + }; + + // Restricts valid standard flags to just {icase}, extends with custom + // flags {idot}. + // + class char_regex: public std::basic_regex<char> + { + public: + using base_type = std::basic_regex<char>; + + char_regex (const char_string&, char_flags = char_flags::none); + }; + + // Newlines are line separators and are not part of the line: + // + // line<newline>line<newline> + // + // Specifically, this means that a customary trailing newline creates a + // trailing blank line. + // + // All characters can inter-compare (though there cannot be regex + // characters in the output, only in line_regex). + // + // Note that we assume that line_regex and the input to regex_match() + // use the same pool. + // + struct line_pool + { + // Note that we assume the pool can be moved without invalidating + // pointers to any already pooled entities. + // + std::unordered_set<char_string> strings; + std::list<char_regex> regexes; + }; + + enum class line_type + { + special, + literal, + regex + }; + + struct line_char + { + // Steal last two bits from the pointer to store the type. + // + private: + std::uintptr_t data_; + + public: + line_type + type () const {return static_cast<line_type> (data_ & 0x3);} + + int + special () const + { + // Stored as (shifted) int16_t. Perform steps reversed to those + // that are described in the comment for the corresponding ctor. + // Note that the intermediate cast to uint16_t is required to + // portably preserve the -1 special character. + // + return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2)); + } + + const char_string* + literal () const + { + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast<const char_string*> ( + data_ & ~std::uintptr_t (0x3)); + } + + const char_regex* + regex () const + { + // Note that 2 rightmost bits are used for packaging line_char + // type. Read the comment for the corresponding ctor for details. + // + return reinterpret_cast<const char_regex*> ( + data_ & ~std::uintptr_t (0x3)); + } + + static const line_char nul; + static const line_char eof; + + // Note: creates an uninitialized value. + // + line_char () = default; + + // Create a special character. The argument value must be one of the + // following ones: + // + // 0 (nul character) + // -1 (EOF) + // [()|.*+?{}\0123456789,=!] (excluding []) + // + // Note that the constructor is implicit to allow basic_regex to + // implicitly construct line_chars from special char literals (in + // particular libstdc++ appends them to an internal line_string). + // + // Also note that we extend the valid characters set (see above) with + // 'p', 'n' (used by libstdc++ for positive/negative look-ahead + // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used + // by libstdc++ for newline/newparagraph matching). + // + line_char (int); + + // Create a literal character. + // + // Don't copy string if already pooled. + // + explicit + line_char (const char_string&, line_pool&); + + explicit + line_char (char_string&&, line_pool&); + + explicit + line_char (const char_string* s) // Assume already pooled. + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_string address is a + // multiple of four. + // + : data_ (reinterpret_cast <std::uintptr_t> (s) | + static_cast <std::uintptr_t> (line_type::literal)) {} + + // Create a regex character. + // + explicit + line_char (char_regex, line_pool&); + + explicit + line_char (const char_regex* r) // Assume already pooled. + // + // Steal two bits from the pointer to package line_char type. + // Assume (and statically assert) that char_regex address is a + // multiple of four. + // + : data_ (reinterpret_cast <std::uintptr_t> (r) | + static_cast <std::uintptr_t> (line_type::regex)) {} + + // Provide basic_regex with the ability to use line_char in a context + // where a char value is expected (e.g., as a function argument). + // + // libstdc++ seems to cast special line_chars only (and such a + // conversion is meanigfull). + // + // msvcrt casts line_chars of arbitrary types instead. The only + // reasonable strategy is to return a value that differs from any + // other that can be encountered in a regex expression and so will + // unlikelly be misinterpreted. + // + operator char () const + { + return type () == line_type::special ? special () : '\a'; // BELL. + } + + // Return true if the character is a syntax (special) one. + // + static bool + syntax (char); + + // Provide basic_regex (such as from msvcrt) with the ability to + // explicitly cast line_chars to implementation-specific enums. + // + template <typename T> + explicit + operator T () const + { + assert (type () == line_type::special); + return static_cast<T> (special ()); + } + }; + + // Perform "deep" characters comparison (for example match literal + // character with a regex character), rather than just compare them + // literally. At least one argument must be of a type other than regex + // as there is no operator==() defined to compare regexes. Characters + // of the literal type must share the same pool (strings are compared + // by pointers not by values). + // + bool + operator== (const line_char&, const line_char&); + + // Return false if arguments are equal (operator==() returns true). + // Otherwise if types are different return the value implying that + // special < literal < regex. If types are special or literal return + // the result of the respective characters or strings comparison. At + // least one argument must be of a type other than regex as there is no + // operator<() defined to compare regexes. + // + // While not very natural operation for the class we have, we have to + // provide some meaningfull semantics for such a comparison as it is + // required by the char_traits<line_char> specialization. While we + // could provide it right in that specialization, let's keep it here + // for basic_regex implementations that potentially can compare + // line_chars as they compare them with expressions of other types (see + // below). + // + bool + operator< (const line_char&, const line_char&); + + inline bool + operator!= (const line_char& l, const line_char& r) + { + return !(l == r); + } + + inline bool + operator<= (const line_char& l, const line_char& r) + { + return l < r || l == r; + } + + // Provide basic_regex (such as from msvcrt) with the ability to + // compare line_char to a value of an integral or + // implementation-specific enum type. In the absense of the following + // template operators, such a comparisons would be ambigious for + // integral types (given that there are implicit conversions + // int->line_char and line_char->char) and impossible for enums. + // + // Note that these == and < operators can succeed only for a line_char + // of the special type. For other types they always return false. That + // in particular leads to the following case: + // + // (lc != c) != (lc < c || c < lc). + // + // Note that we can not assert line_char is of the special type as + // basic_regex (such as from libc++) may need the ability to check if + // arbitrary line_char belongs to some special characters range (like + // ['0', '9']). + // + template <typename T> + struct line_char_cmp + : public std::enable_if<std::is_integral<T>::value || + (std::is_enum<T>::value && + !std::is_same<T, char_flags>::value)> {}; + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator== (const line_char& l, const T& r) + { + return l.type () == line_type::special && + static_cast<T> (l.special ()) == r; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator== (const T& l, const line_char& r) + { + return r.type () == line_type::special && + static_cast<T> (r.special ()) == l; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator!= (const line_char& l, const T& r) + { + return !(l == r); + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator!= (const T& l, const line_char& r) + { + return !(l == r); + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator< (const line_char& l, const T& r) + { + return l.type () == line_type::special && + static_cast<T> (l.special ()) < r; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + bool + operator< (const T& l, const line_char& r) + { + return r.type () == line_type::special && + l < static_cast<T> (r.special ()); + } + + template <typename T, typename = typename line_char_cmp<T>::type> + inline bool + operator<= (const line_char& l, const T& r) + { + return l < r || l == r; + } + + template <typename T, typename = typename line_char_cmp<T>::type> + inline bool + operator<= (const T& l, const line_char& r) + { + return l < r || l == r; + } + + using line_string = std::basic_string<line_char>; + + // Locale that has ctype<line_char> facet installed. Used in the + // regex_traits<line_char> specialization (see below). + // + class line_char_locale: public std::locale + { + public: + // Create a copy of the global C++ locale. + // + line_char_locale (); + }; + + // Initialize the testscript regex global state. Should be called once + // prior to creating objects of types from this namespace. Note: not + // thread-safe. + // + void + init (); + } + } + } +} + +// Standard template specializations for line_char that are required for the +// basic_regex<line_char> instantiation. +// +namespace std +{ + template <> + class char_traits<build2::test::script::regex::line_char> + { + public: + using char_type = build2::test::script::regex::line_char; + using int_type = char_type; + using off_type = char_traits<char>::off_type; + using pos_type = char_traits<char>::pos_type; + using state_type = char_traits<char>::state_type; + + static void + assign (char_type& c1, const char_type& c2) {c1 = c2;} + + static char_type* + assign (char_type*, size_t, char_type); + + // Note that eq() and lt() are not constexpr (as required by C++11) + // because == and < operators for char_type are not constexpr. + // + static bool + eq (const char_type& l, const char_type& r) {return l == r;} + + static bool + lt (const char_type& l, const char_type& r) {return l < r;} + + static char_type* + move (char_type*, const char_type*, size_t); + + static char_type* + copy (char_type*, const char_type*, size_t); + + static int + compare (const char_type*, const char_type*, size_t); + + static size_t + length (const char_type*); + + static const char_type* + find (const char_type*, size_t, const char_type&); + + static constexpr char_type + to_char_type (const int_type& c) {return c;} + + static constexpr int_type + to_int_type (const char_type& c) {return int_type (c);} + + // Note that the following functions are not constexpr (as required by + // C++11) because their return expressions are not constexpr. + // + static bool + eq_int_type (const int_type& l, const int_type& r) {return l == r;} + + static int_type eof () {return char_type::eof;} + + static int_type + not_eof (const int_type& c) + { + return c != char_type::eof ? c : char_type::nul; + } + }; + + // ctype<> must be derived from both ctype_base and locale::facet (the later + // supports ref-counting used by the std::locale implementation internally). + // + // msvcrt for some reason also derives ctype_base from locale::facet which + // produces "already a base-class" warning and effectivelly breaks the + // reference counting. So we derive from ctype_base only in this case. + // + template <> + class ctype<build2::test::script::regex::line_char>: public ctype_base +#if !defined(_MSC_VER) || _MSC_VER >= 2000 + , public locale::facet +#endif + { + // Used by the implementation only. + // + using line_type = build2::test::script::regex::line_type; + + public: + using char_type = build2::test::script::regex::line_char; + + static locale::id id; + +#if !defined(_MSC_VER) || _MSC_VER >= 2000 + explicit + ctype (size_t refs = 0): locale::facet (refs) {} +#else + explicit + ctype (size_t refs = 0): ctype_base (refs) {} +#endif + + // While unnecessary, let's keep for completeness. + // + virtual + ~ctype () override = default; + + // The C++ standard requires the following functions to call their virtual + // (protected) do_*() counterparts that provide the real implementations. + // The only purpose for this indirection is to provide a user with the + // ability to customize existing (standard) ctype facets. As we do not + // provide such an ability, for simplicity we will omit the do_*() + // functions and provide the implementations directly. This should be safe + // as nobody except us could call those protected functions. + // + bool + is (mask m, char_type c) const + { + return m == + (c.type () == line_type::special && c.special () >= 0 && + build2::digit (static_cast<char> (c.special ())) + ? digit + : 0); + } + + const char_type* + is (const char_type*, const char_type*, mask*) const; + + const char_type* + scan_is (mask, const char_type*, const char_type*) const; + + const char_type* + scan_not (mask, const char_type*, const char_type*) const; + + char_type + toupper (char_type c) const {return c;} + + const char_type* + toupper (char_type*, const char_type* e) const {return e;} + + char_type + tolower (char_type c) const {return c;} + + const char_type* + tolower (char_type*, const char_type* e) const {return e;} + + char_type + widen (char c) const {return char_type (c);} + + const char* + widen (const char*, const char*, char_type*) const; + + char + narrow (char_type c, char def) const + { + return c.type () == line_type::special ? c.special () : def; + } + + const char_type* + narrow (const char_type*, const char_type*, char, char*) const; + }; + + // Note: the current application locale must be the POSIX one. Otherwise the + // behavior is undefined. + // + template <> + class regex_traits<build2::test::script::regex::line_char> + { + public: + using char_type = build2::test::script::regex::line_char; + using string_type = build2::test::script::regex::line_string; + using locale_type = build2::test::script::regex::line_char_locale; + using char_class_type = regex_traits<char>::char_class_type; + + // Workaround for msvcrt bugs. For some reason it assumes such a members + // to be present in a regex_traits specialization. + // +#if defined(_MSC_VER) && _MSC_VER < 2000 + static const ctype_base::mask _Ch_upper = ctype_base::upper; + static const ctype_base::mask _Ch_alpha = ctype_base::alpha; + + // Unsigned char_type. msvcrt statically asserts the _Uelem type is + // unsigned, so we specialize is_unsigned<line_char> as well (see below). + // + using _Uelem = char_type; +#endif + + regex_traits () = default; // Unnecessary but let's keep for completeness. + + static size_t + length (const char_type* p) {return string_type::traits_type::length (p);} + + char_type + translate (char_type c) const {return c;} + + // Case-insensitive matching is not supported by line_regex. So there is no + // reason for the function to be called. + // + char_type + translate_nocase (char_type c) const {assert (false); return c;} + + // Return a sort-key - the exact copy of [b, e). + // + template <typename I> + string_type + transform (I b, I e) const {return string_type (b, e);} + + // Return a case-insensitive sort-key. Case-insensitive matching is not + // supported by line_regex. So there is no reason for the function to be + // called. + // + template <typename I> + string_type + transform_primary (I b, I e) const + { + assert (false); + return string_type (b, e); + } + + // POSIX regex grammar and collating elements (e.g., [.tilde.]) in + // particular are not supported. So there is no reason for the function to + // be called. + // + template <typename I> + string_type + lookup_collatename (I, I) const {assert (false); return string_type ();} + + // Character classes (e.g., [:lower:]) are not supported. So there is no + // reason for the function to be called. + // + template <typename I> + char_class_type + lookup_classname (I, I, bool = false) const + { + assert (false); + return char_class_type (); + } + + // Return false as we don't support character classes (e.g., [:lower:]). + // + bool + isctype (char_type, char_class_type) const {return false;} + + int + value (char_type, int) const; + + // Return the locale passed as an argument as we do not expect anything + // other than POSIX locale, that we also assume to be imbued by default. + // + locale_type + imbue (locale_type l) {return l;} + + locale_type + getloc () const {return locale_type ();} + }; + + // We assume line_char to be an unsigned type and express that with the + // following specializations used by basic_regex implementations. + // + // libstdc++ defines unsigned CharT type (regex_traits template parameter) + // to use as an index in some internal cache regardless if the cache is used + // for this specialization (and the cache is used only if CharT is char). + // + template <> + struct make_unsigned<build2::test::script::regex::line_char> + { + using type = build2::test::script::regex::line_char; + }; + + // msvcrt assumes regex_traits<line_char>::_Uelem to be present (see above) + // and statically asserts it is unsigned. + // + // And starting from VC 16.1, is_unsigned_v is not implemented in terms of + // is_unsigned so we have to get deeper into the implementation details. + // +#if defined(_MSC_VER) && _MSC_VER >= 1921 + template <> + struct _Sign_base<build2::test::script::regex::line_char, false> + { + static constexpr bool _Signed = false; + static constexpr bool _Unsigned = true; + }; +#else + template <> + struct is_unsigned<build2::test::script::regex::line_char> + { + static const bool value = true; + }; +#endif + + // When used with libc++ the linker complains that it can't find + // __match_any_but_newline<line_char>::__exec() function. The problem is + // that the function is only specialized for char and wchar_t + // (LLVM bug #31409). As line_char has no notion of the newline character we + // specialize the class template to behave as the __match_any<line_char> + // instantiation does (that luckily has all the functions in place). + // +#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 8000 + template <> + class __match_any_but_newline<build2::test::script::regex::line_char> + : public __match_any<build2::test::script::regex::line_char> + { + public: + using base = __match_any<build2::test::script::regex::line_char>; + using base::base; + }; +#endif +} + +namespace build2 +{ + namespace test + { + namespace script + { + namespace regex + { + class line_regex: public std::basic_regex<line_char> + { + public: + using base_type = std::basic_regex<line_char>; + + using base_type::base_type; + + line_regex () = default; + + // Move string regex together with the pool used to create it. + // + line_regex (line_string&& s, line_pool&& p) + // No move-string ctor for base_type, so emulate it. + // + : base_type (s), pool (move (p)) {s.clear ();} + + // Move constuctible/assignable-only type. + // + line_regex (line_regex&&) = default; + line_regex (const line_regex&) = delete; + line_regex& operator= (line_regex&&) = default; + line_regex& operator= (const line_regex&) = delete; + + public: + line_pool pool; + }; + } + } + } +} + +#include <libbuild2/test/script/regex.ixx> + +#endif // LIBBUILD2_TEST_SCRIPT_REGEX_HXX diff --git a/libbuild2/test/script/regex.ixx b/libbuild2/test/script/regex.ixx new file mode 100644 index 0000000..c5b638e --- /dev/null +++ b/libbuild2/test/script/regex.ixx @@ -0,0 +1,35 @@ +// file : libbuild2/test/script/regex.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + namespace test + { + namespace script + { + namespace regex + { + inline char_flags + operator&= (char_flags& x, char_flags y) + { + return x = static_cast<char_flags> ( + static_cast<uint16_t> (x) & static_cast<uint16_t> (y)); + } + + inline char_flags + operator|= (char_flags& x, char_flags y) + { + return x = static_cast<char_flags> ( + static_cast<uint16_t> (x) | static_cast<uint16_t> (y)); + } + + inline char_flags + operator& (char_flags x, char_flags y) {return x &= y;} + + inline char_flags + operator| (char_flags x, char_flags y) {return x |= y;} + } + } + } +} diff --git a/libbuild2/test/script/regex.test.cxx b/libbuild2/test/script/regex.test.cxx new file mode 100644 index 0000000..f205154 --- /dev/null +++ b/libbuild2/test/script/regex.test.cxx @@ -0,0 +1,302 @@ +// file : libbuild2/test/script/regex.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <regex> +#include <type_traits> // is_pod, is_array + +#include <libbuild2/test/script/regex.hxx> + +using namespace std; +using namespace build2::test::script::regex; + +int +main () +{ + using lc = line_char; + using ls = line_string; + using lr = line_regex; + using cf = char_flags; + using cr = char_regex; + + init (); // Initializes the testscript regex global state. + + // Test line_char. + // + { + static_assert (is_pod<lc>::value && !is_array<lc>::value, + "line_char must be char-like"); + + // Zero-initialed line_char should be the null-char as required by + // char_traits<>::length() specification. + // + assert (lc () == lc::nul); + + line_pool p; + + assert (lc::eof == -1); + assert (lc::nul == 0); + + enum meta {mn = 'n', mp = 'p'}; + + // Special roundtrip. + // + assert (lc ('0').special () == '0'); + assert (lc (0).special () == 0); + assert (lc (-1).special () == -1); + assert (lc ('p').special () == 'p'); + assert (lc (u'\u2028').special () == u'\u2028'); + + // Special comparison. + // + assert (lc ('0') == lc ('0')); + assert (lc ('0') == '0'); + assert (lc ('n') == mn); + assert (mn == static_cast<meta> (lc ('n'))); + + assert (lc ('0') != lc ('1')); + assert (lc ('0') != '1'); + assert (lc ('n') != mp); + assert (lc ('0') != lc ("0", p)); + assert (lc ('0') != lc (cr ("0"), p)); + + assert (lc ('0') < lc ('1')); + assert (lc ('0') < '1'); + assert (lc ('1') < lc ("0", p)); + assert (lc ('n') < mp); + + assert (lc ('0') <= '1'); + assert (lc ('0') <= lc ('1')); + assert (lc ('n') <= mn); + assert (lc ('1') <= lc ("0", p)); + + // Literal roundtrip. + // + assert (*lc ("abc", p).literal () == "abc"); + + // Literal comparison. + // + assert (lc ("a", p) == lc ("a", p)); + assert (lc ("a", p).literal () == lc ("a", p).literal ()); + assert (char (lc ("a", p)) == '\a'); + + assert (lc ("a", p) != lc ("b", p)); + assert (!(lc ("a", p) != lc (cr ("a"), p))); + assert (lc ("a", p) != lc (cr ("b"), p)); + + assert (lc ("a", p) < lc ("b", p)); + assert (!(lc ("a", p) < lc (cr ("a"), p))); + + assert (lc ("a", p) <= lc ("b", p)); + assert (lc ("a", p) <= lc (cr ("a"), p)); + assert (lc ("a", p) < lc (cr ("c"), p)); + + // Regex roundtrip. + // + assert (regex_match ("abc", *lc (cr ("abc"), p).regex ())); + + // Regex flags. + // + // icase + // + assert (regex_match ("ABC", cr ("abc", cf::icase))); + + // idot + // + assert (!regex_match ("a", cr ("[.]", cf::idot))); + assert (!regex_match ("a", cr ("[\\.]", cf::idot))); + + assert (regex_match ("a", cr ("."))); + assert (!regex_match ("a", cr (".", cf::idot))); + assert (regex_match ("a", cr ("\\.", cf::idot))); + assert (!regex_match ("a", cr ("\\."))); + + // regex::transform() + // + // The function is static and we can't test it directly. So we will test + // it indirectly via regex matches. + // + // @@ Would be nice to somehow address the inability to test internals (not + // exposed via headers). As a part of utility library support? + // + assert (regex_match (".a[.", cr (".\\.\\[[.]", cf::idot))); + assert (regex_match (".a[.", cr (".\\.\\[[\\.]", cf::idot))); + assert (!regex_match ("ba[.", cr (".\\.\\[[.]", cf::idot))); + assert (!regex_match (".a[b", cr (".\\.\\[[.]", cf::idot))); + assert (!regex_match (".a[b", cr (".\\.\\[[\\.]", cf::idot))); + + // Regex comparison. + // + assert (lc ("a", p) == lc (cr ("a|b"), p)); + assert (lc (cr ("a|b"), p) == lc ("a", p)); + } + + // Test char_traits<line_char>. + // + { + using ct = char_traits<lc>; + using vc = vector<lc>; + + lc c; + ct::assign (c, '0'); + assert (c == ct::char_type ('0')); + + assert (ct::to_char_type (c) == c); + assert (ct::to_int_type (c) == c); + + assert (ct::eq_int_type (c, c)); + assert (!ct::eq_int_type (c, lc::eof)); + + assert (ct::eof () == lc::eof); + + assert (ct::not_eof (c) == c); + assert (ct::not_eof (lc::eof) != lc::eof); + + ct::assign (&c, 1, '1'); + assert (c == ct::int_type ('1')); + + assert (ct::eq (lc ('0'), lc ('0'))); + assert (ct::lt (lc ('0'), lc ('1'))); + + vc v1 ({'0', '1', '2'}); + vc v2 (3, lc::nul); + + assert (ct::find (v1.data (), 3, '1') == v1.data () + 1); + + ct::copy (v2.data (), v1.data (), 3); + assert (v2 == v1); + + v2.push_back (lc::nul); + assert (ct::length (v2.data ()) == 3); + + // Overlaping ranges. + // + ct::move (v1.data () + 1, v1.data (), 2); + assert (v1 == vc ({'0', '0', '1'})); + + v1 = vc ({'0', '1', '2'}); + ct::move (v1.data (), v1.data () + 1, 2); + assert (v1 == vc ({'1', '2', '2'})); + } + + // Test line_char_locale and ctype<line_char> (only non-trivial functions). + // + { + using ct = ctype<lc>; + + line_char_locale l; + + // It is better not to create q facet on stack as it is + // reference-countable. + // + const ct& t (use_facet<ct> (l)); + line_pool p; + + assert (t.is (ct::digit, '0')); + assert (!t.is (ct::digit, '?')); + assert (!t.is (ct::digit, lc ("0", p))); + + const lc chars[] = { '0', '?' }; + ct::mask m[2]; + + const lc* b (chars); + const lc* e (chars + 2); + + // Cast flag value to mask type and compare to mask. + // + auto fl = [] (ct::mask m, ct::mask f) {return m == f;}; + + t.is (b, e, m); + assert (fl (m[0], ct::digit) && fl (m[1], 0)); + + assert (t.scan_is (ct::digit, b, e) == b); + assert (t.scan_is (0, b, e) == b + 1); + + assert (t.scan_not (ct::digit, b, e) == b + 1); + assert (t.scan_not (0, b, e) == b); + + { + char nr[] = "0?"; + lc wd[2]; + t.widen (nr, nr + 2, wd); + assert (wd[0] == b[0] && wd[1] == b[1]); + } + + { + lc wd[] = {'0', lc ("a", p)}; + char nr[2]; + t.narrow (wd, wd + 2, '-', nr); + assert (nr[0] == '0' && nr[1] == '-'); + } + } + + // Test regex_traits<line_char>. Functions other that value() are trivial. + // + { + regex_traits<lc> t; + + const int radix[] = {8, 10}; // Radix 16 is not supported by line_char. + const char digits[] = "0123456789ABCDEF"; + + for (size_t r (0); r < 2; ++r) + { + for (int i (0); i < radix[r]; ++i) + assert (t.value (digits[i], radix[r]) == i); + } + } + + // Test line_regex construction. + // + { + line_pool p; + lr r1 ({lc ("foo", p), lc (cr ("ba(r|z)"), p)}, move (p)); + + lr r2 (move (r1)); + assert (regex_match (ls ({lc ("foo", r2.pool), lc ("bar", r2.pool)}), r2)); + assert (!regex_match (ls ({lc ("foo", r2.pool), lc ("ba", r2.pool)}), r2)); + } + + // Test line_regex match. + // + { + line_pool p; + + const lc foo ("foo", p); + const lc bar ("bar", p); + const lc baz ("baz", p); + const lc blank ("", p); + + assert (regex_match (ls ({foo, bar}), lr ({foo, bar}))); + assert (!regex_match (ls ({foo, baz}), lr ({foo, bar}))); + + assert (regex_match (ls ({bar, foo}), + lr ({'(', foo, '|', bar, ')', '+'}))); + + assert (regex_match (ls ({foo, foo, bar}), + lr ({'(', foo, ')', '\\', '1', bar}))); + + assert (regex_match (ls ({foo}), lr ({lc (cr ("fo+"), p)}))); + assert (regex_match (ls ({foo}), lr ({lc (cr (".*"), p)}))); + assert (regex_match (ls ({blank}), lr ({lc (cr (".*"), p)}))); + + assert (regex_match (ls ({blank, blank, foo}), + lr ({blank, '*', foo, blank, '*'}))); + + assert (regex_match (ls ({blank, blank, foo}), lr ({'.', '*'}))); + + assert (regex_match (ls ({blank, blank}), + lr ({blank, '*', foo, '?', blank, '*'}))); + + assert (regex_match (ls ({foo}), lr ({foo, '{', '1', '}'}))); + assert (regex_match (ls ({foo, foo}), lr ({foo, '{', '1', ',', '}'}))); + + assert (regex_match (ls ({foo, foo}), + lr ({foo, '{', '1', ',', '2', '}'}))); + + assert (!regex_match (ls ({foo, foo}), + lr ({foo, '{', '3', ',', '4', '}'}))); + + assert (regex_match (ls ({foo}), lr ({'(', '?', '=', foo, ')', foo}))); + assert (regex_match (ls ({foo}), lr ({'(', '?', '!', bar, ')', foo}))); + } +} diff --git a/libbuild2/test/script/runner.cxx b/libbuild2/test/script/runner.cxx new file mode 100644 index 0000000..6c1becd --- /dev/null +++ b/libbuild2/test/script/runner.cxx @@ -0,0 +1,1891 @@ +// file : libbuild2/test/script/runner.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/script/runner.hxx> + +#include <set> +#include <ios> // streamsize + +#include <libbutl/regex.mxx> +#include <libbutl/fdstream.mxx> // fdopen_mode, fdnull(), fddup() + +#include <libbuild2/variable.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/test/common.hxx> + +#include <libbuild2/test/script/regex.hxx> +#include <libbuild2/test/script/parser.hxx> +#include <libbuild2/test/script/builtin.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace test + { + namespace script + { + // Normalize a path. Also make the relative path absolute using the + // scope's working directory unless it is already absolute. + // + static path + normalize (path p, const scope& sp, const location& l) + { + path r (p.absolute () ? move (p) : sp.wd_path / move (p)); + + try + { + r.normalize (); + } + catch (const invalid_path& e) + { + fail (l) << "invalid file path " << e.path; + } + + return r; + } + + // Check if a path is not empty, the referenced file exists and is not + // empty. + // + static bool + non_empty (const path& p, const location& ll) + { + if (p.empty () || !exists (p)) + return false; + + try + { + ifdstream is (p); + return is.peek () != ifdstream::traits_type::eof (); + } + catch (const io_error& e) + { + // While there can be no fault of the test command being currently + // executed let's add the location anyway to ease the + // troubleshooting. And let's stick to that principle down the road. + // + fail (ll) << "unable to read " << p << ": " << e << endf; + } + } + + // If the file exists, not empty and not larger than 4KB print it to the + // diag record. The file content goes from the new line and is not + // indented. + // + static void + print_file (diag_record& d, const path& p, const location& ll) + { + if (exists (p)) + { + try + { + ifdstream is (p, ifdstream::in, ifdstream::badbit); + + if (is.peek () != ifdstream::traits_type::eof ()) + { + char buf[4096 + 1]; // Extra byte is for terminating '\0'. + + // Note that the string is always '\0'-terminated with a maximum + // sizeof (buf) - 1 bytes read. + // + is.getline (buf, sizeof (buf), '\0'); + + // Print if the file fits 4KB-size buffer. Note that if it + // doesn't the failbit is set. + // + if (is.eof ()) + { + // Suppress the trailing newline character as the diag record + // adds it's own one when flush. + // + streamsize n (is.gcount ()); + assert (n > 0); + + // Note that if the file contains '\0' it will also be counted + // by gcount(). But even in the worst case we will stay in the + // buffer boundaries (and so not crash). + // + if (buf[n - 1] == '\n') + buf[n - 1] = '\0'; + + d << '\n' << buf; + } + } + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << p << ": " << e; + } + } + } + + // Print first 10 directory sub-entries to the diag record. The directory + // must exist. + // + static void + print_dir (diag_record& d, const dir_path& p, const location& ll) + { + try + { + size_t n (0); + for (const dir_entry& de: dir_iterator (p, + false /* ignore_dangling */)) + { + if (n++ < 10) + d << '\n' << (de.ltype () == entry_type::directory + ? path_cast<dir_path> (de.path ()) + : de.path ()); + } + + if (n > 10) + d << "\nand " << n - 10 << " more file(s)"; + } + catch (const system_error& e) + { + fail (ll) << "unable to iterate over " << p << ": " << e; + } + } + + // Save a string to the file. Fail if exception is thrown by underlying + // operations. + // + static void + save (const path& p, const string& s, const location& ll) + { + try + { + ofdstream os (p); + os << s; + os.close (); + } + catch (const io_error& e) + { + fail (ll) << "unable to write " << p << ": " << e; + } + } + + // Return the value of the test.target variable. + // + static inline const target_triplet& + test_target (const script& s) + { + // @@ Would be nice to use cached value from test::common_data. + // + if (auto r = cast_null<target_triplet> (s.test_target["test.target"])) + return *r; + + // We set it to default value in init() so it can only be NULL if the + // user resets it. + // + fail << "invalid test.target value" << endf; + } + + // Transform string according to here-* redirect modifiers from the {/} + // set. + // + static string + transform (const string& s, + bool regex, + const string& modifiers, + const script& scr) + { + if (modifiers.find ('/') == string::npos) + return s; + + // For targets other than Windows leave the string intact. + // + if (test_target (scr).class_ != "windows") + return s; + + // Convert forward slashes to Windows path separators (escape for + // regex). + // + string r; + for (size_t p (0);;) + { + size_t sp (s.find ('/', p)); + + if (sp != string::npos) + { + r.append (s, p, sp - p); + r.append (regex ? "\\\\" : "\\"); + p = sp + 1; + } + else + { + r.append (s, p, sp); + break; + } + } + + return r; + } + + // Check if the test command output matches the expected result (redirect + // value). Noop for redirect types other than none, here_*. + // + static bool + check_output (const path& pr, + const path& op, + const path& ip, + const redirect& rd, + const location& ll, + scope& sp, + bool diag, + const char* what) + { + auto input_info = [&ip, &ll] (diag_record& d) + { + if (non_empty (ip, ll)) + d << info << "stdin: " << ip; + }; + + auto output_info = [&what, &ll] (diag_record& d, + const path& p, + const char* prefix = "", + const char* suffix = "") + { + if (non_empty (p, ll)) + d << info << prefix << what << suffix << ": " << p; + else + d << info << prefix << what << suffix << " is empty"; + }; + + if (rd.type == redirect_type::none) + { + // Check that there is no output produced. + // + assert (!op.empty ()); + + if (!non_empty (op, ll)) + return true; + + if (diag) + { + diag_record d (error (ll)); + d << pr << " unexpectedly writes to " << what << + info << what << ": " << op; + + input_info (d); + + // Print cached output. + // + print_file (d, op, ll); + } + + // Fall through (to return false). + // + } + else if (rd.type == redirect_type::here_str_literal || + rd.type == redirect_type::here_doc_literal || + (rd.type == redirect_type::file && + rd.file.mode == redirect_fmode::compare)) + { + // The expected output is provided as a file or as a string. Save the + // string to a file in the later case. + // + assert (!op.empty ()); + + path eop; + + if (rd.type == redirect_type::file) + eop = normalize (rd.file.path, sp, ll); + else + { + eop = path (op + ".orig"); + save (eop, transform (rd.str, false, rd.modifiers, *sp.root), ll); + sp.clean_special (eop); + } + + // Use the diff utility for comparison. + // + path dp ("diff"); + process_path pp (run_search (dp, true)); + + cstrings args {pp.recall_string (), "-u"}; + + // Ignore Windows newline fluff if that's what we are running on. + // + if (test_target (*sp.root).class_ == "windows") + args.push_back ("--strip-trailing-cr"); + + args.push_back (eop.string ().c_str ()); + args.push_back (op.string ().c_str ()); + args.push_back (nullptr); + + if (verb >= 2) + print_process (args); + + try + { + // Save diff's stdout to a file for troubleshooting and for the + // optional (if not too large) printing (at the end of + // diagnostics). + // + path ep (op + ".diff"); + auto_fd efd; + + try + { + efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create); + sp.clean_special (ep); + } + catch (const io_error& e) + { + fail (ll) << "unable to write " << ep << ": " << e; + } + + // Diff utility prints the differences to stdout. But for the + // user it is a part of the test failure diagnostics so let's + // redirect stdout to stderr. + // + process p (pp, args.data (), 0, 2, efd.get ()); + efd.reset (); + + if (p.wait ()) + return true; + + assert (p.exit); + const process_exit& pe (*p.exit); + + // Note that both POSIX and GNU diff report error by exiting with + // the code > 1. + // + if (!pe.normal () || pe.code () > 1) + { + diag_record d (fail (ll)); + print_process (d, args); + d << " " << pe; + } + + // Output doesn't match the expected result. + // + if (diag) + { + diag_record d (error (ll)); + d << pr << " " << what << " doesn't match expected"; + + output_info (d, op); + output_info (d, eop, "expected "); + output_info (d, ep, "", " diff"); + input_info (d); + + print_file (d, ep, ll); + } + + // Fall through (to return false). + // + } + catch (const process_error& e) + { + error (ll) << "unable to execute " << pp << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + } + else if (rd.type == redirect_type::here_str_regex || + rd.type == redirect_type::here_doc_regex) + { + // The overall plan is: + // + // 1. Create regex line string. While creating it's line characters + // transform regex lines according to the redirect modifiers. + // + // 2. Create line regex using the line string. If creation fails + // then save the (transformed) regex redirect to a file for + // troubleshooting. + // + // 3. Parse the output into the literal line string. + // + // 4. Match the output line string with the line regex. + // + // 5. If match fails save the (transformed) regex redirect to a file + // for troubleshooting. + // + using namespace regex; + + assert (!op.empty ()); + + // Create regex line string. + // + line_pool pool; + line_string rls; + const regex_lines rl (rd.regex); + + // Parse regex flags. + // + // When add support for new flags don't forget to update + // parse_regex(). + // + auto parse_flags = [] (const string& f) -> char_flags + { + char_flags r (char_flags::none); + + for (char c: f) + { + switch (c) + { + case 'd': r |= char_flags::idot; break; + case 'i': r |= char_flags::icase; break; + default: assert (false); // Error so should have been checked. + } + } + + return r; + }; + + // Return original regex line with the transformation applied. + // + auto line = [&rl, &rd, &sp] (const regex_line& l) -> string + { + string r; + if (l.regex) // Regex (possibly empty), + { + r += rl.intro; + r += transform (l.value, true, rd.modifiers, *sp.root); + r += rl.intro; + r += l.flags; + } + else if (!l.special.empty ()) // Special literal. + r += rl.intro; + else // Textual literal. + r += transform (l.value, false, rd.modifiers, *sp.root); + + r += l.special; + return r; + }; + + // Return regex line location. + // + // Note that we rely on the fact that the command and regex lines + // are always belong to the same testscript file. + // + auto loc = [&ll] (uint64_t line, uint64_t column) -> location + { + location r (ll); + r.line = line; + r.column = column; + return r; + }; + + // Save the regex to file for troubleshooting, return the file path + // it have been saved to. + // + // Note that we save the regex on line regex creation failure or if + // the program output doesn't match. + // + auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path + { + path rp (op + ".regex"); + + // Encode here-document regex global flags if present as a file + // name suffix. For example if icase and idot flags are specified + // the name will look like: + // + // test/1/stdout.regex-di + // + if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ()) + rp += '-' + rl.flags; + + // Note that if would be more efficient to directly write chunks + // to file rather than to compose a string first. Hower we don't + // bother (about performance) for the sake of the code as we + // already failed. + // + string s; + for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); + i != e; ++i) + { + if (i != b) s += '\n'; + s += line (*i); + } + + save (rp, s, ll); + return rp; + }; + + // Finally create regex line string. + // + // Note that diagnostics doesn't refer to the program path as it is + // irrelevant to failures at this stage. + // + char_flags gf (parse_flags (rl.flags)); // Regex global flags. + + for (const auto& l: rl.lines) + { + if (l.regex) // Regex (with optional special characters). + { + line_char c; + + // Empty regex is a special case repesenting the blank line. + // + if (l.value.empty ()) + c = line_char ("", pool); + else + { + try + { + string s (transform (l.value, true, rd.modifiers, *sp.root)); + + c = line_char ( + char_regex (s, gf | parse_flags (l.flags)), pool); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful. + // + diag_record d (fail (loc (l.line, l.column))); + + if (rd.type == redirect_type::here_str_regex) + d << "invalid " << what << " regex redirect" << e << + info << "regex: '" << line (l) << "'"; + else + d << "invalid char-regex in " << what << " regex redirect" + << e << + info << "regex line: '" << line (l) << "'"; + + d << endf; + } + } + + rls += c; // Append blank literal or regex line char. + } + else if (!l.special.empty ()) // Special literal. + { + // Literal can not be followed by special characters in the same + // line. + // + assert (l.value.empty ()); + } + else // Textual literal. + { + // Append literal line char. + // + rls += line_char ( + transform (l.value, false, rd.modifiers, *sp.root), pool); + } + + for (char c: l.special) + { + if (line_char::syntax (c)) + rls += line_char (c); // Append special line char. + else + fail (loc (l.line, l.column)) + << "invalid syntax character '" << c << "' in " << what + << " regex redirect" << + info << "regex line: '" << line (l) << "'"; + } + } + + // Create line regex. + // + line_regex regex; + + try + { + regex = line_regex (move (rls), move (pool)); + } + catch (const regex_error& e) + { + // Note that line regex creation can not fail for here-string + // redirect as it doesn't have syntax line chars. That in + // particular means that end_line and end_column are meaningful. + // + assert (rd.type == redirect_type::here_doc_regex); + + diag_record d (fail (loc (rd.end_line, rd.end_column))); + + // Print regex_error description if meaningful. + // + d << "invalid " << what << " regex redirect" << e; + + output_info (d, save_regex (), "", " regex"); + } + + // Parse the output into the literal line string. + // + line_string ls; + + try + { + // Do not throw when eofbit is set (end of stream reached), and + // when failbit is set (getline() failed to extract any character). + // + // Note that newlines are treated as line-chars separators. That + // in particular means that the trailing newline produces a blank + // line-char (empty literal). Empty output produces the zero-length + // line-string. + // + // Also note that we strip the trailing CR characters (otherwise + // can mismatch when cross-test). + // + ifdstream is (op, ifdstream::in, ifdstream::badbit); + is.peek (); // Sets eofbit for an empty stream. + + while (!is.eof ()) + { + string s; + getline (is, s); + + // It is safer to strip CRs in cycle, as msvcrt unexplainably + // adds too much trailing junk to the system_error descriptions, + // and so it can appear in programs output. For example: + // + // ...: Invalid data.\r\r\n + // + // Note that our custom operator<<(ostream&, const exception&) + // removes this junk. + // + while (!s.empty () && s.back () == '\r') + s.pop_back (); + + ls += line_char (move (s), regex.pool); + } + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << op << ": " << e; + } + + // Match the output with the regex. + // + if (regex_match (ls, regex)) // Doesn't throw. + return true; + + // Output doesn't match the regex. We save the regex to file for + // troubleshooting regardless of whether we print the diagnostics or + // not. + // + path rp (save_regex ()); + + if (diag) + { + diag_record d (error (ll)); + d << pr << " " << what << " doesn't match regex"; + + output_info (d, op); + output_info (d, rp, "", " regex"); + input_info (d); + + // Print cached output. + // + print_file (d, op, ll); + } + + // Fall through (to return false). + // + } + else // Noop. + return true; + + return false; + } + + bool default_runner:: + test (scope& s) const + { + return common_.test (s.root->test_target, s.id_path); + } + + void default_runner:: + enter (scope& sp, const location&) + { + auto df = make_diag_frame ( + [&sp](const diag_record& dr) + { + // Let's not depend on how the path representation can be improved + // for readability on printing. + // + dr << info << "test id: " << sp.id_path.posix_string (); + }); + + // Scope working directory shall be empty (the script working + // directory is cleaned up by the test rule prior the script + // execution). + // + // Create the root working directory containing the .buildignore file + // to make sure that it is ignored by name patterns (see buildignore + // description for details). + // + // @@ Shouldn't we add an optional location parameter to mkdir() and + // alike utility functions so the failure message can contain + // location info? + // + fs_status<mkdir_status> r ( + sp.parent == nullptr + ? mkdir_buildignore ( + sp.wd_path, + sp.root->target_scope.root_scope ()->root_extra->buildignore_file, + 2) + : mkdir (sp.wd_path, 2)); + + if (r == mkdir_status::already_exists) + fail << "working directory " << sp.wd_path << " already exists" << + info << "are tests stomping on each other's feet?"; + + // We don't change the current directory here but indicate that the + // scope test commands will be executed in that directory. + // + if (verb >= 2) + text << "cd " << sp.wd_path; + + sp.clean ({cleanup_type::always, sp.wd_path}, true); + } + + void default_runner:: + leave (scope& sp, const location& ll) + { + auto df = make_diag_frame ( + [&sp](const diag_record& dr) + { + // Let's not depend on how the path representation can be improved + // for readability on printing. + // + dr << info << "test id: " << sp.id_path.posix_string (); + }); + + // Perform registered cleanups if requested. + // + if (common_.after == output_after::clean) + { + // Note that we operate with normalized paths here. + // + // Remove special files. The order is not important as we don't + // expect directories here. + // + for (const auto& p: sp.special_cleanups) + { + // Remove the file if exists. Fail otherwise. + // + if (rmfile (p, 3) == rmfile_status::not_exist) + fail (ll) << "registered for cleanup special file " << p + << " does not exist"; + } + + // Remove files and directories in the order opposite to the order of + // cleanup registration. + // + for (const auto& c: reverse_iterate (sp.cleanups)) + { + cleanup_type t (c.type); + + // Skip whenever the path exists or not. + // + if (t == cleanup_type::never) + continue; + + const path& cp (c.path); + + // Wildcard with the last component being '***' (without trailing + // separator) matches all files and sub-directories recursively as + // well as the start directories itself. So we will recursively + // remove the directories that match the parent (for the original + // path) directory wildcard. + // + bool recursive (cp.leaf ().representation () == "***"); + const path& p (!recursive ? cp : cp.directory ()); + + // Remove files or directories using wildcard. + // + if (p.string ().find_first_of ("?*") != string::npos) + { + bool removed (false); + + auto rm = [&cp, recursive, &removed, &sp, &ll] (path&& pe, + const string&, + bool interm) + { + if (!interm) + { + // While removing the entry we can get not_exist due to + // racing conditions, but that's ok if somebody did our job. + // Note that we still set the removed flag to true in this + // case. + // + removed = true; // Will be meaningless on failure. + + if (pe.to_directory ()) + { + dir_path d (path_cast<dir_path> (pe)); + + if (!recursive) + { + rmdir_status r (rmdir (d, 3)); + + if (r != rmdir_status::not_empty) + return true; + + diag_record dr (fail (ll)); + dr << "registered for cleanup directory " << d + << " is not empty"; + + print_dir (dr, d, ll); + dr << info << "wildcard: '" << cp << "'"; + } + else + { + // Don't remove the working directory (it will be removed + // by the dedicated cleanup). + // + // Cast to uint16_t to avoid ambiguity with + // libbutl::rmdir_r(). + // + rmdir_status r (rmdir_r (d, + d != sp.wd_path, + static_cast<uint16_t> (3))); + + if (r != rmdir_status::not_empty) + return true; + + // The directory is unlikely to be current but let's keep + // for completeness. + // + fail (ll) << "registered for cleanup wildcard " << cp + << " matches the current directory"; + } + } + else + rmfile (pe, 3); + } + + return true; + }; + + // Note that here we rely on the fact that recursive iterating + // goes depth-first (which make sense for the cleanup). + // + try + { + // Doesn't follow symlinks. + // + path_search (p, + rm, + dir_path () /* start */, + path_match_flags::none); + } + catch (const system_error& e) + { + fail (ll) << "unable to cleanup wildcard " << cp << ": " << e; + } + + // Removal of no filesystem entries is not an error for 'maybe' + // cleanup type. + // + if (removed || t == cleanup_type::maybe) + continue; + + fail (ll) << "registered for cleanup wildcard " << cp + << " doesn't match any " + << (recursive + ? "path" + : p.to_directory () + ? "directory" + : "file"); + } + + // Remove the directory if exists and empty. Fail otherwise. + // Removal of non-existing directory is not an error for 'maybe' + // cleanup type. + // + if (p.to_directory ()) + { + dir_path d (path_cast<dir_path> (p)); + bool wd (d == sp.wd_path); + + // Trace the scope working directory removal with the verbosity + // level 2 (that was used for its creation). For other + // directories use level 3 (as for other cleanups). + // + int v (wd ? 2 : 3); + + // Don't remove the working directory for the recursive cleanup + // (it will be removed by the dedicated one). + // + // Note that the root working directory contains the + // .buildignore file (see above). + // + // @@ If 'd' is a file then will fail with a diagnostics having + // no location info. Probably need to add an optional location + // parameter to rmdir() function. The same problem exists for + // a file cleanup when try to rmfile() directory instead of + // file. + // + rmdir_status r ( + recursive + ? rmdir_r (d, !wd, static_cast <uint16_t> (v)) + : (wd && sp.parent == nullptr + ? rmdir_buildignore ( + d, + sp.root->target_scope.root_scope ()->root_extra->buildignore_file, + v) + : rmdir (d, v))); + + if (r == rmdir_status::success || + (r == rmdir_status::not_exist && t == cleanup_type::maybe)) + continue; + + diag_record dr (fail (ll)); + dr << "registered for cleanup directory " << d + << (r == rmdir_status::not_exist + ? " does not exist" + : !recursive + ? " is not empty" + : " is current"); + + if (r == rmdir_status::not_empty) + print_dir (dr, d, ll); + } + + // Remove the file if exists. Fail otherwise. Removal of + // non-existing file is not an error for 'maybe' cleanup type. + // + if (rmfile (p, 3) == rmfile_status::not_exist && + t == cleanup_type::always) + fail (ll) << "registered for cleanup file " << p + << " does not exist"; + } + } + + // Return to the parent scope directory or to the out_base one for the + // script scope. + // + if (verb >= 2) + text << "cd " << (sp.parent != nullptr + ? sp.parent->wd_path + : sp.wd_path.directory ()); + } + + // The exit pseudo-builtin: exit the current scope successfully, or + // print the diagnostics and exit the current scope and all the outer + // scopes unsuccessfully. Always throw exit_scope exception. + // + // exit [<diagnostics>] + // + [[noreturn]] static void + exit_builtin (const strings& args, const location& ll) + { + auto i (args.begin ()); + auto e (args.end ()); + + // Process arguments. + // + // If no argument is specified, then exit successfully. Otherwise, + // print the diagnostics and exit unsuccessfully. + // + if (i == e) + throw exit_scope (true); + + const string& s (*i++); + + if (i != e) + fail (ll) << "unexpected argument '" << *i << "'"; + + error (ll) << s; + throw exit_scope (false); + } + + // The set pseudo-builtin: set variable from the stdin input. + // + // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [<attr>] <var> + // + static void + set_builtin (scope& sp, + const strings& args, + auto_fd in, + const location& ll) + { + try + { + // Do not throw when eofbit is set (end of stream reached), and + // when failbit is set (read operation failed to extract any + // character). + // + ifdstream cin (move (in), ifdstream::badbit); + + auto i (args.begin ()); + auto e (args.end ()); + + // Process options. + // + bool exact (false); + bool newline (false); + bool whitespace (false); + + for (; i != e; ++i) + { + const string& o (*i); + + if (o == "-e" || o == "--exact") + exact = true; + else if (o == "-n" || o == "--newline") + newline = true; + else if (o == "-w" || o == "--whitespace") + whitespace = true; + else + { + if (*i == "--") + ++i; + + break; + } + } + + // Process arguments. + // + if (i == e) + fail (ll) << "missing variable name"; + + const string& a (*i++); // Either attributes or variable name. + const string* ats (i == e ? nullptr : &a); + const string& vname (i == e ? a : *i++); + + if (i != e) + fail (ll) << "unexpected argument '" << *i << "'"; + + if (ats != nullptr && ats->empty ()) + fail (ll) << "empty variable attributes"; + + if (vname.empty ()) + fail (ll) << "empty variable name"; + + // Read the input. + // + cin.peek (); // Sets eofbit for an empty stream. + + names ns; + while (!cin.eof ()) + { + // Read next element that depends on the whitespace mode being + // enabled or not. For the later case it also make sense to strip + // the trailing CRs that can appear while cross-testing Windows + // target or as a part of msvcrt junk production (see above). + // + string s; + if (whitespace) + cin >> s; + else + { + getline (cin, s); + + while (!s.empty () && s.back () == '\r') + s.pop_back (); + } + + // If failbit is set then we read nothing into the string as eof is + // reached. That in particular means that the stream has trailing + // whitespaces (possibly including newlines) if the whitespace mode + // is enabled, or the trailing newline otherwise. If so then + // we append the "blank" to the variable value in the exact mode + // prior to bailing out. + // + if (cin.fail ()) + { + if (exact) + { + if (whitespace || newline) + ns.emplace_back (move (s)); // Reuse empty string. + else if (ns.empty ()) + ns.emplace_back ("\n"); + else + ns[0].value += '\n'; + } + + break; + } + + if (whitespace || newline || ns.empty ()) + ns.emplace_back (move (s)); + else + { + ns[0].value += '\n'; + ns[0].value += s; + } + } + + cin.close (); + + // Set the variable value and attributes. Note that we need to aquire + // unique lock before potentially changing the script's variable + // pool. The obtained variable reference can safelly be used with no + // locking as the variable pool is an associative container + // (underneath) and we are only adding new variables into it. + // + ulock ul (sp.root->var_pool_mutex); + const variable& var (sp.root->var_pool.insert (move (vname))); + ul.unlock (); + + value& lhs (sp.assign (var)); + + // If there are no attributes specified then the variable assignment + // is straightforward. Otherwise we will use the build2 parser helper + // function. + // + if (ats == nullptr) + lhs.assign (move (ns), &var); + else + { + // If there is an error in the attributes string, our diagnostics + // will look like this: + // + // <attributes>:1:1 error: unknown value attribute x + // testscript:10:1 info: while parsing attributes '[x]' + // + auto df = make_diag_frame ( + [ats, &ll](const diag_record& dr) + { + dr << info (ll) << "while parsing attributes '" << *ats << "'"; + }); + + parser p; + p.apply_value_attributes (&var, + lhs, + value (move (ns)), + *ats, + token_type::assign, + path ("<attributes>")); + } + } + catch (const io_error& e) + { + fail (ll) << "set: " << e; + } + } + + static bool + run_pipe (scope& sp, + command_pipe::const_iterator bc, + command_pipe::const_iterator ec, + auto_fd ifd, + size_t ci, size_t li, const location& ll, + bool diag) + { + if (bc == ec) // End of the pipeline. + return true; + + // The overall plan is to run the first command in the pipe, reading + // its input from the file descriptor passed (or, for the first + // command, according to stdin redirect specification) and redirecting + // its output to the right-hand part of the pipe recursively. Fail if + // the right-hand part fails. Otherwise check the process exit code, + // match stderr (and stdout for the last command in the pipe) according + // to redirect specification(s) and fail if any of the above fails. + // + const command& c (*bc); + + // Register the command explicit cleanups. Verify that the path being + // cleaned up is a sub-path of the testscript working directory. Fail + // if this is not the case. + // + for (const auto& cl: c.cleanups) + { + const path& p (cl.path); + path np (normalize (p, sp, ll)); + + const string& ls (np.leaf ().string ()); + bool wc (ls == "*" || ls == "**" || ls == "***"); + const path& cp (wc ? np.directory () : np); + const dir_path& wd (sp.root->wd_path); + + if (!cp.sub (wd)) + fail (ll) << (wc + ? "wildcard" + : p.to_directory () + ? "directory" + : "file") + << " cleanup " << p << " is out of working directory " + << wd; + + sp.clean ({cl.type, move (np)}, false); + } + + const redirect& in (c.in.effective ()); + const redirect& out (c.out.effective ()); + const redirect& err (c.err.effective ()); + bool eq (c.exit.comparison == exit_comparison::eq); + + // If stdin file descriptor is not open then this is the first pipeline + // command. + // + bool first (ifd.get () == -1); + + command_pipe::const_iterator nc (bc + 1); + bool last (nc == ec); + + // Prior to opening file descriptors for command input/output + // redirects let's check if the command is the exit builtin. Being a + // builtin syntactically it differs from the regular ones in a number + // of ways. It doesn't communicate with standard streams, so + // redirecting them is meaningless. It may appear only as a single + // command in a pipeline. It doesn't return any value and stops the + // scope execution, so checking its exit status is meaningless as + // well. That all means we can short-circuit here calling the builtin + // and bailing out right after that. Checking that the user didn't + // specify any redirects or exit code check sounds like a right thing + // to do. + // + if (c.program.string () == "exit") + { + // In case the builtin is erroneously pipelined from the other + // command, we will close stdin gracefully (reading out the stream + // content), to make sure that the command doesn't print any + // unwanted diagnostics about IO operation failure. + // + // Note that dtor will ignore any errors (which is what we want). + // + ifdstream is (move (ifd), fdstream_mode::skip); + + if (!first || !last) + fail (ll) << "exit builtin must be the only pipe command"; + + if (in.type != redirect_type::none) + fail (ll) << "exit builtin stdin cannot be redirected"; + + if (out.type != redirect_type::none) + fail (ll) << "exit builtin stdout cannot be redirected"; + + if (err.type != redirect_type::none) + fail (ll) << "exit builtin stderr cannot be redirected"; + + // We can't make sure that there is not exit code check. Let's, at + // least, check that non-zero code is not expected. + // + if (eq != (c.exit.code == 0)) + fail (ll) << "exit builtin exit code cannot be non-zero"; + + exit_builtin (c.arguments, ll); // Throws exit_scope exception. + } + + // Create a unique path for a command standard stream cache file. + // + auto std_path = [&sp, &ci, &li, &ll] (const char* n) -> path + { + path p (n); + + // 0 if belongs to a single-line test scope, otherwise is the + // command line number (start from one) in the test scope. + // + if (li > 0) + p += "-" + to_string (li); + + // 0 if belongs to a single-command expression, otherwise is the + // command number (start from one) in the expression. + // + // Note that the name like stdin-N can relate to N-th command of a + // single-line test or to N-th single-command line of multi-line + // test. These cases are mutually exclusive and so are unambiguous. + // + if (ci > 0) + p += "-" + to_string (ci); + + return normalize (move (p), sp, ll); + }; + + // If this is the first pipeline command, then open stdin descriptor + // according to the redirect specified. + // + path isp; + + if (!first) + assert (in.type == redirect_type::none); // No redirect expected. + else + { + // Open a file for passing to the command stdin. + // + auto open_stdin = [&isp, &ifd, &ll] () + { + assert (!isp.empty ()); + + try + { + ifd = fdopen (isp, fdopen_mode::in); + } + catch (const io_error& e) + { + fail (ll) << "unable to read " << isp << ": " << e; + } + }; + + switch (in.type) + { + case redirect_type::pass: + { + try + { + ifd = fddup (0); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate stdin: " << e; + } + + break; + } + + case redirect_type::none: + // Somehow need to make sure that the child process doesn't read + // from stdin. That is tricky to do in a portable way. Here we + // suppose that the program which (erroneously) tries to read some + // data from stdin being redirected to /dev/null fails not being + // able to read the expected data, and so the test doesn't pass + // through. + // + // @@ Obviously doesn't cover the case when the process reads + // whatever available. + // @@ Another approach could be not to redirect stdin and let the + // process to hang which can be interpreted as a test failure. + // @@ Both ways are quite ugly. Is there some better way to do + // this? + // + // Fall through. + // + case redirect_type::null: + { + try + { + ifd = fdnull (); + } + catch (const io_error& e) + { + fail (ll) << "unable to write to null device: " << e; + } + + break; + } + + case redirect_type::file: + { + isp = normalize (in.file.path, sp, ll); + + open_stdin (); + break; + } + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + // We could write to the command stdin directly but instead will + // cache the data for potential troubleshooting. + // + isp = std_path ("stdin"); + + save ( + isp, transform (in.str, false, in.modifiers, *sp.root), ll); + + sp.clean_special (isp); + + open_stdin (); + break; + } + + case redirect_type::trace: + case redirect_type::merge: + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + case redirect_type::here_doc_ref: assert (false); break; + } + } + + assert (ifd.get () != -1); + + // Prior to opening file descriptors for command outputs redirects + // let's check if the command is the set builtin. Being a builtin + // syntactically it differs from the regular ones in a number of ways. + // It either succeeds or terminates abnormally, so redirecting stderr + // is meaningless. It also never produces any output and may appear + // only as a terminal command in a pipeline. That means we can + // short-circuit here calling the builtin and returning right after + // that. Checking that the user didn't specify any meaningless + // redirects or exit code check sounds as a right thing to do. + // + if (c.program.string () == "set") + { + if (!last) + fail (ll) << "set builtin must be the last pipe command"; + + if (out.type != redirect_type::none) + fail (ll) << "set builtin stdout cannot be redirected"; + + if (err.type != redirect_type::none) + fail (ll) << "set builtin stderr cannot be redirected"; + + if (eq != (c.exit.code == 0)) + fail (ll) << "set builtin exit code cannot be non-zero"; + + set_builtin (sp, c.arguments, move (ifd), ll); + return true; + } + + // Open a file for command output redirect if requested explicitly + // (file overwrite/append redirects) or for the purpose of the output + // validation (none, here_*, file comparison redirects), register the + // file for cleanup, return the file descriptor. Interpret trace + // redirect according to the verbosity level (as null if below 2, as + // pass otherwise). Return nullfd, standard stream descriptor duplicate + // or null-device descriptor for merge, pass or null redirects + // respectively (not opening any file). + // + auto open = [&sp, &ll, &std_path] (const redirect& r, + int dfd, + path& p) -> auto_fd + { + assert (dfd == 1 || dfd == 2); + const char* what (dfd == 1 ? "stdout" : "stderr"); + + fdopen_mode m (fdopen_mode::out | fdopen_mode::create); + + auto_fd fd; + redirect_type rt (r.type != redirect_type::trace + ? r.type + : verb < 2 + ? redirect_type::null + : redirect_type::pass); + switch (rt) + { + case redirect_type::pass: + { + try + { + fd = fddup (dfd); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate " << what << ": " << e; + } + + return fd; + } + + case redirect_type::null: + { + try + { + fd = fdnull (); + } + catch (const io_error& e) + { + fail (ll) << "unable to write to null device: " << e; + } + + return fd; + } + + case redirect_type::merge: + { + // Duplicate the paired file descriptor later. + // + return fd; // nullfd + } + + case redirect_type::file: + { + // For the cmp mode the user-provided path refers a content to + // match against, rather than a content to be produced (as for + // overwrite and append modes). And so for cmp mode we redirect + // the process output to a temporary file. + // + p = r.file.mode == redirect_fmode::compare + ? std_path (what) + : normalize (r.file.path, sp, ll); + + m |= r.file.mode == redirect_fmode::append + ? fdopen_mode::at_end + : fdopen_mode::truncate; + + break; + } + + case redirect_type::none: + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + p = std_path (what); + m |= fdopen_mode::truncate; + break; + } + + case redirect_type::trace: + case redirect_type::here_doc_ref: assert (false); break; + } + + try + { + fd = fdopen (p, m); + + if ((m & fdopen_mode::at_end) != fdopen_mode::at_end) + { + if (rt == redirect_type::file) + sp.clean ({cleanup_type::always, p}, true); + else + sp.clean_special (p); + } + } + catch (const io_error& e) + { + fail (ll) << "unable to write " << p << ": " << e; + } + + return fd; + }; + + path osp; + fdpipe ofd; + + // If this is the last command in the pipeline than redirect the + // command process stdout to a file. Otherwise create a pipe and + // redirect the stdout to the write-end of the pipe. The read-end will + // be passed as stdin for the next command in the pipeline. + // + // @@ Shouldn't we allow the here-* and file output redirects for a + // command with pipelined output? Say if such redirect is present + // then the process output is redirected to a file first (as it is + // when no output pipelined), and only after the process exit code + // and the output are validated the next command in the pipeline is + // executed taking the file as an input. This could be usefull for + // test failures investigation and for tests "tightening". + // + if (last) + ofd.out = open (out, 1, osp); + else + { + assert (out.type == redirect_type::none); // No redirect expected. + + try + { + ofd = fdopen_pipe (); + } + catch (const io_error& e) + { + fail (ll) << "unable to open pipe: " << e; + } + } + + path esp; + auto_fd efd (open (err, 2, esp)); + + // Merge standard streams. + // + bool mo (out.type == redirect_type::merge); + if (mo || err.type == redirect_type::merge) + { + auto_fd& self (mo ? ofd.out : efd); + auto_fd& other (mo ? efd : ofd.out); + + try + { + assert (self.get () == -1 && other.get () != -1); + self = fddup (other.get ()); + } + catch (const io_error& e) + { + fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout") + << ": " << e; + } + } + + // All descriptors should be open to the date. + // + assert (ofd.out.get () != -1 && efd.get () != -1); + + optional<process_exit> exit; + builtin_func* bf (builtins.find (c.program.string ())); + + bool success; + + auto process_args = [&c] () -> cstrings + { + cstrings args {c.program.string ().c_str ()}; + + for (const auto& a: c.arguments) + args.push_back (a.c_str ()); + + args.push_back (nullptr); + return args; + }; + + if (bf != nullptr) + { + // Execute the builtin. + // + if (verb >= 2) + print_process (process_args ()); + + try + { + uint8_t r; // Storage. + builtin b ( + bf (sp, r, c.arguments, move (ifd), move (ofd.out), move (efd))); + + success = run_pipe (sp, + nc, + ec, + move (ofd.in), + ci + 1, li, ll, diag); + + exit = process_exit (b.wait ()); + } + catch (const system_error& e) + { + fail (ll) << "unable to execute " << c.program << " builtin: " + << e << endf; + } + } + else + { + // Execute the process. + // + cstrings args (process_args ()); + + // Resolve the relative not simple program path against the scope's + // working directory. The simple one will be left for the process + // path search machinery. + // + path p; + + try + { + p = path (args[0]); + + if (p.relative () && !p.simple ()) + { + p = sp.wd_path / p; + args[0] = p.string ().c_str (); + } + } + catch (const invalid_path& e) + { + fail (ll) << "invalid program path " << e.path; + } + + try + { + process_path pp (process::path_search (args[0])); + + if (verb >= 2) + print_process (args); + + process pr ( + pp, + args.data (), + {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()}, + sp.wd_path.string ().c_str ()); + + ifd.reset (); + ofd.out.reset (); + efd.reset (); + + success = run_pipe (sp, + nc, + ec, + move (ofd.in), + ci + 1, li, ll, diag); + + pr.wait (); + + exit = move (pr.exit); + } + catch (const process_error& e) + { + error (ll) << "unable to execute " << args[0] << ": " << e; + + if (e.child) + std::exit (1); + + throw failed (); + } + } + + assert (exit); + + // If the righ-hand side pipeline failed than the whole pipeline fails, + // and no further checks are required. + // + if (!success) + return false; + + const path& pr (c.program); + + // If there is no valid exit code available by whatever reason then we + // print the proper diagnostics, dump stderr (if cached and not too + // large) and fail the whole test. Otherwise if the exit code is not + // correct then we print diagnostics if requested and fail the + // pipeline. + // + bool valid (exit->normal ()); + + // On Windows the exit code can be out of the valid codes range being + // defined as uint16_t. + // +#ifdef _WIN32 + if (valid) + valid = exit->code () < 256; +#endif + + success = valid && eq == (exit->code () == c.exit.code); + + if (!valid || (!success && diag)) + { + // In the presense of a valid exit code we print the diagnostics and + // return false rather than throw. + // + diag_record d (valid ? error (ll) : fail (ll)); + + if (!exit->normal ()) + d << pr << " " << *exit; + else + { + uint16_t ec (exit->code ()); // Make sure is printed as integer. + + if (!valid) + d << pr << " exit code " << ec << " out of 0-255 range"; + else if (!success) + { + if (diag) + d << pr << " exit code " << ec << (eq ? " != " : " == ") + << static_cast<uint16_t> (c.exit.code); + } + else + assert (false); + } + + if (non_empty (esp, ll)) + d << info << "stderr: " << esp; + + if (non_empty (osp, ll)) + d << info << "stdout: " << osp; + + if (non_empty (isp, ll)) + d << info << "stdin: " << isp; + + // Print cached stderr. + // + print_file (d, esp, ll); + } + + // If exit code is correct then check if the standard outputs match the + // expectations. Note that stdout is only redirected to file for the + // last command in the pipeline. + // + if (success) + success = + (!last || + check_output (pr, osp, isp, out, ll, sp, diag, "stdout")) && + check_output (pr, esp, isp, err, ll, sp, diag, "stderr"); + + return success; + } + + static bool + run_expr (scope& sp, + const command_expr& expr, + size_t li, const location& ll, + bool diag) + { + // Print test id once per test expression. + // + auto df = make_diag_frame ( + [&sp](const diag_record& dr) + { + // Let's not depend on how the path representation can be improved + // for readability on printing. + // + dr << info << "test id: " << sp.id_path.posix_string (); + }); + + // Commands are numbered sequentially throughout the expression + // starting with 1. Number 0 means the command is a single one. + // + size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1 + ? 0 + : 1); + + // If there is no ORs to the right of a pipe then the pipe failure is + // fatal for the whole expression. In particular, the pipe must print + // the diagnostics on failure (if generally allowed). So we find the + // pipe that "switches on" the diagnostics potential printing. + // + command_expr::const_iterator trailing_ands; // Undefined if diag is + // disallowed. + if (diag) + { + auto i (expr.crbegin ()); + for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ; + trailing_ands = i.base (); + } + + bool r (false); + bool print (false); + + for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i) + { + if (diag && i + 1 == trailing_ands) + print = true; + + const command_pipe& p (i->pipe); + bool or_op (i->op == expr_operator::log_or); + + // Short-circuit if the pipe result must be OR-ed with true or AND-ed + // with false. + // + if (!((or_op && r) || (!or_op && !r))) + r = run_pipe ( + sp, p.begin (), p.end (), auto_fd (), ci, li, ll, print); + + ci += p.size (); + } + + return r; + } + + void default_runner:: + run (scope& sp, + const command_expr& expr, command_type ct, + size_t li, + const location& ll) + { + // Noop for teardown commands if keeping tests output is requested. + // + if (ct == command_type::teardown && + common_.after == output_after::keep) + return; + + if (verb >= 3) + { + char c ('\0'); + + switch (ct) + { + case command_type::test: c = ' '; break; + case command_type::setup: c = '+'; break; + case command_type::teardown: c = '-'; break; + } + + text << ": " << c << expr; + } + + if (!run_expr (sp, expr, li, ll, true)) + throw failed (); // Assume diagnostics is already printed. + } + + bool default_runner:: + run_if (scope& sp, + const command_expr& expr, + size_t li, const location& ll) + { + if (verb >= 3) + text << ": ?" << expr; + + return run_expr (sp, expr, li, ll, false); + } + } + } +} diff --git a/libbuild2/test/script/runner.hxx b/libbuild2/test/script/runner.hxx new file mode 100644 index 0000000..9a3f91f --- /dev/null +++ b/libbuild2/test/script/runner.hxx @@ -0,0 +1,101 @@ +// file : libbuild2/test/script/runner.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_SCRIPT_RUNNER_HXX +#define LIBBUILD2_TEST_SCRIPT_RUNNER_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/test/script/script.hxx> + +namespace build2 +{ + namespace test + { + struct common; + + namespace script + { + // An exception that can be thrown by a runner to exit the scope (for + // example, as a result of executing the exit builtin). The status + // indicates whether the scope should be considered to have succeeded + // or failed. + // + struct exit_scope + { + bool status; + + explicit + exit_scope (bool s): status (s) {} + }; + + class runner + { + public: + // Return false if this test/group should be skipped. + // + virtual bool + test (scope&) const = 0; + + // Location is the scope start location (for diagnostics, etc). + // + virtual void + enter (scope&, const location&) = 0; + + // Index is the 1-base index of this command line in the command list + // (e.g., in a compound test). If it is 0 then it means there is only + // one command (e.g., a simple test). This information can be used, + // for example, to derive file names. + // + // Location is the start position of this command line in the + // testscript. It can be used in diagnostics. + // + virtual void + run (scope&, + const command_expr&, command_type, + size_t index, + const location&) = 0; + + virtual bool + run_if (scope&, const command_expr&, size_t, const location&) = 0; + + // Location is the scope end location (for diagnostics, etc). + // + virtual void + leave (scope&, const location&) = 0; + }; + + class default_runner: public runner + { + public: + explicit + default_runner (const common& c): common_ (c) {} + + virtual bool + test (scope& s) const override; + + virtual void + enter (scope&, const location&) override; + + virtual void + run (scope&, + const command_expr&, command_type, + size_t, + const location&) override; + + virtual bool + run_if (scope&, const command_expr&, size_t, const location&) override; + + virtual void + leave (scope&, const location&) override; + + private: + const common& common_; + }; + } + } +} + +#endif // LIBBUILD2_TEST_SCRIPT_RUNNER_HXX diff --git a/libbuild2/test/script/script.cxx b/libbuild2/test/script/script.cxx new file mode 100644 index 0000000..b879eb4 --- /dev/null +++ b/libbuild2/test/script/script.cxx @@ -0,0 +1,741 @@ +// file : libbuild2/test/script/script.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/script/script.hxx> + +#include <sstream> + +#include <libbuild2/target.hxx> +#include <libbuild2/algorithm.hxx> + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + ostream& + operator<< (ostream& o, line_type lt) + { + const char* s (nullptr); + + switch (lt) + { + case line_type::var: s = "variable"; break; + case line_type::cmd: s = "command"; break; + case line_type::cmd_if: s = "'if'"; break; + case line_type::cmd_ifn: s = "'if!'"; break; + case line_type::cmd_elif: s = "'elif'"; break; + case line_type::cmd_elifn: s = "'elif!'"; break; + case line_type::cmd_else: s = "'else'"; break; + case line_type::cmd_end: s = "'end'"; break; + } + + return o << s; + } + + // Quote if empty or contains spaces or any of the special characters. + // Note that we use single quotes since double quotes still allow + // expansion. + // + // @@ What if it contains single quotes? + // + static void + to_stream_q (ostream& o, const string& s) + { + if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos) + o << '\'' << s << '\''; + else + o << s; + }; + + void + to_stream (ostream& o, const command& c, command_to_stream m) + { + auto print_path = [&o] (const path& p) + { + using build2::operator<<; + + ostringstream s; + stream_verb (s, stream_verb (o)); + s << p; + + to_stream_q (o, s.str ()); + }; + + auto print_redirect = + [&o, print_path] (const redirect& r, const char* prefix) + { + o << ' ' << prefix; + + size_t n (string::traits_type::length (prefix)); + assert (n > 0); + + char d (prefix[n - 1]); // Redirect direction. + + switch (r.type) + { + case redirect_type::none: assert (false); break; + case redirect_type::pass: o << '|'; break; + case redirect_type::null: o << '-'; break; + case redirect_type::trace: o << '!'; break; + case redirect_type::merge: o << '&' << r.fd; break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + bool doc (r.type == redirect_type::here_doc_literal); + + // For here-document add another '>' or '<'. Note that here end + // marker never needs to be quoted. + // + if (doc) + o << d; + + o << r.modifiers; + + if (doc) + o << r.end; + else + { + const string& v (r.str); + to_stream_q (o, + r.modifiers.find (':') == string::npos + ? string (v, 0, v.size () - 1) // Strip newline. + : v); + } + + break; + } + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + bool doc (r.type == redirect_type::here_doc_regex); + + // For here-document add another '>' or '<'. Note that here end + // marker never needs to be quoted. + // + if (doc) + o << d; + + o << r.modifiers; + + const regex_lines& re (r.regex); + + if (doc) + o << re.intro + r.end + re.intro + re.flags; + else + { + assert (!re.lines.empty ()); // Regex can't be empty. + + regex_line l (re.lines[0]); + to_stream_q (o, re.intro + l.value + re.intro + l.flags); + } + + break; + } + + case redirect_type::file: + { + // For stdin or stdout-comparison redirect add '>>' or '<<' (and + // so make it '<<<' or '>>>'). Otherwise add '+' or '=' (and so + // make it '>+' or '>='). + // + if (d == '<' || r.file.mode == redirect_fmode::compare) + o << d << d; + else + o << (r.file.mode == redirect_fmode::append ? '+' : '='); + + print_path (r.file.path); + break; + } + + case redirect_type::here_doc_ref: assert (false); break; + } + }; + + auto print_doc = [&o] (const redirect& r) + { + o << endl; + + if (r.type == redirect_type::here_doc_literal) + o << r.str; + else + { + assert (r.type == redirect_type::here_doc_regex); + + const regex_lines& rl (r.regex); + + for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); + i != e; ++i) + { + if (i != b) + o << endl; + + const regex_line& l (*i); + + if (l.regex) // Regex (possibly empty), + o << rl.intro << l.value << rl.intro << l.flags; + else if (!l.special.empty ()) // Special literal. + o << rl.intro; + else // Textual literal. + o << l.value; + + o << l.special; + } + } + + o << (r.modifiers.find (':') == string::npos ? "" : "\n") << r.end; + }; + + if ((m & command_to_stream::header) == command_to_stream::header) + { + // Program. + // + to_stream_q (o, c.program.string ()); + + // Arguments. + // + for (const string& a: c.arguments) + { + o << ' '; + to_stream_q (o, a); + } + + // Redirects. + // + if (c.in.effective ().type != redirect_type::none) + print_redirect (c.in.effective (), "<"); + + if (c.out.effective ().type != redirect_type::none) + print_redirect (c.out.effective (), ">"); + + if (c.err.effective ().type != redirect_type::none) + print_redirect (c.err.effective (), "2>"); + + for (const auto& p: c.cleanups) + { + o << " &"; + + if (p.type != cleanup_type::always) + o << (p.type == cleanup_type::maybe ? '?' : '!'); + + print_path (p.path); + } + + if (c.exit.comparison != exit_comparison::eq || c.exit.code != 0) + { + switch (c.exit.comparison) + { + case exit_comparison::eq: o << " == "; break; + case exit_comparison::ne: o << " != "; break; + } + + o << static_cast<uint16_t> (c.exit.code); + } + } + + if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) + { + // Here-documents. + // + if (c.in.type == redirect_type::here_doc_literal || + c.in.type == redirect_type::here_doc_regex) + print_doc (c.in); + + if (c.out.type == redirect_type::here_doc_literal || + c.out.type == redirect_type::here_doc_regex) + print_doc (c.out); + + if (c.err.type == redirect_type::here_doc_literal || + c.err.type == redirect_type::here_doc_regex) + print_doc (c.err); + } + } + + void + to_stream (ostream& o, const command_pipe& p, command_to_stream m) + { + if ((m & command_to_stream::header) == command_to_stream::header) + { + for (auto b (p.begin ()), i (b); i != p.end (); ++i) + { + if (i != b) + o << " | "; + + to_stream (o, *i, command_to_stream::header); + } + } + + if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) + { + for (const command& c: p) + to_stream (o, c, command_to_stream::here_doc); + } + } + + void + to_stream (ostream& o, const command_expr& e, command_to_stream m) + { + if ((m & command_to_stream::header) == command_to_stream::header) + { + for (auto b (e.begin ()), i (b); i != e.end (); ++i) + { + if (i != b) + { + switch (i->op) + { + case expr_operator::log_or: o << " || "; break; + case expr_operator::log_and: o << " && "; break; + } + } + + to_stream (o, i->pipe, command_to_stream::header); + } + } + + if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) + { + for (const expr_term& t: e) + to_stream (o, t.pipe, command_to_stream::here_doc); + } + } + + // redirect + // + redirect:: + redirect (redirect_type t) + : type (t) + { + switch (type) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: + case redirect_type::merge: break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: new (&str) string (); break; + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + new (®ex) regex_lines (); + break; + } + + case redirect_type::file: new (&file) file_type (); break; + + case redirect_type::here_doc_ref: assert (false); break; + } + } + + redirect:: + redirect (redirect&& r) + : type (r.type), + modifiers (move (r.modifiers)), + end (move (r.end)), + end_line (r.end_line), + end_column (r.end_column) + { + switch (type) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: break; + + case redirect_type::merge: fd = r.fd; break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: + { + new (&str) string (move (r.str)); + break; + } + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: + { + new (®ex) regex_lines (move (r.regex)); + break; + } + case redirect_type::file: + { + new (&file) file_type (move (r.file)); + break; + } + case redirect_type::here_doc_ref: + { + new (&ref) reference_wrapper<const redirect> (r.ref); + break; + } + } + } + + redirect:: + ~redirect () + { + switch (type) + { + case redirect_type::none: + case redirect_type::pass: + case redirect_type::null: + case redirect_type::trace: + case redirect_type::merge: break; + + case redirect_type::here_str_literal: + case redirect_type::here_doc_literal: str.~string (); break; + + case redirect_type::here_str_regex: + case redirect_type::here_doc_regex: regex.~regex_lines (); break; + + case redirect_type::file: file.~file_type (); break; + + case redirect_type::here_doc_ref: + { + ref.~reference_wrapper<const redirect> (); + break; + } + } + } + + redirect& redirect:: + operator= (redirect&& r) + { + if (this != &r) + { + this->~redirect (); + new (this) redirect (move (r)); // Assume noexcept move-constructor. + } + return *this; + } + + // scope + // + scope:: + scope (const string& id, scope* p, script* r) + : parent (p), + root (r), + vars (false /* global */), + id_path (cast<path> (assign (root->id_var) = path ())), + wd_path (cast<dir_path> (assign (root->wd_var) = dir_path ())) + + { + // Construct the id_path as a string to ensure POSIX form. In fact, + // the only reason we keep it as a path is to be able to easily get id + // by calling leaf(). + // + { + string s (p != nullptr ? p->id_path.string () : string ()); + + if (!s.empty () && !id.empty ()) + s += '/'; + + s += id; + const_cast<path&> (id_path) = path (move (s)); + } + + // Calculate the working directory path unless this is the root scope + // (handled in an ad hoc way). + // + if (p != nullptr) + const_cast<dir_path&> (wd_path) = dir_path (p->wd_path) /= id; + } + + void scope:: + clean (cleanup c, bool implicit) + { + using std::find; // Hidden by scope::find(). + + assert (!implicit || c.type == cleanup_type::always); + + const path& p (c.path); + if (!p.sub (root->wd_path)) + { + if (implicit) + return; + else + assert (false); // Error so should have been checked. + } + + auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;}; + auto i (find_if (cleanups.begin (), cleanups.end (), pr)); + + if (i == cleanups.end ()) + cleanups.emplace_back (move (c)); + else if (!implicit) + i->type = c.type; + } + + void scope:: + clean_special (path p) + { + special_cleanups.emplace_back (move (p)); + } + + // script_base + // + script_base:: + script_base () + : // Enter the test.* variables with the same variable types as in + // buildfiles except for test: while in buildfiles it can be a + // target name, in testscripts it should be resolved to a path. + // + // Note: entering in a custom variable pool. + // + test_var (var_pool.insert<path> ("test")), + options_var (var_pool.insert<strings> ("test.options")), + arguments_var (var_pool.insert<strings> ("test.arguments")), + redirects_var (var_pool.insert<strings> ("test.redirects")), + cleanups_var (var_pool.insert<strings> ("test.cleanups")), + + wd_var (var_pool.insert<dir_path> ("~")), + id_var (var_pool.insert<path> ("@")), + cmd_var (var_pool.insert<strings> ("*")), + cmdN_var { + &var_pool.insert<path> ("0"), + &var_pool.insert<string> ("1"), + &var_pool.insert<string> ("2"), + &var_pool.insert<string> ("3"), + &var_pool.insert<string> ("4"), + &var_pool.insert<string> ("5"), + &var_pool.insert<string> ("6"), + &var_pool.insert<string> ("7"), + &var_pool.insert<string> ("8"), + &var_pool.insert<string> ("9")} {} + + // script + // + script:: + script (const target& tt, + const testscript& st, + const dir_path& rwd) + : group (st.name == "testscript" ? string () : st.name, this), + test_target (tt), + target_scope (tt.base_scope ()), + script_target (st) + { + // Set the script working dir ($~) to $out_base/test/<id> (id_path + // for root is just the id which is empty if st is 'testscript'). + // + const_cast<dir_path&> (wd_path) = dir_path (rwd) /= id_path.string (); + + // Set the test variable at the script level. We do it even if it's + // set in the buildfile since they use different types. + // + { + value& v (assign (test_var)); + + // Note that the test variable's visibility is target. + // + lookup l (find_in_buildfile ("test", false)); + + // Note that we have similar code for simple tests. + // + const target* t (nullptr); + + if (l.defined ()) + { + const name* n (cast_null<name> (l)); + + if (n == nullptr) + v = nullptr; + else if (n->empty ()) + v = path (); + else if (n->simple ()) + { + // Ignore the special 'true' value. + // + if (n->value != "true") + v = path (n->value); + else + t = &tt; + } + else if (n->directory ()) + v = path (n->dir); + else + { + // Must be a target name. + // + // @@ OUT: what if this is a @-qualified pair of names? + // + t = search_existing (*n, target_scope); + + if (t == nullptr) + fail << "unknown target '" << *n << "' in test variable"; + } + } + else + // By default we set it to the test target's path. + // + t = &tt; + + // If this is a path-based target, then we use the path. If this + // is an alias target (e.g., dir{}), then we use the directory + // path. Otherwise, we leave it NULL expecting the testscript to + // set it to something appropriate, if used. + // + if (t != nullptr) + { + if (auto* pt = t->is_a<path_target> ()) + { + // Do some sanity checks: the target better be up-to-date with + // an assigned path. + // + v = pt->path (); + + if (v.empty ()) + fail << "target " << *pt << " specified in the test variable " + << "is out of date" << + info << "consider specifying it as a prerequisite of " << tt; + } + else if (t->is_a<alias> ()) + v = path (t->dir); + else if (t != &tt) + fail << "target " << *t << " specified in the test variable " + << "is not path-based"; + } + } + + // Set the special $*, $N variables. + // + reset_special (); + } + + lookup scope:: + find (const variable& var) const + { + // Search script scopes until we hit the root. + // + const scope* s (this); + + do + { + auto p (s->vars.find (var)); + if (p.first != nullptr) + return lookup (*p.first, p.second, s->vars); + } + while ((s->parent != nullptr ? (s = s->parent) : nullptr) != nullptr); + + return find_in_buildfile (var.name); + } + + + lookup scope:: + find_in_buildfile (const string& n, bool target_only) const + { + // Switch to the corresponding buildfile variable. Note that we don't + // want to insert a new variable into the pool (we might be running + // in parallel). Plus, if there is no such variable, then we cannot + // possibly find any value. + // + const variable* pvar (build2::var_pool.find (n)); + + if (pvar == nullptr) + return lookup (); + + const script& s (static_cast<const script&> (*root)); + const variable& var (*pvar); + + // First check the target we are testing. + // + { + // Note that we skip applying the override if we did not find any + // value. In this case, presumably the override also affects the + // script target and we will pick it up there. A bit fuzzy. + // + auto p (s.test_target.find_original (var, target_only)); + + if (p.first) + { + if (var.overrides != nullptr) + p = s.target_scope.find_override (var, move (p), true); + + return p.first; + } + } + + // Then the script target followed by the scopes it is in. Note that + // while unlikely it is possible the test and script targets will be + // in different scopes which brings the question of which scopes we + // should search. + // + return s.script_target[var]; + } + + value& scope:: + append (const variable& var) + { + lookup l (find (var)); + + if (l.defined () && l.belongs (*this)) // Existing var in this scope. + return vars.modify (l); + + value& r (assign (var)); // NULL. + + if (l.defined ()) + r = *l; // Copy value (and type) from the outer scope. + + return r; + } + + void scope:: + reset_special () + { + // First assemble the $* value. + // + strings s; + + auto append = [&s] (const strings& v) + { + s.insert (s.end (), v.begin (), v.end ()); + }; + + if (lookup l = find (root->test_var)) + s.push_back (cast<path> (l).representation ()); + + if (lookup l = find (root->options_var)) + append (cast<strings> (l)); + + if (lookup l = find (root->arguments_var)) + append (cast<strings> (l)); + + // Keep redirects/cleanups out of $N. + // + size_t n (s.size ()); + + if (lookup l = find (root->redirects_var)) + append (cast<strings> (l)); + + if (lookup l = find (root->cleanups_var)) + append (cast<strings> (l)); + + // Set the $N values if present. + // + for (size_t i (0); i <= 9; ++i) + { + value& v (assign (*root->cmdN_var[i])); + + if (i < n) + { + if (i == 0) + v = path (s[i]); + else + v = s[i]; + } + else + v = nullptr; // Clear any old values. + } + + // Set $*. + // + assign (root->cmd_var) = move (s); + } + } + } +} diff --git a/libbuild2/test/script/script.hxx b/libbuild2/test/script/script.hxx new file mode 100644 index 0000000..e3f8251 --- /dev/null +++ b/libbuild2/test/script/script.hxx @@ -0,0 +1,559 @@ +// file : libbuild2/test/script/script.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX +#define LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX + +#include <set> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/variable.hxx> + +#include <libbuild2/test/target.hxx> + +#include <libbuild2/test/script/token.hxx> // replay_tokens + +namespace build2 +{ + class target; + + namespace test + { + namespace script + { + class parser; // Required by VC for 'friend class parser' declaration. + + // Pre-parse representation. + // + + enum class line_type + { + var, + cmd, + cmd_if, + cmd_ifn, + cmd_elif, + cmd_elifn, + cmd_else, + cmd_end + }; + + ostream& + operator<< (ostream&, line_type); + + struct line + { + line_type type; + replay_tokens tokens; + + union + { + const variable* var; // Pre-entered for line_type::var. + }; + }; + + // Most of the time we will have just one line (test command). + // + using lines = small_vector<line, 1>; + + // Parse object model. + // + + // redirect + // + enum class redirect_type + { + none, + pass, + null, + trace, + merge, + here_str_literal, + here_str_regex, + here_doc_literal, + here_doc_regex, + here_doc_ref, // Reference to here_doc literal or regex. + file, + }; + + // Pre-parsed (but not instantiated) regex lines. The idea here is that + // we should be able to re-create their (more or less) exact text + // representation for diagnostics but also instantiate without any + // re-parsing. + // + struct regex_line + { + // If regex is true, then value is the regex expression. Otherwise, it + // is a literal. Note that special characters can be present in both + // cases. For example, //+ is a regex, while /+ is a literal, both + // with '+' as a special character. Flags are only valid for regex. + // Literals falls apart into textual (has no special characters) and + // special (has just special characters instead) ones. For example + // foo is a textual literal, while /.+ is a special one. Note that + // literal must not have value and special both non-empty. + // + bool regex; + + string value; + string flags; + string special; + + uint64_t line; + uint64_t column; + + // Create regex with optional special characters. + // + regex_line (uint64_t l, uint64_t c, + string v, string f, string s = string ()) + : regex (true), + value (move (v)), + flags (move (f)), + special (move (s)), + line (l), + column (c) {} + + // Create a literal, either text or special. + // + regex_line (uint64_t l, uint64_t c, string v, bool s) + : regex (false), + value (s ? string () : move (v)), + special (s ? move (v) : string ()), + line (l), + column (c) {} + }; + + struct regex_lines + { + char intro; // Introducer character. + string flags; // Global flags (here-document). + + small_vector<regex_line, 8> lines; + }; + + // Output file redirect mode. + // + enum class redirect_fmode + { + compare, + overwrite, + append + }; + + struct redirect + { + redirect_type type; + + struct file_type + { + using path_type = build2::path; + path_type path; + redirect_fmode mode; // Meaningless for input redirect. + }; + + union + { + int fd; // Merge-to descriptor. + string str; // Note: with trailing newline, if requested. + regex_lines regex; // Note: with trailing blank, if requested. + file_type file; + reference_wrapper<const redirect> ref; // Note: no chains. + }; + + string modifiers; // Redirect modifiers. + string end; // Here-document end marker (no regex intro/flags). + uint64_t end_line; // Here-document end marker location. + uint64_t end_column; + + // Create redirect of a type other than reference. + // + explicit + redirect (redirect_type = redirect_type::none); + + // Create redirect of the reference type. + // + redirect (redirect_type t, const redirect& r) + : type (redirect_type::here_doc_ref), ref (r) + { + // There is no support (and need) for reference chains. + // + assert (t == redirect_type::here_doc_ref && + r.type != redirect_type::here_doc_ref); + } + + // Move constuctible/assignable-only type. + // + redirect (redirect&&); + redirect& operator= (redirect&&); + + ~redirect (); + + const redirect& + effective () const noexcept + { + return type == redirect_type::here_doc_ref ? ref.get () : *this; + } + }; + + // cleanup + // + enum class cleanup_type + { + always, // &foo - cleanup, fail if does not exist. + maybe, // &?foo - cleanup, ignore if does not exist. + never // &!foo - don’t cleanup, ignore if doesn’t exist. + }; + + // File or directory to be automatically cleaned up at the end of the + // scope. If the path ends with a trailing slash, then it is assumed to + // be a directory, otherwise -- a file. A directory that is about to be + // cleaned up must be empty. + // + // The last component in the path may contain a wildcard that have the + // following semantics: + // + // dir/* - remove all immediate files + // dir/*/ - remove all immediate sub-directories (must be empty) + // dir/** - remove all files recursively + // dir/**/ - remove all sub-directories recursively (must be empty) + // dir/*** - remove directory dir with all files and sub-directories + // recursively + // + struct cleanup + { + cleanup_type type; + build2::path path; + }; + using cleanups = vector<cleanup>; + + // command_exit + // + enum class exit_comparison {eq, ne}; + + struct command_exit + { + // C/C++ don't apply constraints on program exit code other than it + // being of type int. + // + // POSIX specifies that only the least significant 8 bits shall be + // available from wait() and waitpid(); the full value shall be + // available from waitid() (read more at _Exit, _exit Open Group + // spec). + // + // While the Linux man page for waitid() doesn't mention any + // deviations from the standard, the FreeBSD implementation (as of + // version 11.0) only returns 8 bits like the other wait*() calls. + // + // Windows supports 32-bit exit codes. + // + // Note that in shells some exit values can have special meaning so + // using them can be a source of confusion. For bash values in the + // [126, 255] range are such a special ones (see Appendix E, "Exit + // Codes With Special Meanings" in the Advanced Bash-Scripting Guide). + // + exit_comparison comparison; + uint8_t code; + }; + + // command + // + struct command + { + path program; + strings arguments; + + redirect in; + redirect out; + redirect err; + + script::cleanups cleanups; + + command_exit exit {exit_comparison::eq, 0}; + }; + + enum class command_to_stream: uint16_t + { + header = 0x01, + here_doc = 0x02, // Note: printed on a new line. + all = header | here_doc + }; + + void + to_stream (ostream&, const command&, command_to_stream); + + ostream& + operator<< (ostream&, const command&); + + // command_pipe + // + using command_pipe = vector<command>; + + void + to_stream (ostream&, const command_pipe&, command_to_stream); + + ostream& + operator<< (ostream&, const command_pipe&); + + // command_expr + // + enum class expr_operator {log_or, log_and}; + + struct expr_term + { + expr_operator op; // OR-ed to an implied false for the first term. + command_pipe pipe; + }; + + using command_expr = vector<expr_term>; + + void + to_stream (ostream&, const command_expr&, command_to_stream); + + ostream& + operator<< (ostream&, const command_expr&); + + // command_type + // + enum class command_type {test, setup, teardown}; + + // description + // + struct description + { + string id; + string summary; + string details; + + bool + empty () const + { + return id.empty () && summary.empty () && details.empty (); + } + }; + + // scope + // + class script; + + enum class scope_state {unknown, passed, failed}; + + class scope + { + public: + scope* const parent; // NULL for the root (script) scope. + script* const root; // Self for the root (script) scope. + + // The chain of if-else scope alternatives. See also if_cond_ below. + // + unique_ptr<scope> if_chain; + + // Note that if we pass the variable name as a string, then it will + // be looked up in the wrong pool. + // + variable_map vars; + + const path& id_path; // Id path ($@, relative in POSIX form). + const dir_path& wd_path; // Working dir ($~, absolute and normalized). + + optional<description> desc; + + scope_state state = scope_state::unknown; + test::script::cleanups cleanups; + paths special_cleanups; + + // Variables. + // + public: + // Lookup the variable starting from this scope, continuing with outer + // scopes, then the target being tested, then the testscript target, + // and then outer buildfile scopes (including testscript-type/pattern + // specific). + // + lookup + find (const variable&) const; + + // As above but only look for buildfile variables. If target_only is + // false then also look in scopes of the test target (this should only + // be done if the variable's visibility is target). + // + lookup + find_in_buildfile (const string&, bool target_only = true) const; + + // Return a value suitable for assignment. If the variable does not + // exist in this scope's map, then a new one with the NULL value is + // added and returned. Otherwise the existing value is returned. + // + value& + assign (const variable& var) {return vars.assign (var);} + + // Return a value suitable for append/prepend. If the variable does + // not exist in this scope's map, then outer scopes are searched for + // the same variable. If found then a new variable with the found + // value is added to this scope and returned. Otherwise this function + // proceeds as assign() above. + // + value& + append (const variable&); + + // Reset special $*, $N variables based on the test.* values. + // + void + reset_special (); + + // Cleanup. + // + public: + // Register a cleanup. If the cleanup is explicit, then override the + // cleanup type if this path is already registered. Ignore implicit + // registration of a path outside script working directory. + // + void + clean (cleanup, bool implicit); + + // Register cleanup of a special file. Such files are created to + // maintain testscript machinery and must be removed first, not to + // interfere with the user-defined wildcard cleanups. + // + void + clean_special (path p); + + public: + virtual + ~scope () = default; + + protected: + scope (const string& id, scope* parent, script* root); + + // Pre-parse data. + // + public: + virtual bool + empty () const = 0; + + protected: + friend class parser; + + location start_loc_; + location end_loc_; + + optional<line> if_cond_; + }; + + // group + // + class group: public scope + { + public: + vector<unique_ptr<scope>> scopes; + + public: + group (const string& id, group& p): scope (id, &p, p.root) {} + + protected: + group (const string& id, script* r): scope (id, nullptr, r) {} + + // Pre-parse data. + // + public: + virtual bool + empty () const override + { + return + !if_cond_ && // The condition expression can have side-effects. + setup_.empty () && + tdown_.empty () && + find_if (scopes.begin (), scopes.end (), + [] (const unique_ptr<scope>& s) + { + return !s->empty (); + }) == scopes.end (); + } + + private: + friend class parser; + + lines setup_; + lines tdown_; + }; + + // test + // + class test: public scope + { + public: + test (const string& id, group& p): scope (id, &p, p.root) {} + + // Pre-parse data. + // + public: + virtual bool + empty () const override + { + return tests_.empty (); + } + + private: + friend class parser; + + lines tests_; + }; + + // script + // + class script_base // Make sure certain things are initialized early. + { + protected: + script_base (); + + public: + variable_pool var_pool; + mutable shared_mutex var_pool_mutex; + + const variable& test_var; // test + const variable& options_var; // test.options + const variable& arguments_var; // test.arguments + const variable& redirects_var; // test.redirects + const variable& cleanups_var; // test.cleanups + + const variable& wd_var; // $~ + const variable& id_var; // $@ + const variable& cmd_var; // $* + const variable* cmdN_var[10]; // $N + }; + + class script: public script_base, public group + { + public: + script (const target& test_target, + const testscript& script_target, + const dir_path& root_wd); + + script (script&&) = delete; + script (const script&) = delete; + script& operator= (script&&) = delete; + script& operator= (const script&) = delete; + + public: + const target& test_target; // Target we are testing. + const build2::scope& target_scope; // Base scope of test target. + const testscript& script_target; // Target of the testscript file. + + // Pre-parse data. + // + private: + friend class parser; + + // Testscript file paths. Specifically, replay_token::file points to + // these paths. + // + std::set<path> paths_; + }; + } + } +} + +#include <libbuild2/test/script/script.ixx> + +#endif // LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX diff --git a/libbuild2/test/script/script.ixx b/libbuild2/test/script/script.ixx new file mode 100644 index 0000000..d4a216a --- /dev/null +++ b/libbuild2/test/script/script.ixx @@ -0,0 +1,60 @@ +// file : libbuild2/test/script/script.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + namespace test + { + namespace script + { + inline command_to_stream + operator&= (command_to_stream& x, command_to_stream y) + { + return x = static_cast<command_to_stream> ( + static_cast<uint16_t> (x) & static_cast<uint16_t> (y)); + } + + inline command_to_stream + operator|= (command_to_stream& x, command_to_stream y) + { + return x = static_cast<command_to_stream> ( + static_cast<uint16_t> (x) | static_cast<uint16_t> (y)); + } + + inline command_to_stream + operator& (command_to_stream x, command_to_stream y) {return x &= y;} + + inline command_to_stream + operator| (command_to_stream x, command_to_stream y) {return x |= y;} + + + // command + // + inline ostream& + operator<< (ostream& o, const command& c) + { + to_stream (o, c, command_to_stream::all); + return o; + } + + // command_pipe + // + inline ostream& + operator<< (ostream& o, const command_pipe& p) + { + to_stream (o, p, command_to_stream::all); + return o; + } + + // command_expr + // + inline ostream& + operator<< (ostream& o, const command_expr& e) + { + to_stream (o, e, command_to_stream::all); + return o; + } + } + } +} diff --git a/libbuild2/test/script/token.cxx b/libbuild2/test/script/token.cxx new file mode 100644 index 0000000..e38e227 --- /dev/null +++ b/libbuild2/test/script/token.cxx @@ -0,0 +1,57 @@ +// file : libbuild2/test/script/token.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/script/token.hxx> + +using namespace std; + +namespace build2 +{ + namespace test + { + namespace script + { + void + token_printer (ostream& os, const token& t, bool d) + { + const string& v (t.value); + + // Only quote non-name tokens for diagnostics. + // + const char* q (d ? "'" : ""); + + switch (t.type) + { + case token_type::semi: os << q << ';' << q; break; + + case token_type::dot: os << q << '.' << q; break; + + case token_type::plus: os << q << '+' << q; break; + case token_type::minus: os << q << '-' << q; break; + + case token_type::clean: os << q << '&' << v << q; break; + case token_type::pipe: os << q << '|' << q; break; + + case token_type::in_pass: os << q << "<|" << q; break; + case token_type::in_null: os << q << "<-" << q; break; + case token_type::in_str: os << q << '<' << v << q; break; + case token_type::in_doc: os << q << "<<" << v << q; break; + case token_type::in_file: os << q << "<<<" << q; break; + + case token_type::out_pass: os << q << ">|" << q; break; + case token_type::out_null: os << q << ">-" << q; break; + case token_type::out_trace: os << q << ">!" << q; break; + case token_type::out_merge: os << q << ">&" << q; break; + case token_type::out_str: os << q << '>' << v << q; break; + case token_type::out_doc: os << q << ">>" << v << q; break; + case token_type::out_file_cmp: os << q << ">>>" << v << q; break; + case token_type::out_file_ovr: os << q << ">=" << v << q; break; + case token_type::out_file_app: os << q << ">+" << v << q; break; + + default: build2::token_printer (os, t, d); + } + } + } + } +} diff --git a/libbuild2/test/script/token.hxx b/libbuild2/test/script/token.hxx new file mode 100644 index 0000000..4abe617 --- /dev/null +++ b/libbuild2/test/script/token.hxx @@ -0,0 +1,65 @@ +// file : libbuild2/test/script/token.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_SCRIPT_TOKEN_HXX +#define LIBBUILD2_TEST_SCRIPT_TOKEN_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/token.hxx> + +namespace build2 +{ + namespace test + { + namespace script + { + struct token_type: build2::token_type + { + using base_type = build2::token_type; + + enum + { + // NOTE: remember to update token_printer()! + + semi = base_type::value_next, // ; + + dot, // . + + plus, // + + minus, // - + + pipe, // | + clean, // &{?!} (modifiers in value) + + in_pass, // <| + in_null, // <- + in_str, // <{:} (modifiers in value) + in_doc, // <<{:} (modifiers in value) + in_file, // <<< + + out_pass, // >| + out_null, // >- + out_trace, // >! + out_merge, // >& + out_str, // >{:~} (modifiers in value) + out_doc, // >>{:~} (modifiers in value) + out_file_cmp, // >>> + out_file_ovr, // >= + out_file_app // >+ + }; + + token_type () = default; + token_type (value_type v): base_type (v) {} + token_type (base_type v): base_type (v) {} + }; + + void + token_printer (ostream&, const token&, bool); + } + } +} + +#endif // LIBBUILD2_TEST_SCRIPT_TOKEN_HXX diff --git a/libbuild2/test/target.cxx b/libbuild2/test/target.cxx new file mode 100644 index 0000000..2707a89 --- /dev/null +++ b/libbuild2/test/target.cxx @@ -0,0 +1,63 @@ +// file : libbuild2/test/target.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/test/target.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace test + { + static const char* + testscript_target_extension (const target_key& tk, const scope*) + { + // If the name is special 'testscript', then there is no extension, + // otherwise it is .testscript. + // + return *tk.name == "testscript" ? "" : "testscript"; + } + + static bool + testscript_target_pattern (const target_type&, + const scope&, + string& v, + optional<string>& e, + const location& l, + bool r) + { + if (r) + { + assert (e); + e = nullopt; + } + else + { + e = target::split_name (v, l); + + if (!e && v != "testscript") + { + e = "testscript"; + return true; + } + } + + return false; + } + + const target_type testscript::static_type + { + "testscript", + &file::static_type, + &target_factory<testscript>, + &testscript_target_extension, + nullptr, /* default_extension */ + &testscript_target_pattern, + nullptr, + &file_search, + false + }; + } +} diff --git a/libbuild2/test/target.hxx b/libbuild2/test/target.hxx new file mode 100644 index 0000000..f633adf --- /dev/null +++ b/libbuild2/test/target.hxx @@ -0,0 +1,31 @@ +// file : libbuild2/test/target.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_TEST_TARGET_HXX +#define LIBBUILD2_TEST_TARGET_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target.hxx> + +#include <libbuild2/export.hxx> + +namespace build2 +{ + namespace test + { + class LIBBUILD2_SYMEXPORT testscript: public file + { + public: + using file::file; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + } +} + +#endif // LIBBUILD2_TEST_TARGET_HXX |