// file : libbuild2/config/init.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbuild2/config/init.hxx> #include <sstream> #include <cstdlib> // getenv() #include <libbuild2/file.hxx> #include <libbuild2/rule.hxx> #include <libbuild2/lexer.hxx> #include <libbuild2/scope.hxx> #include <libbuild2/context.hxx> #include <libbuild2/function.hxx> #include <libbuild2/filesystem.hxx> // exists() #include <libbuild2/diagnostics.hxx> #include <libbuild2/config/module.hxx> #include <libbuild2/config/utility.hxx> #include <libbuild2/config/operation.hxx> using namespace std; using namespace butl; namespace build2 { namespace config { void functions (function_map&); // functions.cxx // Custom save function for config.config.environment. // // It tries to optimize the storage in subprojects by appending the // difference (compared to the amalgamation's values) instead of storing // the entire values. // static pair<names_view, const char*> save_environment (const value& d, const value* b, names& storage) { if (b == nullptr) return make_pair (reverse (d, storage), "="); // The plan is to iterator over environment variables adding those that // are not in base to storage. There is, however, a complication: we may // see multiple entries for the same variable and only the last entry // should have effect. So we need to iterate in reverse and check if // we've already seen this variable. // const strings& ds (d.as<strings> ()); const strings& bs (b->as<strings> ()); for (auto i (ds.rbegin ()), e (ds.rend ()); i != e; ++i) { // Note that we must only consider variable names (up to first '=' if // any). // const string& v (*i); size_t p (v.find ('=')); // Check if we have already seen this variable. // if (find_if (ds.rbegin (), i, [&v, p] (const string& v1) { return saved_environment::compare ( v, v1, p, v1.find ('=')); }) != i) continue; // Check if there is the same value in base. // auto j (find_if (bs.rbegin (), bs.rend (), [&v, p] (const string& v1) { return saved_environment::compare ( v, v1, p, v1.find ('=')); })); if (j == bs.rend () || *j != v) storage.push_back (name (v)); } return make_pair (names_view (storage), "+="); } void boot (scope& rs, const location&, module_boot_extra& extra) { tracer trace ("config::boot"); context& ctx (rs.ctx); l5 ([&]{trace << "for " << rs;}); // Note that the config.<name>* variables belong to the module/project // <name>. So the only "special" variables we can allocate in config.** // are config.config.**, names that have been "gifted" to us by other // modules (like config.version below) as well as names that we have // reserved to not be valid module names (`build`). We also currently // treat `import` as special. // // All the variables we enter are qualified so go straight for the // public variable pool. // auto& vp (rs.var_pool (true /* public */)); // NOTE: all config.** variables are by default made (via a pattern) to // be overridable with global visibility. So we must override this if a // different semantics is required. // const auto v_p (variable_visibility::project); // While config.config.load (see below) could theoretically be specified // in a buildfile, config.config.save is expected to always be specified // as a command line override. // // Note: must be entered during bootstrap since we need it in // configure_execute() even for the forward case. // vp.insert<path> ("config.config.save", true /* ovr */); // Configuration variables persistence mode. // // By default a config.* variable is saved in the config.build file if // (1) it is explicitly marked as persistent with save_variable() and // (2) it is not inherited from an amalgamation that also saves this // variable (however, there are some exception; see save_config() for // details). If the first condition is not met, then the variable is // presumed to be no longer used. // // The config.config.persist can be used to adjust this default logic. // It contains a list of key-value pairs with the key being a variable // name pattern and the value specifying the condition/action: // // <pair> = <pattern>@<condition>=<action> // <condition> = unused|inherited|inherited-used|inherited-unused // <action> = (save|drop)[+warn] // // The last pattern and condition that matches is used (we may want to // change this to more specific pattern later). // // Note that support for inherited conditions is still a @@ TODO. // // The create meta-operation by default (i.e., unless a custom value is // specified) saves unused config.import.* variables without a warning // (since there is no way to "use" such variables in a configuration). // // Note that variable patterns must be quoted, for example: // // b "config.config.persist='config.*'@unused=save+warn" // // Use the NULL value to clear. // // Note: must be entered during bootstrap since we need it in create's // save_config(). // vp.insert<vector<pair<string, string>>> ( "config.config.persist", true /* ovr */, v_p); // Only create the module if we are configuring, creating, or // disfiguring or if it was requested with config.config.module (useful // if we need to call $config.save() during other meta-operations). // // Detecting the former (configure/disfigure/creating) is a bit tricky // since the build2 core may not yet know if this is the case. But we // know. // auto& c_m (vp.insert<bool> ("config.config.module", false /*ovr*/, v_p)); bool d; if ((d = ctx.bootstrap_meta_operation ("disfigure")) || ctx.bootstrap_meta_operation ("configure") || ctx.bootstrap_meta_operation ("create") || cast_false<bool> (rs.vars[c_m])) { auto& m (extra.set_module (new module)); if (!d) { // Adjust priority for the config module and import pseudo-module so // that their variables come first in config.build. // m.save_module ("config", INT32_MIN); m.save_module ("import", INT32_MIN); } } // Register the config function family if this is the first instance of // the config module. // if (!function_family::defined (ctx.functions, "config")) functions (ctx.functions); // Register meta-operations. Note that we don't register create_id // since it will be pre-processed into configure. // rs.insert_meta_operation (configure_id, mo_configure); rs.insert_meta_operation (disfigure_id, mo_disfigure); // Initialize first (load config.build). // extra.init = module_boot_init::before_first; } // host-config.cxx.in // #ifndef BUILD2_BOOTSTRAP extern const char host_config[]; extern const char build2_config[]; #endif bool init (scope& rs, scope&, const location& l, bool first, bool, module_init_extra& extra) { tracer trace ("config::init"); if (!first) { warn (l) << "multiple config module initializations"; return true; } context& ctx (rs.ctx); l5 ([&]{trace << "for " << rs;}); // If we have the module, then we are configuring (or the project wishes // to call $config.save(); we don't get here on disfigure). // module* m (extra.module != nullptr ? &extra.module_as<module> () : nullptr); auto& vp (rs.var_pool (true /* public */)); // Note: config.* is pattern-typed to global visibility. // const auto v_p (variable_visibility::project); auto& c_v (vp.insert<uint64_t> ("config.version", false /*ovr*/, v_p)); auto& c_l (vp.insert<paths> ("config.config.load", true /* ovr */)); // Omit loading the configuration from the config.build file (it is // still loaded from config.config.load if specified). Similar to // config.config.load, only values specified on this project's root // scope and global scope are considered. // // Note that this variable is not saved in config.build and is expected // to always be specified as a command line override. // auto& c_u (vp.insert<bool> ("config.config.unload", true /*ovr*/)); // Configuration variables to disfigure. // // The exact semantics is to ignore these variables when loading // config.build (and any files specified in config.config.load), letting // them to take on the default values (more precisely, the current // implementation undefined them after loading config.build). See also // config.config.unload. // // Besides names, variables can also be specified as patterns in the // config.<prefix>.(*|**)[<suffix>] form where `*` matches single // component names (i.e., `foo` but not `foo.bar`), and `**` matches // single and multi-component names. Currently only single wildcard (`*` // or `**`) is supported. Additionally, a pattern in the // config.<prefix>(*|**) form (i.e., without `.` after <prefix>) matches // config.<prefix>.(*|**) plus config.<prefix> itself (but not // config.<prefix>foo). // // For example, to disfigure all the project configuration variables // (while preserving all the module configuration variables; note // quoting to prevent pattern expansion): // // b config.config.disfigure="'config.hello**'" // // Note that this variable is not saved in config.build and is expected // to always be specified as a command line override. // // We also had the idea of using NULL values as a more natural way to // undefine a configuration variable, which would only work for non- // nullable variables (such as project configuration variables) or for // those where NULL is the default value (most of the others). However, // this cannot work in our model since we cannot reset a NULL override // to a default value. So setting the variable itself to some special // value does not seem to be an option and we have to convey this in // some other way, such as in config.config.disfigure. Another idea is // to invent a parallel set of variables, such as disfig.*, that can be // used for that (though they would still have to be specified with some // dummy value, for example disfig.hello.fancy=). On the other hand, // this desire to disfigure individual variables does not seem to be // very common (we lived without it for years without noticing). So // it's not clear we need to do something like disfig.* which has a // wiff of hack to it. // auto& c_d (vp.insert<strings> ("config.config.disfigure", true /*ovr*/)); // Hermetic configurations. // // A hermetic configuration stores environment variables that affect the // project in config.config.environment. // // Note that this is essentially a tri-state value: true means keep // hermetizing (save the environment in config.config.environment), // false means keep de-hermetizing (clear config.config.environment) and // undefined/NULL means don't touch config.config.environment. // // During reconfiguration things stay hermetic unless re-hermetization // is explicitly requested with config.config.hermetic.reload=true (or // de-hermetization is requested with config.config.hermetic=false). // // Use the NULL value to clear. // auto& c_h (vp.insert<bool> ("config.config.hermetic", true /* ovr */)); if (m != nullptr) m->save_variable (c_h, save_null_omitted); // Request hermetic configuration re-hermetization. // // Note that this variable is not saved in config.build and is expected // to always be specified as a command line override. // auto& c_h_r ( vp.insert<bool> ("config.config.hermetic.reload", true /* ovr */)); // Hermetic configuration environment variables inclusion/exclusion. // // This configuration variable can be used to include additional or // exclude existing environment variables into/from the list that should // be saved in order to make the configuration hermetic. For example: // // config.config.hermetic.environment="LANG PATH@false" // // Use the NULL or empty value to clear. // auto& c_h_e ( vp.insert<hermetic_environment> ("config.config.hermetic.environment")); if (m != nullptr) m->save_variable (c_h_e, save_null_omitted | save_empty_omitted); // Configuration environment variables. // // Environment variables used by tools (e.g., compilers), buildfiles // (e.g., $getenv()), and the build system itself (e.g., to locate // tools) in ways that affect the build result are in essence part of // the project configuration. // // This variable allows storing environment variable overrides that // should be applied to the environment when executing tools, etc., as // part of a project build. Specifically, it contains a list of // environment variable "sets" (<name>=<value>) and "unsets" (<name>). // If multiple entries are specified for the same environment variable, // the last entry has effect. For example: // // config.config.environment="LC_ALL=C LANG" // // Note that a subproject inherits overrides from its amalgamation (this // semantics is the result of the way we optimize the storage of this // variable in subproject's config.build; the thinking is that if a // variable is not overridden by the subproject then it doesn't affect // the build result and therefore it's irrelevant whether it has a value // that came from the original environment of from the amalgamation // override). // // Use the NULL or empty value to clear. // // @@ We could use =<name> as a "pass-through" instruction (e.g., if // we need to use original value in subproject). // auto& c_e (vp.insert<strings> ("config.config.environment", true /* ovr */)); if (m != nullptr) m->save_variable (c_e, save_null_omitted | save_empty_omitted | save_base, &save_environment); // Load config.build if one exists (and unless config.config.unload is // specified) followed by extra files specified in config.config.load // (we don't need to worry about disfigure since we will never be // init'ed). // auto load_config = [&rs, &c_v] (istream& is, const path_name& in, const location& l) { // Check the config version. We assume that old versions cannot // understand new configs and new versions are incompatible with old // configs. // // We extract the value manually instead of loading and then checking // in order to be able to fixup/migrate the file which we may want to // do in the future. // // This is tricky for stdin since we cannot reopen it (or put more // than one character back). So what we are going to do is continue // reading after extracting the variable. One side effect of this is // that we won't have the config.version variable entered in the scope // but that is harmless (we could do it manually if necessary though // it's not clear which it should be if we load multiple files). // lexer lex (is, in); // Assume missing version is 0. // optional<value> ov (extract_variable (rs.ctx, lex, c_v)); uint64_t v (ov ? cast<uint64_t> (*ov) : 0); if (v != module::version) fail (l) << "incompatible config file " << in << info << "config file version " << v << (ov ? "" : " (missing)") << info << "config module version " << module::version << info << "consider reconfiguring " << project (rs) << '@' << rs.out_path (); // Treat it as continuation of bootstrap to avoid project switching // (see switch_scope() for details). // source (rs, rs, lex, load_stage::boot); }; auto load_config_file = [&load_config] (const path& f, const location& l) { path_name fn (f); ifdstream ifs; load_config (open_file_or_stdin (fn, ifs), fn, l); }; // Load config.build unless requested not to. // { // The same semantics as in config.config.load below. // bool u; { lookup l (rs[c_u]); u = (l && (l.belongs (rs) || l.belongs (ctx.global_scope)) && cast_false<bool> (l)); } if (!u) { path f (config_file (rs)); if (exists (f)) load_config_file (f, l); } } if (lookup l = rs[c_l]) { // Only load files that were specified on our root scope as well as // global overrides. This way we can use our override "positioning" // machinery (i.e., where the override applies) to decide where the // extra config is loaded. The resulting semantics feels quite natural // and consistent with command line variable overrides: // // b config.config.load=.../config.build # outermost amalgamation // b ./config.config.load=.../config.build # this project // b !config.config.load=.../config.build # every project // if (l.belongs (rs) || l.belongs (ctx.global_scope)) { for (const path& f: cast<paths> (l)) { location l (f); const string& s (f.string ()); if (s[0] != '~') load_config_file (f, l); else if (s == "~host" || s == "~build2") { #ifdef BUILD2_BOOTSTRAP assert (false); #else istringstream is (s[1] == 'h' ? host_config : build2_config); load_config (is, path_name (s), l); #endif } else fail << "unknown special configuration name '" << s << "' in " << "config.config.load"; } } } // Undefine variables specified with config.config.disfigure. // if (const strings* ns = cast_null<strings> (rs[c_d])) { auto p (rs.vars.lookup_namespace ("config")); for (auto i (p.first); i != p.second; ) { const variable& var (i->first); // This can be one of the overrides (__override, __prefix, etc), // which we skip. // if (!var.override ()) { bool m (false); for (const string& n: *ns) { if (n.compare (0, 7, "config.") != 0) fail << "config.* variable expected in " << "config.config.disfigure instead of '" << n << "'"; size_t p (n.find ('*')); if (p == string::npos) { if ((m = var.name == n)) break; } else { // Pattern in one of these forms: // // config.<prefix>.(*|**)[<suffix>] // config.<prefix>(*|**) // // BTW, an alternative way to handle this would be to // translate it to a path and use our path_match() machinery, // similar to how we do it for build config include/exclude. // Perhaps one day when/if we decide to support multiple // wildcards. // if (p == 7) fail << "config.<prefix>* pattern expected in " << "config.config.disfigure instead of '" << n << "'"; bool r (n[p + 1] == '*'); // Recursive. size_t pe; // Prefix end/size. if (n[p - 1] != '.') { // Second form should have no suffix. // if (p + (r ? 2 : 1) != n.size ()) fail << "config.<prefix>(*|**) pattern expected in " << "config.config.disfigure instead of '" << n << "'"; // Match just <prefix>. // if ((m = n.compare (0, p, var.name) == 0)) break; pe = p; } else pe = p - 1; // Match <prefix> followed by `.`. // if (n.compare (0, pe, var.name, 0, pe) != 0 || var.name[pe] != '.') continue; // Match suffix. // size_t sb (p + (r ? 2 : 1)); // Suffix begin. size_t sn (n.size () - sb); // Suffix size. size_t te; // Stem end. if (sn == 0) // No suffix. te = var.name.size (); else { if (var.name.size () < pe + 1 + sn) // Too short. continue; te = var.name.size () - sn; if (n.compare (sb, sn, var.name, te, sn) != 0) continue; } // Match stem. // if ((m = r || var.name.find ('.', pe + 1) >= te)) break; } } if (m) { i = rs.vars.erase (i); // Undefine. continue; } } ++i; } } // Save and cache the config.config.persist value, if any. // if (m != nullptr) { auto& c_p (*vp.find ("config.config.persist")); m->save_variable (c_p, save_null_omitted); m->persist = cast_null<vector<pair<string, string>>> (rs[c_p]); } // If we are configuring, handle config.config.hermetic. // // The overall plan is to either clear config.config.environment (if // c.c.h=false) or populate it with the values that affect this project // (if c.c.h=true). We have to do it half here (because c.c.e is used as // a source for the project environment and we would naturally want the // semantics to be equivalent to what will be saved in config.build) and // half in configure_execute() (because that's where we have the final // list of all the environment variables we need to save). // // So here we must deal with the cases where the current c.c.e value // will be changed: either cleared (c.c.h=false) or set to new values // from the "outer" environment (c.c.h.reload=true). Note also that even // then a c.c.e value from an amalgamation, if any, should be in effect. // if (ctx.current_mif->id == configure_id && (!cast_true<bool> (rs[c_h]) || // c.c.h=false cast_false<bool> (rs[c_h_r]))) // c.c.h.r=true { rs.vars.erase (c_e); // Undefine. } // Copy config.config.environment to scope::root_extra::environment and // calculate its checksum. // // Note that we store shallow copies that point to the c.c.environment // value which means it should not change. // if (const strings* src = cast_null<strings> (rs[c_e])) { sha256 cs; vector<const char*>& dst (rs.root_extra->environment); // The idea is to only copy entries that are effective, that is those // that actually override something in the environment. This should be // both more efficient and less noisy (e.g., if we need to print this // in diagnostics). // // Note that config.config.environment may contain duplicates and the // last entry should have effect. // // Note also that we use std::getenv() instead of butl::getenv() to // disregard any thread environment overrides. // for (auto i (src->rbegin ()), e (src->rend ()); i != e; ++i) { // Note that we must only consider variable names (up to first '=' // if any). // const string& v (*i); size_t p (v.find ('=')); // Check if we have already seen this variable. // if (find_if (src->rbegin (), i, [&v, p] (const string& v1) { return saved_environment::compare ( v, v1, p, v1.find ('=')); }) != i) continue; // If it's an unset, see if it actually unsets anything. // if (p == string::npos) { if (std::getenv (v.c_str ()) == nullptr) continue; } // // And if it's a set, see if it sets a different value. // else { const char* v1 (std::getenv (string (v, 0, p).c_str ())); if (v1 != nullptr && v.compare (p + 1, string::npos, v1) == 0) continue; } dst.push_back (v.c_str ()); cs.append (v); } if (!dst.empty ()) { dst.push_back (nullptr); rs.root_extra->environment_checksum = cs.string (); } } // Register alias and fallback rule for the configure meta-operation. // // We need this rule for out-of-any-project dependencies (for example, // libraries imported from /usr/lib). We are registering it on the // global scope similar to builtin rules. // // See a similar rule in the dist module. // rs.global_scope ().insert_rule<mtime_target> ( configure_id, 0, "config.file", file_rule::instance); rs.insert_rule<alias> (configure_id, 0, "config.alias", alias_rule::instance); // This allows a custom configure rule while doing nothing by default. // rs.insert_rule<target> (configure_id, 0, "config.noop", noop_rule::instance); rs.insert_rule<file> (configure_id, 0, "config.noop", noop_rule::instance); return true; } static const module_functions mod_functions[] = { {"config", &boot, &init}, {nullptr, nullptr, nullptr} }; const module_functions* build2_config_load () { // Initialize the config entry points in the build system core. // config_save_variable = &module::save_variable; config_save_environment = &module::save_environment; config_save_module = &module::save_module; config_preprocess_create = &preprocess_create; config_configure_post = &module::configure_post; config_disfigure_pre = &module::disfigure_pre; return mod_functions; } } }