diff options
-rw-r--r-- | doc/manual.cli | 328 | ||||
-rw-r--r-- | libbuild2/cc/compile-rule.cxx | 65 | ||||
-rw-r--r-- | libbuild2/cc/compiledb.cxx | 1099 | ||||
-rw-r--r-- | libbuild2/cc/compiledb.hxx | 226 | ||||
-rw-r--r-- | libbuild2/cc/init.cxx | 519 | ||||
-rw-r--r-- | libbuild2/cc/module.cxx | 9 | ||||
-rw-r--r-- | libbuild2/cc/module.hxx | 34 | ||||
-rw-r--r-- | libbuild2/cc/pkgconfig.cxx | 3 | ||||
-rw-r--r-- | libbuild2/context.hxx | 49 | ||||
-rw-r--r-- | libbuild2/dump.cxx | 3 | ||||
-rw-r--r-- | libbuild2/module.cxx | 2 | ||||
-rw-r--r-- | libbuild2/name.hxx | 2 | ||||
-rw-r--r-- | libbuild2/variable.cxx | 5 | ||||
-rw-r--r-- | libbuild2/variable.hxx | 3 |
14 files changed, 2283 insertions, 64 deletions
diff --git a/doc/manual.cli b/doc/manual.cli index 07d816a..03fa04a 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -2093,7 +2093,7 @@ If we forget to adjust the \c{missing-name} test, then this is what we could expect to see when running the tests: \ -b test +$ b test c++ hello/cxx{hello} -> hello/obje{hello} ld hello/exe{hello} test hello/exe{hello} + hello/testscript{testscript} @@ -6700,8 +6700,8 @@ quickly re-run a previously failed test), it can also be persisted in subset of tests by default. For example: \ -b test config.test=foo/exe{driver} # Only test foo/exe{driver} target. -b test config.test=bar/baz # Only run bar/baz testscript test. +$ b test config.test=foo/exe{driver} # Only test foo/exe{driver} target. +$ b test config.test=bar/baz # Only run bar/baz testscript test. \ The \c{config.test} variable contains a list of \c{@}-separated pairs with the @@ -6712,14 +6712,14 @@ name. Otherwise \- an id path. The targets are resolved relative to the root scope where the \c{config.test} value is set. For example: \ -b test config.test=foo/exe{driver}@bar +$ b test config.test=foo/exe{driver}@bar \ To specify multiple id paths for the same target we can use the pair generation syntax: \ -b test config.test=foo/exe{driver}@{bar baz} +$ b test config.test=foo/exe{driver}@{bar baz} \ If no targets are specified (only id paths), then all the targets are tested @@ -6741,9 +6741,9 @@ and the right hand side \- for individual tests. The zero value clears the previously set timeout. For example: \ -b test config.test.timeout=20 # Test operation. -b test config.test.timeout=20/5 # Test operation and individual tests. -b test config.test.timeout=/5 # Individual tests. +$ b test config.test.timeout=20 # Test operation. +$ b test config.test.timeout=20/5 # Test operation and individual tests. +$ b test config.test.timeout=/5 # Individual tests. \ The test timeout can be specified on multiple nested root scopes. For example, @@ -6759,7 +6759,7 @@ specifying the \c{config.test.runner} variable. Its value has the \c{<path> [<options>]} form. For example: \ -b test config.test.runner=\"valgrind -q\" +$ b test config.test.runner=\"valgrind -q\" \ When the runner program is specified, commands of simple and Testscript tests @@ -7648,6 +7648,12 @@ config.cc.reprocess cc.reprocess config.cc.pkgconfig.sysroot + +config.cc.compiledb +config.cc.compiledb.name +config.cc.compiledb.filter +config.cc.compiledb.filter.input +config.cc.compiledb.filter.output \ Note that the compiler mode options are \"cross-hinted\" between \c{config.c} @@ -8054,6 +8060,310 @@ As a result, it should only be used for dealing with issues in third-party installation} should be used instead.| +\h#cc-compiledb|Compilation Database| + +The \c{cc}-based modules provide support for generating and maintaining the +\l{https://clang.llvm.org/docs/JSONCompilationDatabase.html JSON Compilation +Database} which can be used by other tools (static analyzers, language +servers, IDEs, etc) to understand how a codebase is compiled. \"Maintaining\" +in the previous sentence means that if new source files get added to the +project or old ones removed, or if any compilation options change, then the +corresponding entries in the compilation database will be automatically +updated when you update your project. This helps maintain the database in sync +with the project state. + +The generation of compilation databases and their configuration are controlled +with a number of \c{config.cc.compiledb.*} variables. The +\c{config.cc.compiledb} variable provides a simplified interface that enables +the generation of one database per project with the resulting database +containing entries for all the source and object files. The rest of the +variables provide a more flexible interface that allows you to generate +multiple databases in different locations as well as filter the entries that +end up in each database. + +Let's start with the simplified interface as provided by +\c{config.cc.compiledb}. The value of this configuration variable is a single +\ci{name} or a \ci{name} and \ci{path} pair in the \c{\i{name}[@\i{path}]} +form. + +The \ci{name} part is the compilation database name that can be used to refer +to it in filters (see below). If \ci{path} is absent or is (syntactically) a +directory, then \ci{name} is also used to derive the compilation database file +by appending the \c{.json} extension to it. + +If \ci{path} is absent, then the compilation database is placed into the +top-level amalgamation that loads any \c{cc}-based module. Otherwise, the +database is placed into the specified location. + +The special \c{-} name is interpreted as an instruction to dump the database +to \c{stdout}. + +Let's see some examples of using \c{config.cc.compiledb} to handle a few +common scenarios. Here we will use \l{bdep(1)} to create amalgamations +(configurations) and configure (initialize) one or more projects. We will +assume we have \c{hello} and \c{libhello} as if created like this: + +\ +$ bdep new -t exe hello +$ bdep new -t lib libhello +\ + +The most common scenario is likely having a compilation database per +project: + +\ +$ cd libhello +$ bdep config create ../build-gcc @gcc cc config.cxx=g++ +$ bdep init @gcc config.cc.compiledb=libhello +$ cd .. + +$ cd hello +$ bdep config add ../build-gcc @gcc +$ bdep init @gcc config.cc.compiledb=hello +$ cd .. + +$ b hello/ libhello/ +\ + +\N|Or if you prefer to create/add configuration as part of \c{init} (notice +the \c{--} separator): + +\ +$ bdep init -C ../build-gcc @gcc cc config.cxx=g++ -- \\ + config.cc.compiledb=libhello + +$ bdep init -A ../build-gcc @gcc config.cc.compiledb=hello +\ + +| + +After the update (the last command), we will have \c{hello.json} and +\c{libhello.json} in \c{build-gcc/} which contain the compilation command +lines for each project. + +\N|Only source files that are compiled end up being added to the compilation +database. + +To illustrate this point, let's assume our \c{hello} project imports and links +\c{libhello}. And instead of updating both as in the above example, we will +first update only \c{hello}: + +\ +$ b hello/ +\ + +In this case \c{libhello.json} will still be generated but it will only +contain a subset of the expected entries \- only those that were caused to be +compiled by \c{hello}. The missing entries can be added by updating +\c{libhello}: + +\ +$ b libhello/ +\ + +| + +In the above setup it feels natural to call each database after the project +and place them into the output directory. However, some consumers, such as +IDEs, may not handle this setup well. Specifically, they may only recognize +the canonical \c{compile_commands.json} file as the compilation database, +opening all other files as generic JSON. They may also assume the directory +where this file resides to be the project source directory root. To accommodate +these assumptions we can instead place each database into the project's +source directory and call it \c{compile_commands.json}: + +\ +$ bdep init @gcc config.cc.compiledb=libhello@./compile_commands.json + +$ bdep init @gcc config.cc.compiledb=hello@./compile_commands.json +\ + +Note that in this case it will be your responsibility to remove the database +files if and when necessary. \N{\l{bdep-new(1)} adds \c{compile_commands.json} +to \c{.gitignore} it generates.} + +If instead of having a separate database for each project we wanted to place +all the entries into a single database, then the relevant commands would +change as follows: + +\ +$ bdep init @gcc config.cc.compiledb=compiledb + +$ bdep init @gcc config.cc.compiledb=compiledb +\ + +This would give us a single \c{build-gcc/compiledb.json} that contains the +compilation command lines for both projects. + +In the above example only \c{hello} and \c{libhello} will end up in the +database, but not any of their dependencies. What if we wanted entries for +everything in \c{build-gcc/}? In this case, we should enable the compilation +database for the entire configuration rather than for individual projects: + +\ +$ bdep config create ../build-gcc @gcc cc \\ + config.cxx=g++ \\ + config.cc.compiledb=compiledb +$ bdep init @gcc + +$ bdep config add ../build-gcc @gcc +$ bdep init @gcc +\ + +If multiple linked configurations are involved, then we would often want +projects initialized in different configurations share the compilation +database. The representative scenario here is a tool, such as a source code +generator, which is initialized in the host configuration, and its runtime +library plus tests/examples, which are initialized in the target +configuration. Let's assume that in our example \c{hello} is the tool and +\c{libhello} is the runtime library and both are part of the same project. +This is how we can arrange for them to share the compilation database: + +\ +$ bdep config create @host ../host-gcc --type host cc config.cxx=g++ +$ bdep config create @target ../build-gcc cc config.cxx=g++ + +$ bdep init @host -d hello config.cc.compiledb=hello@../build-gcc/ +$ bdep init @target -d libhello config.cc.compiledb=hello + +$ bdep update @host @target +\ + +With this setup the \c{hello.json} database in \c{build-gcc/} will contain +entries for both \c{hello} and \c{libhello}. + +If instead of configuring and maintaining the compilation database in a file +you want to dump it somewhere once, the recommended approach is to write it +to \c{stdout}. For example: + +\ +$ b -n hello/ libhello/ config.cc.compiledb=- >/tmp/compiledb.json +\ + +Note that writing to \c{stdout} forces recompilation of all the targets that +would be updated in order to make sure their entries end up in the database. +If you don't want the actual recompilation, then you can use the dry run mode +(\c{-n} option above). + +\N|If your projects are spread across multiple linked configurations and you +would like to get compilation command lines for all of them, then use the +global override for \c{config.cc.compiledb}: + +\ +$ b '!config.cc.compiledb=-' ... +\ + +As mentioned earlier, the entries that will end up in such a database are +determined by what gets updated.| + +Let's now turn to the rest of the \c{config.cc.compiledb.*} configuration +variables that provide a lower-level but more flexible interface. The +following listing shows their synopsis: + +\ +config.cc.compiledb.name = <name>[@<path>]... +config.cc.compiledb.filter = [<name>@]<bool>... +config.cc.compiledb.filter.input = [<name>@]<target-type>... +config.cc.compiledb.filter.output = [<name>@]<target-type>... +\ + +The \c{config.cc.compiledb.name} variable specifies the name and location of +one or more compilation databases. The semantics of the +\c{\i{name}[@\i{path}]} pair is the same as in \c{config.cc.compiledb} +discussed above, except that if \ci{path} is absent, then the database is +placed into the project being configured rather than into the top-level +amalgamation. + +Also, unlike \c{config.cc.compiledb}, this variable does not automatically +enable writing to the specified databases. Instead, this is the job of +\c{config.cc.compiledb.filter}. Splitting this logic into two steps allows us +to configure the database name/location in one place, typically an outer +amalgamation, and then enable writing to it in other places, typically +specific subprojects. + +The \c{config.cc.compiledb.filter.{input,output\}} variables allow us to +filter the entries that end up in the databases based on the input (\c{c{\}}, +\c{cxx{\}}, etc) and output (\c{obja{\}}, \c{objs{\}}, etc) target types. + +Note that in all three \c{.filter} variables the values are examined in the +reverse order and the first entry that matches determines the outcome. +Entries without \ci{name} apply to all databases and the target types are +matched taking into account inheritance (so \c{target{\}} will match any type) +and groups (so \c{obj{\}} will match any \c{obj[eas]{\}}). If no target type +filter (input or output) is specified, then no corresponding target filtering +is performed. + +\N|The \c{config.cc.compiledb=<name>} semantics can be expressed as the +following set of lower-level variables: + +\ +config.cc.compiledb.name = <name>@../path/to/amalgamation/ +config.cc.compiledb.filter += <name>@true +config.cc.compiledb.filter.input += <name>@target +config.cc.compiledb.filter.output += <name>@target +\ + +The last three assignments only apply if the corresponding variable is not set +to a custom value for this project.| + +Let's look at a few examples of using these lower-level configuration +variables. The common use for the output target filtering is getting rid of +\c{obja{\}} or \c{objs{\}} entries in libraries. Unless configured otherwise, +when we build a library we end up with both static and shared variants. And +this means that each source file for the library is compiled twice, once to +produce \c{obja{\}} that goes to the static library and once -- \c{objs{\}}. +And that, in turn, means that we will end up with two compilation database +entries for each such source file. If we don't want that for some reason (for +instance, because the consumer of the database does not handle this well), +then we can filter one of them out. For example, below is how we can +initialize \c{libhello} to achieve this (notice that we also include +\c{obje{\}} to keep object files for executables, such as tests): + +\ +$ bdep init @gcc \\ + config.cc.compiledb=libhello \\ + config.cc.compiledb.filter.output='obje objs' +\ + +As an example of the input target type filtering, below is how we can keep +entries only for the C and C++ source files, filtering out everything else +(assembler, Objective-C/C++), for instance, because the consumer of our +database does not recognize them: + +\ +$ bdep init @gcc \\ + config.cc.compiledb=libhello \\ + config.cc.compiledb.filter.input='c cxx' +\ + +As an example of a more advanced configuration, consider a compilation +database for a project that use C++ modules. To know how such a project is +compiled we not only need to know how its own source files are compiled, but +also how to compile all the module interfaces that it consumes, including from +other projects, transitively. One way to set this up would be to enable +writing entries of the \c{bmi{\}} output target type to any database in the +amalgamation: + +\ +$ bdep config create ../build-gcc @gcc cc \\ + config.cxx=g++ \\ + config.cc.compiledb.filter=true \\ + config.cc.compiledb.filter.output=bmi \\ + + +$ bdep init @gcc config.cc.compiledb=libhello + +$ bdep init @gcc config.cc.compiledb=hello +\ + +With this setup \c{libhello.json} and \c{hello.json} will contain module +interface entries from all the dependencies. + +\N|When debugging complex compilation database setups it can be helpful to +increase diagnostics verbosity to level 6 in order to get a trace of filtering +decisions (the relevant lines will contain the \c{compiledb} keyword).| + + \h#cc-gcc|GCC Compiler Toolchain| The GCC compiler id is \c{gcc}. diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx index 99c3b90..29a26b5 100644 --- a/libbuild2/cc/compile-rule.cxx +++ b/libbuild2/cc/compile-rule.cxx @@ -25,6 +25,7 @@ #include <libbuild2/cc/target.hxx> // h #include <libbuild2/cc/module.hxx> #include <libbuild2/cc/utility.hxx> +#include <libbuild2/cc/compiledb.hxx> using std::exit; using std::strlen; @@ -1181,6 +1182,11 @@ namespace build2 fsdir_rule::perform_update_direct (a, *dir); } + // Use the subset of the depdb checks to detect changes to the + // compilation database entry. + // + bool compiledb_changed (false); + // Note: the leading '@' is reserved for the module map prefix (see // extract_modules()) and no other line must start with it. // @@ -1198,8 +1204,14 @@ namespace build2 // but only in what it targets, then the checksum will still change. // if (dd.expect (cast<string> (rs[x_checksum])) != nullptr) + { l4 ([&]{trace << "compiler mismatch forcing update of " << t;}); + // The checksum includes the absolute compiler path. + // + compiledb_changed = true; + } + // Then the compiler environment checksum. // if (dd.expect (env_checksum) != nullptr) @@ -1263,7 +1275,17 @@ namespace build2 append_sys_hdr_options (cs); // Extra system header dirs (last). if (dd.expect (cs.string ()) != nullptr) + { l4 ([&]{trace << "options mismatch forcing update of " << t;}); + + // Note that this doesn't include any of the "plumbing" options + // like -x, -c, -o, etc. In the unlikely event that there are + // changes in this area that also affect the semantics of the + // compilation database (options reordering doesn't, for example), + // then we can resort to incrementing the rule version. + // + compiledb_changed = true; + } } // Finally the source file. @@ -1273,7 +1295,10 @@ namespace build2 assert (!p.empty ()); // Sanity check. if (dd.expect (p) != nullptr) + { l4 ([&]{trace << "source file mismatch forcing update of " << t;}); + compiledb_changed = true; + } } // If any of the above checks resulted in a mismatch (different @@ -1296,6 +1321,14 @@ namespace build2 u = dd.mtime > mt; } + // Confirm the entry in the compilation database, if any. + // + if (compiledb::match (bs, t, tp, src, compiledb_changed) && !u) + { + l4 ([&]{trace << "compilation database forcing update of " << t;}); + u = true; + } + // If updating for any of the above reasons, treat it as if doesn't // exist. // @@ -7354,8 +7387,11 @@ namespace build2 // apply()). For named modules there may be no obj*{} if this is a // sidebuild (obj*{} is already in the library binary). // - path relm; + const path* abso (nullptr); + const path* absm (nullptr); path relo; + path relm; + switch (ut) { case unit_type::module_header: @@ -7365,12 +7401,18 @@ namespace build2 case unit_type::module_impl_part: { if (const file* o = find_adhoc_member<file> (t, tts.obj)) - relo = relative (o->path ()); + { + abso = &o->path (); + relo = relative (*abso); + } break; } default: - relo = relative (tp); + { + abso = &tp; + relo = relative (tp); + } } // Build the command line. @@ -7400,6 +7442,9 @@ namespace build2 small_vector<string, 2> header_args; // Header unit options storage. small_vector<string, 2> module_args; // Module options storage. + // NOTE: see a note in apply() on the compilation database implications + // if changing anything below. + // switch (cclass) { case compiler_class::msvc: @@ -7534,6 +7579,7 @@ namespace build2 { assert (ut != unit_type::module_header); // @@ MODHDR + absm = &tp; relm = relative (tp); args.push_back ("/ifcOutput"); @@ -7747,6 +7793,9 @@ namespace build2 // Output module file is specified in the mapping file, the // same as input. // + // We set neither relm nor absm since they are not on the + // command line. + // if (ut == unit_type::module_header) // No obj, -c implied. break; @@ -7775,6 +7824,7 @@ namespace build2 { assert (ut != unit_type::module_header); // @@ MODHDR + absm = &tp; relm = relative (tp); // Without this option Clang's .pcm will reference source @@ -7886,6 +7936,15 @@ namespace build2 else if (verb == 2) print_process (args); + // Insert or update the entry in the compilation database, if any. + // + compiledb::execute ( + bs, + t, tp, s, *sp, + cpath, args, + relo, abso != nullptr ? *abso : empty_path, + relm, absm != nullptr ? *absm : empty_path); + // If we have the (partially) preprocessed output, switch to that. // // But we remember the original source/position to restore later. diff --git a/libbuild2/cc/compiledb.cxx b/libbuild2/cc/compiledb.cxx new file mode 100644 index 0000000..7414eb2 --- /dev/null +++ b/libbuild2/cc/compiledb.cxx @@ -0,0 +1,1099 @@ +// file : libbuild2/cc/compiledb.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/compiledb.hxx> + +#include <cstring> // strlen() +#include <iostream> // cout + +#ifndef BUILD2_BOOTSTRAP +# include <libbutl/json/parser.hxx> +#endif + +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/cc/module.hxx> + +#include <libbuild2/cc/target.hxx> +#include <libbuild2/bin/target.hxx> + +using namespace std; + +namespace build2 +{ + namespace cc + { + compiledb_set compiledbs; + + // compiledb + // + compiledb:: + ~compiledb () + { + } + + // Return true if this entry should be written to the database with the + // specified name. + // + static bool + filter (const scope& rs, + const core_module& m, + const string& name, + const file& ot, const file& it) + { + tracer trace ("cc::compiledb_filter"); + + bool r (true); + const char* w (nullptr); // Why r is false. + + // First check if writing to this database is enabled. + // + // No filter means not enabled. + // + if (m.cdb_filter_ == nullptr) + { + r = false; + w = "no database name filter"; + } + else + { + // Iterate in reverse (so that later values override earlier) and take + // the first name match. + // + r = false; + for (const pair<optional<string>, bool>& p: + reverse_iterate (*m.cdb_filter_)) + { + if (!p.first || *p.first == name) + { + r = p.second; + break; + } + } + + if (!r) + w = "no match in database name filter"; + } + + // Verify the name is known in this amalgamation. Note that without + // this check we may end up writing to unrelated databases in other + // amalgamations (think linked configurations). + // + if (r) + { + r = false; + for (const core_module* pm (&m); + pm != nullptr; + pm = pm->outer_module_) + { + const strings& ns (pm->cdb_names_); + + if (find (ns.begin (), ns.end (), name) != ns.end ()) + { + r = true; + break; + } + } + + if (!r) + w = "database name unknown in amalgamation"; + } + + // Filter based on the output target. + // + // If there is no filter specified, then accept all targets. + // + if (r && m.cdb_filter_output_ != nullptr) + { + // If the filter is empty, then there is no match. + // + if (m.cdb_filter_output_->empty ()) + { + r = false; + w = "empty output target type filter"; + } + else + { + const target_type& ott (ot.type ()); + + // Iterate in reverse (so that later values override earlier) and + // take the first name match. + // + r = false; + for (const pair<optional<string>, string>& p: + reverse_iterate (*m.cdb_filter_output_)) + { + if (p.first && *p.first != name) + continue; + + using namespace bin; + + const string& n (p.second); + + if (ott.name == n || n == "target") + { + r = true; + } + // + // Handle obj/bmi/hbmi{} groups ad hoc. + // + else if (n == "obj") + { + r = ott.is_a<obje> () || ott.is_a<objs> () || ott.is_a<obja> (); + } + else if (n == "bmi") + { + r = ott.is_a<bmie> () || ott.is_a<bmis> () || ott.is_a<bmia> (); + } + else if (n == "hbmi") + { + r = ott.is_a<hbmie> () || ott.is_a<hbmis> () || ott.is_a<hbmia> (); + } + else + { + // Handle the commonly-used, well-known targets directly (see + // note in core_config_init() for why we cannot pre-lookup + // them). + // + const target_type* tt ( + n == "obje" ? &obje::static_type : + n == "objs" ? &objs::static_type : + n == "obja" ? &obja::static_type : + n == "bmie" ? &bmie::static_type : + n == "bmis" ? &bmis::static_type : + n == "bmia" ? &bmia::static_type : + n == "hbmie" ? &hbmie::static_type : + n == "hbmis" ? &hbmis::static_type : + n == "hbmia" ? &hbmia::static_type : + rs.find_target_type (n)); + + if (tt == nullptr) + fail << "unknown target type '" << n << "' in " + << "config.cc.compiledb.filter.output value"; + + r = ott.is_a (*tt); + } + + if (r) + break; + } + + if (!r) + w = "no match in output target type filter"; + } + } + + // Filter based on the input target. + // + // If there is no filter specified, then accept all targets. + // + if (r && m.cdb_filter_input_ != nullptr) + { + // If the filter is empty, then there is no match. + // + if (m.cdb_filter_input_->empty ()) + { + r = false; + w = "empty input target type filter"; + } + else + { + const target_type& itt (it.type ()); + + // Iterate in reverse (so that later values override earlier) and + // take the first name match. + // + r = false; + for (const pair<optional<string>, string>& p: + reverse_iterate (*m.cdb_filter_input_)) + { + if (p.first && *p.first != name) + continue; + + const string& n (p.second); + + if (itt.name == n || n == "target") + r = true; + else + { + // The same optimization as above. Note: cxx{}, etc., are in the + // cxx module so we have to look them up. + // + const target_type* tt ( + n == "c" ? &c::static_type : + n == "m" ? &m::static_type : + n == "S" ? &m::static_type : + rs.find_target_type (n)); + + if (tt == nullptr) + fail << "unknown target type '" << n << "' in " + << "config.cc.compiledb.filter.input value"; + + r = itt.is_a (*tt); + } + + if (r) + break; + } + + if (!r) + w = "no match in input target type filter"; + } + } + + l6 ([&] + { + if (r) + trace << "keep " << ot << " in " << name; + else + trace << "omit " << ot << " from " << name << ": " << w; + }); + + return r; + } + + bool compiledb:: + match (const scope& bs, + const file& ot, const path_type& op, + const file& it, + bool changed) + { + if (compiledbs.empty ()) + return false; + + const scope& rs (*bs.root_scope ()); + const auto* m (rs.find_module<core_module> (core_module::name)); + + assert (m != nullptr); + + bool u (false); + + for (const unique_ptr<compiledb>& db: compiledbs) + { + if (filter (rs, *m, db->name, ot, it)) + u = db->match (ot, op, changed) || u; + } + + return u; + } + + void compiledb:: + execute (const scope& bs, + const file& ot, const path_type& op, + const file& it, const path_type& ip, + const process_path& cpath, const cstrings& args, + const path_type& relo, const path_type& abso, + const path_type& relm, const path_type& absm) + { + if (compiledbs.empty ()) + return; + + const scope& rs (*bs.root_scope ()); + const auto* m (rs.find_module<core_module> (core_module::name)); + + assert (m != nullptr); + + assert (relo.empty () == abso.empty () && + relm.empty () == absm.empty ()); + + for (const unique_ptr<compiledb>& db: compiledbs) + { + if (filter (rs, *m, db->name, ot, it)) + db->execute (ot, op, it, ip, cpath, args, relo, abso, relm, absm); + } + } + + void + compiledb_pre (context& ctx, action a, const action_targets&) + { + // Note: won't be registered if compiledbs is empty. + + // Note: may be called directly with empty action_targets. + + assert (a.inner_action () == perform_update_id); + + tracer trace ("cc::compiledb_pre"); + + bool mctx (ctx.module_context == &ctx); + + l6 ([&]{trace << (mctx ? "module" : "normal") << " context " << &ctx;}); + + for (const unique_ptr<compiledb>& db: compiledbs) + db->pre (ctx); + } + + void + compiledb_post (context& ctx, + action a, + const action_targets& ts, + bool failed) + { + // Note: won't be registered if compiledbs is empty. + + assert (a.inner_action () == perform_update_id); + + tracer trace ("cc::compiledb_post"); + + bool mctx (ctx.module_context == &ctx); + + l6 ([&]{trace << (mctx ? "module" : "normal") << " context " << &ctx + << ", failed: " << failed;}); + + for (const unique_ptr<compiledb>& db: compiledbs) + db->post (ctx, ts, failed); + } + +#ifndef BUILD2_BOOTSTRAP + + namespace json = butl::json; + + // compiledb_stdout + // + compiledb_stdout:: + compiledb_stdout (string n) + : compiledb (move (n), path_type ()), + state_ (state::init), + nesting_ (0), + js_ (cout, 0 /* indentation */, "" /* multi_value_separator */) + { + } + + void compiledb_stdout:: + pre (context&) + { + // If the previous operation batch failed, then we shouldn't be here. + // + assert (state_ != state::failed); + + // The module context (used to build build system modules) poses a + // problem: we can receive its callbacks before the main context's or + // nested in the pre/post calls of the main context (or both, in + // fact). Plus there may be multiple pre/post sequences corresponding to + // the module context of both kinds. The three distinct cases are: + // + // 1. Module is loaded as part of the initial buildfile load (e.g., from + // root.build) -- in this case we will observe module pre/post before + // the main context's pre/post. + // + // In fact, to be precise, we will only observe them if cc is loaded + // before such a module. + // + // 2. Module is loaded via the interrupting load (e.g., from a directory + // buildfile that is loaded implicitly during match) -- in this case + // we will observe pre/post calls nested into the main context's + // pre/post. + // + // 3. The module context is used to build an ad hoc C++ recipe -- in + // this case we also get nested calls like in (2) since this happens + // during the recipe's match(). + // + // One thing to keep in mind (and which we rely upon quite a bit below) + // is that the main context's post will always be last (within any given + // operation; there could be another for the subsequent operation in a + // batch). + // + // Handling the nested case is relatively straightforward: we can keep + // track and ignore all the nested calls. + // + // The before case is where things get complicated. We could "take" the + // first module pre call and then wait until the main post, unless we + // see a module post call with failed=true, in which case there will be + // no further pre/post calls. There is, however, a nuance: the module is + // loaded and build for any operation, not just update, which means that + // if the main operation is not update (say, it's clean), we won't see + // any of the main context's pre/post calls. + // + // The way we are going to resolve this problem is different for the + // stdout and file implementations: + // + // For stdout we will just say that it should only be used with the + // update operation. There is really no good reason to use it with + // anything else anyway. See compiledb_stdout::post() for additional + // details. + // + // For file we will rely on its persistence and simply close and reopen + // the database for each pre/post sequence, the same way as if they were + // separate operations in a batch. + // + if (nesting_++ != 0) // Nested pre() call. + return; + + if (state_ == state::init) // First pre() call. + { + state_ = state::empty; + cout << "[\n"; + } + } + + bool compiledb_stdout:: + match (const file&, const path_type&, bool) + { + return true; + } + + static inline const char* + rel_to_abs (const char* a, + const string& rs, const string& as, + string& buf) + { + if (size_t rn = rs.size ()) + { + size_t an (strlen (a)); + + if (an >= rn && rs.compare (0, rn, a, rn) == 0) + { + if (an == rn) + return as.c_str (); + + buf = as; + buf.append (a + rn, an - rn); + + return buf.c_str (); + } + } + + return nullptr; + } + + void compiledb_stdout:: + execute (const file&, const path_type& op, + const file&, const path_type& ip, + const process_path& cpath, const cstrings& args, + const path_type& relo, const path_type& abso, + const path_type& relm, const path_type& absm) + { + const string& ro (relo.string ()); + const string& ao (abso.string ()); + + const string& rm (relm.string ()); + const string& am (absm.string ()); + + mlock l (mutex_); + + switch (state_) + { + case state::full: + { + cout << ",\n"; + break; + } + case state::empty: + { + state_ = state::full; + break; + } + case state::failed: + return; + case state::init: + assert (false); + return; + } + + try + { + // Duplicate what we have in the file implementation (instead of + // factoring it out to something common) in case here we need to + // adjust things (change order, omit some values; for example to + // accommodate broken consumers). We have this freedom here but not + // there. + // + js_.begin_object (); + { + js_.member ("output", op.string ()); + js_.member ("file", ip.string ()); + + js_.member_begin_array ("arguments"); + { + string buf; // Reuse. + for (auto b (args.begin ()), i (b), e (args.end ()); + i != e && *i != nullptr; + ++i) + { + const char* r; + + if (i == b) + r = cpath.effect_string (); + else + { + // Untranslate relative paths back to absolute. + // + const char* a (*i); + + if ((r = rel_to_abs (a, ro, ao, buf)) == nullptr && + (r = rel_to_abs (a, rm, am, buf)) == nullptr) + r = a; + } + + js_.value (r); + } + } + js_.end_array (); + + js_.member ("directory", work.string ()); + } + js_.end_object (); + } + catch (const json::invalid_json_output& e) + { + // There is no way (nor reason; the output will most likely be invalid + // anyway) to reuse the failed json serializer so make sure we ignore + // all the subsequent callbacks. + // + state_ = state::failed; + + l.unlock (); + + fail << "invalid compilation database json output: " << e; + } + } + + void compiledb_stdout:: + post (context& ctx, const action_targets&, bool failed) + { + assert (nesting_ != 0); + if (--nesting_ != 0) // Nested post() call. + return; + + bool mctx (ctx.module_context == &ctx); + + switch (state_) + { + case state::empty: + case state::full: + { + // If this is a module context's post, wait for the main context's + // post (last) unless the module load failed (in which case there + // will be no main pre/post). + // + // Note that there is no easy way to diagnose the case where we + // won't get the main pre/post calls. Instead, we will just produce + // invalid JSON (array won't be closed). In a somewhat hackish way, + // this actually makes the `b [-n] clean update` sequence work: we + // will take the pre() call from clean and the main post() from + // update. + // + if (mctx && !failed) + return; + + if (state_ == state::full) + cout << '\n'; + + cout << "]\n"; + break; + } + case state::failed: + return; + case state::init: + assert (false); + } + + state_ = state::init; + } + + // compiledb_file + // + compiledb_file:: + compiledb_file (string n, path_type p) + : compiledb (move (n), move (p)), + state_ (state::closed), + nesting_ (0) + { + } + + void compiledb_file:: + pre (context&) + { + // If the previous operation batch failed, then we shouldn't be here. + // + assert (state_ != state::failed); + + // See compiledb_stdout::pre() for background on dealing with the module + // context. Here are some file-specific nuances: + // + // We are going to load the database on the first pre call and flush + // (but not close) it on the matching post. Flushing means that we will + // update the file but still keep the in-memory state, in case there is + // another pre/post session coming. This is both a performance + // optimization but also the way we handle prunning no longer present + // entries, which gets tricky across multiple pre/post sessions (see + // post() for details). + // + if (nesting_++ != 0) // Nested pre() call. + return; + + if (state_ == state::closed) // First pre() call. + { + // Load the contents of the file if it exists, marking all the entries + // as (presumed) absent. + // + if (exists (path)) + { + uint64_t line (1); + try + { + ifdstream ifs (path, ifdstream::badbit); + + // Parse the top-level array manually (see post() for the expected + // format). + // + auto throw_invalid_input = [] (const string& d) + { + throw json::invalid_json_input ("", 0, 1, 0, d); + }; + + enum {first, second, next, last, end} s (first); + + for (string l; !eof (getline (ifs, l)); line++) + { + switch (s) + { + case first: + { + if (l != "[") + throw_invalid_input ("beginning of array expected"); + + s = second; + continue; + } + case second: + { + if (l == "]") + { + s = end; + continue; + } + + s = next; + } + // Fall through. + case next: + { + if (!l.empty () && l.back () == ',') + l.pop_back (); + else + s = last; + + break; + } + case last: + { + if (l != "]") + throw_invalid_input ("end of array expected"); + + s = end; + continue; + } + case end: + { + throw_invalid_input ("junk after end of array"); + } + } + + // Parse just the output target path, which must come first. + // + json::parser jp (l, "" /* name */); + + jp.next_expect (json::event::begin_object); + string op (move (jp.next_expect_member_string ("output"))); + + auto r (db_.emplace (move (op), entry {entry_status::absent, l})); + if (!r.second) + throw_invalid_input ( + "duplicate output value '" + r.first->first + '\''); + } + + if (s != end) + throw_invalid_input ("corrupt input text"); + } + catch (const json::invalid_json_input& e) + { + location l (path, line, e.column); + fail (l) << "invalid compilation database json input: " << e; + state_ = state::failed; + } + catch (const io_error& e) + { + fail << "unable to read " << path << ": " << e; + state_ = state::failed; + } + } + + absent_ = db_.size (); + changed_ = false; + + state_ = state::open; + } + } + + bool compiledb_file:: + match (const file&, const path_type& op, bool changed) + { + mlock l (mutex_); + + switch (state_) + { + case state::open: + break; + case state::failed: + return false; + case state::closed: + assert (false); + return false; + } + + // Mark an existing entry as present or changed. And if one does not + // exist, then (for now) as missing. + // + auto i (db_.find (op.string ())); + + if (i != db_.end ()) + { + entry& e (i->second); + + // Note: we can end up with present entries via the module context + // (see post() below). And we can see changed entries in a subsequent + // nested module context. + // + switch (e.status) + { + case entry_status::present: + case entry_status::changed: + assert (!changed); + break; + case entry_status::absent: + { + e.status = changed ? entry_status::changed : entry_status::present; + + absent_--; + changed_ = changed_ || (e.status == entry_status::changed); + break; + } + case entry_status::missing: + assert (false); + } + + return false; + } + else + { + db_.emplace (op.string (), entry {entry_status::missing, string ()}); + + changed_ = true; + + return true; + } + } + + void compiledb_file:: + execute (const file&, const path_type& op, + const file&, const path_type& ip, + const process_path& cpath, const cstrings& args, + const path_type& relo, const path_type& abso, + const path_type& relm, const path_type& absm) + { + const string& ro (relo.string ()); + const string& ao (abso.string ()); + + const string& rm (relm.string ()); + const string& am (absm.string ()); + + mlock l (mutex_); + + switch (state_) + { + case state::open: + break; + case state::failed: + return; + case state::closed: + assert (false); + return; + } + + auto i (db_.find (op.string ())); + + // We should have had the match() call before execute(). + // + assert (i != db_.end () && i->second.status != entry_status::absent); + + entry& e (i->second); + + if (e.status == entry_status::present) // Present and unchanged. + return; + + // The entry is either missing or changed. + // + try + { + e.json.clear (); + json::buffer_serializer js (e.json, 0 /* indentation */); + + js.begin_object (); + { + js.member ("output", op.string ()); // Note: must come first. + js.member ("file", ip.string ()); + + js.member_begin_array ("arguments"); + { + string buf; // Reuse. + for (auto b (args.begin ()), i (b), e (args.end ()); + i != e && *i != nullptr; + ++i) + { + const char* r; + + if (i == b) + r = cpath.effect_string (); + else + { + // Untranslate relative paths back to absolute. + // + const char* a (*i); + + if ((r = rel_to_abs (a, ro, ao, buf)) == nullptr && + (r = rel_to_abs (a, rm, am, buf)) == nullptr) + r = a; + } + + js.value (r); + } + } + js.end_array (); + + js.member ("directory", work.string ()); + } + js.end_object (); + } + catch (const json::invalid_json_output& e) + { + // There is no way (nor reason; the output will most likely be invalid + // anyway) to reuse the failed json serializer so make sure we ignore + // all the subsequent callbacks. + // + state_ = state::failed; + + l.unlock (); + + fail << "invalid compilation database json output: " << e; + } + + e.status = entry_status::changed; + } + + void compiledb_file:: + post (context& ctx, const action_targets& ts, bool failed) + { + assert (nesting_ != 0); + if (--nesting_ != 0) // Nested post() call. + return; + + switch (state_) + { + case state::open: + break; + case state::failed: + return; + case state::closed: + assert (false); + return; + } + + bool mctx (ctx.module_context == &ctx); + + tracer trace ("cc::compiledb_file::post"); + + // See if we need to update the file. + // + if (changed_) + l6 ([&]{trace << "updating due to missing/changed entries: " << path;}); + + // Don't prune the stale entries if the operation failed since we may + // not have gotten to execute some of them. + // + // And if this is a module context's post, then also don't prune the + // stale entries, instead waiting for the main context's post (if there + // will be one; this means we will only prune on update). + // + // Actually, this pruning business is even trickier than that: if we + // are not updating the entire project (say, rather only a subdirectory + // or even a specific target), then we will naturally not get any + // match/execute calls for targets of this project that don't get pulled + // into this build. Which means that we cannot just prune entries that + // we did not match/execute. It feels the correct semantics is to only + // prune the entries if they are in a subdirectory of the dir{} targets + // which we are building. + // + // What do we do about the module context, where we always update a + // specific libs{}? We could use its directory instead but that may lead + // to undesirable results. For example, if there are unit tests in the + // same directory, we will end up dropping their entries. It feels like + // the correct approach is to just ignore module context's entries + // entirely. If someone wants to prune the compilation database of a + // module, they will just need to update it directly (i.e., via the main + // context). Note that we cannot apply the same "simplification" to the + // changed entries since we will only observe the change once. + // + bool absent (false); + + if (!failed && !mctx && absent_ != 0) + { + // Pre-scan the entries and drop the appropriate absent ones. + // + for (auto i (db_.begin ()); i != db_.end (); ) + { + const entry& e (i->second); + + if (e.status == entry_status::absent) + { + // Absent entries should be rare enough during the normal + // development that we don't need to bother with caching the + // directories. + // + bool a (false); + for (const action_target& at: ts) + { + const target& t (at.as<target> ()); + if (t.is_a<dir> ()) + { + const string& p (i->first); + const string& d (t.out_dir ().string ()); + + if (path_traits::sub (p.c_str (), p.size (), + d.c_str (), d.size ())) + { + // Remove this entry from the in-memory state so that it + // matches the file state. + // + i = db_.erase (i); + --absent_; + a = absent = true; + break; + } + } + } + + if (a) + continue; + } + + ++i; + } + } + + if (absent) + l6 ([&]{trace << "updating due to absent entries: " << path;}); + + try + { + auto_rmfile rm; + ofdstream ofs; + + bool u (changed_ || absent); // Update the file. + + if (u) + { + rm = auto_rmfile (path); + ofs.open (path); + + // We parse the top-level array manually (see pre() above) and the + // expected format is as follows: + // + // [ + // {"output":...}, + // ... + // {"output":...} + // ] + // + ofs.write ("[\n", 2); + } + + // Iterate over the entries resetting their status and writing them to + // the file if necessary. + // + bool first (true); + for (auto& p: db_) + { + entry& e (p.second); + + // First sort out the status also skipping appropriate entries. + // + switch (e.status) + { + case entry_status::absent: + { + // This is an absent entry that we should keep (see pre-scan + // above). + // + break; + } + case entry_status::missing: + { + // This should only happen if this operation has failed (see + // also below) or we are in the match-only mode. + // + assert (failed || ctx.match_only); + continue; + } + case entry_status::present: + case entry_status::changed: + { + // This is tricky: if this is a module context, then we don't + // want to mark the entries as absent since they will then get + // dropped by the main operation context. + // + if (mctx) + e.status = entry_status::present; + else + { + // Note: this is necessary for things to work across multiple + // operations in a batch. + // + e.status = entry_status::absent; + absent_++; + } + } + } + + if (u) + { + if (first) + first = false; + else + ofs.write (",\n", 2); + + ofs.write (e.json.c_str (), e.json.size ()); + } + } + + if (u) + { + ofs.write (first ? "]\n" : "\n]\n", first ? 2 : 3); + + ofs.close (); + rm.cancel (); + } + } + catch (const io_error& e) + { + fail << "unable to write to " << path << ": " << e; + state_ = state::failed; + return; + } + + // If this operation has failed, then our state may not be accurate + // (e.g., entries with missing status) but we also don't expect any + // further pre calls. Let's change out state to failed as a sanity + // check. + // + if (failed) + state_ = state::failed; + else + changed_ = false; + + // Note: keep in the open state (see pre() for details). + } + +#endif // BUILD2_BOOTSTRAP + } +} diff --git a/libbuild2/cc/compiledb.hxx b/libbuild2/cc/compiledb.hxx new file mode 100644 index 0000000..edfd1ee --- /dev/null +++ b/libbuild2/cc/compiledb.hxx @@ -0,0 +1,226 @@ +// file : libbuild2/cc/compiledb.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_COMPILEDB_HXX +#define LIBBUILD2_CC_COMPILEDB_HXX + +#include <unordered_map> + +#ifndef BUILD2_BOOTSTRAP +# include <libbutl/json/serializer.hxx> +#endif + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target.hxx> +#include <libbuild2/action.hxx> +#include <libbuild2/context.hxx> + +namespace build2 +{ + namespace cc + { + using compiledb_name_filter = vector<pair<optional<string>, bool>>; + using compiledb_type_filter = vector<pair<optional<string>, string>>; + + class compiledb + { + public: + // Match callback where we confirm an entry in the database and also + // signal whether it has changes (based on change tracking in depdb). + // Return true to force compilation of this target and thus make sure + // the below execute() is called (unless something before that failed). + // + // Besides noticing changes, this callback is also necessary to notice + // and delete entries that should no longer be in the database (e.g., a + // source file was removed from the project). + // + // Note that output is either obj*{}, bmi*{}, of hbmi*{}. + // + static bool + match (const scope& bs, + const file& output, const path& output_path, + const file& input, + bool changed); + + // Execute callback where we insert or update an entry in the database. + // + // The {relo, abso}, and {relm, absm} pairs are used to "untranslate" + // relative paths to absolute. Specifically, any argument that has rel? + // as a prefix has this prefix replaced with the corresponding abs?. + // Note that this means we won't be able to handle old MSVC and + // clang-cl, which don't support the `/F?: <path>` form, only + // `/F?<path>`. Oh, well. Note also that either relo or relm (but not + // both) could be empty if unused. + // + // Note also that we assume the source file is always absolute and is + // the last argument. + // + static void + execute (const scope& bs, + const file& output, const path& output_path, + const file& input, const path& input_path, + const process_path& cpath, const cstrings& args, + const path& relo, const path& abso, + const path& relm, const path& absm); + + public: + using path_type = build2::path; + + string name; + path_type path; + + // The path is expected to be absolute and normalized or empty if the + // name is `-` (stdout). + // + compiledb (string n, path_type p) + : name (move (n)), path (move (p)) + { + } + + virtual void + pre (context&) = 0; + + virtual bool + match (const file& output, const path_type& output_path, + bool changed) = 0; + + virtual void + execute (const file& output, const path_type& output_path, + const file& input, const path_type& input_path, + const process_path& cpath, const cstrings& args, + const path_type& relo, const path_type& abso, + const path_type& relm, const path_type& absm) = 0; + + virtual void + post (context&, const action_targets&, bool failed) = 0; + + virtual + ~compiledb (); + }; + + using compiledb_set = vector<unique_ptr<compiledb>>; + + // Populated by core_config_init() during serial load. + // + extern compiledb_set compiledbs; + + // Context operation callbacks. + // + void + compiledb_pre (context&, action, const action_targets&); + + void + compiledb_post (context&, action, const action_targets&, bool failed); + +#ifndef BUILD2_BOOTSTRAP + + // Implementation that writes to stdout. + // + // Note that this implementation forces compilation of all the targets for + // which it is called to make sure their entries are in the database. So + // typically used in the dry run mode. + // + class compiledb_stdout: public compiledb + { + public: + // The path is expected to be empty. + // + explicit + compiledb_stdout (string name); + + virtual void + pre (context&) override; + + virtual bool + match (const file& output, const path_type& output_path, + bool changed) override; + + virtual void + execute (const file& output, const path_type& output_path, + const file& input, const path_type& input_path, + const process_path& cpath, const cstrings& args, + const path_type& relo, const path_type& abso, + const path_type& relm, const path_type& absm) override; + + virtual void + post (context&, const action_targets&, bool failed) override; + + private: + mutex mutex_; + enum class state {init, empty, full, failed} state_; + size_t nesting_; + butl::json::stream_serializer js_; + }; + + // Implementation that maintains a file. + // + class compiledb_file: public compiledb + { + public: + compiledb_file (string name, path_type path); + + virtual void + pre (context&) override; + + virtual bool + match (const file& output, const path_type& output_path, + bool changed) override; + + virtual void + execute (const file& output, const path_type& output_path, + const file& input, const path_type& input_path, + const process_path& cpath, const cstrings& args, + const path_type& relo, const path_type& abso, + const path_type& relm, const path_type& absm) override; + + virtual void + post (context&, const action_targets&, bool failed) override; + + private: + mutex mutex_; + enum class state {closed, open, failed} state_; + size_t nesting_; + + // We want to optimize the performance for the incremental update case + // where only a few files will be recompiled and most of the time there + // will be no change in the command line, which means we won't need to + // rewrite the file. + // + // As a result, our in-memory representation is a hashmap (we could have + // thousands of entries) of absolute and normalized output file paths + // (stored as strings for lookup efficiency) to their serialized JSON + // text lines plus the status: absent, present, changed, or missing + // (entry should be there but is not). This way we don't waste + // (completely) parsing (and re-serializing) each line knowing that we + // won't need to touch most of them. + // + // In fact, we could have gone even further and used a sorted vector + // since insertions will be rare in this case. But we will need to + // lookup every entry on each update, so it's unclear this is a win. + // + enum class entry_status {absent, present, changed, missing}; + + struct entry + { + entry_status status; + string json; + }; + + using map_type = std::unordered_map<string, entry>; + map_type db_; + + // Number/presence of various entries in the database (used to determine + // whether we need to update the file without iterating over all the + // entries). + // + size_t absent_; // Number of absent entries. + bool changed_; // Presence of changed or missing entries. + }; + +#endif // BUILD2_BOOTSTRAP + } +} + +#endif // LIBBUILD2_CC_COMPILEDB_HXX diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx index e124450..1ddeca8 100644 --- a/libbuild2/cc/init.cxx +++ b/libbuild2/cc/init.cxx @@ -10,8 +10,10 @@ #include <libbuild2/config/utility.hxx> +#include <libbuild2/cc/module.hxx> #include <libbuild2/cc/target.hxx> #include <libbuild2/cc/utility.hxx> +#include <libbuild2/cc/compiledb.hxx> using namespace std; using namespace butl; @@ -23,7 +25,7 @@ namespace build2 // Scope operation callback that cleans up module sidebuilds. // static target_state - clean_module_sidebuilds (action, const scope& rs, const dir&) + clean_module_sidebuilds (const scope& rs) { context& ctx (rs.ctx); @@ -67,6 +69,81 @@ namespace build2 return target_state::unchanged; } + // Scope operation callback that cleans up compilation databases. + // + static target_state + clean_compiledb (const scope& rs) + { + context& ctx (rs.ctx); + + target_state r (target_state::unchanged); + + for (const unique_ptr<compiledb>& db: compiledbs) + { + const path& p (db->path); + + if (p.empty () || + ctx.scopes.find_out (p.directory ()).root_scope () != &rs) + continue; + + if (rmfile (ctx, p)) + r = target_state::changed; + } + + return r; + } + + // Scope operation callback for cleaning module sidebuilds and compilation + // databases. + // + static target_state + clean_callback (action, const scope& rs, const dir&) + { + target_state r (clean_module_sidebuilds (rs)); + + if (!compiledbs.empty ()) + r |= clean_compiledb (rs); + + return r; + } + + // Custom save function that completes relative paths in the + // config.cc.compiledb and config.cc.compiledb.name values. + // + static pair<names_view, const char*> + save_compiledb_name (const scope&, + const value& v, + const value*, + names& storage) + { + const names& ns (v.as<names> ()); // Value is untyped. + + if (find_if (ns.begin (), ns.end (), + [] (const name& n) {return n.pair;}) == ns.end ()) + { + return make_pair (names_view (ns), "="); + } + + storage = ns; + for (auto i (storage.begin ()); i != storage.end (); ++i) + { + if (i->pair) + { + name& n (*++i); + + if (!n.directory ()) + n.canonicalize (); + + if (n.dir.relative ()) + n.dir.complete (); + + n.dir.normalize (); + } + } + + return make_pair (names_view (storage), "="); + } + bool core_vars_init (scope& rs, scope&, @@ -107,6 +184,22 @@ namespace build2 vp.insert<abs_dir_path> ("config.cc.pkgconfig.sysroot"); + // Compilation database. + // + // See the manual for the semantics. + // + // config.cc.compiledb -- <name>[@<path>] (untyped) + // config.cc.compiledb.name -- <name>[@<path>]... (untyped) + // config.cc.compiledb.filter -- [<name>@]<bool>... + // config.cc.compiledb.filter.input -- [<name>@]<target-type>... + // config.cc.compiledb.filter.output -- [<name>@]<target-type>... + // + vp.insert ("config.cc.compiledb"); + vp.insert ("config.cc.compiledb.name"); + vp.insert<compiledb_name_filter> ("config.cc.compiledb.filter"); + vp.insert<compiledb_type_filter> ("config.cc.compiledb.filter.input"); + vp.insert<compiledb_type_filter> ("config.cc.compiledb.filter.output"); + vp.insert<strings> ("cc.poptions"); vp.insert<strings> ("cc.coptions"); vp.insert<strings> ("cc.loptions"); @@ -192,16 +285,6 @@ namespace build2 // vp.insert<bool> ("cc.serialize"); - // Register scope operation callback. - // - // It feels natural to clean up sidebuilds as a post operation but that - // prevents the (otherwise-empty) out root directory to be cleaned up - // (via the standard fsdir{} chain). - // - rs.operation_callbacks.emplace ( - perform_clean_id, - scope::operation_callback {&clean_module_sidebuilds, nullptr /*post*/}); - return true; } @@ -292,6 +375,8 @@ namespace build2 assert (first); + context& ctx (rs.ctx); + // Load cc.core.guess. // load_module (rs, rs, "cc.core.guess", loc); @@ -312,7 +397,6 @@ namespace build2 // // @@ Same nonsense as in module. // - // rs.assign ("cc.poptions") += cast_null<strings> ( lookup_config (rs, "config.cc.poptions", nullptr)); @@ -363,21 +447,16 @@ namespace build2 if (!cast_false<bool> (rs["bin.config.loaded"])) { // Prepare configuration hints (pretend it belongs to root scope). - // They are only used on the first load of bin.config so we only - // populate them on our first load. // variable_map h (rs); - if (first) - { - // Note that all these variables have already been registered. - // - h.assign ("config.bin.target") = - cast<target_triplet> (rs["cc.target"]).representation (); + // Note that all these variables have already been registered. + // + h.assign ("config.bin.target") = + cast<target_triplet> (rs["cc.target"]).representation (); - if (auto l = extra.hints["config.bin.pattern"]) - h.assign ("config.bin.pattern") = cast<string> (l); - } + if (auto l = extra.hints["config.bin.pattern"]) + h.assign ("config.bin.pattern") = cast<string> (l); init_module (rs, rs, "bin.config", loc, false /* optional */, h); } @@ -386,7 +465,6 @@ namespace build2 // ourselves since the target can come from the configuration and not // our hint). // - if (first) { const auto& ct (cast<target_triplet> (rs["cc.target"])); const auto& bt (cast<target_triplet> (rs["bin.target"])); @@ -416,6 +494,399 @@ namespace build2 if (tsys == "mingw32") load_module (rs, rs, "bin.rc.config", loc); + // Find the innermost outer core_module, if any. + // + const core_module* om (nullptr); + for (const scope* s (&rs); + (s = s->parent_scope ()->root_scope ()) != nullptr; ) + { + if ((om = s->find_module<core_module> (core_module::name)) != nullptr) + break; + } + + auto& m (extra.set_module (new core_module (om))); + + // config.cc.compiledb.* + // + { + // For config.cc.compiledb and config.cc.compiledb.name we only + // consider a value in this root scope (if it's inherited from the + // outer scope, then that's where it will be handled). One special + // case is when it's specified on a scope that doesn't load the cc + // module (including, ultimately, the global scope for a global + // override). We handle it by assuming the value belongs to the + // outermost amalgamation that loads the cc module. + // + // Note: cache the result. + // + auto find_outermost = + [&rs, o = optional<pair<scope*, core_module*>> ()] () mutable + { + if (!o) + { + o = pair<scope*, core_module*> (&rs, nullptr); + for (scope* s (&rs); + (s = s->parent_scope ()->root_scope ()) != nullptr; ) + { + if (auto* m = s->find_module<core_module> (core_module::name)) + { + o->first = s; + o->second = m; + } + } + } + + return *o; + }; + + auto belongs = [&rs, &find_outermost] (const lookup& l) + { + return l.belongs (rs) || find_outermost ().first == &rs; + }; + + // Add compilation databases specified in ns as <name>[@<path>] pairs, + // appending their names to cdb_names. If <path> is absent, then place + // the database into the base directory. Return the last added name. + // + auto add_cdbs = [&ctx, + &loc, + &trace] (strings& cdb_names, + const names& ns, + const dir_path& base) -> const string& + { + // Check that names and paths match. Return false if this entry + // already exist. + // + // Note that before we also checked that the same paths are not used + // across contexts. But, actually, there doesn't seem to be anything + // wrong with that and this can actually be useful, for example, + // when developing build system modules. + // + auto check = [&loc] (const string& n, const path& p) + { + for (const unique_ptr<compiledb>& db: compiledbs) + { + bool nm (db->name == n); + bool pm (db->path == p); + + if (nm != pm) + fail (loc) << "inconsistent compilation database names/paths" << + info << p << " is called " << n << + info << db->path << " is called " << db->name; + + if (nm) + return false; + } + + return true; + }; + + const string* r (&empty_string); + + bool reg (false); + size_t j (compiledbs.size ()); // First newly added database. + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + // Each element has the <name>[@<path>] form. + // + // The special `-` <name> signifies stdout. + // + // If <path> is absent, then the file is called <name>.json and + // placed into the output directory of the amalgamation or project + // root scope (passed as the base argument). + // + // If <path> is (syntactically) a directory, then the file path is + // <path>/<name>.json. + // + if (!i->simple () || i->empty ()) + fail (loc) << "invalid compilation database name '" << *i << "'"; + + string n (i->value); + + path p; + if (i->pair) + { + ++i; + + if (n == "-") + fail (loc) << "compilation database path specified for stdout " + << "name"; + try + { + if (i->directory ()) + p = i->dir / n + ".json"; + else if (i->file ()) + { + if (i->dir.empty ()) + p = path (i->value); + else + p = i->dir / i->value; + } + else + throw invalid_path (""); + + if (p.relative ()) + p.complete (); + + p.normalize (); + } + catch (const invalid_path&) + { + fail (loc) << "invalid compilation database path '" << *i + << "'"; + } + } + else if (n != "-") + { + p = base / n + ".json"; + } + + if (check (n, p)) + { + reg = compiledbs.empty (); // First time. + +#ifdef BUILD2_BOOTSTRAP + fail (loc) << "compilation database requested during bootstrap"; +#else + if (n == "-") + compiledbs.push_back ( + unique_ptr<compiledb> ( + new compiledb_stdout (n))); + else + compiledbs.push_back ( + unique_ptr<compiledb> ( + new compiledb_file (n, move (p)))); +#endif + } + + // We may end up with duplicates via the config.cc.compiledb + // logic. + // + auto k (find (cdb_names.begin (), cdb_names.end (), n)); + + if (k == cdb_names.end ()) + { + cdb_names.push_back (move (n)); + r = &cdb_names.back (); + } + else + r = &*k; + } + + // Register context operation callback for compiledb generation. + // + // We have two complications here: + // + // 1. We could be performing all this from the load phase that + // interrupted the match phase, which means the point where the + // pre callback would have been called is already gone (but the + // post callback will still be called). This will happen if we, + // say, import a project that has a compilation database from a + // project that doesn't. + // + // (Note that if you think that this can be solved by simply + // always registering the callbacks, regardless of whether we + // have any databases or not, consider a slightly different + // scenario where we import a project that loads the cc module + // from a project that does not). + // + // What we are going to do in this case is simply call the pre + // callback manually. + // + // 2. We could again be performing all this from the load phase that + // interrupted the match phase, but this time the pre callback + // has already been called, which means there will be no pre() + // call for the newly added database(s). This will happen if we, + // say, import a project that has a compilation database from a + // project that also has one. + // + // Again, what we are going to do in this case is simply call the + // pre callback for the new database(s) manually. + // + if (reg) + ctx.operation_callbacks.emplace ( + perform_update_id, + context::operation_callback {&compiledb_pre, &compiledb_post}); + + if (ctx.load_generation > 1) + { + action a (ctx.current_action ()); + + if (a.inner_action () == perform_update_id) + { + if (reg) // Case #1. + { + l6 ([&]{trace << "direct compiledb_pre for context " << &ctx;}); + compiledb_pre (ctx, a, action_targets {}); + } + else // Case #2. + { + size_t n (compiledbs.size ()); + + if (j != n) + { + l6 ([&]{trace << "additional compiledb for context " << &ctx;}); + + for (; j != n; ++j) + compiledbs[j]->pre (ctx); + } + } + } + } + + return *r; + }; + + lookup l; + + // config.cc.compiledb + // + // The semantics of this value is as follows: + // + // Location: outermost amalgamation that loads the cc module. + // Name filter: enable from this scope unless specified explicitly. + // Type filter: enable from this scope unless specified explicitly. + // + // Note: save omitted. + // + optional<string> enable_filter; + + l = lookup_config (rs, "config.cc.compiledb", 0, &save_compiledb_name); + if (l && belongs (l)) + { + l6 ([&]{trace << "config.cc.compiledb specified on " << rs;}); + + const names& ns (cast<names> (l)); + + // Make sure it's one name/path. + // + if (ns.empty () || ns.size () != (ns.front ().pair ? 2 : 1)) + fail (loc) << "invalid compilation database name '" << ns << "'"; + + // We inject the database directly into the outer amalgamation's + // module, as-if config.cc.compiledb.name was specified in its + // scope. Unless there isn't one, in which case it's us. + // + pair<scope*, core_module*> p (find_outermost ()); + + // Save the name for the name filter below. + // + enable_filter = add_cdbs ( + (p.second != nullptr ? *p.second : m).cdb_names_, + ns, + p.first->out_path ()); + } + + // config.cc.compiledb.name + // + // Note: save omitted. + // + l = lookup_config (rs, + "config.cc.compiledb.name", + 0, + &save_compiledb_name); + if (l && belongs (l)) + { + l6 ([&]{trace << "config.cc.compiledb.name specified on " << rs;}); + + add_cdbs (m.cdb_names_, cast<names> (l), rs.out_path ()); + } + + // config.cc.compiledb.filter + // + // Note: save omitted. + // + l = lookup_config (rs, "config.cc.compiledb.filter"); + if (l && belongs (l)) // Custom. + { + m.cdb_filter_ = &cast<compiledb_name_filter> (l); + } + else if (enable_filter) // Override. + { + // Inherit outer filter. + // + if (om != nullptr && om->cdb_filter_ != nullptr) + m.cdb_filter_storage_ = *om->cdb_filter_; + + m.cdb_filter_storage_.emplace_back (*enable_filter, true); + m.cdb_filter_ = &m.cdb_filter_storage_; + } + else if (om != nullptr) // Inherit. + { + m.cdb_filter_ = om->cdb_filter_; + } + + // config.cc.compiledb.filter.input + // config.cc.compiledb.filter.output + // + // Note that filtering happens before we take into account the change + // status, which means for larger projects there would be a lot of + // targets to filter even during the incremental update. So it feels + // it would have been better to pre-lookup the target types. However, + // the targets that would normally be used are registered by other + // modules (bin, c/cxx) and which haven't been loaded yet. So instead + // we try to optimize the lookup for the commonly used targets. + // + // Note: save omitted. + // + l = lookup_config (rs, "config.cc.compiledb.filter.input"); + if (l && belongs (l)) // Custom. + { + m.cdb_filter_input_ = &cast<compiledb_type_filter> (l); + } + else if (enable_filter) // Override. + { + // Inherit outer filter. + // + if (om != nullptr && om->cdb_filter_input_ != nullptr) + { + m.cdb_filter_input_storage_ = *om->cdb_filter_input_; + m.cdb_filter_input_storage_.emplace_back (*enable_filter, "target"); + m.cdb_filter_input_ = &m.cdb_filter_input_storage_; + } + else + m.cdb_filter_input_ = nullptr; // Enable all. + } + else if (om != nullptr) // Inherit. + { + m.cdb_filter_input_ = om->cdb_filter_input_; + } + + l = lookup_config (rs, "config.cc.compiledb.filter.output"); + if (l && belongs (l)) // Custom. + { + m.cdb_filter_output_ = &cast<compiledb_type_filter> (l); + } + else if (enable_filter) // Override. + { + // Inherit outer filter. + // + if (om != nullptr && om->cdb_filter_output_ != nullptr) + { + m.cdb_filter_output_storage_ = *om->cdb_filter_output_; + m.cdb_filter_output_storage_.emplace_back (*enable_filter, "target"); + m.cdb_filter_output_ = &m.cdb_filter_output_storage_; + } + else + m.cdb_filter_output_ = nullptr; // Enable all. + } + else if (om != nullptr) // Inherit. + { + m.cdb_filter_output_ = om->cdb_filter_output_; + } + } + + // Register scope operation callback for cleaning module sidebuilds and + // compilation databases. + // + // It feels natural to clean this stuff up as a post operation but that + // prevents the (otherwise-empty) out root directory to be cleaned up + // (via the standard fsdir{} chain). + // + rs.operation_callbacks.emplace ( + perform_clean_id, + scope::operation_callback {&clean_callback, nullptr /*post*/}); + return true; } diff --git a/libbuild2/cc/module.cxx b/libbuild2/cc/module.cxx index cf6c6e4..a3c64d9 100644 --- a/libbuild2/cc/module.cxx +++ b/libbuild2/cc/module.cxx @@ -22,6 +22,12 @@ namespace build2 { namespace cc { + // cc.core_module + // + const string core_module::name ("cc.core.config"); + + // x.config_module + // void config_module:: guess (scope& rs, const location& loc, const variable_map&) { @@ -891,6 +897,9 @@ namespace build2 config::save_environment (rs, xi.platform_environment); } + // x module + // + // Global cache of ad hoc importable headers. // // The key is a hash of the system header search directories diff --git a/libbuild2/cc/module.hxx b/libbuild2/cc/module.hxx index 2ef07d6..d4e9a67 100644 --- a/libbuild2/cc/module.hxx +++ b/libbuild2/cc/module.hxx @@ -14,6 +14,8 @@ #include <libbuild2/cc/common.hxx> +#include <libbuild2/cc/compiledb.hxx> + #include <libbuild2/cc/compile-rule.hxx> #include <libbuild2/cc/link-rule.hxx> #include <libbuild2/cc/install-rule.hxx> @@ -27,6 +29,35 @@ namespace build2 { struct compiler_info; + // cc.core module + // + class core_module: public build2::module + { + public: + static const string name; + + explicit + core_module (const core_module* om) + : outer_module_ (om) + { + } + + public: + const core_module* outer_module_; + + strings cdb_names_; + + const compiledb_name_filter* cdb_filter_ = nullptr; + const compiledb_type_filter* cdb_filter_input_ = nullptr; + const compiledb_type_filter* cdb_filter_output_ = nullptr; + + compiledb_name_filter cdb_filter_storage_; + compiledb_type_filter cdb_filter_input_storage_; + compiledb_type_filter cdb_filter_output_storage_; + }; + + // x.config module + // class LIBBUILD2_CC_SYMEXPORT config_module: public build2::module, public config_data { @@ -153,6 +184,8 @@ namespace build2 msvc_library_search_dirs (const compiler_info&, scope&) const; }; + // x module + // class LIBBUILD2_CC_SYMEXPORT module: public build2::module, public virtual common, public link_rule, @@ -162,7 +195,6 @@ namespace build2 public predefs_rule { public: - explicit module (data&& d, const scope& rs) : common (move (d)), link_rule (move (d)), diff --git a/libbuild2/cc/pkgconfig.cxx b/libbuild2/cc/pkgconfig.cxx index ecef61c..79a38ea 100644 --- a/libbuild2/cc/pkgconfig.cxx +++ b/libbuild2/cc/pkgconfig.cxx @@ -1,6 +1,8 @@ // file : libbuild2/cc/pkgconfig.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file +#include <libbuild2/cc/pkgconfig.hxx> + #include <libbuild2/scope.hxx> #include <libbuild2/target.hxx> #include <libbuild2/context.hxx> @@ -18,7 +20,6 @@ #include <libbuild2/cc/utility.hxx> #include <libbuild2/cc/common.hxx> -#include <libbuild2/cc/pkgconfig.hxx> #include <libbuild2/cc/compile-rule.hxx> #include <libbuild2/cc/link-rule.hxx> diff --git a/libbuild2/context.hxx b/libbuild2/context.hxx index db126bc..81ac970 100644 --- a/libbuild2/context.hxx +++ b/libbuild2/context.hxx @@ -178,34 +178,35 @@ namespace build2 // match - search prerequisites and match rules // execute - execute the matched rule // - // The build system starts with a "serial load" phase and then continues - // with parallel match and execute. Match, however, can be interrupted - // both with load and execute. + // The build system starts with a serial "initial load" phase and then + // continues with parallel match and execute. Match, however, can be + // interrupted both with load and execute. // - // Match can be interrupted with "exclusive load" in order to load - // additional buildfiles. Similarly, it can be interrupted with (parallel) - // execute in order to build targetd required to complete the match (for - // example, generated source code or source code generators themselves). + // Match can be interrupted with a (serial) "interrupting load" in order + // to load additional buildfiles. Similarly, it can be interrupted with + // (parallel) execute in order to build targetd required to complete the + // match (for example, generated source code or source code generators + // themselves). // // Such interruptions are performed by phase change that is protected by // phase_mutex (which is also used to synchronize the state changes // between phases). // - // Serial load can perform arbitrary changes to the build state. Exclusive - // load, however, can only perform "island appends". That is, it can - // create new "nodes" (variables, scopes, etc) but not (semantically) - // change already existing nodes or invalidate any references to such (the - // idea here is that one should be able to load additional buildfiles as - // long as they don't interfere with the existing build state). The - // "islands" are identified by the load_generation number (1 for the - // initial/serial load). It is incremented in case of a phase switch and - // can be stored in various "nodes" to verify modifications are only done - // "within the islands". Another example of invalidation would be - // insertion of a new scope "under" an existing target thus changing its - // scope hierarchy (and potentially even its base scope). This would be - // bad because we may have made decisions based on the original hierarchy, - // for example, we may have queried a variable which in the new hierarchy - // would "see" a new value from the newly inserted scope. + // Initial load can perform arbitrary changes to the build state. + // Interrupting load, however, can only perform what we call "island + // appends". That is, it can create new "nodes" (variables, scopes, etc) + // but not (semantically) change already existing nodes or invalidate any + // references to such (the idea here is that one should be able to load + // additional buildfiles as long as they don't interfere with the existing + // build state). The "islands" are identified by the load_generation + // number (1 for the initial load). It is incremented in case of a phase + // switch and can be stored in various "nodes" to verify modifications are + // only done "within the islands". Another example of invalidation would + // be insertion of a new scope "under" an existing target thus changing + // its scope hierarchy (and potentially even its base scope). This would + // be bad because we may have made decisions based on the original + // hierarchy, for example, we may have queried a variable which in the new + // hierarchy would "see" a new value from the newly inserted scope. // // The special load_generation value 0 indicates initialization before // anything has been loaded. Currently, it is changed to 1 at the end @@ -370,6 +371,10 @@ namespace build2 // system module or an ad hoc C++ recipe. See create_module_context() for // details. // + // Note also that if the callbacks are registered from a module load + // function, then there are nuances with interrupted load phases. See the + // compilation database handling in the cc module for details. + // // See also scope::operation_callback. // struct operation_callback diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx index 9b7f5b1..9fcfca8 100644 --- a/libbuild2/dump.cxx +++ b/libbuild2/dump.cxx @@ -242,7 +242,8 @@ namespace build2 h_pair = true; } else if (t.is_a<map<optional<string>, string>> () || - t.is_a<vector<pair<optional<string>, string>>> ()) + t.is_a<vector<pair<optional<string>, string>>> () || + t.is_a<vector<pair<optional<string>, bool>>> ()) { h_array = true; h_pair = false; diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx index 520b993..36a7ce5 100644 --- a/libbuild2/module.cxx +++ b/libbuild2/module.cxx @@ -480,7 +480,7 @@ namespace build2 // if (nested) { - // This could be initial or exclusive load. + // This could be initial or interrupting load. // // @@ TODO: see the ad hoc recipe case as a reference. // diff --git a/libbuild2/name.hxx b/libbuild2/name.hxx index f5cb2c5..c6aac45 100644 --- a/libbuild2/name.hxx +++ b/libbuild2/name.hxx @@ -136,7 +136,7 @@ namespace build2 // value to dir. Throw invalid_argument if value would become empty. May // also throw invalid_path. // - void + LIBBUILD2_SYMEXPORT void canonicalize (); }; diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx index 0ec23d3..0abc360 100644 --- a/libbuild2/variable.cxx +++ b/libbuild2/variable.cxx @@ -3320,10 +3320,13 @@ namespace build2 value_traits<vector<pair<string, optional<string>>>>; template struct LIBBUILD2_DEFEXPORT + value_traits<vector<pair<string, optional<bool>>>>; + + template struct LIBBUILD2_DEFEXPORT value_traits<vector<pair<optional<string>, string>>>; template struct LIBBUILD2_DEFEXPORT - value_traits<vector<pair<string, optional<bool>>>>; + value_traits<vector<pair<optional<string>, bool>>>; template struct LIBBUILD2_DEFEXPORT value_traits<set<string>>; template struct LIBBUILD2_DEFEXPORT value_traits<set<json_value>>; diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx index a14c52b..e55a121 100644 --- a/libbuild2/variable.hxx +++ b/libbuild2/variable.hxx @@ -1380,6 +1380,9 @@ namespace build2 extern template struct LIBBUILD2_DECEXPORT value_traits<vector<pair<string, optional<bool>>>>; + extern template struct LIBBUILD2_DECEXPORT + value_traits<vector<pair<optional<string>, bool>>>; + extern template struct LIBBUILD2_DECEXPORT value_traits<set<string>>; extern template struct LIBBUILD2_DECEXPORT value_traits<set<json_value>>; |