diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2019-08-24 17:41:30 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2019-08-28 15:01:48 +0300 |
commit | 4bdf53837e010073de802070d4e6087410662d3e (patch) | |
tree | 2820d3964877d1a7d498833da325aa3d3a699353 /libbuild2 | |
parent | ea24f530048cbce0c5335ca3fd3632c8ce34315a (diff) |
Move cc build system module to separate library
Diffstat (limited to 'libbuild2')
43 files changed, 20869 insertions, 1 deletions
diff --git a/libbuild2/buildfile b/libbuild2/buildfile index 57f4895..aad4e78 100644 --- a/libbuild2/buildfile +++ b/libbuild2/buildfile @@ -5,7 +5,7 @@ # NOTE: remember to update bundled_modules in libbuild2/modules.cxx if adding # a new module. # -./: lib{build2} bash/ bin/ in/ version/ +./: lib{build2} bash/ bin/ cc/ in/ version/ import int_libs = libbutl%lib{butl} diff --git a/libbuild2/cc/buildfile b/libbuild2/cc/buildfile new file mode 100644 index 0000000..5b3d8eb --- /dev/null +++ b/libbuild2/cc/buildfile @@ -0,0 +1,74 @@ +# file : libbuild2/cc/buildfile +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +import int_libs = libbutl%lib{butl} +import imp_libs = libpkgconf%lib{pkgconf} + +include ../ +int_libs += ../lib{build2} + +include ../bin/ +int_libs += ../bin/lib{build2-bin} + +./: lib{build2-cc}: libul{build2-cc}: {hxx ixx txx cxx}{** -**.test...} \ + $imp_libs $int_libs + +# Unit tests. +# +exe{*.test}: +{ + test = true + install = false +} + +for t: cxx{**.test...} +{ + d = $directory($t) + n = $name($t)... + b = $path.base($name($t)) + + ./: $d/exe{$n}: $t $d/{hxx ixx txx}{+$n} $d/testscript{+$n +$b+*.test...} + $d/exe{$n}: libul{build2-cc}: bin.whole = false +} + +# Build options. +# +obja{*}: cxx.poptions += -DLIBBUILD2_CC_STATIC_BUILD +objs{*}: cxx.poptions += -DLIBBUILD2_CC_SHARED_BUILD + +# Export options. +# +lib{build2-cc}: +{ + cxx.export.poptions = "-I$out_root" "-I$src_root" + cxx.export.libs = $int_libs +} + +liba{build2-cc}: cxx.export.poptions += -DLIBBUILD2_CC_STATIC +libs{build2-cc}: cxx.export.poptions += -DLIBBUILD2_CC_SHARED + +# For pre-releases use the complete version to make sure they cannot be used +# in place of another pre-release or the final version. See the version module +# for details on the version.* variable values. +# +# And because this is a build system module, we also embed the same value as +# the interface version (note that we cannot use build.version.interface for +# bundled modules because we could be built with a different version of the +# build system). +# +ver = ($version.pre_release \ + ? "$version.project_id" \ + : "$version.major.$version.minor") + +lib{build2-cc}: bin.lib.version = @"-$ver" +libs{build2-cc}: bin.lib.load_suffix = "-$ver" + +# Install into the libbuild2/cc/ subdirectory of, say, /usr/include/ +# recreating subdirectories. +# +{hxx ixx txx}{*}: +{ + install = include/libbuild2/cc/ + install.subdirs = true +} diff --git a/libbuild2/cc/common.cxx b/libbuild2/cc/common.cxx new file mode 100644 index 0000000..bfcb00c --- /dev/null +++ b/libbuild2/cc/common.cxx @@ -0,0 +1,1031 @@ +// file : libbuild2/cc/common.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/common.hxx> + +#include <libbuild2/file.hxx> // import() +#include <libbuild2/scope.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/cc/utility.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + using namespace bin; + + // Recursively process prerequisite libraries. If proc_impl returns false, + // then only process interface (*.export.libs), otherwise -- interface and + // implementation (prerequisite and from *.libs, unless overriden). + // + // Note that here we assume that an interface library is also an + // implementation (since we don't use *.export.libs in static link). We + // currently have this restriction to make sure the target in + // *.export.libs is up-to-date (which will happen automatically if it is + // listed as a prerequisite of this library). + // + // Storing a reference to library path in proc_lib is legal (it comes + // either from the target's path or from one of the *.libs variables + // neither of which should change on this run). + // + // Note that the order of processing is: + // + // 1. options + // 2. lib itself (if self is true) + // 3. dependency libs (prerequisite_targets, left to right, depth-first) + // 4. dependency libs (*.libs variables). + // + // The first argument to proc_lib is a pointer to the last element of an + // array that contains the current library dependency chain all the way to + // the library passes to process_libraries(). The first element of this + // array is NULL. + // + void common:: + process_libraries ( + action a, + const scope& top_bs, + linfo top_li, + const dir_paths& top_sysd, + const file& l, + bool la, + lflags lf, + const function<bool (const file&, + bool la)>& proc_impl, // Implementation? + const function<void (const file* const*, // Can be NULL. + const string& path, // Library path. + lflags, // Link flags. + bool sys)>& proc_lib, // True if system library. + const function<void (const file&, + const string& type, // cc.type + bool com, // cc. or x. + bool exp)>& proc_opt, // *.export. + bool self /*= false*/, // Call proc_lib on l? + small_vector<const file*, 16>* chain) const + { + small_vector<const file*, 16> chain_storage; + if (chain == nullptr) + { + chain = &chain_storage; + chain->push_back (nullptr); + } + + // See what type of library this is (C, C++, etc). Use it do decide + // which x.libs variable name to use. If it's unknown, then we only + // look into prerequisites. Note: lookup starting from rule-specific + // variables (target should already be matched). + // + const string* t (cast_null<string> (l.state[a][c_type])); + + bool impl (proc_impl && proc_impl (l, la)); + bool cc (false), same (false); + + auto& vp (top_bs.ctx.var_pool); + lookup c_e_libs; + lookup x_e_libs; + + if (t != nullptr) + { + cc = *t == "cc"; + same = !cc && *t == x; + + // The explicit export override should be set on the liba/libs{} + // target itself. Note also that we only check for *.libs. If one + // doesn't have any libraries but needs to set, say, *.loptions, then + // *.libs should be set to NULL or empty (this is why we check for + // the result being defined). + // + if (impl) + c_e_libs = l.vars[c_export_libs]; // Override. + else if (l.group != nullptr) // lib{} group. + c_e_libs = l.group->vars[c_export_libs]; + + if (!cc) + { + const variable& var (same + ? x_export_libs + : vp[*t + ".export.libs"]); + + if (impl) + x_e_libs = l.vars[var]; // Override. + else if (l.group != nullptr) // lib{} group. + x_e_libs = l.group->vars[var]; + } + + // Process options first. + // + if (proc_opt) + { + // If all we know is it's a C-common library, then in both cases we + // only look for cc.export.*. + // + if (cc) + proc_opt (l, *t, true, true); + else + { + if (impl) + { + // Interface and implementation: as discussed above, we can have + // two situations: overriden export or default export. + // + if (c_e_libs.defined () || x_e_libs.defined ()) + { + // NOTE: should this not be from l.vars rather than l? Or + // perhaps we can assume non-common values will be set on + // libs{}/liba{}. + // + proc_opt (l, *t, true, true); + proc_opt (l, *t, false, true); + } + else + { + // For default export we use the same options as were used to + // build the library. + // + proc_opt (l, *t, true, false); + proc_opt (l, *t, false, false); + } + } + else + { + // Interface: only add *.export.* (interface dependencies). + // + proc_opt (l, *t, true, true); + proc_opt (l, *t, false, true); + } + } + } + } + + // Determine if an absolute path is to a system library. Note that + // we assume both paths to be normalized. + // + auto sys = [] (const dir_paths& sysd, const string& p) -> bool + { + size_t pn (p.size ()); + + for (const dir_path& d: sysd) + { + const string& ds (d.string ()); // Can be "/", otherwise no slash. + size_t dn (ds.size ()); + + if (pn > dn && + p.compare (0, dn, ds) == 0 && + (path::traits_type::is_separator (ds[dn - 1]) || + path::traits_type::is_separator (p[dn]))) + return true; + } + + return false; + }; + + // Next process the library itself if requested. + // + if (self && proc_lib) + { + chain->push_back (&l); + + // Note that while normally the path is assigned, in case of an import + // stub the path to the DLL may not be known and so the path will be + // empty (but proc_lib() will use the import stub). + // + const path& p (l.path ()); + + bool s (t != nullptr // If cc library (matched or imported). + ? cast_false<bool> (l.vars[c_system]) + : !p.empty () && sys (top_sysd, p.string ())); + + proc_lib (&chain->back (), p.string (), lf, s); + } + + const scope& bs (t == nullptr || cc ? top_bs : l.base_scope ()); + optional<linfo> li; // Calculate lazily. + const dir_paths* sysd (nullptr); // Resolve lazily. + + // Find system search directories corresponding to this library, i.e., + // from its project and for its type (C, C++, etc). + // + auto find_sysd = [&top_sysd, t, cc, same, &bs, &sysd, this] () + { + // Use the search dirs corresponding to this library scope/type. + // + sysd = (t == nullptr || cc) + ? &top_sysd // Imported library, use importer's sysd. + : &cast<dir_paths> ( + bs.root_scope ()->vars[same + ? x_sys_lib_dirs + : bs.ctx.var_pool[*t + ".sys_lib_dirs"]]); + }; + + auto find_linfo = [top_li, t, cc, &bs, &l, &li] () + { + li = (t == nullptr || cc) + ? top_li + : link_info (bs, link_type (l).type); + }; + + // Only go into prerequisites (implementation) if instructed and we are + // not using explicit export. Otherwise, interface dependencies come + // from the lib{}:*.export.libs below. + // + if (impl && !c_e_libs.defined () && !x_e_libs.defined ()) + { + for (const prerequisite_target& pt: l.prerequisite_targets[a]) + { + // Note: adhoc prerequisites are not part of the library meta- + // information protocol. + // + if (pt == nullptr || pt.adhoc) + continue; + + bool la; + const file* f; + + if ((la = (f = pt->is_a<liba> ())) || + (la = (f = pt->is_a<libux> ())) || + ( f = pt->is_a<libs> ())) + { + if (sysd == nullptr) find_sysd (); + if (!li) find_linfo (); + + process_libraries (a, bs, *li, *sysd, + *f, la, pt.data, + proc_impl, proc_lib, proc_opt, true, chain); + } + } + } + + // Process libraries (recursively) from *.export.libs (of type names) + // handling import, etc. + // + // If it is not a C-common library, then it probably doesn't have any of + // the *.libs. + // + if (t != nullptr) + { + optional<dir_paths> usrd; // Extract lazily. + + // Determine if a "simple path" is a system library. + // + auto sys_simple = [&sysd, &sys, &find_sysd] (const string& p) -> bool + { + bool s (!path::traits_type::absolute (p)); + + if (!s) + { + if (sysd == nullptr) find_sysd (); + + s = sys (*sysd, p); + } + + return s; + }; + + auto proc_int = [&l, + &proc_impl, &proc_lib, &proc_opt, chain, + &sysd, &usrd, + &find_sysd, &find_linfo, &sys_simple, + &bs, a, &li, this] (const lookup& lu) + { + const vector<name>* ns (cast_null<vector<name>> (lu)); + if (ns == nullptr || ns->empty ()) + return; + + for (const name& n: *ns) + { + if (n.simple ()) + { + // This is something like -lpthread or shell32.lib so should be + // a valid path. But it can also be an absolute library path + // (e.g., something that may come from our .static/shared.pc + // files). + // + if (proc_lib) + proc_lib (nullptr, n.value, 0, sys_simple (n.value)); + } + else + { + // This is a potentially project-qualified target. + // + if (sysd == nullptr) find_sysd (); + if (!li) find_linfo (); + + const file& t (resolve_library (a, bs, n, *li, *sysd, usrd)); + + if (proc_lib) + { + // This can happen if the target is mentioned in *.export.libs + // (i.e., it is an interface dependency) but not in the + // library's prerequisites (i.e., it is not an implementation + // dependency). + // + // Note that we used to just check for path being assigned but + // on Windows import-installed DLLs may legally have empty + // paths. + // + if (t.mtime () == timestamp_unknown) + fail << "interface dependency " << t << " is out of date" << + info << "mentioned in *.export.libs of target " << l << + info << "is it a prerequisite of " << l << "?"; + } + + // Process it recursively. + // + // @@ Where can we get the link flags? Should we try to find + // them in the library's prerequisites? What about installed + // stuff? + // + process_libraries (a, bs, *li, *sysd, + t, t.is_a<liba> () || t.is_a<libux> (), 0, + proc_impl, proc_lib, proc_opt, true, chain); + } + } + }; + + // Process libraries from *.libs (of type strings). + // + auto proc_imp = [&proc_lib, &sys_simple] (const lookup& lu) + { + const strings* ns (cast_null<strings> (lu)); + if (ns == nullptr || ns->empty ()) + return; + + for (const string& n: *ns) + { + // This is something like -lpthread or shell32.lib so should be a + // valid path. + // + proc_lib (nullptr, n, 0, sys_simple (n)); + } + }; + + // Note: the same structure as when processing options above. + // + // If all we know is it's a C-common library, then in both cases we + // only look for cc.export.libs. + // + if (cc) + { + if (c_e_libs) proc_int (c_e_libs); + } + else + { + if (impl) + { + // Interface and implementation: as discussed above, we can have + // two situations: overriden export or default export. + // + if (c_e_libs.defined () || x_e_libs.defined ()) + { + if (c_e_libs) proc_int (c_e_libs); + if (x_e_libs) proc_int (x_e_libs); + } + else + { + // For default export we use the same options/libs as were used + // to build the library. Since libraries in (non-export) *.libs + // are not targets, we don't need to recurse. + // + if (proc_lib) + { + proc_imp (l[c_libs]); + proc_imp (l[same ? x_libs : vp[*t + ".libs"]]); + } + } + } + else + { + // Interface: only add *.export.* (interface dependencies). + // + if (c_e_libs) proc_int (c_e_libs); + if (x_e_libs) proc_int (x_e_libs); + } + } + } + + // Remove this library from the chain. + // + if (self && proc_lib) + chain->pop_back (); + } + + // The name can be an absolute or relative target name (for example, + // /tmp/libfoo/lib{foo} or ../libfoo/lib{foo}) or a project-qualified + // relative target name (e.g., libfoo%lib{foo}). + // + // Note that in case of the relative target that comes from export.libs, + // the resolution happens relative to the base scope of the target from + // which this export.libs came, which is exactly what we want. + // + // Note that the scope, search paths, and the link order should all be + // derived from the library target that mentioned this name. This way we + // will select exactly the same target as the library's matched rule and + // that's the only way to guarantee it will be up-to-date. + // + const file& common:: + resolve_library (action a, + const scope& s, + name n, + linfo li, + const dir_paths& sysd, + optional<dir_paths>& usrd) const + { + if (n.type != "lib" && n.type != "liba" && n.type != "libs") + fail << "target name " << n << " is not a library"; + + const target* xt (nullptr); + + if (!n.qualified ()) + { + // Search for an existing target with this name "as if" it was a + // prerequisite. + // + xt = search_existing (n, s); + + if (xt == nullptr) + fail << "unable to find library " << n; + } + else + { + // This is import. + // + auto rp (s.find_target_type (n, location ())); // Note: changes name. + const target_type* tt (rp.first); + optional<string>& ext (rp.second); + + if (tt == nullptr) + fail << "unknown target type '" << n.type << "' in library " << n; + + // @@ OUT: for now we assume out is undetermined, just like in + // search (name, scope). + // + dir_path out; + + prerequisite_key pk {n.proj, {tt, &n.dir, &out, &n.value, ext}, &s}; + xt = search_library_existing (a, sysd, usrd, pk); + + if (xt == nullptr) + { + if (n.qualified ()) + xt = import_existing (s.ctx, pk); + } + + if (xt == nullptr) + fail << "unable to find library " << pk; + } + + // If this is lib{}/libu*{}, pick appropriate member. + // + if (const libx* l = xt->is_a<libx> ()) + xt = link_member (*l, a, li); // Pick lib*{e,a,s}{}. + + return xt->as<file> (); + } + + // Insert a target verifying that it already exists if requested. Return + // the lock. + // + template <typename T> + ulock common:: + insert_library (context& ctx, + T*& r, + const string& name, + const dir_path& d, + optional<string> ext, + bool exist, + tracer& trace) + { + auto p (ctx.targets.insert_locked (T::static_type, + d, + dir_path (), + name, + move (ext), + true, // Implied. + trace)); + + assert (!exist || !p.second.owns_lock ()); + r = &p.first.template as<T> (); + return move (p.second); + } + + // Note that pk's scope should not be NULL (even if dir is absolute). + // + target* common:: + search_library (action act, + const dir_paths& sysd, + optional<dir_paths>& usrd, + const prerequisite_key& p, + bool exist) const + { + tracer trace (x, "search_library"); + + assert (p.scope != nullptr); + + // @@ This is hairy enough to warrant a separate implementation for + // Windows. + + // Note: since we are searching for a (presumably) installed library, + // utility libraries do not apply. + // + bool l (p.is_a<lib> ()); + const optional<string>& ext (l ? nullopt : p.tk.ext); // Only liba/libs. + + // First figure out what we need to search for. + // + const string& name (*p.tk.name); + + // liba + // + path an; + optional<string> ae; + + if (l || p.is_a<liba> ()) + { + // We are trying to find a library in the search paths extracted from + // the compiler. It would only be natural if we used the library + // prefix/extension that correspond to this compiler and/or its + // target. + // + // Unlike MinGW, VC's .lib/.dll.lib naming is by no means standard and + // we might need to search for other names. In fact, there is no + // reliable way to guess from the file name what kind of library it + // is, static or import and we will have to do deep inspection of such + // alternative names. However, if we did find .dll.lib, then we can + // assume that .lib is the static library without any deep inspection + // overhead. + // + const char* e (""); + + if (tsys == "win32-msvc") + { + an = path (name); + e = "lib"; + } + else + { + an = path ("lib" + name); + e = "a"; + } + + ae = ext ? ext : string (e); + if (!ae->empty ()) + { + an += '.'; + an += *ae; + } + } + + // libs + // + path sn; + optional<string> se; + + if (l || p.is_a<libs> ()) + { + const char* e (""); + + if (tsys == "win32-msvc") + { + sn = path (name); + e = "dll.lib"; + } + else + { + sn = path ("lib" + name); + + if (tsys == "darwin") e = "dylib"; + else if (tsys == "mingw32") e = "dll.a"; // See search code below. + else e = "so"; + } + + se = ext ? ext : string (e); + if (!se->empty ()) + { + sn += '.'; + sn += *se; + } + } + + // Now search. + // + liba* a (nullptr); + libs* s (nullptr); + + pair<path, path> pc; // pkg-config .pc file paths. + path f; // Reuse the buffer. + + auto search =[&a, &s, &pc, + &an, &ae, + &sn, &se, + &name, ext, + &p, &f, exist, &trace, this] (const dir_path& d) -> bool + { + context& ctx (p.scope->ctx); + + timestamp mt; + + // libs + // + // Look for the shared library first. The order is important for VC: + // only if we found .dll.lib can we safely assumy that just .lib is a + // static library. + // + if (!sn.empty ()) + { + f = d; + f /= sn; + mt = mtime (f); + + if (mt != timestamp_nonexistent) + { + // On Windows what we found is the import library which we need + // to make the first ad hoc member of libs{}. + // + if (tclass == "windows") + { + libi* i (nullptr); + insert_library (ctx, i, name, d, se, exist, trace); + + ulock l ( + insert_library (ctx, s, name, d, nullopt, exist, trace)); + + if (!exist) + { + if (l.owns_lock ()) + { + s->member = i; // We are first. + l.unlock (); + } + else + assert (find_adhoc_member<libi> (*s) == i); + + i->mtime (mt); + i->path (move (f)); + + // Presumably there is a DLL somewhere, we just don't know + // where (and its possible we might have to look for one if we + // decide we need to do rpath emulation for installed + // libraries as well). We will represent this as empty path + // but valid timestamp (aka "trust me, it's there"). + // + s->mtime (mt); + s->path (empty_path); + } + } + else + { + insert_library (ctx, s, name, d, se, exist, trace); + + s->mtime (mt); + s->path (move (f)); + } + } + else if (!ext && tsys == "mingw32") + { + // Above we searched for the import library (.dll.a) but if it's + // not found, then we also search for the .dll (unless the + // extension was specified explicitly) since we can link to it + // directly. Note also that the resulting libs{} would end up + // being the .dll. + // + se = string ("dll"); + f = f.base (); // Remove .a from .dll.a. + mt = mtime (f); + + if (mt != timestamp_nonexistent) + { + insert_library (ctx, s, name, d, se, exist, trace); + + s->mtime (mt); + s->path (move (f)); + } + } + } + + // liba + // + // If we didn't find .dll.lib then we cannot assume .lib is static. + // + if (!an.empty () && (s != nullptr || tsys != "win32-msvc")) + { + f = d; + f /= an; + + if ((mt = mtime (f)) != timestamp_nonexistent) + { + // Enter the target. Note that because the search paths are + // normalized, the result is automatically normalized as well. + // + // Note that this target is outside any project which we treat + // as out trees. + // + insert_library (ctx, a, name, d, ae, exist, trace); + a->mtime (mt); + a->path (move (f)); + } + } + + // Alternative search for VC. + // + if (tsys == "win32-msvc") + { + const scope& rs (*p.scope->root_scope ()); + const process_path& ld (cast<process_path> (rs["bin.ld.path"])); + + if (s == nullptr && !sn.empty ()) + s = msvc_search_shared (ld, d, p, exist); + + if (a == nullptr && !an.empty ()) + a = msvc_search_static (ld, d, p, exist); + } + + // Look for binary-less libraries via pkg-config .pc files. Note that + // it is possible we have already found one of them as binfull but the + // other is binless. + // + { + bool na (a == nullptr && !an.empty ()); // Need static. + bool ns (s == nullptr && !sn.empty ()); // Need shared. + + if (na || ns) + { + // Only consider the common .pc file if we can be sure there + // is no binfull variant. + // + pair<path, path> r ( + pkgconfig_search (d, p.proj, name, na && ns /* common */)); + + if (na && !r.first.empty ()) + { + insert_library (ctx, a, name, d, nullopt, exist, trace); + a->mtime (timestamp_unreal); + a->path (empty_path); + } + + if (ns && !r.second.empty ()) + { + insert_library (ctx, s, name, d, nullopt, exist, trace); + s->mtime (timestamp_unreal); + s->path (empty_path); + } + + // Only keep these .pc paths if we found anything via them. + // + if ((na && a != nullptr) || (ns && s != nullptr)) + pc = move (r); + } + } + + return a != nullptr || s != nullptr; + }; + + // First try user directories (i.e., -L). + // + bool sys (false); + + if (!usrd) + usrd = extract_library_dirs (*p.scope); + + const dir_path* pd (nullptr); + for (const dir_path& d: *usrd) + { + if (search (d)) + { + pd = &d; + break; + } + } + + // Next try system directories (i.e., those extracted from the compiler). + // + if (pd == nullptr) + { + for (const dir_path& d: sysd) + { + if (search (d)) + { + pd = &d; + break; + } + } + + sys = true; + } + + if (pd == nullptr) + return nullptr; + + // Enter (or find) the lib{} target group. + // + lib* lt; + insert_library ( + p.scope->ctx, lt, name, *pd, l ? p.tk.ext : nullopt, exist, trace); + + // Result. + // + target* r (l ? lt : (p.is_a<liba> () ? static_cast<target*> (a) : s)); + + // Assume the rest is already done if existing. + // + if (exist) + return r; + + // If we cannot acquire the lock then this mean the target has already + // been matched (though not clear by whom) and we assume all of this + // has already been done. + // + target_lock ll (lock (act, *lt)); + + // Set lib{} group members to indicate what's available. Note that we + // must be careful here since its possible we have already imported some + // of its members. + // + if (ll) + { + if (a != nullptr) lt->a = a; + if (s != nullptr) lt->s = s; + } + + target_lock al (a != nullptr ? lock (act, *a) : target_lock ()); + target_lock sl (s != nullptr ? lock (act, *s) : target_lock ()); + + if (!al) a = nullptr; + if (!sl) s = nullptr; + + if (a != nullptr) a->group = lt; + if (s != nullptr) s->group = lt; + + // Mark as a "cc" library (unless already marked) and set the system + // flag. + // + auto mark_cc = [sys, this] (target& t) -> bool + { + auto p (t.vars.insert (c_type)); + + if (p.second) + { + p.first.get () = string ("cc"); + + if (sys) + t.vars.assign (c_system) = true; + } + + return p.second; + }; + + // If the library already has cc.type, then assume it was either + // already imported or was matched by a rule. + // + if (a != nullptr && !mark_cc (*a)) a = nullptr; + if (s != nullptr && !mark_cc (*s)) s = nullptr; + + // Add the "using static/shared library" macro (used, for example, to + // handle DLL export). The absence of either of these macros would + // mean some other build system that cannot distinguish between the + // two (and no pkg-config information). + // + auto add_macro = [this] (target& t, const char* suffix) + { + // If there is already a value (either in cc.export or x.export), + // don't add anything: we don't want to be accumulating defines nor + // messing with custom values. And if we are adding, then use the + // generic cc.export. + // + // The only way we could already have this value is if this same + // library was also imported as a project (as opposed to installed). + // Unlikely but possible. In this case the values were set by the + // export stub and we shouldn't touch them. + // + if (!t.vars[x_export_poptions]) + { + auto p (t.vars.insert (c_export_poptions)); + + if (p.second) + { + // The "standard" macro name will be LIB<NAME>_{STATIC,SHARED}, + // where <name> is the target name. Here we want to strike a + // balance between being unique and not too noisy. + // + string d ("-DLIB"); + + d += sanitize_identifier ( + ucase (const_cast<const string&> (t.name))); + + d += '_'; + d += suffix; + + strings o; + o.push_back (move (d)); + p.first.get () = move (o); + } + } + }; + + if (ll && (a != nullptr || s != nullptr)) + { + // Try to extract library information from pkg-config. We only add the + // default macro if we could not extract more precise information. The + // idea is that in .pc files that we generate, we copy those macros + // (or custom ones) from *.export.poptions. + // + if (pc.first.empty () && pc.second.empty ()) + { + if (!pkgconfig_load (act, *p.scope, + *lt, a, s, + p.proj, name, + *pd, sysd, *usrd)) + { + if (a != nullptr) add_macro (*a, "STATIC"); + if (s != nullptr) add_macro (*s, "SHARED"); + } + } + else + pkgconfig_load (act, *p.scope, *lt, a, s, pc, *pd, sysd, *usrd); + } + + // If we have the lock (meaning this is the first time), set the + // traget's recipe to noop. Failed that we will keep re-locking it, + // updating its members, etc. + // + if (al) match_recipe (al, noop_recipe); + if (sl) match_recipe (sl, noop_recipe); + if (ll) match_recipe (ll, noop_recipe); + + return r; + } + + dir_paths common:: + extract_library_dirs (const scope& bs) const + { + dir_paths r; + + // Extract user-supplied search paths (i.e., -L, /LIBPATH). + // + auto extract = [&bs, &r, this] (const value& val, const variable& var) + { + const auto& v (cast<strings> (val)); + + for (auto i (v.begin ()), e (v.end ()); i != e; ++i) + { + const string& o (*i); + + dir_path d; + + try + { + if (cclass == compiler_class::msvc) + { + // /LIBPATH:<dir> (case-insensitive). + // + if ((o[0] == '/' || o[0] == '-') && + casecmp (o.c_str () + 1, "LIBPATH:", 8) == 0) + d = dir_path (o, 9, string::npos); + else + continue; + } + else + { + // -L can either be in the "-L<dir>" or "-L <dir>" form. + // + if (o == "-L") + { + if (++i == e) + break; // Let the compiler complain. + + d = dir_path (*i); + } + else if (o.compare (0, 2, "-L") == 0) + d = dir_path (o, 2, string::npos); + else + continue; + } + } + catch (const invalid_path& e) + { + fail << "invalid directory '" << e.path << "'" + << " in option '" << o << "'" + << " in variable " << var + << " for scope " << bs; + } + + // Ignore relative paths. Or maybe we should warn? + // + if (!d.relative ()) + r.push_back (move (d)); + } + }; + + if (auto l = bs[c_loptions]) extract (*l, c_loptions); + if (auto l = bs[x_loptions]) extract (*l, x_loptions); + + return r; + } + } +} diff --git a/libbuild2/cc/common.hxx b/libbuild2/cc/common.hxx new file mode 100644 index 0000000..31219a3 --- /dev/null +++ b/libbuild2/cc/common.hxx @@ -0,0 +1,358 @@ +// file : build2/cc/common.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_COMMON_HXX +#define LIBBUILD2_CC_COMMON_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/context.hxx> +#include <libbuild2/variable.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/types.hxx> +#include <libbuild2/cc/guess.hxx> // compiler_id +#include <libbuild2/cc/target.hxx> // h{} + +#include <libbuild2/cc/export.hxx> + +namespace build2 +{ + namespace cc + { + // Data entries that define a concrete c-family module (e.g., c or cxx). + // These classes are used as a virtual bases by the rules as well as the + // modules. This way the member variables can be referenced as is, without + // any extra decorations (in other words, it is a bunch of data members + // that can be shared between several classes/instances). + // + struct config_data + { + lang x_lang; + + const char* x; // Module name ("c", "cxx"). + const char* x_name; // Compiler name ("c", "c++"). + const char* x_default; // Compiler default ("gcc", "g++"). + const char* x_pext; // Preprocessed source extension (".i", ".ii"). + + // Array of modules that can hint us the toolchain, terminate with + // NULL. + // + const char* const* x_hinters; + + const variable& config_x; + const variable& config_x_id; // <type>[-<variant>] + const variable& config_x_version; + const variable& config_x_target; + const variable& config_x_std; + const variable& config_x_poptions; + const variable& config_x_coptions; + const variable& config_x_loptions; + const variable& config_x_aoptions; + const variable& config_x_libs; + const variable* config_x_importable_headers; + + const variable& x_path; // Compiler process path. + const variable& x_sys_lib_dirs; // System library search directories. + const variable& x_sys_inc_dirs; // System header search directories. + + const variable& x_std; + const variable& x_poptions; + const variable& x_coptions; + const variable& x_loptions; + const variable& x_aoptions; + const variable& x_libs; + const variable* x_importable_headers; + + const variable& c_poptions; // cc.* + const variable& c_coptions; + const variable& c_loptions; + const variable& c_aoptions; + const variable& c_libs; + + const variable& x_export_poptions; + const variable& x_export_coptions; + const variable& x_export_loptions; + const variable& x_export_libs; + + const variable& c_export_poptions; // cc.export.* + const variable& c_export_coptions; + const variable& c_export_loptions; + const variable& c_export_libs; + + const variable& x_stdlib; // x.stdlib + + const variable& c_runtime; // cc.runtime + const variable& c_stdlib; // cc.stdlib + + const variable& c_type; // cc.type + const variable& c_system; // cc.system + const variable& c_module_name; // cc.module_name + const variable& c_reprocess; // cc.reprocess + + const variable& x_preprocessed; // x.preprocessed + const variable* x_symexport; // x.features.symexport + + const variable& x_id; + const variable& x_id_type; + const variable& x_id_variant; + + const variable& x_class; + + const variable& x_version; + const variable& x_version_major; + const variable& x_version_minor; + const variable& x_version_patch; + const variable& x_version_build; + + const variable& x_signature; + const variable& x_checksum; + + const variable& x_pattern; + + const variable& x_target; + const variable& x_target_cpu; + const variable& x_target_vendor; + const variable& x_target_system; + const variable& x_target_version; + const variable& x_target_class; + }; + + struct data: config_data + { + const char* x_compile; // Rule names. + const char* x_link; + const char* x_install; + const char* x_uninstall; + + // Cached values for some commonly-used variables/values. + // + + compiler_type ctype; // x.id.type + const string& cvariant; // x.id.variant + compiler_class cclass; // x.class + uint64_t cmaj; // x.version.major + uint64_t cmin; // x.version.minor + const process_path& cpath; // x.path + + const target_triplet& ctgt; // x.target + const string& tsys; // x.target.system + const string& tclass; // x.target.class + + const strings& tstd; // Translated x_std value (options). + + bool modules; // x.features.modules + bool symexport; // x.features.symexport + + const strings* import_hdr; // x.importable_headers (NULL if unused/empty). + + const dir_paths& sys_lib_dirs; // x.sys_lib_dirs + const dir_paths& sys_inc_dirs; // x.sys_inc_dirs + + size_t sys_lib_dirs_extra; // First extra path (size if none). + size_t sys_inc_dirs_extra; // First extra path (size if none). + + const target_type& x_src; // Source target type (c{}, cxx{}). + const target_type* x_mod; // Module target type (mxx{}), if any. + + // Array of target types that are considered the X-language headers + // (excluding h{} except for C). Keep them in the most likely to appear + // order with the "real header" first and terminated with NULL. + // + const target_type* const* x_hdr; + + template <typename T> + bool + x_header (const T& t, bool c_hdr = true) const + { + for (const target_type* const* ht (x_hdr); *ht != nullptr; ++ht) + if (t.is_a (**ht)) + return true; + + return c_hdr && t.is_a (h::static_type); + } + + // Array of target types that can be #include'd. Used to reverse-lookup + // extensions to target types. Keep them in the most likely to appear + // order and terminate with NULL. + // + const target_type* const* x_inc; + + // Aggregate-like constructor with from-base support. + // + data (const config_data& cd, + const char* compile, + const char* link, + const char* install, + const char* uninstall, + compiler_type ct, + const string& cv, + compiler_class cl, + uint64_t mj, uint64_t mi, + const process_path& path, + const target_triplet& tgt, + const strings& std, + bool fm, + bool fs, + const dir_paths& sld, + const dir_paths& sid, + size_t sle, + size_t sie, + const target_type& src, + const target_type* mod, + const target_type* const* hdr, + const target_type* const* inc) + : config_data (cd), + x_compile (compile), + x_link (link), + x_install (install), + x_uninstall (uninstall), + ctype (ct), cvariant (cv), cclass (cl), + cmaj (mj), cmin (mi), + cpath (path), + ctgt (tgt), tsys (ctgt.system), tclass (ctgt.class_), + tstd (std), + modules (fm), + symexport (fs), + import_hdr (nullptr), + sys_lib_dirs (sld), sys_inc_dirs (sid), + sys_lib_dirs_extra (sle), sys_inc_dirs_extra (sie), + x_src (src), x_mod (mod), x_hdr (hdr), x_inc (inc) {} + }; + + class LIBBUILD2_CC_SYMEXPORT common: public data + { + public: + common (data&& d): data (move (d)) {} + + // Library handling. + // + public: + void + process_libraries ( + action, + const scope&, + linfo, + const dir_paths&, + const file&, + bool, + lflags, + const function<bool (const file&, bool)>&, + const function<void (const file* const*, const string&, lflags, bool)>&, + const function<void (const file&, const string&, bool, bool)>&, + bool = false, + small_vector<const file*, 16>* = nullptr) const; + + const target* + search_library (action a, + const dir_paths& sysd, + optional<dir_paths>& usrd, + const prerequisite& p) const + { + const target* r (p.target.load (memory_order_consume)); + + if (r == nullptr) + { + if ((r = search_library (a, sysd, usrd, p.key ())) != nullptr) + { + const target* e (nullptr); + if (!p.target.compare_exchange_strong ( + e, r, + memory_order_release, + memory_order_consume)) + assert (e == r); + } + } + + return r; + } + + public: + const file& + resolve_library (action, + const scope&, + name, + linfo, + const dir_paths&, + optional<dir_paths>&) const; + + template <typename T> + static ulock + insert_library (context&, + T*&, + const string&, + const dir_path&, + optional<string>, + bool, + tracer&); + + target* + search_library (action, + const dir_paths&, + optional<dir_paths>&, + const prerequisite_key&, + bool existing = false) const; + + const target* + search_library_existing (action a, + const dir_paths& sysd, + optional<dir_paths>& usrd, + const prerequisite_key& pk) const + { + return search_library (a, sysd, usrd, pk, true); + } + + dir_paths + extract_library_dirs (const scope&) const; + + // Alternative search logic for VC (msvc.cxx). + // + bin::liba* + msvc_search_static (const process_path&, + const dir_path&, + const prerequisite_key&, + bool existing) const; + + bin::libs* + msvc_search_shared (const process_path&, + const dir_path&, + const prerequisite_key&, + bool existing) const; + + // The pkg-config file searching and loading (pkgconfig.cxx) + // + using pkgconfig_callback = function<bool (dir_path&& d)>; + + bool + pkgconfig_search (const dir_path&, const pkgconfig_callback&) const; + + pair<path, path> + pkgconfig_search (const dir_path&, + const optional<project_name>&, + const string&, + bool) const; + + void + pkgconfig_load (action, const scope&, + bin::lib&, bin::liba*, bin::libs*, + const pair<path, path>&, + const dir_path&, + const dir_paths&, + const dir_paths&) const; + + bool + pkgconfig_load (action, const scope&, + bin::lib&, bin::liba*, bin::libs*, + const optional<project_name>&, + const string&, + const dir_path&, + const dir_paths&, + const dir_paths&) const; + }; + } +} + +#endif // LIBBUILD2_CC_COMMON_HXX diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx new file mode 100644 index 0000000..8cebef0 --- /dev/null +++ b/libbuild2/cc/compile-rule.cxx @@ -0,0 +1,6098 @@ +// file : libbuild2/cc/compile-rule.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/compile-rule.hxx> + +#include <cstdlib> // exit() +#include <cstring> // strlen(), strchr() + +#include <libbuild2/file.hxx> +#include <libbuild2/depdb.hxx> +#include <libbuild2/scope.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> // mtime() +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/config/utility.hxx> // create_project() + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/parser.hxx> +#include <libbuild2/cc/target.hxx> // h +#include <libbuild2/cc/module.hxx> +#include <libbuild2/cc/utility.hxx> + +using std::exit; +using std::strlen; + +using namespace butl; + +namespace build2 +{ + namespace cc + { + using namespace bin; + + // Module type/info string serialization. + // + // The string representation is a space-separated list of module names + // or quoted paths for header units with the following rules: + // + // 1. If this is a module unit, then the first name is the module name + // intself following by either '!' for an interface or header unit and + // by '+' for an implementation unit. + // + // 2. If an imported module is re-exported, then the module name is + // followed by '*'. + // + // For example: + // + // foo! foo.core* foo.base* foo.impl + // foo.base+ foo.impl + // foo.base foo.impl + // "/usr/include/stdio.h"! + // "/usr/include/stdio.h"! "/usr/include/stddef.h" + // + // NOTE: currently we omit the imported header units since we have no need + // for this information (everything is handled by the mapper). Plus, + // resolving an import declaration to an absolute path would require + // some effort. + // + static string + to_string (unit_type ut, const module_info& mi) + { + string s; + + if (ut != unit_type::non_modular) + { + if (ut == unit_type::module_header) s += '"'; + s += mi.name; + if (ut == unit_type::module_header) s += '"'; + + s += (ut == unit_type::module_impl ? '+' : '!'); + } + + for (const module_import& i: mi.imports) + { + if (!s.empty ()) + s += ' '; + + if (i.type == unit_type::module_header) s += '"'; + s += i.name; + if (i.type == unit_type::module_header) s += '"'; + + if (i.exported) + s += '*'; + } + + return s; + } + + static pair<unit_type, module_info> + to_module_info (const string& s) + { + unit_type ut (unit_type::non_modular); + module_info mi; + + for (size_t b (0), e (0), n (s.size ()), m; e < n; ) + { + // Let's handle paths with spaces seeing that we already quote them. + // + char d (s[b = e] == '"' ? '"' : ' '); + + if ((m = next_word (s, n, b, e, d)) == 0) + break; + + char c (d == ' ' ? s[e - 1] : // Before delimiter. + e + 1 < n ? s[e + 1] : // After delimiter. + '\0'); + + switch (c) + { + case '!': + case '+': + case '*': break; + default: c = '\0'; + } + + string w (s, b, m - (d == ' ' && c != '\0' ? 1 : 0)); + + unit_type t (c == '+' ? unit_type::module_impl : + d == ' ' ? unit_type::module_iface : + unit_type::module_header); + + if (c == '!' || c == '+') + { + ut = t; + mi.name = move (w); + } + else + mi.imports.push_back (module_import {t, move (w), c == '*', 0}); + + // Skip to the next word (quote and space or just space). + // + e += (d == '"' ? 2 : 1); + } + + return pair<unit_type, module_info> (move (ut), move (mi)); + } + + // preprocessed + // + template <typename T> + inline bool + operator< (preprocessed l, T r) // Template because of VC14 bug. + { + return static_cast<uint8_t> (l) < static_cast<uint8_t> (r); + } + + preprocessed + to_preprocessed (const string& s) + { + if (s == "none") return preprocessed::none; + if (s == "includes") return preprocessed::includes; + if (s == "modules") return preprocessed::modules; + if (s == "all") return preprocessed::all; + throw invalid_argument ("invalid preprocessed value '" + s + "'"); + } + + struct compile_rule::match_data + { + explicit + match_data (unit_type t, const prerequisite_member& s) + : type (t), src (s) {} + + unit_type type; + preprocessed pp = preprocessed::none; + bool symexport = false; // Target uses __symexport. + bool touch = false; // Target needs to be touched. + timestamp mt = timestamp_unknown; // Target timestamp. + prerequisite_member src; + auto_rmfile psrc; // Preprocessed source, if any. + path dd; // Dependency database path. + size_t headers = 0; // Number of imported header units. + module_positions modules = {0, 0, 0}; // Positions of imported modules. + }; + + compile_rule:: + compile_rule (data&& d) + : common (move (d)), + rule_id (string (x) += ".compile 4") + { + static_assert (sizeof (match_data) <= target::data_size, + "insufficient space"); + } + + size_t compile_rule:: + append_lang_options (cstrings& args, const match_data& md) const + { + size_t r (args.size ()); + + // Normally there will be one or two options/arguments. + // + const char* o1 (nullptr); + const char* o2 (nullptr); + + switch (cclass) + { + case compiler_class::msvc: + { + switch (x_lang) + { + case lang::c: o1 = "/TC"; break; + case lang::cxx: o1 = "/TP"; break; + } + break; + } + case compiler_class::gcc: + { + // For GCC we ignore the preprocessed value since it is handled via + // -fpreprocessed -fdirectives-only. + // + // Clang has *-cpp-output (but not c++-module-cpp-output) and they + // handle comments and line continuations. However, currently this + // is only by accident since these modes are essentially equivalent + // to their cpp-output-less versions. + // + switch (md.type) + { + case unit_type::non_modular: + case unit_type::module_impl: + { + o1 = "-x"; + switch (x_lang) + { + case lang::c: o2 = "c"; break; + case lang::cxx: o2 = "c++"; break; + } + break; + } + case unit_type::module_iface: + case unit_type::module_header: + { + // Here things get rather compiler-specific. We also assume + // the language is C++. + // + bool h (md.type == unit_type::module_header); + + //@@ MODHDR TODO: should we try to distinguish c-header vs + // c++-header based on the source target type? + + switch (ctype) + { + case compiler_type::gcc: + { + // In GCC compiling a header unit required -fmodule-header + // in addition to -x c/c++-header. Probably because relying + // on just -x would be ambigous with its PCH support. + // + if (h) + args.push_back ("-fmodule-header"); + + o1 = "-x"; + o2 = h ? "c++-header" : "c++"; + break; + } + case compiler_type::clang: + { + o1 = "-x"; + o2 = h ? "c++-header" : "c++-module"; + break; + } + default: + assert (false); + } + break; + } + } + break; + } + } + + if (o1 != nullptr) args.push_back (o1); + if (o2 != nullptr) args.push_back (o2); + + return args.size () - r; + } + + inline void compile_rule:: + append_symexport_options (cstrings& args, const target& t) const + { + // With VC if a BMI is compiled with dllexport, then when such BMI is + // imported, it is auto-magically treated as dllimport. Let's hope + // other compilers follow suit. + // + args.push_back (t.is_a<bmis> () && tclass == "windows" + ? "-D__symexport=__declspec(dllexport)" + : "-D__symexport="); + } + + bool compile_rule:: + match (action a, target& t, const string&) const + { + tracer trace (x, "compile_rule::match"); + + // Note: unit type will be refined in apply(). + // + unit_type ut (t.is_a<hbmix> () ? unit_type::module_header : + t.is_a<bmix> () ? unit_type::module_iface : + unit_type::non_modular); + + // Link-up to our group (this is the obj/bmi{} target group protocol + // which means this can be done whether we match or not). + // + if (t.group == nullptr) + t.group = &search (t, + (ut == unit_type::module_header ? hbmi::static_type: + ut == unit_type::module_iface ? bmi::static_type : + obj::static_type), + t.dir, t.out, t.name); + + // See if we have a source file. Iterate in reverse so that a source + // file specified for a member overrides the one specified for the + // group. Also "see through" groups. + // + for (prerequisite_member p: reverse_group_prerequisite_members (a, t)) + { + // If excluded or ad hoc, then don't factor it into our tests. + // + if (include (a, t, p) != include_type::normal) + continue; + + // For a header unit we check the "real header" plus the C header. + // + if (ut == unit_type::module_header ? p.is_a (**x_hdr) || p.is_a<h> () : + ut == unit_type::module_iface ? p.is_a (*x_mod) : + p.is_a (x_src)) + { + // Save in the target's auxiliary storage. + // + t.data (match_data (ut, p)); + return true; + } + } + + l4 ([&]{trace << "no " << x_lang << " source file for target " << t;}); + return false; + } + + // Append or hash library options from a pair of *.export.* variables + // (first one is cc.export.*) recursively, prerequisite libraries first. + // + void compile_rule:: + append_lib_options (const scope& bs, + cstrings& args, + action a, + const target& t, + linfo li) const + { + // See through utility libraries. + // + auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();}; + + auto opt = [&args, this] ( + const file& l, const string& t, bool com, bool exp) + { + // Note that in our model *.export.poptions are always "interface", + // even if set on liba{}/libs{}, unlike loptions. + // + if (!exp) // Ignore libux. + return; + + const variable& var ( + com + ? c_export_poptions + : (t == x + ? x_export_poptions + : l.ctx.var_pool[t + ".export.poptions"])); + + append_options (args, l, var); + }; + + // In case we don't have the "small function object" optimization. + // + const function<bool (const file&, bool)> impf (imp); + const function<void (const file&, const string&, bool, bool)> optf (opt); + + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + // Should be already searched and matched for libraries. + // + if (const target* pt = p.load ()) + { + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, li); + + bool la; + if (!((la = pt->is_a<liba> ()) || + (la = pt->is_a<libux> ()) || + pt->is_a<libs> ())) + continue; + + process_libraries (a, bs, li, sys_lib_dirs, + pt->as<file> (), la, 0, // Hack: lflags unused. + impf, nullptr, optf); + } + } + } + + void compile_rule:: + hash_lib_options (const scope& bs, + sha256& cs, + action a, + const target& t, + linfo li) const + { + auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();}; + + auto opt = [&cs, this] ( + const file& l, const string& t, bool com, bool exp) + { + if (!exp) + return; + + const variable& var ( + com + ? c_export_poptions + : (t == x + ? x_export_poptions + : l.ctx.var_pool[t + ".export.poptions"])); + + hash_options (cs, l, var); + }; + + // The same logic as in append_lib_options(). + // + const function<bool (const file&, bool)> impf (imp); + const function<void (const file&, const string&, bool, bool)> optf (opt); + + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + if (const target* pt = p.load ()) + { + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, li); + + bool la; + if (!((la = pt->is_a<liba> ()) || + (la = pt->is_a<libux> ()) || + pt->is_a<libs> ())) + continue; + + process_libraries (a, bs, li, sys_lib_dirs, + pt->as<file> (), la, 0, // Hack: lflags unused. + impf, nullptr, optf); + } + } + } + + // Append library prefixes based on the *.export.poptions variables + // recursively, prerequisite libraries first. + // + void compile_rule:: + append_lib_prefixes (const scope& bs, + prefix_map& m, + action a, + target& t, + linfo li) const + { + auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();}; + + auto opt = [&m, this] ( + const file& l, const string& t, bool com, bool exp) + { + if (!exp) + return; + + const variable& var ( + com + ? c_export_poptions + : (t == x + ? x_export_poptions + : l.ctx.var_pool[t + ".export.poptions"])); + + append_prefixes (m, l, var); + }; + + // The same logic as in append_lib_options(). + // + const function<bool (const file&, bool)> impf (imp); + const function<void (const file&, const string&, bool, bool)> optf (opt); + + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + if (const target* pt = p.load ()) + { + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, li); + + bool la; + if (!((la = pt->is_a<liba> ()) || + (la = pt->is_a<libux> ()) || + pt->is_a<libs> ())) + continue; + + process_libraries (a, bs, li, sys_lib_dirs, + pt->as<file> (), la, 0, // Hack: lflags unused. + impf, nullptr, optf); + } + } + } + + // Update the target during the match phase. Return true if it has changed + // or if the passed timestamp is not timestamp_unknown and is older than + // the target. + // + // This function is used to make sure header dependencies are up to date. + // + // There would normally be a lot of headers for every source file (think + // all the system headers) and just calling execute_direct() on all of + // them can get expensive. At the same time, most of these headers are + // existing files that we will never be updating (again, system headers, + // for example) and the rule that will match them is the fallback + // file_rule. That rule has an optimization: it returns noop_recipe (which + // causes the target state to be automatically set to unchanged) if the + // file is known to be up to date. So we do the update "smartly". + // + static bool + update (tracer& trace, action a, const target& t, timestamp ts) + { + const path_target* pt (t.is_a<path_target> ()); + + if (pt == nullptr) + ts = timestamp_unknown; + + target_state os (t.matched_state (a)); + + if (os == target_state::unchanged) + { + if (ts == timestamp_unknown) + return false; + else + { + // We expect the timestamp to be known (i.e., existing file). + // + timestamp mt (pt->mtime ()); + assert (mt != timestamp_unknown); + return mt > ts; + } + } + else + { + // We only want to return true if our call to execute() actually + // caused an update. In particular, the target could already have been + // in target_state::changed because of a dependency extraction run for + // some other source file. + // + // @@ MT perf: so we are going to switch the phase and execute for + // any generated header. + // + phase_switch ps (t.ctx, run_phase::execute); + target_state ns (execute_direct (a, t)); + + if (ns != os && ns != target_state::unchanged) + { + l6 ([&]{trace << "updated " << t + << "; old state " << os + << "; new state " << ns;}); + return true; + } + else + return ts != timestamp_unknown ? pt->newer (ts) : false; + } + } + + recipe compile_rule:: + apply (action a, target& xt) const + { + tracer trace (x, "compile_rule::apply"); + + file& t (xt.as<file> ()); // Either obj*{} or bmi*{}. + + match_data& md (t.data<match_data> ()); + + context& ctx (t.ctx); + + // Note: until refined below, non-BMI-generating translation unit is + // assumed non-modular. + // + unit_type ut (md.type); + + const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); + + otype ot (compile_type (t, ut)); + linfo li (link_info (bs, ot)); // Link info for selecting libraries. + compile_target_types tts (compile_types (ot)); + + // Derive file name from target name. + // + string e; // Primary target extension (module or object). + { + const char* o ("o"); // Object extension (.o or .obj). + + if (tsys == "win32-msvc") + { + switch (ot) + { + case otype::e: e = "exe."; break; + case otype::a: e = "lib."; break; + case otype::s: e = "dll."; break; + } + o = "obj"; + } + else if (tsys == "mingw32") + { + switch (ot) + { + case otype::e: e = "exe."; break; + case otype::a: e = "a."; break; + case otype::s: e = "dll."; break; + } + } + else if (tsys == "darwin") + { + switch (ot) + { + case otype::e: e = ""; break; + case otype::a: e = "a."; break; + case otype::s: e = "dylib."; break; + } + } + else + { + switch (ot) + { + case otype::e: e = ""; break; + case otype::a: e = "a."; break; + case otype::s: e = "so."; break; + } + } + + switch (ctype) + { + case compiler_type::gcc: + { + e += (ut != unit_type::non_modular ? "gcm" : o); + break; + } + case compiler_type::clang: + { + e += (ut != unit_type::non_modular ? "pcm" : o); + break; + } + case compiler_type::msvc: + { + e += (ut != unit_type::non_modular ? "ifc" : o); + break; + } + case compiler_type::icc: + { + assert (ut == unit_type::non_modular); + e += o; + } + } + + // If we are compiling a module, then the obj*{} is an ad hoc member + // of bmi*{}. For now neither GCC nor Clang produce an object file + // for a header unit (but something tells me this is going to change). + // + if (ut == unit_type::module_iface) + { + // The module interface unit can be the same as an implementation + // (e.g., foo.mxx and foo.cxx) which means obj*{} targets could + // collide. So we add the module extension to the target name. + // + file& obj (add_adhoc_member<file> (t, tts.obj, e.c_str ())); + + if (obj.path ().empty ()) + obj.derive_path (o); + } + } + + const path& tp (t.derive_path (e.c_str ())); + + // Inject dependency on the output directory. + // + const fsdir* dir (inject_fsdir (a, t)); + + // Match all the existing prerequisites. The injection code takes care + // of the ones it is adding. + // + // When cleaning, ignore prerequisites that are not in the same or a + // subdirectory of our project root. + // + auto& pts (t.prerequisite_targets[a]); + optional<dir_paths> usr_lib_dirs; // Extract lazily. + + // Start asynchronous matching of prerequisites. Wait with unlocked + // phase to allow phase switching. + // + wait_guard wg (ctx, ctx.count_busy (), t[a].task_count, true); + + size_t start (pts.size ()); // Index of the first to be added. + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + const target* pt (nullptr); + include_type pi (include (a, t, p)); + + if (!pi) + continue; + + // A dependency on a library is there so that we can get its + // *.export.poptions, modules, etc. This is the library + // meta-information protocol. See also append_lib_options(). + // + if (pi == include_type::normal && + (p.is_a<libx> () || + p.is_a<liba> () || + p.is_a<libs> () || + p.is_a<libux> ())) + { + if (a.operation () == update_id) + { + // Handle (phase two) imported libraries. We know that for such + // libraries we don't need to do match() in order to get options + // (if any, they would be set by search_library()). + // + if (p.proj ()) + { + if (search_library (a, + sys_lib_dirs, + usr_lib_dirs, + p.prerequisite) != nullptr) + continue; + } + + pt = &p.search (t); + + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, li); + } + else + continue; + } + // + // For modules we pick only what we import which is done below so + // skip it here. One corner case is clean: we assume that someone + // else (normally library/executable) also depends on it and will + // clean it up. + // + else if (pi == include_type::normal && + (p.is_a<bmi> () || p.is_a (tts.bmi) || + p.is_a<hbmi> () || p.is_a (tts.hbmi))) + continue; + else + { + pt = &p.search (t); + + if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ())) + continue; + } + + match_async (a, *pt, ctx.count_busy (), t[a].task_count); + pts.push_back (prerequisite_target (pt, pi)); + } + + wg.wait (); + + // Finish matching all the targets that we have started. + // + for (size_t i (start), n (pts.size ()); i != n; ++i) + { + const target*& pt (pts[i]); + + // Making sure a library is updated before us will only restrict + // parallelism. But we do need to match it in order to get its imports + // resolved and prerequisite_targets populated. So we match it but + // then unmatch if it is safe. And thanks to the two-pass prerequisite + // match in link::apply() it will be safe unless someone is building + // an obj?{} target directory. + // + if (build2::match ( + a, + *pt, + pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> () + ? unmatch::safe + : unmatch::none)) + pt = nullptr; // Ignore in execute. + } + + // Inject additional prerequisites. We only do it when performing update + // since chances are we will have to update some of our prerequisites in + // the process (auto-generated source code, header units). + // + if (a == perform_update_id) + { + // The cached prerequisite target should be the same as what is in + // t.prerequisite_targets since we used standard search() and match() + // above. + // + const file& src (*md.src.search (t).is_a<file> ()); + + // Figure out if __symexport is used. While normally it is specified + // on the project root (which we cached), it can be overridden with + // a target-specific value for installed modules (which we sidebuild + // as part of our project). + // + // @@ MODHDR MSVC: are we going to do the same for header units? I + // guess we will figure it out when MSVC supports header units. + // Also see hashing below. + // + if (ut == unit_type::module_iface) + { + lookup l (src.vars[x_symexport]); + md.symexport = l ? cast<bool> (l) : symexport; + } + + // Make sure the output directory exists. + // + // Is this the right thing to do? It does smell a bit, but then we do + // worse things in inject_prerequisites() below. There is also no way + // to postpone this until update since we need to extract and inject + // header dependencies now (we don't want to be calling search() and + // match() in update), which means we need to cache them now as well. + // So the only alternative, it seems, is to cache the updates to the + // database until later which will sure complicate (and slow down) + // things. + // + if (dir != nullptr) + { + // We can do it properly by using execute_direct(). But this means + // we will be switching to the execute phase with all the associated + // overheads. At the same time, in case of update, creation of a + // directory is not going to change the external state in any way + // that would affect any parallel efforts in building the internal + // state. So we are just going to create the directory directly. + // Note, however, that we cannot modify the fsdir{} target since + // this can very well be happening in parallel. But that's not a + // problem since fsdir{}'s update is idempotent. + // + fsdir_rule::perform_update_direct (a, t); + } + + // Note: the leading '@' is reserved for the module map prefix (see + // extract_modules()) and no other line must start with it. + // + depdb dd (tp + ".d"); + + // First should come the rule name/version. + // + if (dd.expect (rule_id) != nullptr) + l4 ([&]{trace << "rule mismatch forcing update of " << t;}); + + // Then the compiler checksum. Note that here we assume it + // incorporates the (default) target so that if the compiler changes + // but only in what it targets, then the checksum will still change. + // + if (dd.expect (cast<string> (rs[x_checksum])) != nullptr) + l4 ([&]{trace << "compiler mismatch forcing update of " << t;}); + + // Then the options checksum. + // + // The idea is to keep them exactly as they are passed to the compiler + // since the order may be significant. + // + { + sha256 cs; + + // These flags affect how we compile the source and/or the format of + // depdb so factor them in. + // + cs.append (&md.pp, sizeof (md.pp)); + + if (ut == unit_type::module_iface) + cs.append (&md.symexport, sizeof (md.symexport)); + + if (import_hdr != nullptr) + hash_options (cs, *import_hdr); + + if (md.pp != preprocessed::all) + { + hash_options (cs, t, c_poptions); + hash_options (cs, t, x_poptions); + + // Hash *.export.poptions from prerequisite libraries. + // + hash_lib_options (bs, cs, a, t, li); + + // Extra system header dirs (last). + // + assert (sys_inc_dirs_extra <= sys_inc_dirs.size ()); + hash_option_values ( + cs, "-I", + sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (), + [] (const dir_path& d) {return d.string ();}); + } + + hash_options (cs, t, c_coptions); + hash_options (cs, t, x_coptions); + hash_options (cs, tstd); + + if (ot == otype::s) + { + // On Darwin, Win32 -fPIC is the default. + // + if (tclass == "linux" || tclass == "bsd") + cs.append ("-fPIC"); + } + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "options mismatch forcing update of " << t;}); + } + + // Finally the source file. + // + if (dd.expect (src.path ()) != nullptr) + l4 ([&]{trace << "source file mismatch forcing update of " << t;}); + + // If any of the above checks resulted in a mismatch (different + // compiler, options, or source file) or if the depdb is newer than + // the target (interrupted update), then do unconditional update. + // + // Note that load_mtime() can only be used in the execute phase so we + // have to check for a cached value manually. + // + bool u; + timestamp mt; + + if (dd.writing ()) + u = true; + else + { + if ((mt = t.mtime ()) == timestamp_unknown) + t.mtime (mt = mtime (tp)); // Cache. + + u = dd.mtime > mt; + } + + if (u) + mt = timestamp_nonexistent; // Treat as if it doesn't exist. + + // Update prerequisite targets (normally just the source file). + // + // This is an unusual place and time to do it. But we have to do it + // before extracting dependencies. The reasoning for source file is + // pretty clear. What other prerequisites could we have? While + // normally they will be some other sources (as in, static content + // from src_root), it's possible they are some auto-generated stuff. + // And it's possible they affect the preprocessor result. Say some ad + // hoc/out-of-band compiler input file that is passed via the command + // line. So, to be safe, we make sure everything is up to date. + // + for (const target* pt: pts) + { + if (pt == nullptr || pt == dir) + continue; + + u = update (trace, a, *pt, u ? timestamp_unknown : mt) || u; + } + + // Check if the source is already preprocessed to a certain degree. + // This determines which of the following steps we perform and on + // what source (original or preprocessed). + // + // Note: must be set on the src target. + // + if (const string* v = cast_null<string> (src[x_preprocessed])) + try + { + md.pp = to_preprocessed (*v); + } + catch (const invalid_argument& e) + { + fail << "invalid " << x_preprocessed.name << " variable value " + << "for target " << src << ": " << e; + } + + // If we have no #include directives (or header unit imports), then + // skip header dependency extraction. + // + pair<auto_rmfile, bool> psrc (auto_rmfile (), false); + if (md.pp < preprocessed::includes) + { + // Note: trace is used in a test. + // + l5 ([&]{trace << "extracting headers from " << src;}); + psrc = extract_headers (a, bs, t, li, src, md, dd, u, mt); + } + + // Next we "obtain" the translation unit information. What exactly + // "obtain" entails is tricky: If things changed, then we re-parse the + // translation unit. Otherwise, we re-create this information from + // depdb. We, however, have to do it here and now in case the database + // is invalid and we still have to fallback to re-parse. + // + // Store the translation unit's checksum to detect ignorable changes + // (whitespaces, comments, etc). + // + { + optional<string> cs; + if (string* l = dd.read ()) + cs = move (*l); + else + u = true; // Database is invalid, force re-parse. + + unit tu; + for (bool first (true);; first = false) + { + if (u) + { + // Flush depdb since it can be used (as a module map) by + // parse_unit(). + // + if (dd.writing ()) + dd.flush (); + + auto p (parse_unit (a, t, li, src, psrc.first, md, dd.path)); + + if (!cs || *cs != p.second) + { + assert (first); // Unchanged TU has a different checksum? + dd.write (p.second); + } + // + // Don't clear if it was forced or the checksum should not be + // relied upon. + // + else if (first && !p.second.empty ()) + { + // Clear the update flag and set the touch flag. Unless there + // is no object file, of course. See also the md.mt logic + // below. + // + if (mt != timestamp_nonexistent) + { + u = false; + md.touch = true; + } + } + + tu = move (p.first); + } + + if (modules) + { + if (u || !first) + { + string s (to_string (tu.type, tu.module_info)); + + if (first) + dd.expect (s); + else + dd.write (s); + } + else + { + if (string* l = dd.read ()) + { + auto p (to_module_info (*l)); + tu.type = p.first; + tu.module_info = move (p.second); + } + else + { + u = true; // Database is invalid, force re-parse. + continue; + } + } + } + + break; + } + + // Make sure the translation unit type matches the resulting target + // type. + // + switch (tu.type) + { + case unit_type::non_modular: + case unit_type::module_impl: + { + if (ut != unit_type::non_modular) + fail << "translation unit " << src << " is not a module interface" << + info << "consider using " << x_src.name << "{} instead"; + break; + } + case unit_type::module_iface: + { + if (ut != unit_type::module_iface) + fail << "translation unit " << src << " is a module interface" << + info << "consider using " << x_mod->name << "{} instead"; + break; + } + case unit_type::module_header: + { + assert (ut == unit_type::module_header); + break; + } + } + + // Refine the non-modular/module-impl decision from match(). + // + ut = md.type = tu.type; + + // Note: trace is used in a test. + // + l5 ([&]{trace << "extracting modules from " << t;}); + + // Extract the module dependency information in addition to header + // dependencies. + // + // NOTE: assumes that no further targets will be added into + // t.prerequisite_targets! + // + if (modules) + { + extract_modules (a, bs, t, li, + tts, src, + md, move (tu.module_info), dd, u); + + // Currently in VC module interface units must be compiled from + // the original source (something to do with having to detect and + // store header boundaries in the .ifc files). + // + // @@ MODHDR MSVC: should we do the same for header units? I guess + // we will figure it out when MSVC supports header units. + // + if (ctype == compiler_type::msvc) + { + if (ut == unit_type::module_iface) + psrc.second = false; + } + } + } + + // If anything got updated, then we didn't rely on the cache. However, + // the cached data could actually have been valid and the compiler run + // in extract_headers() as well as the code above merely validated it. + // + // We do need to update the database timestamp, however. Failed that, + // we will keep re-validating the cached data over and over again. + // + // @@ DRYRUN: note that for dry-run we would keep re-touching the + // database on every run (because u is true). So for now we suppress + // it (the file will be re-validated on the real run anyway). It feels + // like support for reusing the (partially) preprocessed output (see + // note below) should help solve this properly (i.e., we don't want + // to keep re-validating the file on every subsequent dry-run as well + // on the real run). + // + if (u && dd.reading () && !ctx.dry_run) + dd.touch = true; + + dd.close (); + md.dd = move (dd.path); + + // If the preprocessed output is suitable for compilation, then pass + // it along. + // + if (psrc.second) + { + md.psrc = move (psrc.first); + + // Without modules keeping the (partially) preprocessed output + // around doesn't buy us much: if the source/headers haven't changed + // then neither will the object file. Modules make things more + // interesting: now we may have to recompile an otherwise unchanged + // translation unit because a BMI it depends on has changed. In this + // case re-processing the translation unit would be a waste and + // compiling the original source would break distributed + // compilation. + // + // Note also that the long term trend will (hopefully) be for + // modularized projects to get rid of #include's which means the + // need for producing this partially preprocessed output will + // (hopefully) gradually disappear. + // + if (modules) + md.psrc.active = false; // Keep. + } + + // Above we may have ignored changes to the translation unit. The + // problem is, unless we also update the target's timestamp, we will + // keep re-checking this on subsequent runs and it is not cheap. + // Updating the target's timestamp is not without problems either: it + // will cause a re-link on a subsequent run. So, essentially, we + // somehow need to remember two timestamps: one for checking + // "preprocessor prerequisites" above and one for checking other + // prerequisites (like modules) below. So what we are going to do is + // store the first in the target file (so we do touch it) and the + // second in depdb (which is never newer that the target). + // + // Perhaps when we start keeping the partially preprocessed this will + // fall away? Yes, please. + // + md.mt = u ? timestamp_nonexistent : dd.mtime; + } + + switch (a) + { + case perform_update_id: return [this] (action a, const target& t) + { + return perform_update (a, t); + }; + case perform_clean_id: return [this] (action a, const target& t) + { + return perform_clean (a, t); + }; + default: return noop_recipe; // Configure update. + } + } + + // Reverse-lookup target type(s) from extension. + // + small_vector<const target_type*, 2> compile_rule:: + map_extension (const scope& s, const string& n, const string& e) const + { + // We will just have to try all of the possible ones, in the "most + // likely to match" order. + // + auto test = [&s, &n, &e] (const target_type& tt) -> bool + { + // Call the extension derivation function. Here we know that it will + // only use the target type and name from the target key so we can + // pass bogus values for the rest. + // + target_key tk {&tt, nullptr, nullptr, &n, nullopt}; + + // This is like prerequisite search. + // + optional<string> de (tt.default_extension (tk, s, nullptr, true)); + + return de && *de == e; + }; + + small_vector<const target_type*, 2> r; + + for (const target_type* const* p (x_inc); *p != nullptr; ++p) + if (test (**p)) + r.push_back (*p); + + return r; + } + + void compile_rule:: + append_prefixes (prefix_map& m, const target& t, const variable& var) const + { + tracer trace (x, "compile_rule::append_prefixes"); + + // If this target does not belong to any project (e.g, an "imported as + // installed" library), then it can't possibly generate any headers for + // us. + // + const scope& bs (t.base_scope ()); + const scope* rs (bs.root_scope ()); + if (rs == nullptr) + return; + + const dir_path& out_base (t.dir); + const dir_path& out_root (rs->out_path ()); + + if (auto l = t[var]) + { + const auto& v (cast<strings> (l)); + + for (auto i (v.begin ()), e (v.end ()); i != e; ++i) + { + // -I can either be in the "-Ifoo" or "-I foo" form. For VC it can + // also be /I. + // + const string& o (*i); + + if (o.size () < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I') + continue; + + dir_path d; + + try + { + if (o.size () == 2) + { + if (++i == e) + break; // Let the compiler complain. + + d = dir_path (*i); + } + else + d = dir_path (*i, 2, string::npos); + } + catch (const invalid_path& e) + { + fail << "invalid directory '" << e.path << "'" + << " in option '" << o << "'" + << " in variable " << var + << " for target " << t; + } + + l6 ([&]{trace << "-I " << d;}); + + if (d.relative ()) + fail << "relative directory " << d + << " in option '" << o << "'" + << " in variable " << var + << " for target " << t; + + // If the directory is not normalized, we can complain or normalize + // it. Let's go with normalizing to minimize questions/complaints. + // + if (!d.normalized (false)) // Allow non-canonical dir separators. + d.normalize (); + + // If we are not inside our project root, then ignore. + // + if (!d.sub (out_root)) + continue; + + // If the target directory is a sub-directory of the include + // directory, then the prefix is the difference between the + // two. Otherwise, leave it empty. + // + // The idea here is to make this "canonical" setup work auto- + // magically: + // + // 1. We include all files with a prefix, e.g., <foo/bar>. + // 2. The library target is in the foo/ sub-directory, e.g., + // /tmp/foo/. + // 3. The poptions variable contains -I/tmp. + // + dir_path p (out_base.sub (d) ? out_base.leaf (d) : dir_path ()); + + // We use the target's directory as out_base but that doesn't work + // well for targets that are stashed in subdirectories. So as a + // heuristics we are going to also enter the outer directories of + // the original prefix. It is, however, possible, that another -I + // option after this one will produce one of these outer prefixes as + // its original prefix in which case we should override it. + // + // So we are going to assign the original prefix priority value 0 + // (highest) and then increment it for each outer prefix. + // + auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio) + { + auto j (m.find (p)); + + if (j != m.end ()) + { + prefix_value& v (j->second); + + // We used to reject duplicates but it seems this can be + // reasonably expected to work according to the order of the + // -I options. + // + // Seeing that we normally have more "specific" -I paths first, + // (so that we don't pick up installed headers, etc), we ignore + // it. + // + if (v.directory == d) + { + if (v.priority > prio) + v.priority = prio; + } + else if (v.priority <= prio) + { + if (verb >= 4) + trace << "ignoring mapping for prefix '" << p << "'\n" + << " existing mapping to " << v.directory + << " priority " << v.priority << '\n' + << " another mapping to " << d + << " priority " << prio; + } + else + { + if (verb >= 4) + trace << "overriding mapping for prefix '" << p << "'\n" + << " existing mapping to " << v.directory + << " priority " << v.priority << '\n' + << " new mapping to " << d + << " priority " << prio; + + v.directory = move (d); + v.priority = prio; + } + } + else + { + l6 ([&]{trace << "'" << p << "' -> " << d << " priority " + << prio;}); + m.emplace (move (p), prefix_value {move (d), prio}); + } + }; + +#if 1 + // Enter all outer prefixes, including prefixless. + // + // The prefixless part is fuzzy but seems to be doing the right + // thing ignoring/overriding-wise, at least in cases where one of + // the competing -I paths is a subdirectory of another. But the + // proper solution will be to keep all the prefixless entries (by + // changing prefix_map to a multimap) since for them we have an + // extra check (target must be explicitly spelled out in a + // buildfile). + // + for (size_t prio (0);; ++prio) + { + bool e (p.empty ()); + enter ((e ? move (p) : p), (e ? move (d) : d), prio); + if (e) + break; + p = p.directory (); + } +#else + size_t prio (0); + for (bool e (false); !e; ++prio) + { + dir_path n (p.directory ()); + e = n.empty (); + enter ((e ? move (p) : p), (e ? move (d) : d), prio); + p = move (n); + } +#endif + } + } + } + + auto compile_rule:: + build_prefix_map (const scope& bs, + action a, + target& t, + linfo li) const -> prefix_map + { + prefix_map m; + + // First process our own. + // + append_prefixes (m, t, c_poptions); + append_prefixes (m, t, x_poptions); + + // Then process the include directories from prerequisite libraries. + // + append_lib_prefixes (bs, m, a, t, li); + + return m; + } + + // Return the next make prerequisite starting from the specified + // position and update position to point to the start of the + // following prerequisite or l.size() if there are none left. + // + static string + next_make (const string& l, size_t& p) + { + size_t n (l.size ()); + + // Skip leading spaces. + // + for (; p != n && l[p] == ' '; p++) ; + + // Lines containing multiple prerequisites are 80 characters max. + // + string r; + r.reserve (n); + + // Scan the next prerequisite while watching out for escape sequences. + // + for (; p != n && l[p] != ' '; p++) + { + char c (l[p]); + + if (p + 1 != n) + { + if (c == '$') + { + // Got to be another (escaped) '$'. + // + if (l[p + 1] == '$') + ++p; + } + else if (c == '\\') + { + // This may or may not be an escape sequence depending on whether + // what follows is "escapable". + // + switch (c = l[++p]) + { + case '\\': break; + case ' ': break; + default: c = '\\'; --p; // Restore. + } + } + } + + r += c; + } + + // Skip trailing spaces. + // + for (; p != n && l[p] == ' '; p++) ; + + // Skip final '\'. + // + if (p == n - 1 && l[p] == '\\') + p++; + + return r; + } + + // VC /showIncludes output. The first line is the file being compiled + // (handled by our caller). Then we have the list of headers, one per + // line, in this form (text can presumably be translated): + // + // Note: including file: C:\Program Files (x86)\[...]\iostream + // + // Finally, if we hit a non-existent header, then we end with an error + // line in this form: + // + // x.cpp(3): fatal error C1083: Cannot open include file: 'd/h.hpp': + // No such file or directory + // + // Distinguishing between the include note and the include error is + // easy: we can just check for C1083. Distinguising between the note and + // other errors/warnings is harder: an error could very well end with + // what looks like a path so we cannot look for the note but rather have + // to look for an error. Here we assume that a line containing ' CNNNN:' + // is an error. Should be robust enough in the face of language + // translation, etc. + // + // It turns out C1083 is also used when we are unable to open the main + // source file and the error line (which is printed after the first line + // containing the file name) looks like this: + // + // c1xx: fatal error C1083: Cannot open source file: 's.cpp': No such + // file or directory + + size_t + msvc_sense_diag (const string&, char); // msvc.cxx + + // Extract the include path from the VC /showIncludes output line. Return + // empty string if the line is not an include note or include error. Set + // the good_error flag if it is an include error (which means the process + // will terminate with the error status that needs to be ignored). + // + static string + next_show (const string& l, bool& good_error) + { + // The include error should be the last line that we handle. + // + assert (!good_error); + + size_t p (msvc_sense_diag (l, 'C')); + if (p == string::npos) + { + // Include note. + // + // We assume the path is always at the end but need to handle both + // absolute Windows and POSIX ones. + // + // Note that VC appears to always write the absolute path to the + // included file even if it is ""-included and the source path is + // relative. Aren't we lucky today? + // + p = l.rfind (':'); + + if (p != string::npos) + { + // See if this one is part of the Windows drive letter. + // + if (p > 1 && p + 1 < l.size () && // 2 chars before, 1 after. + l[p - 2] == ' ' && + alpha (l[p - 1]) && + path::traits_type::is_separator (l[p + 1])) + p = l.rfind (':', p - 2); + } + + if (p != string::npos) + { + // VC uses indentation to indicate the include nesting so there + // could be any number of spaces after ':'. Skip them. + // + p = l.find_first_not_of (' ', p + 1); + } + + if (p == string::npos) + fail << "unable to parse /showIncludes include note line \"" + << l << '"'; + + return string (l, p); + } + else if (l.compare (p, 4, "1083") == 0 && + l.compare (0, 5, "c1xx:") != 0 /* Not the main source file. */ ) + { + // Include error. + // + // The path is conveniently quoted with ''. Or so we thought: turns + // out different translations (e.g., Chinese) can use different quote + // characters. But the overall structure seems to be stable: + // + // ...C1083: <translated>: 'd/h.hpp': <translated> + // + // Plus, it seems the quote character could to be multi-byte. + // + size_t p1 (l.find (':', p + 5)); + size_t p2 (l.rfind (':')); + + if (p1 != string::npos && + p2 != string::npos && + (p2 - p1) > 4 && // At least ": 'x':". + l[p1 + 1] == ' ' && + l[p2 + 1] == ' ') + { + p1 += 3; // First character of the path. + p2 -= 1; // One past last character of the path. + + // Skip any non-printable ASCII characters before/after (the mutli- + // byte quote case). + // + auto printable = [] (char c) { return c >= 0x20 && c <= 0x7e; }; + + for (; p1 != p2 && !printable (l[p1]); ++p1) ; + for (; p2 != p1 && !printable (l[p2 - 1]); --p2) ; + + if (p1 != p2) + { + good_error = true; + return string (l, p1 , p2 - p1); + } + } + + fail << "unable to parse /showIncludes include error line \"" + << l << '"' << endf; + } + else + { + // Some other error. + // + return string (); + } + } + + void + msvc_sanitize_cl (cstrings&); // msvc.cxx + + // GCC module mapper handler. + // + // Note that the input stream is non-blocking while output is blocking + // and this function should be prepared to handle closed input stream. + // Any unhandled io_error is handled by the caller as a generic module + // mapper io error. + // + struct compile_rule::module_mapper_state + { + size_t headers = 0; // Number of header units imported. + size_t skip; // Number of depdb entries to skip. + string data; // Auxiliary data. + + explicit + module_mapper_state (size_t skip_count): skip (skip_count) {} + }; + + void compile_rule:: + gcc_module_mapper (module_mapper_state& st, + action a, const scope& bs, file& t, linfo li, + ifdstream& is, + ofdstream& os, + depdb& dd, bool& update, bool& bad_error, + optional<prefix_map>& pfx_map, srcout_map& so_map) const + { + tracer trace (x, "compile_rule::gcc_module_mapper"); + + // Read in the request line. + // + // Because the dynamic mapper is only used during preprocessing, we + // can assume there is no batching and expect to see one line at a + // time. + // + string rq; +#if 1 + if (!eof (getline (is, rq))) + { + if (rq.empty ()) + rq = "<empty>"; // Not to confuse with EOF. + } +#else + for (char buf[4096]; !is.eof (); ) + { + streamsize n (is.readsome (buf, sizeof (buf) - 1)); + buf[n] = '\0'; + + if (char* p = strchr (buf, '\n')) + { + *p = '\0'; + + if (++p != buf + n) + fail << "batched module mapper request: '" << p << "'"; + + rq += buf; + break; + } + else + rq += buf; + } +#endif + + if (rq.empty ()) // EOF + return; + + // @@ MODHDR: Should we print the pid we are talking to? It gets hard to + // follow once things get nested. But if all our diag will + // include some kind of id (chain, thread?), then this will + // not be strictly necessary. + // + if (verb >= 3) + text << " > " << rq; + + // Check for a command. If match, remove it and the following space from + // the request string saving it in cmd (for diagnostics) unless the + // second argument is false, and return true. + // + const char* cmd (nullptr); + auto command = [&rq, &cmd] (const char* c, bool r = true) + { + size_t n (strlen (c)); + bool m (rq.compare (0, n, c) == 0 && rq[n] == ' '); + + if (m && r) + { + cmd = c; + rq.erase (0, n + 1); + } + + return m; + }; + + string rs; + for (;;) // Breakout loop. + { + // Each command is reponsible for handling its auxiliary data while we + // just clear it. + // + string data (move (st.data)); + + if (command ("HELLO")) + { + // HELLO <ver> <kind> <ident> + // + //@@ MODHDR TODO: check protocol version. + + // We don't use "repository path" (whatever it is) so we pass '.'. + // + rs = "HELLO 0 build2 ."; + } + // + // Turns out it's easiest to handle IMPORT together with INCLUDE since + // it can also trigger a re-search, etc. In a sense, IMPORT is all of + // the INCLUDE logic (skipping translation) plus the BMI dependency + // synthesis. + // + else if (command ("INCLUDE") || command ("IMPORT")) + { + // INCLUDE [<"']<name>[>"'] <path> + // IMPORT [<"']<name>[>"'] <path> + // IMPORT '<path>' + // + // <path> is the resolved path or empty if the header is not found. + // It can be relative if it is derived from a relative path (either + // via -I or includer). If <name> is single-quoted, then it cannot + // be re-searched (e.g., implicitly included stdc-predef.h) and in + // this case <path> is never empty. + // + // In case of re-search or include translation we may have to split + // handling the same include or import across multiple commands. + // Here are the scenarios in question: + // + // INCLUDE --> SEARCH -?-> INCLUDE + // IMPORT --> SEARCH -?-> IMPORT + // INCLUDE --> IMPORT -?-> IMPORT + // + // The problem is we may not necessarily get the "followup" command + // (the question marks above). We may not get the followup after + // SEARCH because, for example, the newly found header has already + // been included/imported using a different style/path. Similarly, + // the IMPORT response may not be followed up with the IMPORT + // command because this header has already been imported, for + // example, using an import declaration. Throw into this #pragma + // once, include guards, and how exactly the compiler deals with + // them and things become truly unpredictable and hard to reason + // about. As a result, for each command we have to keep the build + // state consistent, specifically, without any "dangling" matched + // targets (which would lead to skew dependency counts). Note: the + // include translation is no longer a problem since we respond with + // an immediate BMI. + // + // To keep things simple we are going to always add a target that we + // matched to our prerequisite_targets. This includes the header + // target when building the BMI: while not ideal, this should be + // harmless provided we don't take its state/mtime into account. + // + // One thing we do want to handle specially is the "maybe-followup" + // case discussed above. It is hard to distinguish from an unrelated + // INCLUDE/IMPORT (we could have saved <name> and maybe correlated + // based on that). But if we don't, then we will keep matching and + // adding each target twice. What we can do, however, is check + // whether this target is already in prerequisite_targets and skip + // it if that's the case, which is a valid thing to do whether it is + // a followup or an unrelated command. In fact, for a followup, we + // only need to check the last element in prerequisite_targets. + // + // This approach strikes a reasonable balance between keeping things + // simple and handling normal cases without too much overhead. Note + // that we may still end up matching and adding the same targets + // multiple times for pathological cases, like when the same header + // is included using a different style/path, etc. We could, however, + // take care of this by searching the entire prerequisite_targets, + // which is always an option (and which would probably be required + // if the compiler were to send the INCLUDE command before checking + // for #pragma once or include guards, which GCC does not do). + // + // One thing that we cannot do without distinguishing followup and + // unrelated commands is verify the remapped header found by the + // compiler resolves to the expected target. So we will also do the + // correlation via <name>. + // + bool imp (cmd[1] == 'M'); + + path f; // <path> or <name> if doesn't exist + string n; // [<"']<name>[>"'] + bool exists; // <path> is not empty + bool searchable; // <name> is not single-quoted + { + char q (rq[0]); // Opening quote. + q = (q == '<' ? '>' : + q == '"' ? '"' : + q == '\'' ? '\'' : '\0'); // Closing quote. + + size_t s (rq.size ()), qp; // Quote position. + if (q == '\0' || (qp = rq.find (q, 1)) == string::npos) + break; // Malformed command. + + n.assign (rq, 0, qp + 1); + + size_t p (qp + 1); + if (imp && q == '\'' && p == s) // IMPORT '<path>' + { + exists = true; + // Leave f empty and fall through. + } + else + { + if (p != s && rq[p++] != ' ') // Skip following space, if any. + break; + + exists = (p != s); + + if (exists) + { + rq.erase (0, p); + f = path (move (rq)); + assert (!f.empty ()); + } + //else // Leave f empty and fall through. + } + + if (f.empty ()) + { + rq.erase (0, 1); // Opening quote. + rq.erase (qp - 1); // Closing quote and trailing space, if any. + f = path (move (rq)); + } + + // Complete relative paths not to confuse with non-existent. + // + if (exists && !f.absolute ()) + f.complete (); + + searchable = (q != '\''); + } + + // The skip_count logic: in a nutshell (and similar to the non- + // mapper case), we may have "processed" some portion of the headers + // based on the depdb cache and we need to avoid re-processing them + // here. See the skip_count discussion for details. + // + // Note also that we need to be careful not to decrementing the + // count for re-searches and include translation. + // + bool skip (st.skip != 0); + + // The first part is the same for both INCLUDE and IMPORT: resolve + // the header path to target, update it, and trigger re-search if + // necessary. + // + const file* ht (nullptr); + auto& pts (t.prerequisite_targets[a]); + + // If this is a followup command (or indistinguishable from one), + // then as a sanity check verify the header found by the compiler + // resolves to the expected target. + // + if (data == n) + { + assert (!skip); // We shouldn't be re-searching while skipping. + + if (exists) + { + pair<const file*, bool> r ( + enter_header (a, bs, t, li, + move (f), false /* cache */, + pfx_map, so_map)); + + if (!r.second) // Shouldn't be remapped. + ht = r.first; + } + + if (ht != pts.back ()) + { + ht = static_cast<const file*> (pts.back ().target); + rs = "ERROR expected header '" + ht->path ().string () + + "' to be found instead"; + bad_error = true; // We expect an error from the compiler. + break; + } + + // Fall through. + } + else + { + // Enter, update, and see if we need to re-search this header. + // + bool updated (false), remapped; + try + { + pair<const file*, bool> er ( + enter_header (a, bs, t, li, + move (f), false /* cache */, + pfx_map, so_map)); + + ht = er.first; + remapped = er.second; + + if (remapped && !searchable) + { + rs = "ERROR remapping non-re-searchable header " + n; + bad_error = true; + break; + } + + // If we couldn't enter this header as a target (as opposed to + // not finding a rule to update it), then our diagnostics won't + // really add anything to the compiler's. + // + if (ht == nullptr) + { + assert (!exists); // Sanity check. + throw failed (); + } + + // Note that we explicitly update even for IMPORT (instead of, + // say, letting the BMI rule do it implicitly) since we may need + // to cause a re-search (see below). + // + if (!skip) + { + if (pts.empty () || pts.back () != ht) + { + optional<bool> ir (inject_header (a, t, + *ht, false /* cache */, + timestamp_unknown)); + assert (ir); // Not from cache. + updated = *ir; + } + else + assert (exists); + } + else + assert (exists && !remapped); // Maybe this should be an error. + } + catch (const failed&) + { + // If the header does not exist or could not be updated, do we + // want our diagnostics, the compiler's, or both? We definitely + // want the compiler's since it points to the exact location. + // Ours could also be helpful. So while it will look a bit + // messy, let's keep both (it would have been nicer to print + // ours after the compiler's but that isn't easy). + // + rs = !exists + ? string ("INCLUDE") + : ("ERROR unable to update header '" + + (ht != nullptr ? ht->path () : f).string () + "'"); + + bad_error = true; + break; + } + + if (!imp) // Indirect prerequisite (see above). + update = updated || update; + + // A mere update is not enough to cause a re-search. It either had + // to also not exist or be remapped. + // + if ((updated && !exists) || remapped) + { + rs = "SEARCH"; + st.data = move (n); // Followup correlation. + break; + } + + // Fall through. + } + + // Now handle INCLUDE and IMPORT differences. + // + const string& hp (ht->path ().string ()); + + // Reduce include translation to the import case. + // + if (!imp && import_hdr != nullptr) + { + const strings& ih (*import_hdr); + + auto i (lower_bound (ih.begin (), + ih.end (), + hp, + [] (const string& x, const string& y) + { + return path::traits_type::compare (x, y) < 0; + })); + + imp = (i != ih.end () && *i == hp); + } + + if (imp) + { + try + { + // Synthesize the BMI dependency then update and add the BMI + // target as a prerequisite. + // + const file& bt (make_header_sidebuild (a, bs, li, *ht)); + + if (!skip) + { + optional<bool> ir (inject_header (a, t, + bt, false /* cache */, + timestamp_unknown)); + assert (ir); // Not from cache. + update = *ir || update; + } + + const string& bp (bt.path ().string ()); + + if (!skip) + { + // @@ MODHDR: we write normalized path while the compiler will + // look for the original. In particular, this means + // that paths with `..` won't work. Maybe write + // original for mapping and normalized for our use? + // + st.headers++; + dd.expect ("@ '" + hp + "' " + bp); + } + else + st.skip--; + + rs = "IMPORT " + bp; + } + catch (const failed&) + { + rs = "ERROR unable to update header unit '" + hp + "'"; + bad_error = true; + break; + } + } + else + { + if (!skip) + dd.expect (hp); + else + st.skip--; + + rs = "INCLUDE"; + } + } + + break; + } + + if (rs.empty ()) + { + rs = "ERROR unexpected command '"; + + if (cmd != nullptr) + { + rs += cmd; // Add the command back. + rs += ' '; + } + + rs += rq; + rs += "'"; + + bad_error = true; + } + + if (verb >= 3) + text << " < " << rs; + + os << rs << endl; + } + + // Enter as a target a header file. Depending on the cache flag, the file + // is assumed to either have come from the depdb cache or from the + // compiler run. + // + // Return the header target and an indication of whether it was remapped + // or NULL if the header does not exist and cannot be generated. In the + // latter case the passed header path is guaranteed to be still valid but + // might have been adjusted (e.g., normalized, etc). + // + // Note: this used to be a lambda inside extract_headers() so refer to the + // body of that function for the overall picture. + // + pair<const file*, bool> compile_rule:: + enter_header (action a, const scope& bs, file& t, linfo li, + path&& f, bool cache, + optional<prefix_map>& pfx_map, srcout_map& so_map) const + { + tracer trace (x, "compile_rule::enter_header"); + + // Find or maybe insert the target. The directory is only moved from if + // insert is true. + // + auto find = [&trace, &t, this] (dir_path&& d, + path&& f, + bool insert) -> const file* + { + // Split the file into its name part and extension. Here we can assume + // the name part is a valid filesystem name. + // + // Note that if the file has no extension, we record an empty + // extension rather than NULL (which would signify that the default + // extension should be added). + // + string e (f.extension ()); + string n (move (f).string ()); + + if (!e.empty ()) + n.resize (n.size () - e.size () - 1); // One for the dot. + + // See if this directory is part of any project out_root hierarchy and + // if so determine the target type. + // + // Note that this will miss all the headers that come from src_root + // (so they will be treated as generic C headers below). Generally, we + // don't have the ability to determine that some file belongs to + // src_root of some project. But that's not a problem for our + // purposes: it is only important for us to accurately determine + // target types for headers that could be auto-generated. + // + // While at it also try to determine if this target is from the src or + // out tree of said project. + // + dir_path out; + + // It's possible the extension-to-target type mapping is ambiguous + // (usually because both C and X-language headers use the same .h + // extension). In this case we will first try to find one that matches + // an explicit target (similar logic to when insert is false). + // + small_vector<const target_type*, 2> tts; + + const scope& bs (t.ctx.scopes.find (d)); + if (const scope* rs = bs.root_scope ()) + { + tts = map_extension (bs, n, e); + + if (bs.out_path () != bs.src_path () && d.sub (bs.src_path ())) + out = out_src (d, *rs); + } + + // If it is outside any project, or the project doesn't have such an + // extension, assume it is a plain old C header. + // + if (tts.empty ()) + { + // If the project doesn't "know" this extension then we can't + // possibly find an explicit target of this type. + // + if (!insert) + return nullptr; + + tts.push_back (&h::static_type); + } + + // Find or insert target. + // + // Note that in case of the target type ambiguity we first try to find + // an explicit target that resolves this ambiguity. + // + const target* r (nullptr); + + if (!insert || tts.size () > 1) + { + // Note that we skip any target type-specific searches (like for an + // existing file) and go straight for the target object since we + // need to find the target explicitly spelled out. + // + // Also, it doesn't feel like we should be able to resolve an + // absolute path with a spelled-out extension to multiple targets. + // + for (const target_type* tt: tts) + if ((r = t.ctx.targets.find (*tt, d, out, n, e, trace)) != nullptr) + break; + + // Note: we can't do this because of the in-source builds where + // there won't be explicit targets for non-generated headers. + // + // This should be harmless, however, since in our world generated + // headers are normally spelled-out as explicit targets. And if not, + // we will still get an error, just a bit less specific. + // +#if 0 + if (r == nullptr && insert) + { + f = d / n; + if (!e.empty ()) + { + f += '.'; + f += e; + } + + diag_record dr (fail); + dr << "mapping of header " << f << " to target type is ambiguous"; + for (const target_type* tt: tts) + dr << info << "could be " << tt->name << "{}"; + dr << info << "spell-out its target to resolve this ambiguity"; + } +#endif + } + + // @@ OPT: move d, out, n + // + if (r == nullptr && insert) + r = &search (t, *tts[0], d, out, n, &e, nullptr); + + return static_cast<const file*> (r); + }; + + // If it's not absolute then it either does not (yet) exist or is a + // relative ""-include (see init_args() for details). Reduce the second + // case to absolute. + // + // Note: we now always use absolute path to the translation unit so this + // no longer applies. But let's keep it for posterity. + // +#if 0 + if (f.relative () && rels.relative ()) + { + // If the relative source path has a directory component, make sure + // it matches since ""-include will always start with that (none of + // the compilers we support try to normalize this path). Failed that + // we may end up searching for a generated header in a random + // (working) directory. + // + const string& fs (f.string ()); + const string& ss (rels.string ()); + + size_t p (path::traits::rfind_separator (ss)); + + if (p == string::npos || // No directory. + (fs.size () > p + 1 && + path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0)) + { + path t (work / f); // The rels path is relative to work. + + if (exists (t)) + f = move (t); + } + } +#endif + + const file* pt (nullptr); + bool remapped (false); + + // If still relative then it does not exist. + // + if (f.relative ()) + { + // This is probably as often an error as an auto-generated file, so + // trace at level 4. + // + l4 ([&]{trace << "non-existent header '" << f << "'";}); + + f.normalize (); + + // The relative path might still contain '..' (e.g., ../foo.hxx; + // presumably ""-include'ed). We don't attempt to support auto- + // generated headers with such inclusion styles. + // + if (f.normalized ()) + { + if (!pfx_map) + pfx_map = build_prefix_map (bs, a, t, li); + + // First try the whole file. Then just the directory. + // + // @@ Has to be a separate map since the prefix can be the same as + // the file name. + // + // auto i (pfx_map->find (f)); + + // Find the most qualified prefix of which we are a sub-path. + // + if (!pfx_map->empty ()) + { + dir_path d (f.directory ()); + auto i (pfx_map->find_sup (d)); + + if (i != pfx_map->end ()) + { + const dir_path& pd (i->second.directory); + + l4 ([&]{trace << "prefix '" << d << "' mapped to " << pd;}); + + // If this is a prefixless mapping, then only use it if we can + // resolve it to an existing target (i.e., it is explicitly + // spelled out in a buildfile). + // + // Note that at some point we will probably have a list of + // directories. + // + pt = find (pd / d, f.leaf (), !i->first.empty ()); + if (pt != nullptr) + { + f = pd / f; + l4 ([&]{trace << "mapped as auto-generated " << f;}); + } + else + l4 ([&]{trace << "no explicit target in " << pd;}); + } + else + l4 ([&]{trace << "no prefix map entry for '" << d << "'";}); + } + else + l4 ([&]{trace << "prefix map is empty";}); + } + } + else + { + // We used to just normalize the path but that could result in an + // invalid path (e.g., for some system/compiler headers on CentOS 7 + // with Clang 3.4) because of the symlinks (if a directory component + // is a symlink, then any following `..` are resolved relative to the + // target; see path::normalize() for background). + // + // Initially, to fix this, we realized (i.e., realpath(3)) it instead. + // But that turned out also not to be quite right since now we have + // all the symlinks resolved: conceptually it feels correct to keep + // the original header names since that's how the user chose to + // arrange things and practically this is how the compilers see/report + // them (e.g., the GCC module mapper). + // + // So now we have a pretty elaborate scheme where we try to use the + // normalized path if possible and fallback to realized. Normalized + // paths will work for situations where `..` does not cross symlink + // boundaries, which is the sane case. And for the insane case we only + // really care about out-of-project files (i.e., system/compiler + // headers). In other words, if you have the insane case inside your + // project, then you are on your own. + // + // All of this is unless the path comes from the depdb, in which case + // we've already done that. This is also where we handle src-out remap + // (again, not needed if cached). + // + if (!cache) + { + // Interestingly, on most paltforms and with most compilers (Clang + // on Linux being a notable exception) most system/compiler headers + // are already normalized. + // + path_abnormality a (f.abnormalities ()); + if (a != path_abnormality::none) + { + // While we can reasonably expect this path to exit, things do go + // south from time to time (like compiling under wine with file + // wlantypes.h included as WlanTypes.h). + // + try + { + // If we have any parent components, then we have to verify the + // normalized path matches realized. + // + path r; + if ((a & path_abnormality::parent) == path_abnormality::parent) + { + r = f; + r.realize (); + } + + try + { + f.normalize (); + + // Note that we might still need to resolve symlinks in the + // normalized path. + // + if (!r.empty () && f != r && path (f).realize () != r) + f = move (r); + } + catch (const invalid_path&) + { + assert (!r.empty ()); // Shouldn't have failed if no `..`. + f = move (r); // Fallback to realize. + } + } + catch (const invalid_path&) + { + fail << "invalid header path '" << f.string () << "'"; + } + catch (const system_error& e) + { + fail << "invalid header path '" << f.string () << "': " << e; + } + } + + if (!so_map.empty ()) + { + // Find the most qualified prefix of which we are a sub-path. + // + auto i (so_map.find_sup (f)); + if (i != so_map.end ()) + { + // Ok, there is an out tree for this headers. Remap to a path + // from the out tree and see if there is a target for it. + // + dir_path d (i->second); + d /= f.leaf (i->first).directory (); + pt = find (move (d), f.leaf (), false); // d is not moved from. + + if (pt != nullptr) + { + path p (d / f.leaf ()); + l4 ([&]{trace << "remapping " << f << " to " << p;}); + f = move (p); + remapped = true; + } + } + } + } + + if (pt == nullptr) + { + l6 ([&]{trace << "entering " << f;}); + pt = find (f.directory (), f.leaf (), true); + } + } + + return make_pair (pt, remapped); + } + + // Update and add (unless add is false) to the list of prerequisite + // targets a header or header unit target. Depending on the cache flag, + // the target is assumed to either have come from the depdb cache or from + // the compiler run. + // + // Return the indication of whether it has changed or, if the passed + // timestamp is not timestamp_unknown, is older than the target. If the + // header came from the cache and it no longer exists nor can be + // generated, then return nullopt. + // + // Note: this used to be a lambda inside extract_headers() so refer to the + // body of that function for the overall picture. + // + optional<bool> compile_rule:: + inject_header (action a, file& t, + const file& pt, bool cache, timestamp mt) const + { + tracer trace (x, "compile_rule::inject_header"); + + // Match to a rule. + // + // If we are reading the cache, then it is possible the file has since + // been removed (think of a header in /usr/local/include that has been + // uninstalled and now we need to use one from /usr/include). This will + // lead to the match failure which we translate to a restart. + // + if (!cache) + build2::match (a, pt); + else if (!build2::try_match (a, pt).first) + return nullopt; + + bool r (update (trace, a, pt, mt)); + + // Add to our prerequisite target list. + // + t.prerequisite_targets[a].push_back (&pt); + + return r; + } + + // Extract and inject header dependencies. Return the preprocessed source + // file as well as an indication if it is usable for compilation (see + // below for details). + // + // This is also the place where we handle header units which are a lot + // more like auto-generated headers than modules. In particular, if a + // header unit BMI is out-of-date, then we have to re-preprocess this + // translation unit. + // + pair<auto_rmfile, bool> compile_rule:: + extract_headers (action a, + const scope& bs, + file& t, + linfo li, + const file& src, + match_data& md, + depdb& dd, + bool& update, + timestamp mt) const + { + tracer trace (x, "compile_rule::extract_headers"); + + otype ot (li.type); + + bool reprocess (cast_false<bool> (t[c_reprocess])); + + auto_rmfile psrc; + bool puse (true); + + // If things go wrong (and they often do in this area), give the user a + // bit extra context. + // + auto df = make_diag_frame ( + [&src](const diag_record& dr) + { + if (verb != 0) + dr << info << "while extracting header dependencies from " << src; + }); + + const scope& rs (*bs.root_scope ()); + + // Preprocesor mode that preserves as much information as possible while + // still performing inclusions. Also serves as a flag indicating whether + // this compiler uses the separate preprocess and compile setup. + // + const char* pp (nullptr); + + switch (ctype) + { + case compiler_type::gcc: + { + // -fdirectives-only is available since GCC 4.3.0. + // + if (cmaj > 4 || (cmaj == 4 && cmin >= 3)) + pp = "-fdirectives-only"; + + break; + } + case compiler_type::clang: + { + // -frewrite-includes is available since vanilla Clang 3.2.0. + // + // Apple Clang 5.0 is based on LLVM 3.3svn so it should have this + // option (4.2 is based on 3.2svc so it may or may not have it and, + // no, we are not going to try to find out). + // + if (cvariant == "apple" + ? (cmaj >= 5) + : (cmaj > 3 || (cmaj == 3 && cmin >= 2))) + pp = "-frewrite-includes"; + + break; + } + case compiler_type::msvc: + { + // Asking MSVC to preserve comments doesn't really buy us anything + // but does cause some extra buggy behavior. + // + //pp = "/C"; + break; + } + case compiler_type::icc: + break; + } + + // Initialize lazily, only if required. + // + environment env; + cstrings args; + string out; // Storage. + + // Some compilers in certain modes (e.g., when also producing the + // preprocessed output) are incapable of writing the dependecy + // information to stdout. In this case we use a temporary file. + // + auto_rmfile drm; + + // Here is the problem: neither GCC nor Clang allow -MG (treat missing + // header as generated) when we produce any kind of other output (-MD). + // And that's probably for the best since otherwise the semantics gets + // pretty hairy (e.g., what is the exit code and state of the output)? + // + // One thing to note about generated headers: if we detect one, then, + // after generating it, we re-run the compiler since we need to get + // this header's dependencies. + // + // So this is how we are going to work around this problem: we first run + // with -E but without -MG. If there are any errors (maybe because of + // generated headers maybe not), we restart with -MG and without -E. If + // this fixes the error (so it was a generated header after all), then + // we have to restart at which point we go back to -E and no -MG. And we + // keep yo-yoing like this. Missing generated headers will probably be + // fairly rare occurrence so this shouldn't be too expensive. + // + // Actually, there is another error case we would like to handle: an + // outdated generated header that is now causing an error (e.g., because + // of a check that is now triggering #error or some such). So there are + // actually three error cases: outdated generated header, missing + // generated header, and some other error. To handle the outdated case + // we need the compiler to produce the dependency information even in + // case of an error. Clang does it, for VC we parse diagnostics + // ourselves, but GCC does not (but a patch has been submitted). + // + // So the final plan is then as follows: + // + // 1. Start wothout -MG and with suppressed diagnostics. + // 2. If error but we've updated a header, then repeat step 1. + // 3. Otherwise, restart with -MG and diagnostics. + // + // Note that below we don't even check if the compiler supports the + // dependency info on error. We just try to use it and if it's not + // there we ignore the io error since the compiler has failed. + // + bool args_gen; // Current state of args. + size_t args_i (0); // Start of the -M/-MD "tail". + + // Ok, all good then? Not so fast, the rabbit hole is deeper than it + // seems: When we run with -E we have to discard diagnostics. This is + // not a problem for errors since they will be shown on the re-run but + // it is for (preprocessor) warnings. + // + // Clang's -frewrite-includes is nice in that it preserves the warnings + // so they will be shown during the compilation of the preprocessed + // source. They are also shown during -E but that we discard. And unlike + // GCC, in Clang -M does not imply -w (disable warnings) so it would + // have been shown in -M -MG re-runs but we suppress that with explicit + // -w. All is good in the Clang land then (even -Werror works nicely). + // + // GCC's -fdirective-only, on the other hand, processes all the + // directives so they are gone from the preprocessed source. Here is + // what we are going to do to work around this: we will detect if any + // diagnostics has been written to stderr on the -E run. If that's the + // case (but the compiler indicated success) then we assume they are + // warnings and disable the use of the preprocessed output for + // compilation. This in turn will result in compilation from source + // which will display the warnings. Note that we may still use the + // preprocessed output for other things (e.g., C++ module dependency + // discovery). BTW, another option would be to collect all the + // diagnostics and then dump it if the run is successful, similar to + // the VC semantics (and drawbacks) described below. + // + // Finally, for VC, things are completely different: there is no -MG + // equivalent and we handle generated headers by analyzing the + // diagnostics. This means that unlike in the above two cases, the + // preprocessor warnings are shown during dependency extraction, not + // compilation. Not ideal but that's the best we can do. Or is it -- we + // could implement ad hoc diagnostics sensing... It appears warnings are + // in the C4000-C4999 code range though there can also be note lines + // which don't have any C-code. + // + // BTW, triggering a warning in the VC preprocessor is not easy; there + // is no #warning and pragmas are passed through to the compiler. One + // way to do it is to redefine a macro, for example: + // + // hello.cxx(4): warning C4005: 'FOO': macro redefinition + // hello.cxx(3): note: see previous definition of 'FOO' + // + // So seeing that it is hard to trigger a legitimate VC preprocessor + // warning, for now, we will just treat them as errors by adding /WX. + // + // Finally, if we are using the module mapper, then all this mess falls + // away: we only run the compiler once, we let the diagnostics through, + // we get a compiler error (with location information) if a header is + // not found, and there is no problem with outdated generated headers + // since we update/remap them before the compiler has a chance to read + // them. Overall, this "dependency mapper" approach is how it should + // have been done from the beginning. + + // Note: diagnostics sensing is currently only supported if dependency + // info is written to a file (see above). + // + bool sense_diag (false); + + // And here is another problem: if we have an already generated header + // in src and the one in out does not yet exist, then the compiler will + // pick the one in src and we won't even notice. Note that this is not + // only an issue with mixing in- and out-of-tree builds (which does feel + // wrong but is oh so convenient): this is also a problem with + // pre-generated headers, a technique we use to make installing the + // generator by end-users optional by shipping pre-generated headers. + // + // This is a nasty problem that doesn't seem to have a perfect solution + // (except, perhaps, C++ modules). So what we are going to do is try to + // rectify the situation by detecting and automatically remapping such + // mis-inclusions. It works as follows. + // + // First we will build a map of src/out pairs that were specified with + // -I. Here, for performance and simplicity, we will assume that they + // always come in pairs with out first and src second. We build this + // map lazily only if we are running the preprocessor and reuse it + // between restarts. + // + // With the map in hand we can then check each included header for + // potentially having a doppelganger in the out tree. If this is the + // case, then we calculate a corresponding header in the out tree and, + // (this is the most important part), check if there is a target for + // this header in the out tree. This should be fairly accurate and not + // require anything explicit from the user except perhaps for a case + // where the header is generated out of nothing (so there is no need to + // explicitly mention its target in the buildfile). But this probably + // won't be very common. + // + // One tricky area in this setup are target groups: if the generated + // sources are mentioned in the buildfile as a group, then there might + // be no header target (yet). The way we solve this is by requiring code + // generator rules to cooperate and create at least the header target as + // part of the group creation. While not all members of the group may be + // generated depending on the options (e.g., inline files might be + // suppressed), headers are usually non-optional. + // + // Note that we use path_map instead of dir_path_map to allow searching + // using path (file path). + // + srcout_map so_map; // path_map<dir_path> + + // Dynamic module mapper. + // + bool mod_mapper (false); + + // The gen argument to init_args() is in/out. The caller signals whether + // to force the generated header support and on return it signals + // whether this support is enabled. The first call to init_args is + // expected to have gen false. + // + // Return NULL if the dependency information goes to stdout and a + // pointer to the temporary file path otherwise. + // + auto init_args = [a, &t, ot, li, reprocess, + &src, &md, &psrc, &sense_diag, &mod_mapper, + &rs, &bs, + pp, &env, &args, &args_gen, &args_i, &out, &drm, + &so_map, this] + (bool& gen) -> const path* + { + const path* r (nullptr); + + if (args.empty ()) // First call. + { + assert (!gen); + + // We use absolute/relative paths in the dependency output to + // distinguish existing headers from (missing) generated. Which + // means we have to (a) use absolute paths in -I and (b) pass + // absolute source path (for ""-includes). That (b) is a problem: + // if we use an absolute path, then all the #line directives will be + // absolute and all the diagnostics will have long, noisy paths + // (actually, we will still have long paths for diagnostics in + // headers). + // + // To work around this we used to pass a relative path to the source + // file and then check every relative path in the dependency output + // for existence in the source file's directory. This is not without + // issues: it is theoretically possible for a generated header that + // is <>-included and found via -I to exist in the source file's + // directory. Note, however, that this is a lot more likely to + // happen with prefix-less inclusion (e.g., <foo>) and in this case + // we assume the file is in the project anyway. And if there is a + // conflict with a prefixed include (e.g., <bar/foo>), then, well, + // we will just have to get rid of quoted includes (which are + // generally a bad idea, anyway). + // + // But then this approach (relative path) fell apart further when we + // tried to implement precise changed detection: the preprocessed + // output would change depending from where it was compiled because + // of #line (which we could work around) and __FILE__/assert() + // (which we can't really do anything about). So it looks like using + // the absolute path is the lesser of all the evils (and there are + // many). + // + // Note that we detect and diagnose relative -I directories lazily + // when building the include prefix map. + // + args.push_back (cpath.recall_string ()); + + // If we are re-processing the translation unit, then allow the + // translation unit to detect header/module dependency extraction. + // This can be used to work around separate preprocessing bugs in + // the compiler. + // + if (reprocess) + args.push_back ("-D__build2_preprocess"); + + append_options (args, t, c_poptions); + append_options (args, t, x_poptions); + + // Add *.export.poptions from prerequisite libraries. + // + append_lib_options (bs, args, a, t, li); + + // Populate the src-out with the -I$out_base -I$src_base pairs. + // + { + // Try to be fast and efficient by reusing buffers as much as + // possible. + // + string ds; + + // Previous -I innermost scope if out_base plus the difference + // between the scope path and the -I path (normally empty). + // + const scope* s (nullptr); + dir_path p; + + for (auto i (args.begin ()), e (args.end ()); i != e; ++i) + { + // -I can either be in the "-Ifoo" or "-I foo" form. For VC it + // can also be /I. + // + const char* o (*i); + size_t n (strlen (o)); + + if (n < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I') + { + s = nullptr; + continue; + } + + if (n == 2) + { + if (++i == e) + break; // Let the compiler complain. + + ds = *i; + } + else + ds.assign (o + 2, n - 2); + + if (!ds.empty ()) + { + // Note that we don't normalize the paths since it would be + // quite expensive and normally the pairs we are inerested in + // are already normalized (since they are usually specified as + // -I$src/out_*). We just need to add a trailing directory + // separator if it's not already there. + // + if (!dir_path::traits_type::is_separator (ds.back ())) + ds += dir_path::traits_type::directory_separator; + + dir_path d (move (ds), dir_path::exact); // Move the buffer in. + + // Ignore invalid paths (buffer is not moved). + // + if (!d.empty ()) + { + // Ignore any paths containing '.', '..' components. Allow + // any directory separators thought (think -I$src_root/foo + // on Windows). + // + if (d.absolute () && d.normalized (false)) + { + // If we have a candidate out_base, see if this is its + // src_base. + // + if (s != nullptr) + { + const dir_path& bp (s->src_path ()); + + if (d.sub (bp)) + { + if (p.empty () || d.leaf (bp) == p) + { + // We've got a pair. + // + so_map.emplace (move (d), s->out_path () / p); + s = nullptr; // Taken. + continue; + } + } + + // Not a pair. Fall through to consider as out_base. + // + s = nullptr; + } + + // See if this path is inside a project with an out-of- + // tree build and is in the out directory tree. + // + const scope& bs (t.ctx.scopes.find (d)); + if (bs.root_scope () != nullptr) + { + const dir_path& bp (bs.out_path ()); + if (bp != bs.src_path ()) + { + bool e; + if ((e = (d == bp)) || d.sub (bp)) + { + s = &bs; + if (e) + p.clear (); + else + p = d.leaf (bp); + } + } + } + } + else + s = nullptr; + + ds = move (d).string (); // Move the buffer out. + } + else + s = nullptr; + } + else + s = nullptr; + } + } + + // Extra system header dirs (last). + // + assert (sys_inc_dirs_extra <= sys_inc_dirs.size ()); + append_option_values ( + args, "-I", + sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (), + [] (const dir_path& d) {return d.string ().c_str ();}); + + if (md.symexport) + append_symexport_options (args, t); + + // Some compile options (e.g., -std, -m) affect the preprocessor. + // + // Currently Clang supports importing "header modules" even when in + // the TS mode. And "header modules" support macros which means + // imports have to be resolved during preprocessing. Which poses a + // bit of a chicken and egg problem for us. For now, the workaround + // is to remove the -fmodules-ts option when preprocessing. Hopefully + // there will be a "pure modules" mode at some point. + // + // @@ MODHDR Clang: should be solved with the dynamic module mapper + // if/when Clang supports it? + // + + // Don't treat warnings as errors. + // + const char* werror (nullptr); + switch (cclass) + { + case compiler_class::gcc: werror = "-Werror"; break; + case compiler_class::msvc: werror = "/WX"; break; + } + + bool clang (ctype == compiler_type::clang); + + append_options (args, t, c_coptions, werror); + append_options (args, t, x_coptions, werror); + append_options (args, tstd, + tstd.size () - (modules && clang ? 1 : 0)); + + switch (cclass) + { + case compiler_class::msvc: + { + args.push_back ("/nologo"); + + // See perform_update() for details on overriding the default + // exceptions and runtime. + // + if (x_lang == lang::cxx && !find_option_prefix ("/EH", args)) + args.push_back ("/EHsc"); + + if (!find_option_prefixes ({"/MD", "/MT"}, args)) + args.push_back ("/MD"); + + args.push_back ("/P"); // Preprocess to file. + args.push_back ("/showIncludes"); // Goes to stdout (with diag). + if (pp != nullptr) + args.push_back (pp); // /C (preserve comments). + args.push_back ("/WX"); // Warning as error (see above). + + msvc_sanitize_cl (args); + + psrc = auto_rmfile (t.path () + x_pext); + + if (cast<uint64_t> (rs[x_version_major]) >= 18) + { + args.push_back ("/Fi:"); + args.push_back (psrc.path.string ().c_str ()); + } + else + { + out = "/Fi" + psrc.path.string (); + args.push_back (out.c_str ()); + } + + append_lang_options (args, md); // Compile as. + gen = args_gen = true; + break; + } + case compiler_class::gcc: + { + if (ot == otype::s) + { + // On Darwin, Win32 -fPIC is the default. + // + if (tclass == "linux" || tclass == "bsd") + args.push_back ("-fPIC"); + } + + // Setup the dynamic module mapper if needed. + // + // Note that it's plausible in the future we will use it even if + // modules are disabled, for example, to implement better -MG. + // In which case it will have probably be better called a + // "dependency mapper". + // + if (modules) + { + if (ctype == compiler_type::gcc) + { + args.push_back ("-fmodule-mapper=<>"); + mod_mapper = true; + } + } + + // Depending on the compiler, decide whether (and how) we can + // produce preprocessed output as a side effect of dependency + // extraction. + // + // Note: -MM -MG skips missing <>-included. + + // Clang's -M does not imply -w (disable warnings). We also + // don't need them in the -MD case (see above) so disable for + // both. + // + if (clang) + args.push_back ("-w"); + + append_lang_options (args, md); + + if (pp != nullptr) + { + // With the GCC module mapper the dependency information is + // written directly to depdb by the mapper. + // + if (ctype == compiler_type::gcc && mod_mapper) + { + // Note that in this mode we don't have -MG re-runs. In a + // sense we are in the -MG mode (or, more precisely, the "no + // -MG required" mode) right away. + // + args.push_back ("-E"); + args.push_back (pp); + gen = args_gen = true; + r = &drm.path; // Bogus/hack to force desired process start. + } + else + { + // Previously we used '*' as a target name but it gets + // expanded to the current directory file names by GCC (4.9) + // that comes with MSYS2 (2.4). Yes, this is the (bizarre) + // behavior of GCC being executed in the shell with -MQ '*' + // option and not just -MQ *. + // + args.push_back ("-MQ"); // Quoted target name. + args.push_back ("^"); // Old versions can't do empty. + + // Note that the options are carefully laid out to be easy + // to override (see below). + // + args_i = args.size (); + + args.push_back ("-MD"); + args.push_back ("-E"); + args.push_back (pp); + + // Dependency output. + // + // GCC until version 8 was not capable of writing the + // dependency information to stdout. We also either need to + // sense the diagnostics on the -E runs (which we currently + // can only do if we don't need to read stdout) or we could + // be communicating with the module mapper via stdin/stdout. + // + if (ctype == compiler_type::gcc) + { + // Use the .t extension (for "temporary"; .d is taken). + // + r = &(drm = auto_rmfile (t.path () + ".t")).path; + } + + args.push_back ("-MF"); + args.push_back (r != nullptr ? r->string ().c_str () : "-"); + + sense_diag = (ctype == compiler_type::gcc); + gen = args_gen = false; + } + + // Preprocessor output. + // + psrc = auto_rmfile (t.path () + x_pext); + args.push_back ("-o"); + args.push_back (psrc.path.string ().c_str ()); + } + else + { + args.push_back ("-MQ"); + args.push_back ("^"); + args.push_back ("-M"); + args.push_back ("-MG"); // Treat missing headers as generated. + gen = args_gen = true; + } + + break; + } + } + + args.push_back (src.path ().string ().c_str ()); + args.push_back (nullptr); + + // Note: only doing it here. + // + if (!env.empty ()) + env.push_back (nullptr); + } + else + { + assert (gen != args_gen && args_i != 0); + + size_t i (args_i); + + if (gen) + { + // Overwrite. + // + args[i++] = "-M"; + args[i++] = "-MG"; + args[i++] = src.path ().string ().c_str (); + args[i] = nullptr; + + if (ctype == compiler_type::gcc) + { + sense_diag = false; + } + } + else + { + // Restore. + // + args[i++] = "-MD"; + args[i++] = "-E"; + args[i++] = pp; + args[i] = "-MF"; + + if (ctype == compiler_type::gcc) + { + r = &drm.path; + sense_diag = true; + } + } + + args_gen = gen; + } + + return r; + }; + + // Build the prefix map lazily only if we have non-existent files. + // Also reuse it over restarts since it doesn't change. + // + optional<prefix_map> pfx_map; + + // If any prerequisites that we have extracted changed, then we have to + // redo the whole thing. The reason for this is auto-generated headers: + // the updated header may now include a yet-non-existent header. Unless + // we discover this and generate it (which, BTW, will trigger another + // restart since that header, in turn, can also include auto-generated + // headers), we will end up with an error during compilation proper. + // + // One complication with this restart logic is that we will see a + // "prefix" of prerequisites that we have already processed (i.e., they + // are already in our prerequisite_targets list) and we don't want to + // keep redoing this over and over again. One thing to note, however, is + // that the prefix that we have seen on the previous run must appear + // exactly the same in the subsequent run. The reason for this is that + // none of the files that it can possibly be based on have changed and + // thus it should be exactly the same. To put it another way, the + // presence or absence of a file in the dependency output can only + // depend on the previous files (assuming the compiler outputs them as + // it encounters them and it is hard to think of a reason why would + // someone do otherwise). And we have already made sure that all those + // files are up to date. And here is the way we are going to exploit + // this: we are going to keep track of how many prerequisites we have + // processed so far and on restart skip right to the next one. + // + // And one more thing: most of the time this list of headers would stay + // unchanged and extracting them by running the compiler every time is a + // bit wasteful. So we are going to cache them in the depdb. If the db + // hasn't been invalidated yet (e.g., because the compiler options have + // changed), then we start by reading from it. If anything is out of + // date then we use the same restart and skip logic to switch to the + // compiler run. + // + size_t skip_count (0); + + // Enter as a target, update, and add to the list of prerequisite + // targets a header file. Depending on the cache flag, the file is + // assumed to either have come from the depdb cache or from the compiler + // run. Return true if the extraction process should be restarted. + // + auto add = [a, &bs, &t, li, + &pfx_map, &so_map, + &dd, &skip_count, + this] (path hp, bool cache, timestamp mt) -> bool + { + const file* ht (enter_header (a, bs, t, li, + move (hp), cache, + pfx_map, so_map).first); + if (ht == nullptr) + { + diag_record dr; + dr << fail << "header '" << hp + << "' not found and cannot be generated"; + + if (verb < 4) + dr << info << "re-run with --verbose=4 for more information"; + } + + if (optional<bool> u = inject_header (a, t, *ht, cache, mt)) + { + // Verify/add it to the dependency database. + // + if (!cache) + dd.expect (ht->path ()); + + skip_count++; + return *u; + } + + dd.write (); // Invalidate this line. + return true; + }; + + // As above but for a header unit. Note that currently it is only used + // for the cached case (the other case is handled by the mapper). + // + auto add_unit = [a, &bs, &t, li, + &pfx_map, &so_map, + &dd, &skip_count, &md, + this] (path hp, path bp, timestamp mt) -> bool + { + const file* ht (enter_header (a, bs, t, li, + move (hp), true /* cache */, + pfx_map, so_map).first); + if (ht == nullptr) + fail << "header '" << hp << "' not found and cannot be generated"; + + // Again, looks like we have to update the header explicitly since + // we want to restart rather than fail if it cannot be updated. + // + if (inject_header (a, t, *ht, true /* cache */, mt)) + { + const file& bt (make_header_sidebuild (a, bs, li, *ht)); + + // It doesn't look like we need the cache semantics here since given + // the header, we should be able to build its BMI. In other words, a + // restart is not going to change anything. + // + optional<bool> u (inject_header (a, t, + bt, false /* cache */, mt)); + assert (u); // Not from cache. + + if (bt.path () == bp) + { + md.headers++; + skip_count++; + return *u; + } + } + + dd.write (); // Invalidate this line. + return true; + }; + + // See init_args() above for details on generated header support. + // + bool gen (false); + optional<bool> force_gen; + optional<size_t> force_gen_skip; // Skip count at last force_gen run. + + const path* drmp (nullptr); // Points to drm.path () if active. + + // If nothing so far has invalidated the dependency database, then try + // the cached data before running the compiler. + // + bool cache (!update); + + for (bool restart (true); restart; cache = false) + { + restart = false; + + if (cache) + { + // If any, this is always the first run. + // + assert (skip_count == 0); + + // We should always end with a blank line. + // + for (;;) + { + string* l (dd.read ()); + + // If the line is invalid, run the compiler. + // + if (l == nullptr) + { + restart = true; + break; + } + + if (l->empty ()) // Done, nothing changed. + { + // If modules are enabled, then we keep the preprocessed output + // around (see apply() for details). + // + return modules + ? make_pair (auto_rmfile (t.path () + x_pext, false), true) + : make_pair (auto_rmfile (), false); + } + + // This can be a header or a header unit (mapping). The latter + // is single-quoted. + // + // If this header (unit) came from the depdb, make sure it is no + // older than the target (if it has changed since the target was + // updated, then the cached data is stale). + // + if ((*l)[0] == '@') + { + size_t p (l->find ('\'', 3)); + + if (p != string::npos) + { + path h (*l, 3, p - 3); + path b (move (l->erase (0, p + 2))); + + restart = add_unit (move (h), move (b), mt); + } + else + restart = true; // Corrupt database? + } + else + restart = add (path (move (*l)), true, mt); + + if (restart) + { + update = true; + l6 ([&]{trace << "restarting (cache)";}); + break; + } + } + } + else + { + try + { + if (force_gen) + gen = *force_gen; + + if (args.empty () || gen != args_gen) + drmp = init_args (gen); + + if (verb >= 3) + print_process (args.data ()); // Disable pipe mode. + + process pr; + + try + { + // Assume the preprocessed output (if produced) is usable + // until proven otherwise. + // + puse = true; + + // Save the timestamp just before we start preprocessing. If + // we depend on any header that has been updated since, then + // we should assume we've "seen" the old copy and re-process. + // + timestamp pmt (system_clock::now ()); + + // In some cases we may need to ignore the error return status. + // The good_error flag keeps track of that. Similarly, sometimes + // we expect the error return status based on the output that we + // see. The bad_error flag is for that. + // + bool good_error (false), bad_error (false); + + // If we have no generated header support, then suppress all + // diagnostics (if things go badly we will restart with this + // support). + // + if (drmp == nullptr) // Dependency info goes to stdout. + { + assert (!sense_diag); // Note: could support with fdselect(). + + // For VC with /P the dependency info and diagnostics all go + // to stderr so redirect it to stdout. + // + pr = process ( + cpath, + args.data (), + 0, + -1, + cclass == compiler_class::msvc ? 1 : gen ? 2 : -2, + nullptr, // CWD + env.empty () ? nullptr : env.data ()); + } + else // Dependency info goes to a temporary file. + { + pr = process (cpath, + args.data (), + mod_mapper ? -1 : 0, + mod_mapper ? -1 : 2, // Send stdout to stderr. + gen ? 2 : sense_diag ? -1 : -2, + nullptr, // CWD + env.empty () ? nullptr : env.data ()); + + // Monitor for module mapper requests and/or diagnostics. If + // diagnostics is detected, mark the preprocessed output as + // unusable for compilation. + // + if (mod_mapper || sense_diag) + { + module_mapper_state mm_state (skip_count); + + const char* w (nullptr); + try + { + // For now we don't need to do both so let's use a simpler + // blocking implementation. Note that the module mapper + // also needs to be adjusted when switching to the + // non-blocking version. + // +#if 1 + assert (mod_mapper != sense_diag); + + if (mod_mapper) + { + w = "module mapper request"; + + // Note: the order is important (see the non-blocking + // verison for details). + // + ifdstream is (move (pr.in_ofd), + fdstream_mode::skip, + ifdstream::badbit); + ofdstream os (move (pr.out_fd)); + + do + { + gcc_module_mapper (mm_state, + a, bs, t, li, + is, os, + dd, update, bad_error, + pfx_map, so_map); + } while (!is.eof ()); + + os.close (); + is.close (); + } + + if (sense_diag) + { + w = "diagnostics"; + ifdstream is (move (pr.in_efd), fdstream_mode::skip); + puse = puse && (is.peek () == ifdstream::traits_type::eof ()); + is.close (); + } +#else + fdselect_set fds; + auto add = [&fds] (const auto_fd& afd) -> fdselect_state* + { + int fd (afd.get ()); + fdmode (fd, fdstream_mode::non_blocking); + fds.push_back (fd); + return &fds.back (); + }; + + // Note that while we read both streams until eof in + // normal circumstances, we cannot use fdstream_mode::skip + // for the exception case on both of them: we may end up + // being blocked trying to read one stream while the + // process may be blocked writing to the other. So in case + // of an exception we only skip the diagnostics and close + // the mapper stream hard. The latter should happen first + // so the order of the following variable is important. + // + ifdstream es; + ofdstream os; + ifdstream is; + + fdselect_state* ds (nullptr); + if (sense_diag) + { + w = "diagnostics"; + ds = add (pr.in_efd); + es.open (move (pr.in_efd), fdstream_mode::skip); + } + + fdselect_state* ms (nullptr); + if (mod_mapper) + { + w = "module mapper request"; + ms = add (pr.in_ofd); + is.open (move (pr.in_ofd)); + os.open (move (pr.out_fd)); // Note: blocking. + } + + // Set each state pointer to NULL when the respective + // stream reaches eof. + // + while (ds != nullptr || ms != nullptr) + { + w = "output"; + ifdselect (fds); + + // First read out the diagnostics in case the mapper + // interaction produces more. To make sure we don't get + // blocked by full stderr, the mapper should only handle + // one request at a time. + // + if (ds != nullptr && ds->ready) + { + w = "diagnostics"; + + for (char buf[4096];;) + { + streamsize c (sizeof (buf)); + streamsize n (es.readsome (buf, c)); + + if (puse && n > 0) + puse = false; + + if (n < c) + break; + } + + if (es.eof ()) + { + es.close (); + ds->fd = nullfd; + ds = nullptr; + } + } + + if (ms != nullptr && ms->ready) + { + w = "module mapper request"; + + gcc_module_mapper (mm_state, + a, bs, t, li, + is, os, + dd, update, bad_error, + pfx_map, so_map); + if (is.eof ()) + { + os.close (); + is.close (); + ms->fd = nullfd; + ms = nullptr; + } + } + } +#endif + } + catch (const io_error& e) + { + if (pr.wait ()) + fail << "io error handling " << x_lang << " compiler " + << w << ": " << e; + + // Fall through. + } + + if (mod_mapper) + md.headers += mm_state.headers; + } + + // The idea is to reduce this to the stdout case. + // + pr.wait (); + + // With -MG we want to read dependency info even if there is + // an error (in case an outdated header file caused it). But + // with the GCC module mapper an error is non-negotiable, so + // to speak, and so we want to skip all of that. In fact, we + // now write directly to depdb without generating and then + // parsing an intermadiate dependency makefile. + // + pr.in_ofd = (ctype == compiler_type::gcc && mod_mapper) + ? auto_fd (nullfd) + : fdopen (*drmp, fdopen_mode::in); + } + + if (pr.in_ofd != nullfd) + { + // We may not read all the output (e.g., due to a restart). + // Before we used to just close the file descriptor to signal + // to the other end that we are not interested in the rest. + // This works fine with GCC but Clang (3.7.0) finds this + // impolite and complains, loudly (broken pipe). So now we are + // going to skip until the end. + // + ifdstream is (move (pr.in_ofd), + fdstream_mode::text | fdstream_mode::skip, + ifdstream::badbit); + + size_t skip (skip_count); + string l; // Reuse. + for (bool first (true), second (false); !restart; ) + { + if (eof (getline (is, l))) + break; + + l6 ([&]{trace << "header dependency line '" << l << "'";}); + + // Parse different dependency output formats. + // + switch (cclass) + { + case compiler_class::msvc: + { + if (first) + { + // The first line should be the file we are compiling. + // If it is not, then something went wrong even before + // we could compile anything (e.g., file does not + // exist). In this case the first line (and everything + // after it) is presumably diagnostics. + // + // It can, however, be a command line warning, for + // example: + // + // cl : Command line warning D9025 : overriding '/W3' with '/W4' + // + // So we try to detect and skip them assuming they + // will also show up during the compilation proper. + // + if (l != src.path ().leaf ().string ()) + { + // D8XXX are errors while D9XXX are warnings. + // + size_t p (msvc_sense_diag (l, 'D')); + if (p != string::npos && l[p] == '9') + continue; + + text << l; + bad_error = true; + break; + } + + first = false; + continue; + } + + string f (next_show (l, good_error)); + + if (f.empty ()) // Some other diagnostics. + { + text << l; + bad_error = true; + break; + } + + // Skip until where we left off. + // + if (skip != 0) + { + // We can't be skipping over a non-existent header. + // + assert (!good_error); + skip--; + } + else + { + restart = add (path (move (f)), false, pmt); + + // If the header does not exist (good_error), then + // restart must be true. Except that it is possible + // that someone running in parallel has already + // updated it. In this case we must force a restart + // since we haven't yet seen what's after this + // at-that-time-non-existent header. + // + // We also need to force the target update (normally + // done by add()). + // + if (good_error) + restart = true; + // + // And if we have updated the header (restart is + // true), then we may end up in this situation: an old + // header got included which caused the preprocessor + // to fail down the line. So if we are restarting, set + // the good error flag in case the process fails + // because of something like this (and if it is for a + // valid reason, then we will pick it up on the next + // round). + // + else if (restart) + good_error = true; + + if (restart) + { + update = true; + l6 ([&]{trace << "restarting";}); + } + } + + break; + } + case compiler_class::gcc: + { + // Make dependency declaration. + // + size_t pos (0); + + if (first) + { + // Empty/invalid output should mean the wait() call + // below will return false. + // + if (l.empty () || + l[0] != '^' || l[1] != ':' || l[2] != ' ') + { + // @@ Hm, we don't seem to redirect stderr to stdout + // for this class of compilers so I wonder why + // we are doing this? + // + if (!l.empty ()) + text << l; + + bad_error = true; + break; + } + + first = false; + second = true; + + // While normally we would have the source file on the + // first line, if too long, it will be moved to the + // next line and all we will have on this line is: + // "^: \". + // + if (l.size () == 4 && l[3] == '\\') + continue; + else + pos = 3; // Skip "^: ". + + // Fall through to the 'second' block. + } + + if (second) + { + second = false; + next_make (l, pos); // Skip the source file. + } + + while (pos != l.size ()) + { + string f (next_make (l, pos)); + + // Skip until where we left off. + // + if (skip != 0) + { + skip--; + continue; + } + + restart = add (path (move (f)), false, pmt); + + if (restart) + { + // The same "preprocessor may fail down the line" + // logic as above. + // + good_error = true; + + update = true; + l6 ([&]{trace << "restarting";}); + break; + } + } + + break; + } + } + + if (bad_error) + break; + } + + // In case of VC, we are parsing stderr and if things go + // south, we need to copy the diagnostics for the user to see. + // + if (bad_error && cclass == compiler_class::msvc) + { + // We used to just dump the whole rdbuf but it turns out VC + // may continue writing include notes interleaved with the + // diagnostics. So we have to filter them out. + // + for (; !eof (getline (is, l)); ) + { + size_t p (msvc_sense_diag (l, 'C')); + if (p != string::npos && l.compare (p, 4, "1083") != 0) + diag_stream_lock () << l << endl; + } + } + + is.close (); + + // This is tricky: it is possible that in parallel someone has + // generated all our missing headers and we wouldn't restart + // normally. + // + // In this case we also need to force the target update (which + // is normally done by add()). + // + if (force_gen && *force_gen) + { + restart = update = true; + force_gen = false; + } + } + + if (pr.wait ()) + { + if (!bad_error) // Ignore expected successes (we are done). + continue; + + fail << "expected error exit status from " << x_lang + << " compiler"; + } + else if (pr.exit->normal ()) + { + if (good_error) // Ignore expected errors (restart). + continue; + } + + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + fail << "unable to read " << x_lang << " compiler header " + << "dependency output: " << e; + + // Fall through. + } + + assert (pr.exit && !*pr.exit); + const process_exit& e (*pr.exit); + + // For normal exit we assume the child process issued some + // diagnostics. + // + if (e.normal ()) + { + // If this run was with the generated header support then we + // have issued diagnostics and it's time to give up. + // + if (gen) + throw failed (); + + // Just to recap, being here means something is wrong with the + // source: it can be a missing generated header, it can be an + // outdated generated header (e.g., some check triggered #error + // which will go away if only we updated the generated header), + // or it can be a real error that is not going away. + // + // So this is what we are going to do here: if anything got + // updated on this run (i.e., the compiler has produced valid + // dependency information even though there were errors and we + // managed to find and update a header based on this + // informaion), then we restart in the same mode hoping that + // this fixes things. Otherwise, we force the generated header + // support which will either uncover a missing generated header + // or will issue diagnostics. + // + if (restart) + l6 ([&]{trace << "trying again without generated headers";}); + else + { + // In some pathological situations we may end up switching + // back and forth indefinitely without making any headway. So + // we use skip_count to track our progress. + // + // Examples that have been encountered so far: + // + // - Running out of disk space. + // + // - Using __COUNTER__ in #if which is incompatible with the + // GCC's -fdirectives-only mode. + // + // - A Clang bug: https://bugs.llvm.org/show_bug.cgi?id=35580 + // + // So let's show the yo-yo'ing command lines and ask the user + // to investigate. + // + // Note: we could restart one more time but this time without + // suppressing diagnostics. This could be useful since, say, + // running out of disk space may not reproduce on its own (for + // example, because we have removed all the partially + // preprocessed source files). + // + if (force_gen_skip && *force_gen_skip == skip_count) + { + diag_record dr (fail); + + dr << "inconsistent " << x_lang << " compiler behavior" << + info << "run the following two commands to investigate"; + + dr << info; + print_process (dr, args.data ()); // No pipes. + + init_args ((gen = true)); + dr << info << ""; + print_process (dr, args.data ()); // No pipes. + } + + restart = true; + force_gen = true; + force_gen_skip = skip_count; + l6 ([&]{trace << "restarting with forced generated headers";}); + } + continue; + } + else + run_finish (args, pr); // Throws. + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + // In a multi-threaded program that fork()'ed but did not exec(), + // it is unwise to try to do any kind of cleanup (like unwinding + // the stack and running destructors). + // + if (e.child) + { + drm.cancel (); + exit (1); + } + + throw failed (); + } + } + } + + // Add the terminating blank line (we are updating depdb). + // + dd.expect (""); + + puse = puse && !reprocess && !psrc.path.empty (); + return make_pair (move (psrc), puse); + } + + // Return the translation unit information (first) and its checksum + // (second). If the checksum is empty, then it should not be used. + // + pair<unit, string> compile_rule:: + parse_unit (action a, + file& t, + linfo li, + const file& src, + auto_rmfile& psrc, + const match_data& md, + const path& dd) const + { + tracer trace (x, "compile_rule::parse_unit"); + + otype ot (li.type); + + // If things go wrong give the user a bit extra context. + // + auto df = make_diag_frame ( + [&src](const diag_record& dr) + { + if (verb != 0) + dr << info << "while parsing " << src; + }); + + // For some compilers (GCC, Clang) the preporcessed output is only + // partially preprocessed. For others (VC), it is already fully + // preprocessed (well, almost: it still has comments but we can handle + // that). Plus, the source file might already be (sufficiently) + // preprocessed. + // + // So the plan is to start the compiler process that writes the fully + // preprocessed output to stdout and reduce the already preprocessed + // case to it. + // + environment env; + cstrings args; + small_vector<string, 2> header_args; // Header unit options storage. + + const path* sp; // Source path. + + // @@ MODHDR: If we are reprocessing, then will need module mapper for + // include translation. Hairy... Can't we add support for + // include translation in file mapper? + // + bool reprocess (cast_false<bool> (t[c_reprocess])); + + bool ps; // True if extracting from psrc. + if (md.pp < preprocessed::modules) + { + // If we were instructed to reprocess the source during compilation, + // then also reprocess it here. While the preprocessed output may be + // usable for our needs, to be safe we assume it is not (and later we + // may extend cc.reprocess to allow specifying where reprocessing is + // needed). + // + ps = !psrc.path.empty () && !reprocess; + sp = &(ps ? psrc.path : src.path ()); + + // VC's preprocessed output, if present, is fully preprocessed. + // + if (cclass != compiler_class::msvc || !ps) + { + // This should match with how we setup preprocessing and is pretty + // similar to init_args() from extract_headers(). + // + args.push_back (cpath.recall_string ()); + + if (reprocess) + args.push_back ("-D__build2_preprocess"); + + append_options (args, t, c_poptions); + append_options (args, t, x_poptions); + + append_lib_options (t.base_scope (), args, a, t, li); + + assert (sys_inc_dirs_extra <= sys_inc_dirs.size ()); + append_option_values ( + args, "-I", + sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (), + [] (const dir_path& d) {return d.string ().c_str ();}); + + if (md.symexport) + append_symexport_options (args, t); + + // Make sure we don't fail because of warnings. + // + // @@ Can be both -WX and /WX. + // + const char* werror (nullptr); + switch (cclass) + { + case compiler_class::gcc: werror = "-Werror"; break; + case compiler_class::msvc: werror = "/WX"; break; + } + + bool clang (ctype == compiler_type::clang); + + append_options (args, t, c_coptions, werror); + append_options (args, t, x_coptions, werror); + append_options (args, tstd, + tstd.size () - (modules && clang ? 1 : 0)); + + append_headers (env, args, header_args, a, t, md, dd); + + switch (cclass) + { + case compiler_class::msvc: + { + args.push_back ("/nologo"); + + if (x_lang == lang::cxx && !find_option_prefix ("/EH", args)) + args.push_back ("/EHsc"); + + if (!find_option_prefixes ({"/MD", "/MT"}, args)) + args.push_back ("/MD"); + + args.push_back ("/E"); + // args.push_back ("/C"); // See above. + + msvc_sanitize_cl (args); + + append_lang_options (args, md); // Compile as. + + break; + } + case compiler_class::gcc: + { + if (ot == otype::s) + { + if (tclass == "linux" || tclass == "bsd") + args.push_back ("-fPIC"); + } + + args.push_back ("-E"); + append_lang_options (args, md); + + // Options that trigger preprocessing of partially preprocessed + // output are a bit of a compiler-specific voodoo. + // + if (ps) + { + if (ctype == compiler_type::gcc) + { + // Note that only these two *plus* -x do the trick. + // + args.push_back ("-fpreprocessed"); + args.push_back ("-fdirectives-only"); + } + } + + break; + } + } + + args.push_back (sp->string ().c_str ()); + args.push_back (nullptr); + } + + if (!env.empty ()) + env.push_back (nullptr); + } + else + { + // Extracting directly from source. + // + ps = false; + sp = &src.path (); + } + + // Preprocess and parse. + // + for (;;) // Breakout loop. + try + { + // Disarm the removal of the preprocessed file in case of an error. + // We re-arm it below. + // + if (ps) + psrc.active = false; + + process pr; + + try + { + if (args.empty ()) + { + pr = process (process_exit (0)); // Successfully exited. + pr.in_ofd = fdopen (*sp, fdopen_mode::in); + } + else + { + if (verb >= 3) + print_process (args); + + // We don't want to see warnings multiple times so ignore all + // diagnostics. + // + pr = process (cpath, + args.data (), + 0, -1, -2, + nullptr, // CWD + env.empty () ? nullptr : env.data ()); + } + + // Use binary mode to obtain consistent positions. + // + ifdstream is (move (pr.in_ofd), + fdstream_mode::binary | fdstream_mode::skip); + + parser p; + unit tu (p.parse (is, *sp)); + + is.close (); + + if (pr.wait ()) + { + if (ps) + psrc.active = true; // Re-arm. + + unit_type& ut (tu.type); + module_info& mi (tu.module_info); + + if (!modules) + { + if (ut != unit_type::non_modular || !mi.imports.empty ()) + fail << "modules support required by " << src; + } + else + { + // Sanity checks. + // + // If we are compiling a module interface, make sure the + // translation unit has the necessary declarations. + // + if (ut != unit_type::module_iface && src.is_a (*x_mod)) + fail << src << " is not a module interface unit"; + + // A header unit should look like a non-modular translation unit. + // + if (md.type == unit_type::module_header) + { + if (ut != unit_type::non_modular) + fail << "module declaration in header unit " << src; + + ut = md.type; + mi.name = src.path ().string (); + } + + // Prior to 15.5 (19.12) VC was not using the 'export module M;' + // syntax so we use the preprequisite type to distinguish + // between interface and implementation units. + // + if (ctype == compiler_type::msvc && cmaj == 19 && cmin <= 11) + { + if (ut == unit_type::module_impl && src.is_a (*x_mod)) + ut = unit_type::module_iface; + } + } + + // If we were forced to reprocess, assume the checksum is not + // accurate (parts of the translation unit could have been + // #ifdef'ed out; see __build2_preprocess). + // + return pair<unit, string> ( + move (tu), + reprocess ? string () : move (p.checksum)); + } + + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + fail << "unable to read " << x_lang << " preprocessor output: " + << e; + + // Fall through. + } + + assert (pr.exit && !*pr.exit); + const process_exit& e (*pr.exit); + + // What should we do with a normal error exit? Remember we suppressed + // the compiler's diagnostics. We used to issue a warning and continue + // with the assumption that the compilation step will fail with + // diagnostics. The problem with this approach is that we may fail + // before that because the information we return (e.g., module name) + // is bogus. So looks like failing is the only option. + // + if (e.normal ()) + { + fail << "unable to preprocess " << src << + info << "re-run with -s -V to display failing command" << + info << "then run failing command to display compiler diagnostics"; + } + else + run_finish (args, pr); // Throws. + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + if (e.child) + exit (1); + } + + throw failed (); + } + + // Extract and inject module dependencies. + // + void compile_rule:: + extract_modules (action a, + const scope& bs, + file& t, + linfo li, + const compile_target_types& tts, + const file& src, + match_data& md, + module_info&& mi, + depdb& dd, + bool& update) const + { + tracer trace (x, "compile_rule::extract_modules"); + + // If things go wrong, give the user a bit extra context. + // + auto df = make_diag_frame ( + [&src](const diag_record& dr) + { + if (verb != 0) + dr << info << "while extracting module dependencies from " << src; + }); + + unit_type ut (md.type); + module_imports& is (mi.imports); + + // Search and match all the modules we depend on. If this is a module + // implementation unit, then treat the module itself as if it was + // imported (we insert it first since for some compilers we have to + // differentiate between this special module and real imports). Note: + // move. + // + if (ut == unit_type::module_impl) + is.insert ( + is.begin (), + module_import {unit_type::module_iface, move (mi.name), false, 0}); + + // The change to the set of imports would have required a change to + // source code (or options). Changes to the bmi{}s themselves will be + // detected via the normal prerequisite machinery. However, the same set + // of imports could be resolved to a different set of bmi{}s (in a sense + // similar to changing the source file). To detect this we calculate and + // store a hash of all (not just direct) bmi{}'s paths. + // + sha256 cs; + + if (!is.empty ()) + md.modules = search_modules (a, bs, t, li, tts.bmi, src, is, cs); + + if (dd.expect (cs.string ()) != nullptr) + update = true; + + // Save the module map for compilers that use it. + // + switch (ctype) + { + case compiler_type::gcc: + { + // We don't need to redo this if the above hash hasn't changed and + // the database is still valid. + // + if (dd.writing () || !dd.skip ()) + { + auto write = [&dd] (const string& name, const path& file, bool q) + { + dd.write ("@ ", false); + if (q) dd.write ('\'', false); + dd.write (name, false); + if (q) dd.write ('\'', false); + dd.write (' ', false); + dd.write (file); + }; + + // The output mapping is provided in the same way as input. + // + if (ut == unit_type::module_iface || + ut == unit_type::module_header) + write (mi.name, t.path (), ut == unit_type::module_header); + + if (size_t start = md.modules.start) + { + // Note that we map both direct and indirect imports to override + // any module paths that might be stored in the BMIs (or + // resolved relative to "repository path", whatever that is). + // + const auto& pts (t.prerequisite_targets[a]); + for (size_t i (start); i != pts.size (); ++i) + { + if (const target* m = pts[i]) + { + // Save a variable lookup by getting the module name from + // the import list (see search_modules()). + // + // Note: all real modules (not header units). + // + write (is[i - start].name, m->as<file> ().path (), false); + } + } + } + } + break; + } + default: + break; + } + + // Set the cc.module_name rule-specific variable if this is an interface + // unit. Note that it may seem like a good idea to set it on the bmi{} + // group to avoid duplication. We, however, cannot do it MT-safely since + // we don't match the group. + // + // @@ MODHDR TODO: do we need this for header units? Currently we don't + // see header units here. + // + if (ut == unit_type::module_iface /*|| ut == unit_type::module_header*/) + { + if (value& v = t.state[a].assign (c_module_name)) + assert (cast<string> (v) == mi.name); + else + v = move (mi.name); // Note: move. + } + } + + inline bool + std_module (const string& m) + { + size_t n (m.size ()); + return (n >= 3 && + m[0] == 's' && m[1] == 't' && m[2] == 'd' && + (n == 3 || m[3] == '.')); + }; + + // Resolve imported modules to bmi*{} targets. + // + module_positions compile_rule:: + search_modules (action a, + const scope& bs, + file& t, + linfo li, + const target_type& btt, + const file& src, + module_imports& imports, + sha256& cs) const + { + tracer trace (x, "compile_rule::search_modules"); + + // NOTE: currently we don't see header unit imports (they are + // handled by extract_headers() and are not in imports). + + // So we have a list of imports and a list of "potential" module + // prerequisites. They are potential in the sense that they may or may + // not be required by this translation unit. In other words, they are + // the pool where we can resolve actual imports. + // + // Because we may not need all of these prerequisites, we cannot just go + // ahead and match all of them (and they can even have cycles; see rule + // synthesis). This poses a bit of a problem: the only way to discover + // the module's actual name (see cc.module_name) is by matching it. + // + // One way to solve this would be to make the user specify the module + // name for each mxx{} explicitly. This will be a major pain, however. + // Another would be to require encoding of the module name in the + // interface unit file name. For example, hello.core -> hello-core.mxx. + // This is better but still too restrictive: some will want to call it + // hello_core.mxx or HelloCore.mxx (because that's their file naming + // convention) or place it in a subdirectory, say, hello/core.mxx. + // + // In the above examples one common theme about all the file names is + // that they contain, in one form or another, the "tail" of the module + // name ('core'). So what we are going to do is require that the + // interface file names contain enough of the module name tail to + // unambiguously resolve all the module imports. On our side we are + // going to implement a "fuzzy" module name to file name match. This + // should be reliable enough since we will always verify our guesses + // once we match the target and extract the actual module name. Plus, + // the user will always have the option of resolving any impasses by + // specifying the module name explicitly. + // + // So, the fuzzy match: the idea is that each match gets a score, the + // number of characters in the module name that got matched. A match + // with the highest score is used. And we use the (length + 1) for a + // match against an actual module name. + // + // Actually, the scoring system is a bit more elaborate than that. + // Consider module name core.window and two files, window.mxx and + // abstract-window.mxx: which one is likely to define this module? + // Clearly the first, but in the above-described scheme they will get + // the same score. More generally, consider these "obvious" (to the + // human) situations: + // + // window.mxx vs abstract-window.mxx + // details/window.mxx vs abstract-window.mxx + // gtk-window.mxx vs gtk-abstract-window.mxx + // + // To handle such cases we are going to combine the above primary score + // with the following secondary scores (in that order): + // + // a) Strength of separation between matched and unmatched parts: + // + // '\0' > directory separator > other separator > unseparated + // + // Here '\0' signifies nothing to separate (unmatched part is empty). + // + // b) Shortness of the unmatched part. + // + // For std.* modules we only accept non-fuzzy matches (think std.core vs + // some core.mxx). And if such a module is unresolved, then we assume it + // is pre-built and will be found by some other means (e.g., VC's + // IFCPATH). + // + auto match_max = [] (const string& m) -> size_t + { + // The primary and sub-scores are packed in the following decimal + // representation: + // + // PPPPABBBB + // + // We use decimal instead of binary packing to make it easier to + // separate fields in the trace messages, during debugging, etc. + // + return m.size () * 100000 + 99999; // Maximum match score. + }; + + auto match = [] (const string& f, const string& m) -> size_t + { + auto file_sep = [] (char c) -> char + { + // Return the character (translating directory seperator to '/') if + // it is a separator and '\0' otherwise (so can be used as bool). + // + return (c == '_' || c == '-' || c == '.' ? c : + path::traits_type::is_separator (c) ? '/' : '\0'); + }; + + auto case_sep = [] (char c1, char c2) + { + return (alpha (c1) && + alpha (c2) && + (ucase (c1) == c1) != (ucase (c2) == c2)); + }; + + size_t fn (f.size ()), fi (fn); + size_t mn (m.size ()), mi (mn); + + // True if the previous character was counted as a real (that is, + // non-case changing) separator. + // + bool fsep (false); + bool msep (false); + + // Scan backwards for as long as we match. Keep track of the previous + // character for case change detection. + // + for (char fc, mc, fp ('\0'), mp ('\0'); + fi != 0 && mi != 0; + fp = fc, mp = mc, --fi, --mi) + { + fc = f[fi - 1]; + mc = m[mi - 1]; + + if (casecmp (fc, mc) == 0) + { + fsep = msep = false; + continue; + } + + // We consider all separators equal and character case change being + // a separators. Some examples of the latter: + // + // foo.bar + // fooBAR + // FOObar + // + bool fs (file_sep (fc)); + bool ms (mc == '_' || mc == '.'); + + if (fs && ms) + { + fsep = msep = true; + continue; + } + + // Only if one is a real separator do we consider case change. + // + if (fs || ms) + { + bool fa (false), ma (false); + if ((fs || (fa = case_sep (fp, fc))) && + (ms || (ma = case_sep (mp, mc)))) + { + // Stay on this character if imaginary punctuation (note: cannot + // be both true). + // + if (fa) {++fi; msep = true;} + if (ma) {++mi; fsep = true;} + + continue; + } + } + + break; // No match. + } + + // "Uncount" real separators. + // + if (fsep) fi++; + if (msep) mi++; + + // Use the number of characters matched in the module name and not + // in the file (this may not be the same because of the imaginary + // separators). + // + size_t ps (mn - mi); + + // The strength of separation sub-score. + // + // Check for case change between the last character that matched and + // the first character that did not. + // + size_t as (0); + if (fi == 0) as = 9; + else if (char c = file_sep (f[fi - 1])) as = c == '/' ? 8 : 7; + else if (fi != fn && case_sep (f[fi], f[fi - 1])) as = 7; + + // The length of the unmatched part sub-score. + // + size_t bs (9999 - fi); + + return ps * 100000 + as * 10000 + bs; + }; + + auto& pts (t.prerequisite_targets[a]); + size_t start (pts.size ()); // Index of the first to be added. + + // We have two parallel vectors: module names/scores in imports and + // targets in prerequisite_targets (offset with start). Pre-allocate + // NULL entries in the latter. + // + size_t n (imports.size ()); + pts.resize (start + n, nullptr); + + // Oh, yes, there is one "minor" complication. It's the last one, I + // promise. It has to do with module re-exporting (export import M;). + // In this case (currently) all implementations simply treat it as a + // shallow (from the BMI's point of view) reference to the module (or an + // implicit import, if you will). Do you see where it's going? Nowever + // good, that's right. This shallow reference means that the compiler + // should be able to find BMIs for all the re-exported modules, + // recursive. The good news is we are actually in a pretty good shape to + // handle this: after match all our prerequisite BMIs will have their + // prerequisite BMIs known, recursively. The only bit that is missing is + // the re-export flag of some sorts. As well as deciding where to handle + // it: here or in append_modules(). After some meditation it became + // clear handling it here will be simpler: we need to weed out + // duplicates for which we can re-use the imports vector. And we may + // also need to save this "flattened" list of modules in depdb. + // + // Ok, so, here is the plan: + // + // 1. There is no good place in prerequisite_targets to store the + // exported flag (no, using the marking facility across match/execute + // is a bad idea). So what we are going to do is put re-exported + // bmi{}s at the back and store (in the target's data pad) the start + // position. One bad aspect about this part is that we assume those + // bmi{}s have been matched by the same rule. But let's not kid + // ourselves, there will be no other rule that matches bmi{}s. + // + // 2. Once we have matched all the bmi{}s we are importing directly + // (with all the re-exported by us at the back), we will go over them + // and copy all of their re-exported bmi{}s (using the position we + // saved on step #1). The end result will be a recursively-explored + // list of imported bmi{}s that append_modules() can simply convert + // to the list of options. + // + // One issue with this approach is that these copied targets will be + // executed which means we need to adjust their dependent counts + // (which is normally done by match). While this seems conceptually + // correct (especially if you view re-exports as implicit imports), + // it's just extra overhead (we know they will be updated). So what + // we are going to do is save another position, that of the start of + // these copied-over targets, and will only execute up to this point. + // + // And after implementing this came the reality check: all the current + // implementations require access to all the imported BMIs, not only + // re-exported. Some (like Clang) store references to imported BMI files + // so we actually don't need to pass any extra options (unless things + // get moved) but they still need access to the BMIs (and things will + // most likely have to be done differenly for distributed compilation). + // + // So the revised plan: on the off chance that some implementation will + // do it differently we will continue maintaing the imported/re-exported + // split and how much to copy-over can be made compiler specific. + // + // As a first sub-step of step #1, move all the re-exported imports to + // the end of the vector. This will make sure they end up at the end + // of prerequisite_targets. Note: the special first import, if any, + // should be unaffected. + // + sort (imports.begin (), imports.end (), + [] (const module_import& x, const module_import& y) + { + return !x.exported && y.exported; + }); + + // Go over the prerequisites once. + // + // For (direct) library prerequisites, check their prerequisite bmi{}s + // (which should be searched and matched with module names discovered; + // see the library meta-information protocol for details). + // + // For our own bmi{} prerequisites, checking if each (better) matches + // any of the imports. + + // For fuzzy check if a file name (better) resolves any of our imports + // and if so make it the new selection. For exact the name is the actual + // module name and it can only resolve one import (there are no + // duplicates). + // + // Set done to true if all the imports have now been resolved to actual + // module names (which means we can stop searching). This will happens + // if all the modules come from libraries. Which will be fairly common + // (think of all the tests) so it's worth optimizing for. + // + bool done (false); + + auto check_fuzzy = [&trace, &imports, &pts, &match, &match_max, start, n] + (const target* pt, const string& name) + { + for (size_t i (0); i != n; ++i) + { + module_import& m (imports[i]); + + if (std_module (m.name)) // No fuzzy std.* matches. + continue; + + if (m.score > match_max (m.name)) // Resolved to module name. + continue; + + size_t s (match (name, m.name)); + + l5 ([&]{trace << name << " ~ " << m.name << ": " << s;}); + + if (s > m.score) + { + pts[start + i] = pt; + m.score = s; + } + } + }; + + // If resolved, return the "slot" in pts (we don't want to create a + // side build until we know we match; see below for details). + // + auto check_exact = [&trace, &imports, &pts, &match_max, start, n, &done] + (const string& name) -> const target** + { + const target** r (nullptr); + done = true; + + for (size_t i (0); i != n; ++i) + { + module_import& m (imports[i]); + + size_t ms (match_max (m.name)); + + if (m.score > ms) // Resolved to module name (no effect on done). + continue; + + if (r == nullptr) + { + size_t s (name == m.name ? ms + 1 : 0); + + l5 ([&]{trace << name << " ~ " << m.name << ": " << s;}); + + if (s > m.score) + { + r = &pts[start + i].target; + m.score = s; + continue; // Scan the rest to detect if all done. + } + } + + done = false; + } + + return r; + }; + + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + const target* pt (p.load ()); // Should be cached for libraries. + + if (pt != nullptr) + { + const target* lt (nullptr); + + if (const libx* l = pt->is_a<libx> ()) + lt = link_member (*l, a, li); + else if (pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ()) + lt = pt; + + // If this is a library, check its bmi{}s and mxx{}s. + // + if (lt != nullptr) + { + for (const target* bt: lt->prerequisite_targets[a]) + { + if (bt == nullptr) + continue; + + // Note that here we (try) to use whatever flavor of bmi*{} is + // available. + // + // @@ MOD: BMI compatibility check. + // @@ UTL: we need to (recursively) see through libu*{} (and + // also in pkgconfig_save()). + // + if (bt->is_a<bmix> ()) + { + const string& n ( + cast<string> (bt->state[a].vars[c_module_name])); + + if (const target** p = check_exact (n)) + *p = bt; + } + else if (bt->is_a (*x_mod)) + { + // This is an installed library with a list of module sources + // (the source are specified as prerequisites but the fallback + // file rule puts them into prerequisite_targets for us). + // + // The module names should be specified but if not assume + // something else is going on and ignore. + // + const string* n (cast_null<string> (bt->vars[c_module_name])); + + if (n == nullptr) + continue; + + if (const target** p = check_exact (*n)) + *p = &make_module_sidebuild (a, bs, *lt, *bt, *n); + } + else + continue; + + if (done) + break; + } + + if (done) + break; + + continue; + } + + // Fall through. + } + + // While it would have been even better not to search for a target, we + // need to get hold of the corresponding mxx{} (unlikely but possible + // for bmi{} to have a different name). + // + // While we want to use group_prerequisite_members() below, we cannot + // call resolve_group() since we will be doing it "speculatively" for + // modules that we may use but also for modules that may use us. This + // quickly leads to deadlocks. So instead we are going to perform an + // ad hoc group resolution. + // + const target* pg; + if (p.is_a<bmi> ()) + { + pg = pt != nullptr ? pt : &p.search (t); + pt = &search (t, btt, p.key ()); // Same logic as in picking obj*{}. + } + else if (p.is_a (btt)) + { + pg = &search (t, bmi::static_type, p.key ()); + if (pt == nullptr) pt = &p.search (t); + } + else + continue; + + // Find the mxx{} prerequisite and extract its "file name" for the + // fuzzy match unless the user specified the module name explicitly. + // + for (prerequisite_member p: + prerequisite_members (a, t, group_prerequisites (*pt, pg))) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + if (p.is_a (*x_mod)) + { + // Check for an explicit module name. Only look for an existing + // target (which means the name can only be specified on the + // target itself, not target type/pattern-spec). + // + const target* t (p.search_existing ()); + const string* n (t != nullptr + ? cast_null<string> (t->vars[c_module_name]) + : nullptr); + if (n != nullptr) + { + if (const target** p = check_exact (*n)) + *p = pt; + } + else + { + // Fuzzy match. + // + string f; + + // Add the directory part if it is relative. The idea is to + // include it into the module match, say hello.core vs + // hello/mxx{core}. + // + // @@ MOD: Why not for absolute? Good question. What if it + // contains special components, say, ../mxx{core}? + // + const dir_path& d (p.dir ()); + + if (!d.empty () && d.relative ()) + f = d.representation (); // Includes trailing slash. + + f += p.name (); + check_fuzzy (pt, f); + } + break; + } + } + + if (done) + break; + } + + // Diagnose unresolved modules. + // + if (!done) + { + for (size_t i (0); i != n; ++i) + { + if (pts[start + i] == nullptr && !std_module (imports[i].name)) + { + // It would have been nice to print the location of the import + // declaration. And we could save it during parsing at the expense + // of a few paths (that can be pooled). The question is what to do + // when we re-create this information from depdb? We could have + // saved the location information there but the relative paths + // (e.g., from the #line directives) could end up being wrong if + // the we re-run from a different working directory. + // + // It seems the only workable approach is to extract full location + // info during parse, not save it in depdb, when re-creating, + // fallback to just src path without any line/column information. + // This will probably cover the majority of case (most of the time + // it will be a misspelled module name, not a removal of module + // from buildfile). + // + // But at this stage this doesn't seem worth the trouble. + // + fail (relative (src)) << "unable to resolve module " + << imports[i].name; + } + } + } + + // Match in parallel and wait for completion. + // + match_members (a, t, pts, start); + + // Post-process the list of our (direct) imports. While at it, calculate + // the checksum of all (direct and indirect) bmi{} paths. + // + size_t exported (n); + size_t copied (pts.size ()); + + for (size_t i (0); i != n; ++i) + { + const module_import& m (imports[i]); + + // Determine the position of the first re-exported bmi{}. + // + if (m.exported && exported == n) + exported = i; + + const target* bt (pts[start + i]); + + if (bt == nullptr) + continue; // Unresolved (std.*). + + // Verify our guesses against extracted module names but don't waste + // time if it was a match against the actual module name. + // + const string& in (m.name); + + if (m.score <= match_max (in)) + { + const string& mn (cast<string> (bt->state[a].vars[c_module_name])); + + if (in != mn) + { + // Note: matched, so the group should be resolved. + // + for (prerequisite_member p: group_prerequisite_members (a, *bt)) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + if (p.is_a (*x_mod)) // Got to be there. + { + fail (relative (src)) + << "failed to correctly guess module name from " << p << + info << "guessed: " << in << + info << "actual: " << mn << + info << "consider adjusting module interface file names or" << + info << "consider specifying module name with " << x + << ".module_name"; + } + } + } + } + + // Hash (we know it's a file). + // + cs.append (static_cast<const file&> (*bt).path ().string ()); + + // Copy over bmi{}s from our prerequisites weeding out duplicates. + // + if (size_t j = bt->data<match_data> ().modules.start) + { + // Hard to say whether we should reserve or not. We will probably + // get quite a bit of duplications. + // + auto& bpts (bt->prerequisite_targets[a]); + for (size_t m (bpts.size ()); j != m; ++j) + { + const target* et (bpts[j]); + + if (et == nullptr) + continue; // Unresolved (std.*). + + const string& mn (cast<string> (et->state[a].vars[c_module_name])); + + if (find_if (imports.begin (), imports.end (), + [&mn] (const module_import& i) + { + return i.name == mn; + }) == imports.end ()) + { + pts.push_back (et); + cs.append (static_cast<const file&> (*et).path ().string ()); + + // Add to the list of imports for further duplicate suppression. + // We could have stored reference to the name (e.g., in score) + // but it's probably not worth it if we have a small string + // optimization. + // + imports.push_back ( + module_import {unit_type::module_iface, mn, true, 0}); + } + } + } + } + + if (copied == pts.size ()) // No copied tail. + copied = 0; + + if (exported == n) // No (own) re-exported imports. + exported = copied; + else + exported += start; // Rebase. + + return module_positions {start, exported, copied}; + } + + // Find or create a modules sidebuild subproject returning its root + // directory. + // + dir_path compile_rule:: + find_modules_sidebuild (const scope& rs) const + { + // First figure out where we are going to build. We want to avoid + // multiple sidebuilds so the outermost scope that has loaded the + // cc.config module and that is within our amalgmantion seems like a + // good place. + // + const scope* as (&rs); + { + const scope* ws (as->weak_scope ()); + if (as != ws) + { + const scope* s (as); + do + { + s = s->parent_scope ()->root_scope (); + + // Use cc.core.vars as a proxy for {c,cxx}.config (a bit smelly). + // + // This is also the module that registers the scope operation + // callback that cleans up the subproject. + // + if (cast_false<bool> ((*s)["cc.core.vars.loaded"])) + as = s; + + } while (s != ws); + } + } + + // We build modules in a subproject (since there might be no full + // language support loaded in the amalgamation, only *.config). So the + // first step is to check if the project has already been created and/or + // loaded and if not, then to go ahead and do so. + // + dir_path pd (as->out_path () / + as->root_extra->build_dir / + modules_sidebuild_dir /= + x); + + const scope* ps (&rs.ctx.scopes.find (pd)); + + if (ps->out_path () != pd) + { + // Switch the phase to load then create and load the subproject. + // + phase_switch phs (rs.ctx, run_phase::load); + + // Re-test again now that we are in exclusive phase (another thread + // could have already created and loaded the subproject). + // + ps = &rs.ctx.scopes.find (pd); + + if (ps->out_path () != pd) + { + // The project might already be created in which case we just need + // to load it. + // + optional<bool> altn (false); // Standard naming scheme. + if (!is_src_root (pd, altn)) + { + // Copy our standard and force modules. + // + string extra; + + if (const string* std = cast_null<string> (rs[x_std])) + extra += string (x) + ".std = " + *std + '\n'; + + extra += string (x) + ".features.modules = true"; + + config::create_project ( + pd, + as->out_path ().relative (pd), /* amalgamation */ + {}, /* boot_modules */ + extra, /* root_pre */ + {string (x) + '.'}, /* root_modules */ + "", /* root_post */ + false, /* config */ + false, /* buildfile */ + "the cc module", + 2); /* verbosity */ + } + + ps = &load_project (as->rw () /* lock */, + pd, + pd, + false /* forwarded */); + } + } + + // Some sanity checks. + // +#ifndef NDEBUG + assert (ps->root ()); + const module* m (ps->lookup_module<module> (x)); + assert (m != nullptr && m->modules); +#endif + + return pd; + } + + // Synthesize a dependency for building a module binary interface on + // the side. + // + const file& compile_rule:: + make_module_sidebuild (action a, + const scope& bs, + const target& lt, + const target& mt, + const string& mn) const + { + tracer trace (x, "compile_rule::make_module_sidebuild"); + + // Note: see also make_header_sidebuild() below. + + dir_path pd (find_modules_sidebuild (*bs.root_scope ())); + + // We need to come up with a file/target name that will be unique enough + // not to conflict with other modules. If we assume that within an + // amalgamation there is only one "version" of each module, then the + // module name itself seems like a good fit. We just replace '.' with + // '-'. + // + string mf; + transform (mn.begin (), mn.end (), + back_inserter (mf), + [] (char c) {return c == '.' ? '-' : c;}); + + // It seems natural to build a BMI type that corresponds to the library + // type. After all, this is where the object file part of the BMI is + // going to come from (though things will probably be different for + // module-only libraries). + // + const target_type& tt (compile_types (link_type (lt).type).bmi); + + // Store the BMI target in the subproject root. If the target already + // exists then we assume all this is already done (otherwise why would + // someone have created such a target). + // + if (const file* bt = bs.ctx.targets.find<file> ( + tt, + pd, + dir_path (), // Always in the out tree. + mf, + nullopt, // Use default extension. + trace)) + return *bt; + + prerequisites ps; + ps.push_back (prerequisite (mt)); + + // We've added the mxx{} but it may import other modules from this + // library. Or from (direct) dependencies of this library. We add them + // all as prerequisites so that the standard module search logic can + // sort things out. This is pretty similar to what we do in link when + // synthesizing dependencies for bmi{}'s. + // + // Note: lt is matched and so the group is resolved. + // + ps.push_back (prerequisite (lt)); + for (prerequisite_member p: group_prerequisite_members (a, lt)) + { + if (include (a, lt, p) != include_type::normal) // Excluded/ad hoc. + continue; + + // @@ TODO: will probably need revision if using sidebuild for + // non-installed libraries (e.g., direct BMI dependencies + // will probably have to be translated to mxx{} or some such). + // + if (p.is_a<libx> () || + p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ()) + { + ps.push_back (p.as_prerequisite ()); + } + } + + auto p (bs.ctx.targets.insert_locked ( + tt, + move (pd), + dir_path (), // Always in the out tree. + move (mf), + nullopt, // Use default extension. + true, // Implied. + trace)); + file& bt (static_cast<file&> (p.first)); + + // Note that this is racy and someone might have created this target + // while we were preparing the prerequisite list. + // + if (p.second.owns_lock ()) + bt.prerequisites (move (ps)); + + return bt; + } + + // Synthesize a dependency for building a header unit binary interface on + // the side. + // + const file& compile_rule:: + make_header_sidebuild (action, + const scope& bs, + linfo li, + const file& ht) const + { + tracer trace (x, "compile_rule::make_header_sidebuild"); + + // Note: similar to make_module_sidebuild() above. + + dir_path pd (find_modules_sidebuild (*bs.root_scope ())); + + // What should we use as a file/target name? On one hand we want it + // unique enough so that <stdio.h> and <custom/stdio.h> don't end up + // with the same BMI. On the other, we need the same headers resolving + // to the same target, regardless of how they were imported. So it feels + // like the name should be the absolute and normalized (actualized on + // case-insensitive filesystems) header path. We could try to come up + // with something by sanitizing certain characters, etc. But then the + // names will be very long and ugly, they will run into path length + // limits, etc. So instead we will use the file name plus an abbreviated + // hash of the whole path, something like stdio-211321fe6de7. + // + string mf; + { + // @@ MODHDR: Can we assume the path is actualized since the header + // target came from enter_header()? No, not anymore: it + // is now normally just normalized. + // + const path& hp (ht.path ()); + mf = hp.leaf ().make_base ().string (); + mf += '-'; + mf += sha256 (hp.string ()).abbreviated_string (12); + } + + const target_type& tt (compile_types (li.type).hbmi); + + if (const file* bt = bs.ctx.targets.find<file> ( + tt, + pd, + dir_path (), // Always in the out tree. + mf, + nullopt, // Use default extension. + trace)) + return *bt; + + prerequisites ps; + ps.push_back (prerequisite (ht)); + + auto p (bs.ctx.targets.insert_locked ( + tt, + move (pd), + dir_path (), // Always in the out tree. + move (mf), + nullopt, // Use default extension. + true, // Implied. + trace)); + file& bt (static_cast<file&> (p.first)); + + // Note that this is racy and someone might have created this target + // while we were preparing the prerequisite list. + // + if (p.second.owns_lock ()) + bt.prerequisites (move (ps)); + + return bt; + } + + // Filter cl.exe noise (msvc.cxx). + // + void + msvc_filter_cl (ifdstream&, const path& src); + + // Append header unit-related options. + // + // Note that this function is called for both full preprocessing and + // compilation proper and in the latter case it is followed by a call + // to append_modules(). + // + void compile_rule:: + append_headers (environment&, + cstrings& args, + small_vector<string, 2>& stor, + action, + const file&, + const match_data& md, + const path& dd) const + { + switch (ctype) + { + case compiler_type::gcc: + { + if (md.headers != 0) + { + string s (relative (dd).string ()); + s.insert (0, "-fmodule-mapper="); + s += "?@"; // Cookie (aka line prefix). + stor.push_back (move (s)); + } + + break; + } + case compiler_type::clang: + case compiler_type::msvc: + case compiler_type::icc: + break; + } + + // Shallow-copy storage to args. Why not do it as we go along pushing + // into storage? Because of potential reallocations. + // + for (const string& a: stor) + args.push_back (a.c_str ()); + } + + // Append module-related options. + // + // Note that this function is only called for the compilation proper and + // after a call to append_headers() (so watch out for duplicate options). + // + void compile_rule:: + append_modules (environment& env, + cstrings& args, + small_vector<string, 2>& stor, + action a, + const file& t, + const match_data& md, + const path& dd) const + { + unit_type ut (md.type); + const module_positions& ms (md.modules); + + dir_path stdifc; // See the VC case below. + + switch (ctype) + { + case compiler_type::gcc: + { + // Use the module map stored in depdb. + // + // Note that it is also used to specify the output BMI file. + // + if (md.headers == 0 && // Done in append_headers()? + (ms.start != 0 || + ut == unit_type::module_iface || + ut == unit_type::module_header)) + { + string s (relative (dd).string ()); + s.insert (0, "-fmodule-mapper="); + s += "?@"; // Cookie (aka line prefix). + stor.push_back (move (s)); + } + + break; + } + case compiler_type::clang: + { + if (ms.start == 0) + return; + + // Clang embeds module file references so we only need to specify + // our direct imports. + // + // If/when we get the ability to specify the mapping in a file, we + // will pass the whole list. + // +#if 0 + // In Clang the module implementation's unit .pcm is special and + // must be "loaded". + // + if (ut == unit_type::module_impl) + { + const file& f (pts[ms.start]->as<file> ()); + string s (relative (f.path ()).string ()); + s.insert (0, "-fmodule-file="); + stor.push_back (move (s)); + } + + // Use the module map stored in depdb for others. + // + string s (relative (dd).string ()); + s.insert (0, "-fmodule-file-map=@="); + stor.push_back (move (s)); +#else + auto& pts (t.prerequisite_targets[a]); + for (size_t i (ms.start), + n (ms.copied != 0 ? ms.copied : pts.size ()); + i != n; + ++i) + { + const target* pt (pts[i]); + + if (pt == nullptr) + continue; + + // Here we use whatever bmi type has been added. And we know all + // of these are bmi's. + // + const file& f (pt->as<file> ()); + string s (relative (f.path ()).string ()); + + // In Clang the module implementation's unit .pcm is special and + // must be "loaded". + // + if (ut == unit_type::module_impl && i == ms.start) + s.insert (0, "-fmodule-file="); + else + { + s.insert (0, 1, '='); + s.insert (0, cast<string> (f.state[a].vars[c_module_name])); + s.insert (0, "-fmodule-file="); + } + + stor.push_back (move (s)); + } +#endif + break; + } + case compiler_type::msvc: + { + if (ms.start == 0) + return; + + auto& pts (t.prerequisite_targets[a]); + for (size_t i (ms.start), n (pts.size ()); + i != n; + ++i) + { + const target* pt (pts[i]); + + if (pt == nullptr) + continue; + + // Here we use whatever bmi type has been added. And we know all + // of these are bmi's. + // + const file& f (pt->as<file> ()); + + // In VC std.* modules can only come from a single directory + // specified with the IFCPATH environment variable or the + // /module:stdIfcDir option. + // + if (std_module (cast<string> (f.state[a].vars[c_module_name]))) + { + dir_path d (f.path ().directory ()); + + if (stdifc.empty ()) + { + // Go one directory up since /module:stdIfcDir will look in + // either Release or Debug subdirectories. Keeping the result + // absolute feels right. + // + stor.push_back ("/module:stdIfcDir"); + stor.push_back (d.directory ().string ()); + stdifc = move (d); + } + else if (d != stdifc) // Absolute and normalized. + fail << "multiple std.* modules in different directories"; + } + else + { + stor.push_back ("/module:reference"); + stor.push_back (relative (f.path ()).string ()); + } + } + break; + } + case compiler_type::icc: + break; + } + + // Shallow-copy storage to args. Why not do it as we go along pushing + // into storage? Because of potential reallocations. + // + for (const string& a: stor) + args.push_back (a.c_str ()); + + // VC's IFCPATH takes precedence over /module:stdIfcDir so unset it + // if we are using our own std modules. + // + if (!stdifc.empty ()) + env.push_back ("IFCPATH"); + } + + target_state compile_rule:: + perform_update (action a, const target& xt) const + { + const file& t (xt.as<file> ()); + const path& tp (t.path ()); + + match_data md (move (t.data<match_data> ())); + unit_type ut (md.type); + + context& ctx (t.ctx); + + // While all our prerequisites are already up-to-date, we still have to + // execute them to keep the dependency counts straight. Actually, no, we + // may also have to update the modules. + // + // Note that this also takes care of forcing update on any ad hoc + // prerequisite change. + // + auto pr ( + execute_prerequisites<file> ( + md.src.type (), + a, t, + md.mt, + [s = md.modules.start] (const target&, size_t i) + { + return s != 0 && i >= s; // Only compare timestamps for modules. + }, + md.modules.copied)); // See search_modules() for details. + + const file& s (pr.second); + const path* sp (&s.path ()); + + if (pr.first) + { + if (md.touch) + { + touch (ctx, tp, false, 2); + t.mtime (system_clock::now ()); + ctx.skip_count.fetch_add (1, memory_order_relaxed); + } + // Note: else mtime should be cached. + + return *pr.first; + } + + // Make sure depdb is no older than any of our prerequisites (see md.mt + // logic description above for details). Also save the sequence start + // time if doing mtime checks (see the depdb::check_mtime() call below). + // + timestamp start (depdb::mtime_check () + ? system_clock::now () + : timestamp_unknown); + + touch (ctx, md.dd, false, verb_never); + + const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); + + otype ot (compile_type (t, ut)); + linfo li (link_info (bs, ot)); + compile_target_types tts (compile_types (ot)); + + environment env; + cstrings args {cpath.recall_string ()}; + + // If we are building a module interface, then the target is bmi*{} and + // its ad hoc member is obj*{}. For header units there is no obj*{}. + // + path relm; + path relo (ut == unit_type::module_header + ? path () + : relative (ut == unit_type::module_iface + ? find_adhoc_member<file> (t, tts.obj)->path () + : tp)); + + // Build the command line. + // + if (md.pp != preprocessed::all) + { + append_options (args, t, c_poptions); + append_options (args, t, x_poptions); + + // Add *.export.poptions from prerequisite libraries. + // + append_lib_options (bs, args, a, t, li); + + // Extra system header dirs (last). + // + assert (sys_inc_dirs_extra <= sys_inc_dirs.size ()); + append_option_values ( + args, "-I", + sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (), + [] (const dir_path& d) {return d.string ().c_str ();}); + + if (md.symexport) + append_symexport_options (args, t); + } + + append_options (args, t, c_coptions); + append_options (args, t, x_coptions); + append_options (args, tstd); + + string out, out1; // Output options storage. + small_vector<string, 2> header_args; // Header unit options storage. + small_vector<string, 2> module_args; // Module options storage. + + size_t out_i (0); // Index of the -o option. + size_t lang_n (0); // Number of lang options. + + if (cclass == compiler_class::msvc) + { + // The /F*: option variants with separate names only became available + // in VS2013/12.0. Why do we bother? Because the command line suddenly + // becomes readable. + // + uint64_t ver (cast<uint64_t> (rs[x_version_major])); + + args.push_back ("/nologo"); + + // While we want to keep the low-level build as "pure" as possible, + // the two misguided defaults, exceptions and runtime, just have to be + // fixed. Otherwise the default build is pretty much unusable. But we + // also make sure that the user can easily disable our defaults: if we + // see any relevant options explicitly specified, we take our hands + // off. + // + // For C looks like no /EH* (exceptions supported but no C++ objects + // destroyed) is a reasonable default. + // + if (x_lang == lang::cxx && !find_option_prefix ("/EH", args)) + args.push_back ("/EHsc"); + + // The runtime is a bit more interesting. At first it may seem like a + // good idea to be a bit clever and use the static runtime if we are + // building obja{}. And for obje{} we could decide which runtime to + // use based on the library link order: if it is static-only, then we + // could assume the static runtime. But it is indeed too clever: when + // building liba{} we have no idea who is going to use it. It could be + // an exe{} that links both static and shared libraries (and is + // therefore built with the shared runtime). And to safely use the + // static runtime, everything must be built with /MT and there should + // be no DLLs in the picture. So we are going to play it safe and + // always default to the shared runtime. + // + // In a similar vein, it would seem reasonable to use the debug runtime + // if we are compiling with debug. But, again, there will be fireworks + // if we have some projects built with debug and some without and then + // we try to link them together (which is not an unreasonable thing to + // do). So by default we will always use the release runtime. + // + if (!find_option_prefixes ({"/MD", "/MT"}, args)) + args.push_back ("/MD"); + + msvc_sanitize_cl (args); + + append_headers (env, args, header_args, a, t, md, md.dd); + append_modules (env, args, module_args, a, t, md, md.dd); + + // The presence of /Zi or /ZI causes the compiler to write debug info + // to the .pdb file. By default it is a shared file called vcNN.pdb + // (where NN is the VC version) created (wait for it) in the current + // working directory (and not the directory of the .obj file). Also, + // because it is shared, there is a special Windows service that + // serializes access. We, of course, want none of that so we will + // create a .pdb per object file. + // + // Note that this also changes the name of the .idb file (used for + // minimal rebuild and incremental compilation): cl.exe take the /Fd + // value and replaces the .pdb extension with .idb. + // + // Note also that what we are doing here appears to be incompatible + // with PCH (/Y* options) and /Gm (minimal rebuild). + // + if (find_options ({"/Zi", "/ZI"}, args)) + { + if (ver >= 18) + args.push_back ("/Fd:"); + else + out1 = "/Fd"; + + out1 += relo.string (); + out1 += ".pdb"; + + args.push_back (out1.c_str ()); + } + + if (ver >= 18) + { + args.push_back ("/Fo:"); + args.push_back (relo.string ().c_str ()); + } + else + { + out = "/Fo" + relo.string (); + args.push_back (out.c_str ()); + } + + // @@ MODHDR MSVC + // + if (ut == unit_type::module_iface) + { + relm = relative (tp); + + args.push_back ("/module:interface"); + args.push_back ("/module:output"); + args.push_back (relm.string ().c_str ()); + } + + // Note: no way to indicate that the source if already preprocessed. + + args.push_back ("/c"); // Compile only. + append_lang_options (args, md); // Compile as. + args.push_back (sp->string ().c_str ()); // Note: relied on being last. + } + else + { + if (ot == otype::s) + { + // On Darwin, Win32 -fPIC is the default. + // + if (tclass == "linux" || tclass == "bsd") + args.push_back ("-fPIC"); + } + + append_headers (env, args, header_args, a, t, md, md.dd); + append_modules (env, args, module_args, a, t, md, md.dd); + + // Note: the order of the following options is relied upon below. + // + out_i = args.size (); // Index of the -o option. + + if (ut == unit_type::module_iface || ut == unit_type::module_header) + { + switch (ctype) + { + case compiler_type::gcc: + { + // Output module file is specified in the mapping file, the + // same as input. + // + if (ut != unit_type::module_header) // No object file. + { + args.push_back ("-o"); + args.push_back (relo.string ().c_str ()); + args.push_back ("-c"); + } + break; + } + case compiler_type::clang: + { + relm = relative (tp); + + args.push_back ("-o"); + args.push_back (relm.string ().c_str ()); + args.push_back ("--precompile"); + + // Without this option Clang's .pcm will reference source files. + // In our case this file may be transient (.ii). Plus, it won't + // play nice with distributed compilation. + // + args.push_back ("-Xclang"); + args.push_back ("-fmodules-embed-all-files"); + + break; + } + case compiler_type::msvc: + case compiler_type::icc: + assert (false); + } + } + else + { + args.push_back ("-o"); + args.push_back (relo.string ().c_str ()); + args.push_back ("-c"); + } + + lang_n = append_lang_options (args, md); + + if (md.pp == preprocessed::all) + { + // Note that the mode we select must still handle comments and line + // continuations. So some more compiler-specific voodoo. + // + switch (ctype) + { + case compiler_type::gcc: + { + // -fdirectives-only is available since GCC 4.3.0. + // + if (cmaj > 4 || (cmaj == 4 && cmin >= 3)) + { + args.push_back ("-fpreprocessed"); + args.push_back ("-fdirectives-only"); + } + break; + } + case compiler_type::clang: + { + // Clang handles comments and line continuations in the + // preprocessed source (it does not have -fpreprocessed). + // + break; + } + case compiler_type::icc: + break; // Compile as normal source for now. + case compiler_type::msvc: + assert (false); + } + } + + args.push_back (sp->string ().c_str ()); + } + + args.push_back (nullptr); + + if (!env.empty ()) + env.push_back (nullptr); + + // With verbosity level 2 print the command line as if we are compiling + // the source file, not its preprocessed version (so that it's easy to + // copy and re-run, etc). Only at level 3 and above print the real deal. + // + if (verb == 1) + text << x_name << ' ' << s; + else if (verb == 2) + print_process (args); + + // If we have the (partially) preprocessed output, switch to that. + // + bool psrc (!md.psrc.path.empty ()); + bool pact (md.psrc.active); + if (psrc) + { + args.pop_back (); // nullptr + args.pop_back (); // sp + + sp = &md.psrc.path; + + // This should match with how we setup preprocessing. + // + switch (ctype) + { + case compiler_type::gcc: + { + // The -fpreprocessed is implied by .i/.ii. But not when compiling + // a header unit (there is no .hi/.hii). + // + if (ut == unit_type::module_header) + args.push_back ("-fpreprocessed"); + else + // Pop -x since it takes precedence over the extension. + // + // @@ I wonder why bother and not just add -fpreprocessed? Are + // we trying to save an option or does something break? + // + for (; lang_n != 0; --lang_n) + args.pop_back (); + + args.push_back ("-fdirectives-only"); + break; + } + case compiler_type::clang: + { + // Note that without -x Clang will treat .i/.ii as fully + // preprocessed. + // + break; + } + case compiler_type::msvc: + { + // Nothing to do (/TP or /TC already there). + // + break; + } + case compiler_type::icc: + assert (false); + } + + args.push_back (sp->string ().c_str ()); + args.push_back (nullptr); + + // Let's keep the preprocessed file in case of an error but only at + // verbosity level 3 and up (when one actually sees it mentioned on + // the command line). We also have to re-arm on success (see below). + // + if (pact && verb >= 3) + md.psrc.active = false; + } + + if (verb >= 3) + print_process (args); + + // @@ DRYRUN: Currently we discard the (partially) preprocessed file on + // dry-run which is a waste. Even if we keep the file around (like we do + // for the error case; see above), we currently have no support for + // re-using the previously preprocessed output. However, everything + // points towards us needing this in the near future since with modules + // we may be out of date but not needing to re-preprocess the + // translation unit (i.e., one of the imported module's has BMIs + // changed). + // + if (!ctx.dry_run) + { + try + { + // VC cl.exe sends diagnostics to stdout. It also prints the file + // name being compiled as the first line. So for cl.exe we redirect + // stdout to a pipe, filter that noise out, and send the rest to + // stderr. + // + // For other compilers redirect stdout to stderr, in case any of + // them tries to pull off something similar. For sane compilers this + // should be harmless. + // + bool filter (ctype == compiler_type::msvc); + + process pr (cpath, + args.data (), + 0, (filter ? -1 : 2), 2, + nullptr, // CWD + env.empty () ? nullptr : env.data ()); + + if (filter) + { + try + { + ifdstream is ( + move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit); + + msvc_filter_cl (is, *sp); + + // If anything remains in the stream, send it all to stderr. + // Note that the eof check is important: if the stream is at + // eof, this and all subsequent writes to the diagnostics stream + // will fail (and you won't see a thing). + // + if (is.peek () != ifdstream::traits_type::eof ()) + diag_stream_lock () << is.rdbuf (); + + is.close (); + } + catch (const io_error&) {} // Assume exits with error. + } + + run_finish (args, pr); + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + } + + // Remove preprocessed file (see above). + // + if (pact && verb >= 3) + md.psrc.active = true; + + // Clang's module compilation requires two separate compiler + // invocations. + // + if (ctype == compiler_type::clang && ut == unit_type::module_iface) + { + // Adjust the command line. First discard everything after -o then + // build the new "tail". + // + args.resize (out_i + 1); + args.push_back (relo.string ().c_str ()); // Produce .o. + args.push_back ("-c"); // By compiling .pcm. + args.push_back ("-Wno-unused-command-line-argument"); + args.push_back (relm.string ().c_str ()); + args.push_back (nullptr); + + if (verb >= 2) + print_process (args); + + if (!ctx.dry_run) + { + // Remove the target file if this fails. If we don't do that, we + // will end up with a broken build that is up-to-date. + // + auto_rmfile rm (relm); + + try + { + process pr (cpath, + args.data (), + 0, 2, 2, + nullptr, // CWD + env.empty () ? nullptr : env.data ()); + + run_finish (args, pr); + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + + rm.cancel (); + } + } + + timestamp now (system_clock::now ()); + + if (!ctx.dry_run) + depdb::check_mtime (start, md.dd, tp, now); + + // Should we go to the filesystem and get the new mtime? We know the + // file has been modified, so instead just use the current clock time. + // It has the advantage of having the subseconds precision. Plus, in + // case of dry-run, the file won't be modified. + // + t.mtime (now); + return target_state::changed; + } + + target_state compile_rule:: + perform_clean (action a, const target& xt) const + { + const file& t (xt.as<file> ()); + + clean_extras extras; + + switch (ctype) + { + case compiler_type::gcc: extras = {".d", x_pext, ".t"}; break; + case compiler_type::clang: extras = {".d", x_pext}; break; + case compiler_type::msvc: extras = {".d", x_pext, ".idb", ".pdb"};break; + case compiler_type::icc: extras = {".d"}; break; + } + + return perform_clean_extra (a, t, extras); + } + } +} diff --git a/libbuild2/cc/compile-rule.hxx b/libbuild2/cc/compile-rule.hxx new file mode 100644 index 0000000..93972a2 --- /dev/null +++ b/libbuild2/cc/compile-rule.hxx @@ -0,0 +1,189 @@ +// file : libbuild2/cc/compile-rule.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_COMPILE_RULE_HXX +#define LIBBUILD2_CC_COMPILE_RULE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/rule.hxx> +#include <libbuild2/filesystem.hxx> // auto_rmfile + +#include <libbuild2/cc/types.hxx> +#include <libbuild2/cc/common.hxx> + +#include <libbuild2/cc/export.hxx> + +namespace build2 +{ + class depdb; + + namespace cc + { + // The order is arranged so that their integral values indicate whether + // one is a "stronger" than another. + // + enum class preprocessed: uint8_t {none, includes, modules, all}; + + // Positions of the re-exported bmi{}s. See search_modules() for + // details. + // + struct module_positions + { + size_t start; // First imported bmi*{}, 0 if none. + size_t exported; // First re-exported bmi*{}, 0 if none. + size_t copied; // First copied-over bmi*{}, 0 if none. + }; + + class LIBBUILD2_CC_SYMEXPORT compile_rule: public rule, virtual common + { + public: + compile_rule (data&&); + + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + target_state + perform_update (action, const target&) const; + + target_state + perform_clean (action, const target&) const; + + private: + struct match_data; + using environment = small_vector<const char*, 2>; + + void + append_lib_options (const scope&, + cstrings&, + action, + const target&, + linfo) const; + + void + hash_lib_options (const scope&, + sha256&, + action, + const target&, + linfo) const; + + // Mapping of include prefixes (e.g., foo in <foo/bar>) for auto- + // generated headers to directories where they will be generated. + // + // We are using a prefix map of directories (dir_path_map) instead of + // just a map in order to also cover sub-paths (e.g., <foo/more/bar> if + // we continue with the example). Specifically, we need to make sure we + // don't treat foobar as a sub-directory of foo. + // + // The priority is used to decide who should override whom. Lesser + // values are considered higher priority. See append_prefixes() for + // details. + // + // @@ The keys should be normalized. + // + struct prefix_value + { + dir_path directory; + size_t priority; + }; + using prefix_map = dir_path_map<prefix_value>; + + void + append_prefixes (prefix_map&, const target&, const variable&) const; + + void + append_lib_prefixes (const scope&, + prefix_map&, + action, + target&, + linfo) const; + + prefix_map + build_prefix_map (const scope&, action, target&, linfo) const; + + small_vector<const target_type*, 2> + map_extension (const scope&, const string&, const string&) const; + + // Src-to-out re-mapping. See extract_headers() for details. + // + using srcout_map = path_map<dir_path>; + + struct module_mapper_state; + + void + gcc_module_mapper (module_mapper_state&, + action, const scope&, file&, linfo, + ifdstream&, ofdstream&, + depdb&, bool&, bool&, + optional<prefix_map>&, srcout_map&) const; + + pair<const file*, bool> + enter_header (action, const scope&, file&, linfo, + path&&, bool, + optional<prefix_map>&, srcout_map&) const; + + optional<bool> + inject_header (action, file&, const file&, bool, timestamp) const; + + pair<auto_rmfile, bool> + extract_headers (action, const scope&, file&, linfo, + const file&, match_data&, + depdb&, bool&, timestamp) const; + + pair<unit, string> + parse_unit (action, file&, linfo, + const file&, auto_rmfile&, + const match_data&, const path&) const; + + void + extract_modules (action, const scope&, file&, linfo, + const compile_target_types&, + const file&, match_data&, + module_info&&, depdb&, bool&) const; + + module_positions + search_modules (action, const scope&, file&, linfo, + const target_type&, + const file&, module_imports&, sha256&) const; + + dir_path + find_modules_sidebuild (const scope&) const; + + const file& + make_module_sidebuild (action, const scope&, const target&, + const target&, const string&) const; + + const file& + make_header_sidebuild (action, const scope&, linfo, const file&) const; + + void + append_headers (environment&, cstrings&, small_vector<string, 2>&, + action, const file&, + const match_data&, const path&) const; + + void + append_modules (environment&, cstrings&, small_vector<string, 2>&, + action, const file&, + const match_data&, const path&) const; + + // Compiler-specific language selection option. Return the number of + // options (arguments, really) appended. + // + size_t + append_lang_options (cstrings&, const match_data&) const; + + void + append_symexport_options (cstrings&, const target&) const; + + private: + const string rule_id; + }; + } +} + +#endif // LIBBUILD2_CC_COMPILE_RULE_HXX diff --git a/libbuild2/cc/export.hxx b/libbuild2/cc/export.hxx new file mode 100644 index 0000000..16118d6 --- /dev/null +++ b/libbuild2/cc/export.hxx @@ -0,0 +1,38 @@ +// file : libbuild2/cc/export.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#pragma once + +// Normally we don't export class templates (but do complete specializations), +// inline functions, and classes with only inline member functions. Exporting +// classes that inherit from non-exported/imported bases (e.g., std::string) +// will end up badly. The only known workarounds are to not inherit or to not +// export. Also, MinGW GCC doesn't like seeing non-exported functions being +// used before their inline definition. The workaround is to reorder code. In +// the end it's all trial and error. + +#if defined(LIBBUILD2_CC_STATIC) // Using static. +# define LIBBUILD2_CC_SYMEXPORT +#elif defined(LIBBUILD2_CC_STATIC_BUILD) // Building static. +# define LIBBUILD2_CC_SYMEXPORT +#elif defined(LIBBUILD2_CC_SHARED) // Using shared. +# ifdef _WIN32 +# define LIBBUILD2_CC_SYMEXPORT __declspec(dllimport) +# else +# define LIBBUILD2_CC_SYMEXPORT +# endif +#elif defined(LIBBUILD2_CC_SHARED_BUILD) // Building shared. +# ifdef _WIN32 +# define LIBBUILD2_CC_SYMEXPORT __declspec(dllexport) +# else +# define LIBBUILD2_CC_SYMEXPORT +# endif +#else +// If none of the above macros are defined, then we assume we are being used +// by some third-party build system that cannot/doesn't signal the library +// type. Note that this fallback works for both static and shared but in case +// of shared will be sub-optimal compared to having dllimport. +// +# define LIBBUILD2_CC_SYMEXPORT // Using static or shared. +#endif diff --git a/libbuild2/cc/gcc.cxx b/libbuild2/cc/gcc.cxx new file mode 100644 index 0000000..632805c --- /dev/null +++ b/libbuild2/cc/gcc.cxx @@ -0,0 +1,263 @@ +// file : libbuild2/cc/gcc.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/types.hxx> + +#include <libbuild2/cc/module.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + using namespace bin; + + // Extract system header search paths from GCC (gcc/g++) or compatible + // (Clang, Intel) using the -v -E </dev/null method. + // + dir_paths config_module:: + gcc_header_search_paths (const process_path& xc, scope& rs) const + { + dir_paths r; + + cstrings args; + string std; // Storage. + + args.push_back (xc.recall_string ()); + append_options (args, rs, c_coptions); + append_options (args, rs, x_coptions); + append_options (args, tstd); + + // Compile as. + // + auto langopt = [this] () -> const char* + { + switch (x_lang) + { + case lang::c: return "c"; + case lang::cxx: return "c++"; + } + + assert (false); // Can't get here. + return nullptr; + }; + + args.push_back ("-x"); + args.push_back (langopt ()); + args.push_back ("-v"); + args.push_back ("-E"); + args.push_back ("-"); + args.push_back (nullptr); + + if (verb >= 3) + print_process (args); + + try + { + // Open pipe to stderr, redirect stdin and stdout to /dev/null. + // + process pr (xc, args.data (), -2, -2, -1); + + try + { + ifdstream is ( + move (pr.in_efd), fdstream_mode::skip, ifdstream::badbit); + + // Normally the system header paths appear between the following + // lines: + // + // #include <...> search starts here: + // End of search list. + // + // The exact text depends on the current locale. What we can rely on + // is the presence of the "#include <...>" substring in the + // "opening" line and the fact that the paths are indented with a + // single space character, unlike the "closing" line. + // + // Note that on Mac OS we will also see some framework paths among + // system header paths, followed with a comment. For example: + // + // /Library/Frameworks (framework directory) + // + // For now we ignore framework paths and to filter them out we will + // only consider valid paths to existing directories, skipping those + // which we fail to normalize or stat. + // + string s; + for (bool found (false); getline (is, s); ) + { + if (!found) + found = s.find ("#include <...>") != string::npos; + else + { + if (s[0] != ' ') + break; + + try + { + dir_path d (s, 1, s.size () - 1); + + if (d.absolute () && exists (d, true) && + find (r.begin (), r.end (), d.normalize ()) == r.end ()) + r.emplace_back (move (d)); + } + catch (const invalid_path&) {} + } + } + + is.close (); // Don't block. + + if (!pr.wait ()) + { + // We have read stderr so better print some diagnostics. + // + diag_record dr (fail); + + dr << "failed to extract " << x_lang << " header search paths" << + info << "command line: "; + + print_process (dr, args); + } + } + catch (const io_error&) + { + pr.wait (); + fail << "error reading " << x_lang << " compiler -v -E output"; + } + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + + // It's highly unlikely not to have any system directories. More likely + // we misinterpreted the compiler output. + // + if (r.empty ()) + fail << "unable to extract " << x_lang << " compiler system header " + << "search paths"; + + return r; + } + + // Extract system library search paths from GCC (gcc/g++) or compatible + // (Clang, Intel) using the -print-search-dirs option. + // + dir_paths config_module:: + gcc_library_search_paths (const process_path& xc, scope& rs) const + { + dir_paths r; + + cstrings args; + string std; // Storage. + + args.push_back (xc.recall_string ()); + append_options (args, rs, c_coptions); + append_options (args, rs, x_coptions); + append_options (args, tstd); + append_options (args, rs, c_loptions); + append_options (args, rs, x_loptions); + args.push_back ("-print-search-dirs"); + args.push_back (nullptr); + + if (verb >= 3) + print_process (args); + + // Open pipe to stdout. + // + process pr (run_start (xc, + args.data (), + 0, /* stdin */ + -1 /* stdout */)); + + string l; + try + { + ifdstream is ( + move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit); + + // The output of -print-search-dirs are a bunch of lines that start + // with "<name>: =" where name can be "install", "programs", or + // "libraries". If you have English locale, that is. If you set your + // LC_ALL="tr_TR", then it becomes "kurulum", "programlar", and + // "kitapl?klar". Also, Clang omits "install" while GCC and Intel icc + // print all three. The "libraries" seem to be alwasy last, however. + // + string s; + for (bool found (false); !found && getline (is, s); ) + { + found = (s.compare (0, 12, "libraries: =") == 0); + + size_t p (found ? 9 : s.find (": =")); + + if (p != string::npos) + l.assign (s, p + 3, string::npos); + } + + is.close (); // Don't block. + } + catch (const io_error&) + { + pr.wait (); + fail << "error reading " << x_lang << " compiler -print-search-dirs " + << "output"; + } + + run_finish (args, pr); + + if (l.empty ()) + fail << "unable to extract " << x_lang << " compiler system library " + << "search paths"; + + // Now the fun part: figuring out which delimiter is used. Normally it + // is ':' but on Windows it is ';' (or can be; who knows for sure). Also + // note that these paths are absolute (or should be). So here is what we + // are going to do: first look for ';'. If found, then that's the + // delimiter. If not found, then there are two cases: it is either a + // single Windows path or the delimiter is ':'. To distinguish these two + // cases we check if the path starts with a Windows drive. + // + char d (';'); + string::size_type e (l.find (d)); + + if (e == string::npos && + (l.size () < 2 || l[0] == '/' || l[1] != ':')) + { + d = ':'; + e = l.find (d); + } + + // Now chop it up. We already have the position of the first delimiter + // (if any). + // + for (string::size_type b (0);; e = l.find (d, (b = e + 1))) + { + dir_path d (l, b, (e != string::npos ? e - b : e)); + + if (find (r.begin (), r.end (), d.normalize ()) == r.end ()) + r.emplace_back (move (d)); + + if (e == string::npos) + break; + } + + return r; + } + } +} diff --git a/libbuild2/cc/guess.cxx b/libbuild2/cc/guess.cxx new file mode 100644 index 0000000..02a2f5a --- /dev/null +++ b/libbuild2/cc/guess.cxx @@ -0,0 +1,1892 @@ +// file : libbuild2/cc/guess.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/guess.hxx> + +#include <map> +#include <cstring> // strlen(), strchr() + +#include <libbuild2/diagnostics.hxx> + +using namespace std; + +namespace build2 +{ + namespace cc + { + string + to_string (compiler_type t) + { + string r; + + switch (t) + { + case compiler_type::clang: r = "clang"; break; + case compiler_type::gcc: r = "gcc"; break; + case compiler_type::msvc: r = "msvc"; break; + case compiler_type::icc: r = "icc"; break; + } + + return r; + } + + compiler_id:: + compiler_id (const std::string& id) + { + using std::string; + + size_t p (id.find ('-')); + + if (id.compare (0, p, "gcc" ) == 0) type = compiler_type::gcc; + else if (id.compare (0, p, "clang") == 0) type = compiler_type::clang; + else if (id.compare (0, p, "msvc" ) == 0) type = compiler_type::msvc; + else if (id.compare (0, p, "icc" ) == 0) type = compiler_type::icc; + else + throw invalid_argument ( + "invalid compiler type '" + string (id, 0, p) + "'"); + + if (p != string::npos) + { + variant.assign (id, p + 1, string::npos); + + if (variant.empty ()) + throw invalid_argument ("empty compiler variant"); + } + } + + string compiler_id:: + string () const + { + std::string r (to_string (type)); + + if (!variant.empty ()) + { + r += '-'; + r += variant; + } + + return r; + } + + string + to_string (compiler_class c) + { + string r; + + switch (c) + { + case compiler_class::gcc: r = "gcc"; break; + case compiler_class::msvc: r = "msvc"; break; + } + + return r; + } + + // Standard library detection for GCC-class compilers. + // + // The src argument should detect the standard library based on the + // preprocessor macros and output the result in the stdlib:="XXX" form. + // + static string + stdlib (lang xl, + const process_path& xp, + const strings* c_po, const strings* x_po, + const strings* c_co, const strings* x_co, + const char* src) + { + cstrings args {xp.recall_string ()}; + if (c_po != nullptr) append_options (args, *c_po); + if (x_po != nullptr) append_options (args, *x_po); + if (c_co != nullptr) append_options (args, *c_co); + if (x_co != nullptr) append_options (args, *x_co); + args.push_back ("-x"); + switch (xl) + { + case lang::c: args.push_back ("c"); break; + case lang::cxx: args.push_back ("c++"); break; + } + args.push_back ("-E"); + args.push_back ("-"); // Read stdin. + args.push_back (nullptr); + + // The source we are going to preprocess may contains #include's which + // may fail to resolve if, for example, there is no standard library + // (-nostdinc/-nostdinc++). So we are going to suppress diagnostics and + // assume the error exit code means no standard library (of course it + // could also be because there is something wrong with the compiler or + // options but that we simply leave to blow up later). + // + process pr (run_start (3 /* verbosity */, + xp, + args.data (), + -1 /* stdin */, + -1 /* stdout */, + false /* error */)); + string l, r; + try + { + // Here we have to simultaneously write to stdin and read from stdout + // with both operations having the potential to block. For now we + // assume that src fits into the pipe's buffer. + // + ofdstream os (move (pr.out_fd)); + ifdstream is (move (pr.in_ofd), + fdstream_mode::skip, + ifdstream::badbit); + + os << src << endl; + os.close (); + + while (!eof (getline (is, l))) + { + size_t p (l.find_first_not_of (' ')); + + if (p != string::npos && l.compare (p, 9, "stdlib:=\"") == 0) + { + p += 9; + r = string (l, p, l.size () - p - 1); // One for closing \". + break; + } + } + + is.close (); + } + catch (const io_error&) + { + // Presumably the child process failed. Let run_finish() deal with + // that. + } + + if (!run_finish (args.data (), pr, false /* error */, l)) + r = "none"; + + if (r.empty ()) + fail << "unable to determine " << xl << " standard library"; + + return r; + } + + // C standard library detection on POSIX (i.e., non-Windows) systems. + // Notes: + // + // - We place platform macro-based checks (__FreeBSD__, __APPLE__, etc) + // after library macro-based ones in case a non-default libc is used. + // + static const char* c_stdlib_src = +"#if !defined(__STDC_HOSTED__) || __STDC_HOSTED__ == 1 \n" +"# include <stddef.h> /* Forces defining __KLIBC__ for klibc. */ \n" +"# include <limits.h> /* Includes features.h for glibc. */ \n" +"# include <sys/types.h> /* Includes sys/cdefs.h for bionic. */ \n" +" /* Includes sys/features.h for newlib. */ \n" +" /* Includes features.h for uclibc. */ \n" +"# if defined(__KLIBC__) \n" +" stdlib:=\"klibc\" \n" +"# elif defined(__BIONIC__) \n" +" stdlib:=\"bionic\" \n" +"# elif defined(__NEWLIB__) \n" +" stdlib:=\"newlib\" \n" +"# elif defined(__UCLIBC__) \n" +" stdlib:=\"uclibc\" \n" +"# elif defined(__dietlibc__) /* Also has to be defined manually by */ \n" +" stdlib:=\"dietlibc\" /* or some wrapper. */ \n" +"# elif defined(__MUSL__) /* This libc refuses to define __MUSL__ */ \n" +" stdlib:=\"musl\" /* so it has to be defined by user. */ \n" +"# elif defined(__GLIBC__) /* Check for glibc last since some libc's */ \n" +" stdlib:=\"glibc\" /* pretend to be it. */ \n" +"# elif defined(__FreeBSD__) \n" +" stdlib:=\"freebsd\" \n" +"# elif defined(__APPLE__) \n" +" stdlib:=\"apple\" \n" +"# else \n" +" stdlib:=\"other\" \n" +"# endif \n" +"#else \n" +" stdlib:=\"none\" \n" +"#endif \n"; + + // Pre-guess the compiler type based on the compiler executable name and + // also return the start of that name in the path (used to derive the + // toolchain pattern). Return empty string/npos if can't make a guess (for + // example, because the compiler name is a generic 'c++'). Note that it + // only guesses the type, not the variant. + // + static pair<compiler_type, size_t> + pre_guess (lang xl, const path& xc, const optional<compiler_id>& xi) + { + tracer trace ("cc::pre_guess"); + + // Analyze the last path component only. + // + const string& s (xc.string ()); + size_t s_p (path::traits_type::find_leaf (s)); + size_t s_n (s.size ()); + + // Name separator characters (e.g., '-' in 'g++-4.8'). + // + auto sep = [] (char c) -> bool + { + return c == '-' || c == '_' || c == '.'; + }; + + auto stem = [&sep, &s, s_p, s_n] (const char* x) -> size_t + { + size_t m (strlen (x)); + size_t p (s.find (x, s_p, m)); + + return (p != string::npos && + ( p == s_p || sep (s[p - 1])) && // Separated beginning. + ((p + m) == s_n || sep (s[p + m]))) // Separated end. + ? p + : string::npos; + }; + + using type = compiler_type; + using pair = std::pair<type, size_t>; + + // If the user specified the compiler id, then only check the stem for + // that compiler. + // + auto check = [&xi, &stem] (type t, const char* s) -> optional<pair> + { + if (!xi || xi->type == t) + { + size_t p (stem (s)); + + if (p != string::npos) + return pair (t, p); + } + + return nullopt; + }; + + // Warn if the user specified a C compiler instead of C++ or vice versa. + // + lang o; // Other language. + const char* as (nullptr); // Actual stem. + const char* es (nullptr); // Expected stem. + + switch (xl) + { + case lang::c: + { + // Keep msvc last since 'cl' is very generic. + // + if (auto r = check (type::gcc, "gcc") ) return *r; + if (auto r = check (type::clang, "clang")) return *r; + if (auto r = check (type::icc, "icc") ) return *r; + if (auto r = check (type::msvc, "cl") ) return *r; + + if (check (type::gcc, as = "g++") ) es = "gcc"; + else if (check (type::clang, as = "clang++")) es = "clang"; + else if (check (type::icc, as = "icpc") ) es = "icc"; + else if (check (type::msvc, as = "c++") ) es = "cc"; + + o = lang::cxx; + break; + } + case lang::cxx: + { + // Keep msvc last since 'cl' is very generic. + // + if (auto r = check (type::gcc, "g++") ) return *r; + if (auto r = check (type::clang, "clang++")) return *r; + if (auto r = check (type::icc, "icpc") ) return *r; + if (auto r = check (type::msvc, "cl") ) return *r; + + if (check (type::gcc, as = "gcc") ) es = "g++"; + else if (check (type::clang, as = "clang")) es = "clang++"; + else if (check (type::icc, as = "icc") ) es = "icpc"; + else if (check (type::msvc, as = "cc") ) es = "c++"; + + o = lang::c; + break; + } + } + + if (es != nullptr) + warn << xc << " looks like a " << o << " compiler" << + info << "should it be '" << es << "' instead of '" << as << "'?"; + + // If the user specified the id, then continue as if we pre-guessed. + // + if (xi) + return pair (xi->type, string::npos); + + l4 ([&]{trace << "unable to guess compiler type of " << xc;}); + + return pair (invalid_compiler_type, string::npos); + } + + // Guess the compiler type and variant by running it. If the pre argument + // is not empty, then only "confirm" the pre-guess. Return empty result if + // unable to guess. + // + struct guess_result + { + compiler_id id; + string signature; + string checksum; + process_path path; + + guess_result () = default; + guess_result (compiler_id i, string&& s) + : id (move (i)), signature (move (s)) {} + + bool + empty () const {return id.empty ();} + }; + + // Allowed to change pre if succeeds. + // + static guess_result + guess (const char* xm, + lang, + const path& xc, + const optional<compiler_id>& xi, + compiler_type& pre) + { + tracer trace ("cc::guess"); + + assert (!xi || xi->type == pre); + + guess_result r; + + process_path xp; + { + auto df = make_diag_frame ( + [&xm](const diag_record& dr) + { + dr << info << "use config." << xm << " to override"; + }); + + // Only search in PATH (specifically, omitting the current + // executable's directory on Windows). + // + xp = run_search (xc, + false /* init */, // Note: result is cached. + dir_path () /* fallback */, + true /* path_only */); + } + + using type = compiler_type; + const type invalid = invalid_compiler_type; + + // Start with -v. This will cover gcc and clang. + // + // While icc also writes what may seem like something we can use to + // detect it: + // + // icpc version 16.0.2 (gcc version 4.9.0 compatibility) + // + // That first word is actually the executable name. So if we rename + // icpc to foocpc, we will get: + // + // foocpc version 16.0.2 (gcc version 4.9.0 compatibility) + // + // In fact, if someone renames icpc to g++, there will be no way for + // us to detect this. Oh, well, their problem. + // + if (r.empty () && (pre == invalid || + pre == type::gcc || + pre == type::clang)) + { + auto f = [&xi] (string& l, bool last) -> guess_result + { + if (xi) + { + // The signature line is first in Clang and last in GCC. + // + if (xi->type != type::gcc || last) + return guess_result (*xi, move (l)); + } + + // The gcc/g++ -v output will have a last line in the form: + // + // "gcc version X.Y.Z ..." + // + // The "version" word can probably be translated. For example: + // + // gcc version 3.4.4 + // gcc version 4.2.1 + // gcc version 4.8.2 (GCC) + // gcc version 4.8.5 (Ubuntu 4.8.5-2ubuntu1~14.04.1) + // gcc version 4.9.2 (Ubuntu 4.9.2-0ubuntu1~14.04) + // gcc version 5.1.0 (Ubuntu 5.1.0-0ubuntu11~14.04.1) + // gcc version 6.0.0 20160131 (experimental) (GCC) + // + if (last && l.compare (0, 4, "gcc ") == 0) + return guess_result (compiler_id {type::gcc, ""}, move (l)); + + // The Apple clang/clang++ -v output will have a line (currently + // first) in the form: + // + // "Apple (LLVM|clang) version X.Y.Z ..." + // + // Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn) + // Apple clang version 4.0 (tags/Apple/clang-421.0.60) (based on LLVM 3.1svn) + // Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn) + // Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn) + // Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn) + // Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn) + // Apple LLVM version 6.0 (clang-600.0.57) (based on LLVM 3.5svn) + // Apple LLVM version 6.1.0 (clang-602.0.53) (based on LLVM 3.6.0svn) + // Apple LLVM version 7.0.0 (clang-700.0.53) + // Apple LLVM version 7.0.0 (clang-700.1.76) + // Apple LLVM version 7.0.2 (clang-700.1.81) + // Apple LLVM version 7.3.0 (clang-703.0.16.1) + // + // Note that the gcc/g++ "aliases" for clang/clang++ also include + // this line but it is (currently) preceded by "Configured with: + // ...". + // + // Check for Apple clang before the vanilla one since the above line + // also includes "clang". + // + if (l.compare (0, 6, "Apple ") == 0 && + (l.compare (6, 5, "LLVM ") == 0 || + l.compare (6, 6, "clang ") == 0)) + return guess_result (compiler_id {type::clang, "apple"}, move (l)); + + // The vanilla clang/clang++ -v output will have a first line in the + // form: + // + // "[... ]clang version X.Y.Z[-...] ..." + // + // The "version" word can probably be translated. For example: + // + // FreeBSD clang version 3.4.1 (tags/RELEASE_34/dot1-final 208032) 20140512 + // Ubuntu clang version 3.5.0-4ubuntu2~trusty2 (tags/RELEASE_350/final) (based on LLVM 3.5.0) + // Ubuntu clang version 3.6.0-2ubuntu1~trusty1 (tags/RELEASE_360/final) (based on LLVM 3.6.0) + // clang version 3.7.0 (tags/RELEASE_370/final) + // + if (l.find ("clang ") != string::npos) + return guess_result (compiler_id {type::clang, ""}, move (l)); + + return guess_result (); + }; + + // The -v output contains other information (such as the compiler + // build configuration for gcc or the selected gcc installation for + // clang) which makes sense to include into the compiler checksum. So + // ask run() to calculate it for every line of the -v ouput. + // + // One notable consequence of this is that if the locale changes + // (e.g., via LC_ALL), then the compiler signature will most likely + // change as well because of the translated text. + // + sha256 cs; + + // Suppress all the compiler errors because we may be trying an + // unsupported option (but still consider the exit code). + // + r = run<guess_result> (3, xp, "-v", f, false, false, &cs); + + if (r.empty ()) + { + if (xi) + { + // Fallback to --version below in case this GCC/Clang-like + // compiler doesn't support -v. + // + //fail << "unable to obtain " << xc << " signature with -v"; + } + } + else + { + // If this is clang-apple and pre-guess was gcc then change it so + // that we don't issue any warnings. + // + if (r.id.type == type::clang && + r.id.variant == "apple" && + pre == type::gcc) + pre = type::clang; + + r.checksum = cs.string (); + } + } + + // Next try --version to detect icc. As well as obtain signature for + // GCC/Clang-like compilers in case -v above didn't work. + // + if (r.empty () && (pre == invalid || + pre == type::icc || + pre == type::gcc || + pre == type::clang)) + { + auto f = [&xi] (string& l, bool) -> guess_result + { + // Assume the first line is the signature. + // + if (xi) + return guess_result (*xi, move (l)); + + // The first line has the " (ICC) " in it, for example: + // + // icpc (ICC) 9.0 20060120 + // icpc (ICC) 11.1 20100414 + // icpc (ICC) 12.1.0 20110811 + // icpc (ICC) 14.0.0 20130728 + // icpc (ICC) 15.0.2 20150121 + // icpc (ICC) 16.0.2 20160204 + // icc (ICC) 16.0.2 20160204 + // + if (l.find (" (ICC) ") != string::npos) + return guess_result (compiler_id {type::icc, ""}, move (l)); + + return guess_result (); + }; + + r = run<guess_result> (3, xp, "--version", f, false); + + if (r.empty ()) + { + if (xi) + fail << "unable to obtain " << xc << " signature with --version"; + } + } + + // Finally try to run it without any options to detect msvc. + // + if (r.empty () && (pre == invalid || pre == type::msvc)) + { + auto f = [&xi] (string& l, bool) -> guess_result + { + // Assume the first line is the signature. + // + if (xi) + return guess_result (*xi, move (l)); + + // Check for "Microsoft (R)" and "C/C++" in the first line as a + // signature since all other words/positions can be translated. For + // example: + // + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.6030 for 80x86 + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 14.00.50727.762 for 80x86 + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86 + // Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64 + // Microsoft (R) C/C++ Optimizing Compiler Version 17.00.50727.1 for x86 + // Microsoft (R) C/C++ Optimizing Compiler Version 18.00.21005.1 for x86 + // Microsoft (R) C/C++ Optimizing Compiler Version 19.00.23026 for x86 + // Microsoft (R) C/C++ Optimizing Compiler Version 19.10.24629 for x86 + // + // In the recent versions the architecture is either "x86", "x64", + // or "ARM". + // + if (l.find ("Microsoft (R)") != string::npos && + l.find ("C/C++") != string::npos) + return guess_result (compiler_id {type::msvc, ""}, move (l)); + + return guess_result (); + }; + + // One can pass extra options/arguments to cl.exe with the CL and _CL_ + // environment variables. However, if such extra options are passed + // without anything to compile, then cl.exe no longer prints usage and + // exits successfully but instead issues an error and fails. So we are + // going to unset these variables for our test (interestingly, only CL + // seem to cause the problem but let's unset both, for good measure). + // + const char* env[] = {"CL=", "_CL_=", nullptr}; + + r = run<guess_result> (3, process_env (xp, env), f, false); + + if (r.empty ()) + { + if (xi) + fail << "unable to obtain " << xc << " signature"; + } + } + + if (!r.empty ()) + { + if (pre != invalid && r.id.type != pre) + { + l4 ([&]{trace << "compiler type guess mismatch" + << ", pre-guessed " << pre + << ", determined " << r.id.type;}); + + r = guess_result (); + } + else + { + l5 ([&]{trace << xc << " is " << r.id << ": '" + << r.signature << "'";}); + + r.path = move (xp); + } + } + else + l4 ([&]{trace << "unable to determine compiler type of " << xc;}); + + return r; + } + + // Try to derive the toolchain pattern. + // + // The s argument is the stem to look for in the leaf of the path. The ls + // and rs arguments are the left/right separator characters. If either is + // NULL, then the stem should be the prefix/suffix of the leaf, + // respectively. Note that a path that is equal to stem is not considered + // a pattern. + // + // Note that the default right separator includes digits to handle cases + // like clang++37 (FreeBSD). + // + static string + pattern (const path& xc, + const char* s, + const char* ls = "-_.", + const char* rs = "-_.0123456789") + { + string r; + size_t sn (strlen (s)); + + if (xc.size () > sn) + { + string l (xc.leaf ().string ()); + size_t ln (l.size ()); + + size_t b; + if (ln >= sn && (b = l.find (s)) != string::npos) + { + // Check left separators. + // + if (b == 0 || (ls != nullptr && strchr (ls, l[b - 1]) != nullptr)) + { + // Check right separators. + // + size_t e (b + sn); + if (e == ln || (rs != nullptr && strchr (rs, l[e]) != nullptr)) + { + l.replace (b, sn, "*", 1); + path p (xc.directory ()); + p /= l; + r = move (p).string (); + } + } + } + } + + return r; + } + + + static compiler_info + guess_gcc (const char* xm, + lang xl, + const path& xc, + const string* xv, + const string* xt, + const strings* c_po, const strings* x_po, + const strings* c_co, const strings* x_co, + const strings*, const strings*, + guess_result&& gr) + { + tracer trace ("cc::guess_gcc"); + + const process_path& xp (gr.path); + + // Extract the version. The signature line has the following format + // though language words can be translated and even rearranged (see + // examples above). + // + // "gcc version A.B.C[ ...]" + // + compiler_version v; + { + auto df = make_diag_frame ( + [&xm](const diag_record& dr) + { + dr << info << "use config." << xm << ".version to override"; + }); + + // Treat the custom version as just a tail of the signature. + // + const string& s (xv == nullptr ? gr.signature : *xv); + + // Scan the string as words and look for one that looks like a + // version. + // + size_t b (0), e (0); + while (next_word (s, b, e)) + { + // The third argument to find_first_not_of() is the length of the + // first argument, not the length of the interval to check. So to + // limit it to [b, e) we are also going to compare the result to the + // end of the word position (first space). In fact, we can just + // check if it is >= e. + // + if (s.find_first_not_of ("1234567890.", b, 11) >= e) + break; + } + + if (b == e) + fail << "unable to extract gcc version from '" << s << "'"; + + v.string.assign (s, b, string::npos); + + // Split the version into components. + // + size_t vb (b), ve (b); + auto next = [&s, b, e, &vb, &ve] (const char* m) -> uint64_t + { + try + { + if (next_word (s, e, vb, ve, '.')) + return stoull (string (s, vb, ve - vb)); + } + catch (const invalid_argument&) {} + catch (const out_of_range&) {} + + fail << "unable to extract gcc " << m << " version from '" + << string (s, b, e - b) << "'" << endf; + }; + + v.major = next ("major"); + v.minor = next ("minor"); + v.patch = next ("patch"); + + if (e != s.size ()) + v.build.assign (s, e + 1, string::npos); + } + + // Figure out the target architecture. This is actually a lot trickier + // than one would have hoped. + // + // There is the -dumpmachine option but gcc doesn't adjust it per the + // compile options (e.g., -m32). However, starting with 4.6 it has the + // -print-multiarch option which gives (almost) the right answer. The + // "almost" part has to do with it not honoring the -arch option (which + // is really what this compiler is building for). To get to that, we + // would have to resort to a hack like this: + // + // gcc -v -E - 2>&1 | grep cc1 + // .../cc1 ... -mtune=generic -march=x86-64 + // + // Also, -print-multiarch will print am empty line if the compiler + // actually wasn't built with multi-arch support. + // + // So for now this is what we are going to do for the time being: First + // try -print-multiarch. If that works out (recent gcc configure with + // multi-arch support), then use the result. Otherwise, fallback to + // -dumpmachine (older gcc or not multi-arch). + // + string t, ot; + + if (xt == nullptr) + { + cstrings args {xp.recall_string (), "-print-multiarch"}; + if (c_co != nullptr) append_options (args, *c_co); + if (x_co != nullptr) append_options (args, *x_co); + args.push_back (nullptr); + + // The output of both -print-multiarch and -dumpmachine is a single + // line containing just the target triplet. + // + auto f = [] (string& l, bool) {return move (l);}; + + t = run<string> (3, xp, args.data (), f, false); + + if (t.empty ()) + { + l5 ([&]{trace << xc << " doesn's support -print-multiarch, " + << "falling back to -dumpmachine";}); + + args[1] = "-dumpmachine"; + t = run<string> (3, xp, args.data (), f, false); + } + + if (t.empty ()) + fail << "unable to extract target architecture from " << xc + << " using -print-multiarch or -dumpmachine output" << + info << "use config." << xm << ".target to override"; + + ot = t; + } + else + ot = t = *xt; + + // Parse the target into triplet (for further tests) ignoring any + // failures. + // + target_triplet tt; + try {tt = target_triplet (t);} catch (const invalid_argument&) {} + + // Derive the toolchain pattern. Try cc/c++ as a fallback. + // + string pat (pattern (xc, xl == lang::c ? "gcc" : "g++")); + + if (pat.empty ()) + pat = pattern (xc, xl == lang::c ? "cc" : "c++"); + + // Runtime and standard library. + // + // GCC always uses libgcc (even on MinGW). Even with -nostdlib GCC's + // documentation says that you should usually specify -lgcc. + // + string rt ("libgcc"); + string csl (tt.system == "mingw32" + ? "msvc" + : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src)); + string xsl; + switch (xl) + { + case lang::c: xsl = csl; break; + case lang::cxx: + { + // While GCC only supports it's own C++ standard library (libstdc++) + // we still run the test to detect the "none" case (-nostdinc++). + // + const char* src = + "#include <bits/c++config.h> \n" + "stdlib:=\"libstdc++\" \n"; + + xsl = stdlib (xl, xp, c_po, x_po, c_co, x_co, src); + break; + } + } + + return compiler_info { + move (gr.path), + move (gr.id), + compiler_class::gcc, + move (v), + move (gr.signature), + move (gr.checksum), // Calculated on whole -v output. + move (t), + move (ot), + move (pat), + "", + move (rt), + move (csl), + move (xsl)}; + } + + static compiler_info + guess_clang (const char* xm, + lang xl, + const path& xc, + const string* xv, + const string* xt, + const strings* c_po, const strings* x_po, + const strings* c_co, const strings* x_co, + const strings* c_lo, const strings* x_lo, + guess_result&& gr) + { + const process_path& xp (gr.path); + + // Extract the version. Here we will try to handle both vanilla and + // Apple clang since the signature lines are fairly similar. They have + // the following format though language words can probably be translated + // and even rearranged (see examples above). + // + // "[... ]clang version A.B.C[( |-)...]" + // "Apple (clang|LLVM) version A.B[.C] ..." + // + compiler_version v; + { + auto df = make_diag_frame ( + [&xm](const diag_record& dr) + { + dr << info << "use config." << xm << ".version to override"; + }); + + // Treat the custom version as just a tail of the signature. + // + const string& s (xv == nullptr ? gr.signature : *xv); + + // Some overrides for testing. + // + //s = "clang version 3.7.0 (tags/RELEASE_370/final)"; + // + //gr.id.variant = "apple"; + //s = "Apple LLVM version 7.3.0 (clang-703.0.16.1)"; + //s = "Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn)"; + + // Scan the string as words and look for one that looks like a + // version. Use '-' as a second delimiter to handle versions like + // "3.6.0-2ubuntu1~trusty1". + // + size_t b (0), e (0); + while (next_word (s, b, e, ' ', '-')) + { + // The third argument to find_first_not_of() is the length of the + // first argument, not the length of the interval to check. So to + // limit it to [b, e) we are also going to compare the result to the + // end of the word position (first space). In fact, we can just + // check if it is >= e. + // + if (s.find_first_not_of ("1234567890.", b, 11) >= e) + break; + } + + if (b == e) + fail << "unable to extract clang version from '" << s << "'"; + + v.string.assign (s, b, string::npos); + + // Split the version into components. + // + size_t vb (b), ve (b); + auto next = [&s, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t + { + try + { + if (next_word (s, e, vb, ve, '.')) + return stoull (string (s, vb, ve - vb)); + + if (opt) + return 0; + } + catch (const invalid_argument&) {} + catch (const out_of_range&) {} + + fail << "unable to extract clang " << m << " version from '" + << string (s, b, e - b) << "'" << endf; + }; + + v.major = next ("major", false); + v.minor = next ("minor", false); + v.patch = next ("patch", gr.id.variant == "apple"); + + if (e != s.size ()) + v.build.assign (s, e + 1, string::npos); + } + + // Figure out the target architecture. + // + // Unlike gcc, clang doesn't have -print-multiarch. Its -dumpmachine, + // however, respects the compile options (e.g., -m32). + // + string t, ot; + + if (xt == nullptr) + { + cstrings args {xp.recall_string (), "-dumpmachine"}; + if (c_co != nullptr) append_options (args, *c_co); + if (x_co != nullptr) append_options (args, *x_co); + args.push_back (nullptr); + + // The output of -dumpmachine is a single line containing just the + // target triplet. + // + auto f = [] (string& l, bool) {return move (l);}; + t = run<string> (3, xp, args.data (), f, false); + + if (t.empty ()) + fail << "unable to extract target architecture from " << xc + << " using -dumpmachine output" << + info << "use config." << xm << ".target to override"; + + ot = t; + } + else + ot = t = *xt; + + // Parse the target into triplet (for further tests) ignoring any + // failures. + // + target_triplet tt; + try {tt = target_triplet (t);} catch (const invalid_argument&) {} + + // For Clang on Windows targeting MSVC we remap the target to match + // MSVC's. + // + if (tt.system == "windows-msvc") + { + // Keep the CPU and replace the rest. + // + // @@ Note that currently there is no straightforward way to determine + // the VC version Clang is using. See: + // + // http://lists.llvm.org/pipermail/cfe-dev/2017-December/056240.html + // + tt.vendor = "microsoft"; + tt.system = "win32-msvc"; + tt.version = "14.1"; + t = tt.string (); + } + + // Derive the toolchain pattern. Try clang/clang++, the gcc/g++ alias, + // as well as cc/c++. + // + string pat (pattern (xc, xl == lang::c ? "clang" : "clang++")); + + if (pat.empty ()) + pat = pattern (xc, xl == lang::c ? "gcc" : "g++"); + + if (pat.empty ()) + pat = pattern (xc, xl == lang::c ? "cc" : "c++"); + + // Runtime and standard library. + // + // Clang can use libgcc, its own compiler-rt, or, on Windows targeting + // MSVC, the VC's runtime. As usual, there is no straightforward way + // to query this and silence on the mailing list. See: + // + // http://lists.llvm.org/pipermail/cfe-dev/2018-January/056494.html + // + // So for now we will just look for --rtlib (note: linker option) and if + // none specified, assume some platform-specific defaults. + // + string rt; + { + auto find_rtlib = [] (const strings* ops) -> const string* + { + return ops != nullptr + ? find_option_prefix ("--rtlib=", *ops, false) + : nullptr; + }; + + const string* o; + if ((o = find_rtlib (x_lo)) != nullptr || + (o = find_rtlib (c_lo)) != nullptr) + { + rt = string (*o, 8); + } + else if (tt.system == "win32-msvc") rt = "msvc"; + else if (tt.system == "linux-gnu" || + tt.system == "freebsd") rt = "libgcc"; + else /* Mac OS, etc. */ rt = "compiler-rt"; + } + + string csl (tt.system == "win32-msvc" || tt.system == "mingw32" + ? "msvc" + : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src)); + + string xsl; + switch (xl) + { + case lang::c: xsl = csl; break; + case lang::cxx: + { + // All Clang versions that we care to support have __has_include() + // so we use it to determine which standard library is available. + // + // Note that we still include the corresponding headers to verify + // things are usable. For the "other" case we include some + // standard header to detect the "none" case (e.g, -nostdinc++). + // + const char* src = + "#if __has_include(<__config>) \n" + " #include <__config> \n" + " stdlib:=\"libc++\" \n" + "#elif __has_include(<bits/c++config.h>) \n" + " #include <bits/c++config.h> \n" + " stdlib:=\"libstdc++\" \n" + "#else \n" + " #include <cstddef> \n" + " stdlib:=\"other\" \n" + "#endif \n"; + + xsl = tt.system == "win32-msvc" + ? "msvcp" + : stdlib (xl, xp, c_po, x_po, c_co, x_co, src); + break; + } + } + + return compiler_info { + move (gr.path), + move (gr.id), + compiler_class::gcc, + move (v), + move (gr.signature), + move (gr.checksum), // Calculated on whole -v output. + move (t), + move (ot), + move (pat), + "", + move (rt), + move (csl), + move (xsl)}; + } + + static compiler_info + guess_icc (const char* xm, + lang xl, + const path& xc, + const string* xv, + const string* xt, + const strings* c_po, const strings* x_po, + const strings* c_co, const strings* x_co, + const strings*, const strings*, + guess_result&& gr) + { + const process_path& xp (gr.path); + + // Extract the version. If the version has the fourth component, then + // the signature line (extracted with --version) won't include it. So we + // will have to get a more elaborate line with -V. We will also have to + // do it to get the compiler target that respects the -m option: icc + // doesn't support -print-multiarch like gcc and its -dumpmachine + // doesn't respect -m like clang. In fact, its -dumpmachine is + // completely broken as it appears to print the compiler's host and not + // the target (e.g., .../bin/ia32/icpc prints x86_64-linux-gnu). + // + // Some examples of the signature lines from -V output: + // + // Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047 + // Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116 + // Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010 + // Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074 + // Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064 + // Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427 + // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 16.0.2.181 Build 20160204 + // Intel(R) C++ Intel(R) 64 Compiler for applications running on IA-32, Version 16.0.2.181 Build 20160204 + // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204 + // Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204 + // + // We should probably also assume the language words can be translated + // and even rearranged. + // + auto f = [] (string& l, bool) + { + return l.compare (0, 5, "Intel") == 0 && (l[5] == '(' || l[5] == ' ') + ? move (l) + : string (); + }; + + if (xv == nullptr) + { + string& s (gr.signature); + s.clear (); + + // The -V output is sent to STDERR. + // + s = run<string> (3, xp, "-V", f, false); + + if (s.empty ()) + fail << "unable to extract signature from " << xc << " -V output"; + + if (s.find (xl == lang::c ? " C " : " C++ ") == string::npos) + fail << xc << " does not appear to be the Intel " << xl + << " compiler" << + info << "extracted signature: '" << s << "'"; + } + + // Scan the string as words and look for the version. It consist of only + // digits and periods and contains at least one period. + // + compiler_version v; + { + auto df = make_diag_frame ( + [&xm](const diag_record& dr) + { + dr << info << "use config." << xm << ".version to override"; + }); + + // Treat the custom version as just a tail of the signature. + // + const string& s (xv == nullptr ? gr.signature : *xv); + + // Some overrides for testing. + // + //s = "Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047"; + //s = "Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116"; + //s = "Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010"; + //s = "Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074"; + //s = "Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064"; + //s = "Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427"; + + size_t b (0), e (0); + while (next_word (s, b, e, ' ', ',') != 0) + { + // The third argument to find_first_not_of() is the length of the + // first argument, not the length of the interval to check. So to + // limit it to [b, e) we are also going to compare the result to the + // end of the word position (first space). In fact, we can just + // check if it is >= e. Similar logic for find_first_of() except + // that we add space to the list of character to make sure we don't + // go too far. + // + if (s.find_first_not_of ("1234567890.", b, 11) >= e && + s.find_first_of (". ", b, 2) < e) + break; + } + + if (b == e) + fail << "unable to extract icc version from '" << s << "'"; + + v.string.assign (s, b, string::npos); + + // Split the version into components. + // + size_t vb (b), ve (b); + auto next = [&s, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t + { + try + { + if (next_word (s, e, vb, ve, '.')) + return stoull (string (s, vb, ve - vb)); + + if (opt) + return 0; + } + catch (const invalid_argument&) {} + catch (const out_of_range&) {} + + fail << "unable to extract icc " << m << " version from '" + << string (s, b, e - b) << "'" << endf; + }; + + v.major = next ("major", false); + v.minor = next ("minor", false); + v.patch = next ("patch", true); + + if (vb != ve && next_word (s, e, vb, ve, '.')) + v.build.assign (s, vb, ve - vb); + + if (e != s.size ()) + { + if (!v.build.empty ()) + v.build += ' '; + + v.build.append (s, e + 1, string::npos); + } + } + + // Figure out the target CPU by re-running the compiler with -V and + // compile options (which may include, e.g., -m32). The output will + // contain two CPU keywords: the first is the host and the second is the + // target (hopefully this won't get rearranged by the translation). + // + // The CPU keywords (based on the above samples) appear to be: + // + // "32-bit" + // "IA-32" + // "Intel" "64" + // "Intel(R)" "64" + // "Intel(R)" "MIC" (-dumpmachine says: x86_64-k1om-linux) + // + string t, ot; + + if (xt == nullptr) + { + auto df = make_diag_frame ( + [&xm](const diag_record& dr) + { + dr << info << "use config." << xm << ".target to override"; + }); + + cstrings args {xp.recall_string (), "-V"}; + if (c_co != nullptr) append_options (args, *c_co); + if (x_co != nullptr) append_options (args, *x_co); + args.push_back (nullptr); + + // The -V output is sent to STDERR. + // + t = run<string> (3, xp, args.data (), f, false); + + if (t.empty ()) + fail << "unable to extract target architecture from " << xc + << " -V output"; + + string arch; + for (size_t b (0), e (0), n; + (n = next_word (t, b, e, ' ', ',')) != 0; ) + { + if (t.compare (b, n, "Intel(R)", 8) == 0 || + t.compare (b, n, "Intel", 5) == 0) + { + if ((n = next_word (t, b, e, ' ', ',')) != 0) + { + if (t.compare (b, n, "64", 2) == 0) + { + arch = "x86_64"; + } + else if (t.compare (b, n, "MIC", 3) == 0) + { + arch = "x86_64"; // Plus "-k1om-linux" from -dumpmachine below. + } + } + else + break; + } + else if (t.compare (b, n, "IA-32", 5) == 0 || + t.compare (b, n, "32-bit", 6) == 0) + { + arch = "i386"; + } + } + + if (arch.empty ()) + fail << "unable to extract icc target architecture from '" + << t << "'"; + + // So we have the CPU but we still need the rest of the triplet. While + // icc currently doesn't support cross-compilation (at least on Linux) + // and we could have just used the build triplet (i.e., the + // architecture on which we are running), who knows what will happen + // in the future. So instead we are going to use -dumpmachine and + // substitute the CPU. + // + { + auto f = [] (string& l, bool) {return move (l);}; + t = run<string> (3, xp, "-dumpmachine", f); + } + + if (t.empty ()) + fail << "unable to extract target architecture from " << xc + << " using -dumpmachine output"; + + // The first component in the triplet is always CPU. + // + size_t p (t.find ('-')); + + if (p == string::npos) + fail << "unable to parse icc target architecture '" << t << "'"; + + t.swap (arch); + t.append (arch, p, string::npos); + + ot = t; + } + else + ot = t = *xt; + + // Parse the target into triplet (for further tests) ignoring any + // failures. + // + target_triplet tt; + try {tt = target_triplet (t);} catch (const invalid_argument&) {} + + // Derive the toolchain pattern. + // + string pat (pattern (xc, xl == lang::c ? "icc" : "icpc")); + + // Runtime and standard library. + // + // For now we assume that unless it is Windows, we are targeting + // Linux/GCC. + // + string rt (tt.system == "win32-msvc" ? "msvc" : "libgcc"); + string csl (tt.system == "win32-msvc" + ? "msvc" + : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src)); + string xsl; + switch (xl) + { + case lang::c: xsl = csl; break; + case lang::cxx: + { + xsl = tt.system == "win32-msvc" ? "msvcp" : "libstdc++"; + break; + } + } + + return compiler_info { + move (gr.path), + move (gr.id), + compiler_class::gcc, //@@ TODO: msvc on Windows? + move (v), + move (gr.signature), + "", + move (t), + move (ot), + move (pat), + "", + move (rt), + move (csl), + move (xsl)}; + } + + static compiler_info + guess_msvc (const char* xm, + lang xl, + const path& xc, + const string* xv, + const string* xt, + const strings*, const strings*, + const strings*, const strings*, + const strings*, const strings*, + guess_result&& gr) + { + // Extract the version. The signature line has the following format + // though language words can be translated and even rearranged (see + // examples above). + // + // "Microsoft (R) C/C++ Optimizing Compiler Version A.B.C[.D] for CPU" + // + // The CPU keywords (based on the above samples) appear to be: + // + // "80x86" + // "x86" + // "x64" + // "ARM" + // + compiler_version v; + { + auto df = make_diag_frame ( + [&xm](const diag_record& dr) + { + dr << info << "use config." << xm << ".version to override"; + }); + + // Treat the custom version as just a tail of the signature. + // + const string& s (xv == nullptr ? gr.signature : *xv); + + // Some overrides for testing. + // + //string s; + //s = "Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86"; + //s = "Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64"; + //s = "Compilateur d'optimisation Microsoft (R) C/C++ version 19.16.27026.1 pour x64"; + + // Scan the string as words and look for the version. + // + size_t b (0), e (0); + while (next_word (s, b, e, ' ', ',')) + { + // The third argument to find_first_not_of() is the length of the + // first argument, not the length of the interval to check. So to + // limit it to [b, e) we are also going to compare the result to the + // end of the word position (first space). In fact, we can just + // check if it is >= e. + // + if (s.find_first_not_of ("1234567890.", b, 11) >= e) + break; + } + + if (b == e) + fail << "unable to extract msvc version from '" << s << "'"; + + v.string.assign (s, b, e - b); + + // Split the version into components. + // + size_t vb (b), ve (b); + auto next = [&s, b, e, &vb, &ve] (const char* m) -> uint64_t + { + try + { + if (next_word (s, e, vb, ve, '.')) + return stoull (string (s, vb, ve - vb)); + } + catch (const invalid_argument&) {} + catch (const out_of_range&) {} + + fail << "unable to extract msvc " << m << " version from '" + << string (s, b, e - b) << "'" << endf; + }; + + v.major = next ("major"); + v.minor = next ("minor"); + v.patch = next ("patch"); + + if (next_word (s, e, vb, ve, '.')) + v.build.assign (s, vb, ve - vb); + } + + + // Figure out the target architecture. + // + string t, ot; + + if (xt == nullptr) + { + auto df = make_diag_frame ( + [&xm](const diag_record& dr) + { + dr << info << "use config." << xm << ".target to override"; + }); + + const string& s (gr.signature); + + // Scan the string as words and look for the CPU. + // + string arch; + + for (size_t b (0), e (0), n; + (n = next_word (s, b, e, ' ', ',')) != 0; ) + { + if (s.compare (b, n, "x64", 3) == 0 || + s.compare (b, n, "x86", 3) == 0 || + s.compare (b, n, "ARM", 3) == 0 || + s.compare (b, n, "80x86", 5) == 0) + { + arch.assign (s, b, n); + break; + } + } + + if (arch.empty ()) + fail << "unable to extract msvc target architecture from " + << "'" << s << "'"; + + // Now we need to map x86, x64, and ARM to the target triplets. The + // problem is, there aren't any established ones so we got to invent + // them ourselves. Based on the discussion in + // <libbutl/target-triplet.mxx>, we need something in the + // CPU-VENDOR-OS-ABI form. + // + // The CPU part is fairly straightforward with x86 mapped to 'i386' + // (or maybe 'i686'), x64 to 'x86_64', and ARM to 'arm' (it could also + // include the version, e.g., 'amrv8'). + // + // The (toolchain) VENDOR is also straightforward: 'microsoft'. Why + // not omit it? Two reasons: firstly, there are other compilers with + // the otherwise same target, for example Intel C/C++, and it could be + // useful to distinguish between them. Secondly, by having all four + // components we remove any parsing ambiguity. + // + // OS-ABI is where things are not as clear cut. The OS part shouldn't + // probably be just 'windows' since we have Win32 and WinCE. And + // WinRT. And Universal Windows Platform (UWP). So perhaps the + // following values for OS: 'win32', 'wince', 'winrt', 'winup'. + // + // For 'win32' the ABI part could signal the Microsoft C/C++ runtime + // by calling it 'msvc'. And seeing that the runtimes are incompatible + // from version to version, we should probably add the 'X.Y' version + // at the end (so we essentially mimic the DLL name, for example, + // msvcr120.dll). Some suggested we also encode the runtime type + // (those pesky /M* options) though I am not sure: the only + // "redistributable" runtime is multi-threaded release DLL. + // + // The ABI part for the other OS values needs thinking. For 'winrt' + // and 'winup' it probably makes sense to encode the WINAPI_FAMILY + // macro value (perhaps also with the version). Some of its values: + // + // WINAPI_FAMILY_APP Windows 10 + // WINAPI_FAMILY_PC_APP Windows 8.1 + // WINAPI_FAMILY_PHONE_APP Windows Phone 8.1 + // + // For 'wince' we may also want to add the OS version, for example, + // 'wince4.2'. + // + // Putting it all together, Visual Studio 2015 will then have the + // following target triplets: + // + // x86 i386-microsoft-win32-msvc14.0 + // x64 x86_64-microsoft-win32-msvc14.0 + // ARM arm-microsoft-winup-??? + // + if (arch == "ARM") + fail << "cl.exe ARM/WinRT/UWP target is not yet supported"; + else + { + if (arch == "x64") + t = "x86_64-microsoft-win32-msvc"; + else if (arch == "x86" || arch == "80x86") + t = "i386-microsoft-win32-msvc"; + else + assert (false); + + // Mapping of compiler versions to runtime versions: + // + // Note that VC 15 has runtime version 14.1 but the DLLs are still + // called *140.dll (they are said to be backwards-compatible). + // + // And VC 16 seems to have the runtime version 14.1 (and not 14.2, + // as one might expect; DLLs are still *140.dll but there are now _1 + // and _2 variants for, say, msvcp140.dll). We will, however, call + // it 14.2 (which is the version of the "toolset") in our target + // triplet. + // + // year ver cl crt/dll toolset + // + // 2019 16.1 19.21 14.2/140 14.21 + // 2019 16.0 19.20 14.2/140 + // 2017 15.9 19.16 14.1/140 + // 2017 15.8 19.15 14.1/140 + // 2017 15.7 19.14 14.1/140 + // 2017 15.6 19.13 14.1/140 + // 2017 15.5 19.12 14.1/140 + // 2017 15.3 19.11 14.1/140 + // 2017 15 19.10 14.1/140 + // 2015 14 19.00 14.0/140 + // 2013 12 18.00 12.0/120 + // 2012 11 17.00 11.0/110 + // 2010 10 16.00 10.0/100 + // 2008 9 15.00 9.0/90 + // 2005 8 14.00 8.0/80 + // 2003 7.1 13.10 7.1/71 + // + // _MSC_VER is the numeric cl version, e.g., 1921 for 19.21. + // + /**/ if (v.major == 19 && v.minor >= 20) t += "14.2"; + else if (v.major == 19 && v.minor >= 10) t += "14.1"; + else if (v.major == 19 && v.minor == 0) t += "14.0"; + else if (v.major == 18 && v.minor == 0) t += "12.0"; + else if (v.major == 17 && v.minor == 0) t += "11.0"; + else if (v.major == 16 && v.minor == 0) t += "10.0"; + else if (v.major == 15 && v.minor == 0) t += "9.0"; + else if (v.major == 14 && v.minor == 0) t += "8.0"; + else if (v.major == 13 && v.minor == 10) t += "7.1"; + else fail << "unable to map msvc compiler version '" << v.string + << "' to runtime version"; + } + + ot = t; + } + else + ot = t = *xt; + + // Derive the toolchain pattern. + // + // If the compiler name is/starts with 'cl' (e.g., cl.exe, cl-14), + // then replace it with '*' and use it as a pattern for lib, link, + // etc. + // + string cpat (pattern (xc, "cl", nullptr, ".-")); + string bpat (cpat); // Binutils pattern is the same as toolchain. + + // Runtime and standard library. + // + string rt ("msvc"); + string csl ("msvc"); + string xsl; + switch (xl) + { + case lang::c: xsl = csl; break; + case lang::cxx: xsl = "msvcp"; break; + } + + return compiler_info { + move (gr.path), + move (gr.id), + compiler_class::msvc, + move (v), + move (gr.signature), + "", + move (t), + move (ot), + move (cpat), + move (bpat), + move (rt), + move (csl), + move (xsl)}; + } + + // Compiler checks can be expensive (we often need to run the compiler + // several times) so we cache the result. + // + static map<string, compiler_info> cache; + + const compiler_info& + guess (const char* xm, + lang xl, + const path& xc, + const string* xis, + const string* xv, + const string* xt, + const strings* c_po, const strings* x_po, + const strings* c_co, const strings* x_co, + const strings* c_lo, const strings* x_lo) + { + // First check the cache. + // + string key; + { + sha256 cs; + cs.append (static_cast<size_t> (xl)); + cs.append (xc.string ()); + if (xis != nullptr) cs.append (*xis); + if (c_po != nullptr) hash_options (cs, *c_po); + if (x_po != nullptr) hash_options (cs, *x_po); + if (c_co != nullptr) hash_options (cs, *c_co); + if (x_co != nullptr) hash_options (cs, *x_co); + if (c_lo != nullptr) hash_options (cs, *c_lo); + if (x_lo != nullptr) hash_options (cs, *x_lo); + key = cs.string (); + + auto i (cache.find (key)); + if (i != cache.end ()) + return i->second; + } + + // Parse the user-specified compiler id (config.x.id). + // + optional<compiler_id> xi; + if (xis != nullptr) + { + try + { + xi = compiler_id (*xis); + } + catch (const invalid_argument& e) + { + fail << "invalid compiler id '" << *xis << "' " + << "specified in variable config." << xm << ".id: " << e; + } + } + + pair<compiler_type, size_t> pre (pre_guess (xl, xc, xi)); + compiler_type& type (pre.first); + + // If we could pre-guess the type based on the excutable name, then + // try the test just for that compiler. + // + guess_result gr; + + if (type != invalid_compiler_type) + { + gr = guess (xm, xl, xc, xi, type); + + if (gr.empty ()) + { + warn << xc << " looks like " << type << " but it is not" << + info << "use config." << xm << " to override"; + + type = invalid_compiler_type; // Clear pre-guess. + } + } + + if (gr.empty ()) + gr = guess (xm, xl, xc, xi, type); + + if (gr.empty ()) + fail << "unable to guess " << xl << " compiler type of " << xc << + info << "use config." << xm << ".id to specify explicitly"; + + compiler_info r; + const compiler_id& id (gr.id); + + switch (id.type) + { + case compiler_type::gcc: + { + r = guess_gcc (xm, xl, xc, xv, xt, + c_po, x_po, c_co, x_co, c_lo, x_lo, + move (gr)); + break; + } + case compiler_type::clang: + { + r = guess_clang (xm, xl, xc, xv, xt, + c_po, x_po, c_co, x_co, c_lo, x_lo, + move (gr)); + break; + } + case compiler_type::msvc: + { + r = guess_msvc (xm, xl, xc, xv, xt, + c_po, x_po, c_co, x_co, c_lo, x_lo, + move (gr)); + break; + } + case compiler_type::icc: + { + r = guess_icc (xm, xl, xc, xv, xt, + c_po, x_po, c_co, x_co, c_lo, x_lo, + move (gr)); + break; + } + } + + // By default use the signature line to generate the checksum. + // + if (r.checksum.empty ()) + r.checksum = sha256 (r.signature).string (); + + // Derive binutils pattern unless this has already been done by the + // compiler-specific code. + // + + // When cross-compiling the whole toolchain is normally prefixed with + // the target triplet, e.g., x86_64-w64-mingw32-{gcc,g++,ar,ld}. But + // oftentimes it is not quite canonical (and sometimes -- outright + // bogus). So instead we are going to first try to derive the prefix + // using the pre-guessed position of the compiler name. Note that we + // still want to try the target in case we could not pre-guess (think + // x86_64-w64-mingw32-c++). + // + // BTW, for GCC we also get gcc-{ar,ranlib} (but not -ld) which add + // support for the LTO plugin though it seems more recent GNU binutils + // (2.25) are able to load the plugin when needed automatically. So it + // doesn't seem we should bother trying to support this on our end (one + // way we could do it is by passing config.bin.{ar,ranlib} as hints). + // + // It's also normal for native (i.e., non-cross-compiler) builds of GCC + // and Clang to not have binutils installed in the same directory and + // instead relying on the system ones. In this case, if the compiler is + // specified with the absolute path, the pattern will be the fallback + // search directory (though it feels like it should be checked first + // rather than last). + // + if (r.bin_pattern.empty ()) + { + if (pre.second != 0 && + pre.second != string::npos && + !path::traits_type::is_separator (xc.string ()[pre.second - 1])) + { + r.bin_pattern.assign (xc.string (), 0, pre.second); + r.bin_pattern += '*'; // '-' or similar is already there. + } + } + + if (r.bin_pattern.empty ()) + { + const string& t (r.target); + size_t n (t.size ()); + + if (xc.size () > n + 1) + { + const string& l (xc.leaf ().string ()); + + if (l.size () > n + 1 && l.compare (0, n, t) == 0 && l[n] == '-') + { + path p (xc.directory ()); + p /= t; + p += "-*"; + r.bin_pattern = move (p).string (); + } + } + } + + // If we could not derive the pattern, then see if we can come up with a + // fallback search directory. + // + if (r.bin_pattern.empty ()) + { + const path& p (r.path.recall.empty () ? xc : r.path.recall); + + if (!p.simple ()) + r.bin_pattern = p.directory ().representation (); // Trailing slash. + } + + return (cache[key] = move (r)); + } + + path + guess_default (lang xl, const string& cid, const string& pat) + { + compiler_id id (cid); + const char* s (nullptr); + + using type = compiler_type; + + switch (xl) + { + case lang::c: + { + switch (id.type) + { + case type::gcc: s = "gcc"; break; + case type::clang: s = "clang"; break; + case type::icc: s = "icc"; break; + case type::msvc: s = "cl"; break; + } + + break; + } + case lang::cxx: + { + switch (id.type) + { + case type::gcc: s = "g++"; break; + case type::clang: s = "clang++"; break; + case type::icc: s = "icpc"; break; + case type::msvc: s = "cl"; break; + } + + break; + } + } + + return path (apply_pattern (s, &pat)); + } + } +} diff --git a/libbuild2/cc/guess.hxx b/libbuild2/cc/guess.hxx new file mode 100644 index 0000000..3677cc7 --- /dev/null +++ b/libbuild2/cc/guess.hxx @@ -0,0 +1,246 @@ +// file : libbuild2/cc/guess.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_GUESS_HXX +#define LIBBUILD2_CC_GUESS_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/cc/types.hxx> + +namespace build2 +{ + namespace cc + { + // Compiler id consisting of a type and optional variant. If the variant + // is not empty, then the id is spelled out as 'type-variant', similar to + // target triplets (this also means that the type cannot contain '-'). + // + // Currently recognized compilers and their ids: + // + // gcc GCC gcc/g++ + // clang Vanilla Clang clang/clang++ + // clang-apple Apple Clang clang/clang++ and the gcc/g++ "alias" + // msvc Microsoft cl.exe + // icc Intel icc/icpc + // + // Note that the user can provide a custom id with one of the predefined + // types and a custom variant (say 'gcc-tasking'). + // + enum class compiler_type + { + gcc = 1, // 0 value represents invalid type. + clang, + msvc, + icc + // Update compiler_id(string) and to_string() if adding a new type. + }; + + const compiler_type invalid_compiler_type = static_cast<compiler_type> (0); + + string + to_string (compiler_type); + + inline ostream& + operator<< (ostream& o, const compiler_type& t) + { + return o << to_string (t); + } + + struct compiler_id + { + compiler_type type = invalid_compiler_type; + std::string variant; + + bool + empty () const {return type == invalid_compiler_type;} + + std::string + string () const; + + compiler_id () + : type (invalid_compiler_type) {} + + compiler_id (compiler_type t, std::string v) + : type (t), variant (move (v)) {} + + explicit + compiler_id (const std::string&); + }; + + inline ostream& + operator<< (ostream& o, const compiler_id& id) + { + return o << id.string (); + } + + // Compiler class describes a set of compilers that follow more or less + // the same command line interface. Compilers that don't belong to any of + // the existing classes are in classes of their own (say, Sun CC would be + // on its own if we were to support it). + // + // Currently defined compiler classes: + // + // gcc gcc, clang, clang-apple, icc (on non-Windows) + // msvc msvc, clang-cl, icc (Windows) + // + enum class compiler_class + { + gcc, + msvc + }; + + string + to_string (compiler_class); + + inline ostream& + operator<< (ostream& o, compiler_class c) + { + return o << to_string (c); + } + + // Compiler version. Here we map the various compiler version formats to + // something that resembles the MAJOR.MINOR.PATCH-BUILD form of the + // Semantic Versioning. While the MAJOR.MINOR part is relatively + // straightforward, PATCH may be empty and BUILD can contain pretty much + // anything (including spaces). + // + // gcc A.B.C[ ...] {A, B, C, ...} + // clang A.B.C[( |-)...] {A, B, C, ...} + // clang-apple A.B[.C] ... {A, B, C, ...} + // icc A.B[.C.D] ... {A, B, C, D ...} + // msvc A.B.C[.D] {A, B, C, D} + // + // Note that the clang-apple version is a custom Apple version and does + // not correspond to the vanilla clang version. + // + struct compiler_version + { + std::string string; + + // Currently all the compilers that we support have numeric MAJOR, + // MINOR, and PATCH components and it makes sense to represent them as + // integers for easy comparison. If we meet a compiler for which this + // doesn't hold, then we will probably just set these to 0 and let the + // user deal with the string representation. + // + uint64_t major; + uint64_t minor; + uint64_t patch; + std::string build; + }; + + // Compiler information. + // + // The signature is normally the -v/--version line that was used to guess + // the compiler id and its version. + // + // The checksum is used to detect compiler changes. It is calculated in a + // compiler-specific manner (usually the output of -v/--version) and is + // not bulletproof (e.g., it most likely won't detect that the underlying + // assembler or linker has changed). However, it should detect most + // common cases, such as an upgrade to a new version or a configuration + // change. + // + // Note that we assume the checksum incorporates the (default) target so + // that if the compiler changes but only in what it targets, then the + // checksum will still change. This is currently the case for all the + // compilers that we support. + // + // The target is the compiler's traget architecture triplet. Note that + // unlike all the preceding fields, this one takes into account the + // compile options (e.g., -m32). + // + // The pattern is the toolchain program pattern that could sometimes be + // derived for some toolchains. For example, i686-w64-mingw32-*-4.9. + // + // The bin_pattern is the binutils program pattern that could sometimes be + // derived for some toolchains. For example, i686-w64-mingw32-*. If the + // pattern could not be derived, then it could contain a fallback search + // directory, in which case it will end with a directory separator but + // will not contain '*'. + // + struct compiler_info + { + process_path path; + compiler_id id; + compiler_class class_; + compiler_version version; + string signature; + string checksum; + string target; + string original_target; // As reported by the compiler. + string pattern; + string bin_pattern; + + // Compiler runtime, C standard library, and language (e.g., C++) + // standard library. + // + // The runtime is the low-level compiler runtime library and its name is + // the library/project name. Current values are (but can also be some + // custom name specified with Clang's --rtlib): + // + // libgcc + // compiler-rt (clang) + // msvc + // + // The C standard library is normally the library/project name (e.g, + // glibc, klibc, newlib, etc) but if there is none, then we fallback to + // the vendor name (e.g., freebsd, apple). Current values are: + // + // glibc + // msvc (msvcrt.lib/msvcrNNN.dll) + // freebsd + // apple + // newlib (also used by Cygwin) + // klibc + // bionic + // uclibc + // musl + // dietlibc + // other + // none + // + // The C++ standard library is normally the library/project name. + // Current values are: + // + // libstdc++ + // libc++ + // msvcp (msvcprt.lib/msvcpNNN.dll) + // other + // none + // + string runtime; + string c_stdlib; + string x_stdlib; + }; + + // In a sense this is analagous to the language standard which we handle + // via a virtual function in common. However, duplicating this hairy ball + // of fur in multiple places doesn't seem wise, especially considering + // that most of it will be the same, at least for C and C++. + // + const compiler_info& + guess (const char* xm, // Module (for variable names in diagnostics). + lang xl, // Language. + const path& xc, // Compiler path. + const string* xi, // Compiler id (optional). + const string* xv, // Compiler version (optional). + const string* xt, // Compiler target (optional). + const strings* c_poptions, const strings* x_poptions, + const strings* c_coptions, const strings* x_coptions, + const strings* c_loptions, const strings* x_loptions); + + // Given a language, compiler id, and optionally an (empty) pattern, + // return an appropriate default compiler path. + // + // For example, for (lang::cxx, gcc, *-4.9) we will get g++-4.9. + // + path + guess_default (lang, const string& cid, const string& pattern); + } +} + +#endif // LIBBUILD2_CC_GUESS_HXX diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx new file mode 100644 index 0000000..f45a1bf --- /dev/null +++ b/libbuild2/cc/init.cxx @@ -0,0 +1,493 @@ +// file : libbuild2/cc/init.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/init.hxx> + +#include <libbuild2/file.hxx> +#include <libbuild2/scope.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/config/utility.hxx> + +#include <libbuild2/cc/target.hxx> +#include <libbuild2/cc/utility.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + // Scope operation callback that cleans up module sidebuilds. + // + static target_state + clean_module_sidebuilds (action, const scope& rs, const dir&) + { + context& ctx (rs.ctx); + + const dir_path& out_root (rs.out_path ()); + + dir_path d (out_root / rs.root_extra->build_dir / modules_sidebuild_dir); + + if (exists (d)) + { + if (rmdir_r (ctx, d)) + { + // Clean up cc/ if it became empty. + // + d = out_root / rs.root_extra->build_dir / module_dir; + if (empty (d)) + { + rmdir (ctx, d); + + // And build/ if it also became empty (e.g., in case of a build + // with a transient configuration). + // + d = out_root / rs.root_extra->build_dir; + if (empty (d)) + rmdir (ctx, d); + } + + return target_state::changed; + } + } + + return target_state::unchanged; + } + + bool + core_vars_init (scope& rs, + scope&, + const location& loc, + unique_ptr<module_base>&, + bool first, + bool, + const variable_map&) + { + tracer trace ("cc::core_vars_init"); + l5 ([&]{trace << "for " << rs;}); + + assert (first); + + // Load bin.vars (we need its config.bin.target/pattern for hints). + // + if (!cast_false<bool> (rs["bin.vars.loaded"])) + load_module (rs, rs, "bin.vars", loc); + + // Enter variables. Note: some overridable, some not. + // + auto& v (rs.ctx.var_pool.rw (rs)); + + auto v_t (variable_visibility::target); + + v.insert<strings> ("config.cc.poptions", true); + v.insert<strings> ("config.cc.coptions", true); + v.insert<strings> ("config.cc.loptions", true); + v.insert<strings> ("config.cc.aoptions", true); + v.insert<strings> ("config.cc.libs", true); + + v.insert<strings> ("cc.poptions"); + v.insert<strings> ("cc.coptions"); + v.insert<strings> ("cc.loptions"); + v.insert<strings> ("cc.aoptions"); + v.insert<strings> ("cc.libs"); + + v.insert<strings> ("cc.export.poptions"); + v.insert<strings> ("cc.export.coptions"); + v.insert<strings> ("cc.export.loptions"); + v.insert<vector<name>> ("cc.export.libs"); + + // Hint variables (not overridable). + // + v.insert<string> ("config.cc.id"); + v.insert<string> ("config.cc.hinter"); // Hinting module. + v.insert<string> ("config.cc.pattern"); + v.insert<target_triplet> ("config.cc.target"); + + // Compiler runtime and C standard library. + // + v.insert<string> ("cc.runtime"); + v.insert<string> ("cc.stdlib"); + + // Target type, for example, "C library" or "C++ library". Should be set + // on the target as a rule-specific variable by the matching rule to the + // name of the module (e.g., "c", "cxx"). Currenly only set for + // libraries and is used to decide which *.libs to use during static + // linking. + // + // It can also be the special "cc" value which means a C-common library + // but specific language is not known. Used in the import installed + // logic. + // + v.insert<string> ("cc.type", v_t); + + // If set and is true, then this (imported) library has been found in a + // system library search directory. + // + v.insert<bool> ("cc.system", v_t); + + // C++ module name. Set on the bmi*{} target as a rule-specific variable + // by the matching rule. Can also be set by the user (normally via the + // x.module_name alias) on the x_mod{} source. + // + v.insert<string> ("cc.module_name", v_t); + + // Ability to disable using preprocessed output for compilation. + // + v.insert<bool> ("config.cc.reprocess", true); + v.insert<bool> ("cc.reprocess"); + + // Register scope operation callback. + // + // It feels natural to do clean up sidebuilds as a post operation but + // that prevents the (otherwise-empty) out root directory to be cleaned + // up (via the standard fsdir{} chain). + // + rs.operation_callbacks.emplace ( + perform_clean_id, + scope::operation_callback {&clean_module_sidebuilds, nullptr /*post*/}); + + return true; + } + + bool + core_guess_init (scope& rs, + scope&, + const location& loc, + unique_ptr<module_base>&, + bool first, + bool, + const variable_map& h) + { + tracer trace ("cc::core_guess_init"); + l5 ([&]{trace << "for " << rs;}); + + assert (first); + + // Load cc.core.vars. + // + if (!cast_false<bool> (rs["cc.core.vars.loaded"])) + load_module (rs, rs, "cc.core.vars", loc); + + // config.cc.{id,hinter} + // + { + // These values must be hinted. + // + rs.assign<string> ("cc.id") = cast<string> (h["config.cc.id"]); + rs.assign<string> ("cc.hinter") = cast<string> (h["config.cc.hinter"]); + } + + // config.cc.target + // + { + // This value must be hinted. + // + const auto& t (cast<target_triplet> (h["config.cc.target"])); + + // Also enter as cc.target.{cpu,vendor,system,version,class} for + // convenience of access. + // + rs.assign<string> ("cc.target.cpu") = t.cpu; + rs.assign<string> ("cc.target.vendor") = t.vendor; + rs.assign<string> ("cc.target.system") = t.system; + rs.assign<string> ("cc.target.version") = t.version; + rs.assign<string> ("cc.target.class") = t.class_; + + rs.assign<target_triplet> ("cc.target") = t; + } + + // config.cc.pattern + // + { + // This value could be hinted. + // + rs.assign<string> ("cc.pattern") = + cast_empty<string> (h["config.cc.pattern"]); + } + + // cc.runtime + // cc.stdlib + // + rs.assign ("cc.runtime") = cast<string> (h["cc.runtime"]); + rs.assign ("cc.stdlib") = cast<string> (h["cc.stdlib"]); + + return true; + } + + bool + core_config_init (scope& rs, + scope&, + const location& loc, + unique_ptr<module_base>&, + bool first, + bool, + const variable_map& hints) + { + tracer trace ("cc::core_config_init"); + l5 ([&]{trace << "for " << rs;}); + + assert (first); + + // Load cc.core.guess. + // + if (!cast_false<bool> (rs["cc.core.guess.loaded"])) + load_module (rs, rs, "cc.core.guess", loc); + + // Configure. + // + + // Adjust module priority (compiler). + // + config::save_module (rs, "cc", 250); + + // Note that we are not having a config report since it will just + // duplicate what has already been printed by the hinting module. + + // config.cc.{p,c,l}options + // config.cc.libs + // + // @@ Same nonsense as in module. + // + // + rs.assign ("cc.poptions") += cast_null<strings> ( + config::optional (rs, "config.cc.poptions")); + + rs.assign ("cc.coptions") += cast_null<strings> ( + config::optional (rs, "config.cc.coptions")); + + rs.assign ("cc.loptions") += cast_null<strings> ( + config::optional (rs, "config.cc.loptions")); + + rs.assign ("cc.aoptions") += cast_null<strings> ( + config::optional (rs, "config.cc.aoptions")); + + rs.assign ("cc.libs") += cast_null<strings> ( + config::optional (rs, "config.cc.libs")); + + if (lookup l = config::omitted (rs, "config.cc.reprocess").first) + rs.assign ("cc.reprocess") = *l; + + // Load the bin.config module. + // + if (!cast_false<bool> (rs["bin.config.loaded"])) + { + // Prepare configuration hints. They are only used on the first load + // of bin.config so we only populate them on our first load. + // + variable_map h (rs.ctx); + + if (first) + { + // Note that all these variables have already been registered. + // + h.assign ("config.bin.target") = + cast<target_triplet> (rs["cc.target"]).string (); + + if (auto l = hints["config.bin.pattern"]) + h.assign ("config.bin.pattern") = cast<string> (l); + } + + load_module (rs, rs, "bin.config", loc, false, h); + } + + // Verify bin's target matches ours (we do it even if we loaded it + // ourselves since the target can come from the configuration and not + // our hint). + // + if (first) + { + const auto& ct (cast<target_triplet> (rs["cc.target"])); + const auto& bt (cast<target_triplet> (rs["bin.target"])); + + if (bt != ct) + { + const auto& h (cast<string> (rs["cc.hinter"])); + + fail (loc) << h << " and bin module target mismatch" << + info << h << " target is " << ct << + info << "bin target is " << bt; + } + } + + // Load bin.*.config for bin.* modules we may need (see core_init() + // below). + // + const string& tsys (cast<string> (rs["cc.target.system"])); + + if (!cast_false<bool> (rs["bin.ar.config.loaded"])) + load_module (rs, rs, "bin.ar.config", loc); + + if (tsys == "win32-msvc") + { + if (!cast_false<bool> (rs["bin.ld.config.loaded"])) + load_module (rs, rs, "bin.ld.config", loc); + } + + if (tsys == "mingw32") + { + if (!cast_false<bool> (rs["bin.rc.config.loaded"])) + load_module (rs, rs, "bin.rc.config", loc); + } + + return true; + } + + bool + core_init (scope& rs, + scope&, + const location& loc, + unique_ptr<module_base>&, + bool first, + bool, + const variable_map& hints) + { + tracer trace ("cc::core_init"); + l5 ([&]{trace << "for " << rs;}); + + assert (first); + + const string& tsys (cast<string> (rs["cc.target.system"])); + + // Load cc.core.config. + // + if (!cast_false<bool> (rs["cc.core.config.loaded"])) + load_module (rs, rs, "cc.core.config", loc, false, hints); + + // Load the bin module. + // + if (!cast_false<bool> (rs["bin.loaded"])) + load_module (rs, rs, "bin", loc); + + // Load the bin.ar module. + // + if (!cast_false<bool> (rs["bin.ar.loaded"])) + load_module (rs, rs, "bin.ar", loc); + + // For this target we link things directly with link.exe so load the + // bin.ld module. + // + if (tsys == "win32-msvc") + { + if (!cast_false<bool> (rs["bin.ld.loaded"])) + load_module (rs, rs, "bin.ld", loc); + } + + // If our target is MinGW, then we will need the resource compiler + // (windres) in order to embed manifests into executables. + // + if (tsys == "mingw32") + { + if (!cast_false<bool> (rs["bin.rc.loaded"])) + load_module (rs, rs, "bin.rc", loc); + } + + return true; + } + + // The cc module is an "alias" for c and cxx. Its intended use is to make + // sure that the C/C++ configuration is captured in an amalgamation rather + // than subprojects. + // + static inline bool + init_alias (tracer& trace, + scope& rs, + scope& bs, + const char* m, + const char* c, + const char* c_loaded, + const char* cxx, + const char* cxx_loaded, + const location& loc, + const variable_map& hints) + { + l5 ([&]{trace << "for " << bs;}); + + // We only support root loading (which means there can only be one). + // + if (&rs != &bs) + fail (loc) << m << " module must be loaded in project root"; + + // We want to order the loading to match what user specified on the + // command line (config.c or config.cxx). This way the first loaded + // module (with user-specified config.*) will hint the compiler to the + // second. + // + bool lc (!cast_false<bool> (rs[c_loaded])); + bool lp (!cast_false<bool> (rs[cxx_loaded])); + + // If none of them are already loaded, load c first only if config.c + // is specified. + // + if (lc && lp && rs["config.c"]) + { + load_module (rs, rs, c, loc, false, hints); + load_module (rs, rs, cxx, loc, false, hints); + } + else + { + if (lp) load_module (rs, rs, cxx, loc, false, hints); + if (lc) load_module (rs, rs, c, loc, false, hints); + } + + return true; + } + + bool + config_init (scope& rs, + scope& bs, + const location& loc, + unique_ptr<module_base>&, + bool, + bool, + const variable_map& hints) + { + tracer trace ("cc::config_init"); + return init_alias (trace, rs, bs, + "cc.config", + "c.config", "c.config.loaded", + "cxx.config", "cxx.config.loaded", + loc, hints); + } + + bool + init (scope& rs, + scope& bs, + const location& loc, + unique_ptr<module_base>&, + bool, + bool, + const variable_map& hints) + { + tracer trace ("cc::init"); + return init_alias (trace, rs, bs, + "cc", + "c", "c.loaded", + "cxx", "cxx.loaded", + loc, hints); + } + + static const module_functions mod_functions[] = + { + // NOTE: don't forget to also update the documentation in init.hxx if + // changing anything here. + + {"cc.core.vars", nullptr, core_vars_init}, + {"cc.core.guess", nullptr, core_guess_init}, + {"cc.core.config", nullptr, core_config_init}, + {"cc.core", nullptr, core_init}, + {"cc.config", nullptr, config_init}, + {"cc", nullptr, init}, + {nullptr, nullptr, nullptr} + }; + + const module_functions* + build2_cc_load () + { + return mod_functions; + } + } +} diff --git a/libbuild2/cc/init.hxx b/libbuild2/cc/init.hxx new file mode 100644 index 0000000..b98e816 --- /dev/null +++ b/libbuild2/cc/init.hxx @@ -0,0 +1,36 @@ +// file : libbuild2/cc/init.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_INIT_HXX +#define LIBBUILD2_CC_INIT_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/module.hxx> + +#include <libbuild2/cc/export.hxx> + +namespace build2 +{ + namespace cc + { + // Module `cc` does not require bootstrapping. + // + // Submodules: + // + // `cc.core.vars` -- registers some variables. + // `cc.core.guess` -- loads cc.core.vars and sets some variables. + // `cc.core.config` -- loads cc.core.guess and sets more variables. + // `cc.core` -- loads cc.core.config and registers target types and + // rules. + // `cc.config` -- loads {c,cxx}.config. + // `cc` -- loads c and cxx. + // + extern "C" LIBBUILD2_CC_SYMEXPORT const module_functions* + build2_cc_load (); + } +} + +#endif // LIBBUILD2_CC_INIT_HXX diff --git a/libbuild2/cc/install-rule.cxx b/libbuild2/cc/install-rule.cxx new file mode 100644 index 0000000..670757e --- /dev/null +++ b/libbuild2/cc/install-rule.cxx @@ -0,0 +1,355 @@ +// file : libbuild2/cc/install-rule.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/install-rule.hxx> + +#include <libbuild2/algorithm.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/utility.hxx> +#include <libbuild2/cc/link-rule.hxx> // match() + +using namespace std; + +namespace build2 +{ + namespace cc + { + using namespace bin; + + // install_rule + // + install_rule:: + install_rule (data&& d, const link_rule& l) + : common (move (d)), link_ (l) {} + + const target* install_rule:: + filter (action a, const target& t, prerequisite_iterator& i) const + { + // NOTE: see libux_install_rule::filter() if changing anything here. + + const prerequisite& p (i->prerequisite); + + // If this is a shared library prerequisite, install it as long as it + // is in the same amalgamation as we are. + // + // Less obvious: we also want to install a static library prerequisite + // of a library (since it could be referenced from its .pc file, etc). + // + // Note: for now we assume these prerequisites never come from see- + // through groups. + // + // Note: we install ad hoc prerequisites by default. + // + otype ot (link_type (t).type); + + bool st (t.is_a<exe> () || t.is_a<libs> ()); // Target needs shared. + bool at (t.is_a<liba> () || t.is_a<libs> ()); // Target needs static. + + if ((st && (p.is_a<libx> () || p.is_a<libs> ())) || + (at && (p.is_a<libx> () || p.is_a<liba> ()))) + { + const target* pt (&search (t, p)); + + // If this is the lib{}/libu*{} group, pick a member which we would + // link. For libu*{} we want the "see through" logic. + // + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, link_info (t.base_scope (), ot)); + + // Note: not redundant since we are returning a member. + // + if ((st && pt->is_a<libs> ()) || (at && pt->is_a<liba> ())) + return pt->in (t.weak_scope ()) ? pt : nullptr; + + // See through to libu*{} members. Note that we are always in the same + // project (and thus amalgamation). + // + if (pt->is_a<libux> ()) + return pt; + } + + // The rest of the tests only succeed if the base filter() succeeds. + // + const target* pt (file_rule::filter (a, t, p)); + if (pt == nullptr) + return pt; + + // Don't install executable's prerequisite headers and module + // interfaces. + // + // Note that if they come from a group, then we assume the entire + // group is not to be installed. + // + if (t.is_a<exe> ()) + { + if (x_header (p)) + pt = nullptr; + else if (p.type.see_through) + { + for (i.enter_group (); i.group (); ) + { + if (x_header (*++i)) + pt = nullptr; + } + } + + if (pt == nullptr) + return pt; + } + + // Here is a problem: if the user spells the obj*/bmi*{} targets + // explicitly, then the source files, including headers/modules may be + // specified as preprequisites of those targets and not of this target. + // While this can be worked around for headers by also listing them as + // prerequisites of this target, this won't work for modules (since they + // are compiled). So what we are going to do here is detect bmi*{} and + // translate them to their mxx{} (this doesn't quite work for headers + // since there would normally be many of them). + // + // Note: for now we assume bmi*{} never come from see-through groups. + // + bool g (false); + if (p.is_a<bmi> () || (g = p.is_a (compile_types (ot).bmi))) + { + if (g) + resolve_group (a, *pt); + + for (prerequisite_member pm: + group_prerequisite_members (a, *pt, members_mode::maybe)) + { + // This is tricky: we need to "look" inside groups for mxx{} but if + // found, remap to the group, not member. + // + if (pm.is_a (*x_mod)) + { + pt = t.is_a<exe> () + ? nullptr + : file_rule::filter (a, *pt, pm.prerequisite); + break; + } + } + + if (pt == nullptr) + return pt; + } + + return pt; + } + + bool install_rule:: + match (action a, target& t, const string& hint) const + { + // @@ How do we split the hint between the two? + // + + // We only want to handle installation if we are also the ones building + // this target. So first run link's match(). + // + return link_.match (a, t, hint) && file_rule::match (a, t, ""); + } + + recipe install_rule:: + apply (action a, target& t) const + { + recipe r (file_rule::apply (a, t)); + + if (a.operation () == update_id) + { + // Signal to the link rule that this is update for install. And if the + // update has already been executed, verify it was done for install. + // + auto& md (t.data<link_rule::match_data> ()); + + if (md.for_install) + { + if (!*md.for_install) + fail << "target " << t << " already updated but not for install"; + } + else + md.for_install = true; + } + else // install or uninstall + { + // Derive shared library paths and cache them in the target's aux + // storage if we are un/installing (used in the *_extra() functions + // below). + // + static_assert (sizeof (link_rule::libs_paths) <= target::data_size, + "insufficient space"); + + if (file* f = t.is_a<libs> ()) + { + if (!f->path ().empty ()) // Not binless. + { + const string* p (cast_null<string> (t["bin.lib.prefix"])); + const string* s (cast_null<string> (t["bin.lib.suffix"])); + t.data ( + link_.derive_libs_paths (*f, + p != nullptr ? p->c_str (): nullptr, + s != nullptr ? s->c_str (): nullptr)); + } + } + } + + return r; + } + + bool install_rule:: + install_extra (const file& t, const install_dir& id) const + { + bool r (false); + + if (t.is_a<libs> ()) + { + // Here we may have a bunch of symlinks that we need to install. + // + const scope& rs (t.root_scope ()); + auto& lp (t.data<link_rule::libs_paths> ()); + + auto ln = [&rs, &id] (const path& f, const path& l) + { + install_l (rs, id, f.leaf (), l.leaf (), 2 /* verbosity */); + return true; + }; + + const path& lk (lp.link); + const path& ld (lp.load); + const path& so (lp.soname); + const path& in (lp.interm); + + const path* f (lp.real); + + if (!in.empty ()) {r = ln (*f, in) || r; f = ∈} + if (!so.empty ()) {r = ln (*f, so) || r; f = &so;} + if (!ld.empty ()) {r = ln (*f, ld) || r; f = &ld;} + if (!lk.empty ()) {r = ln (*f, lk) || r; } + } + + return r; + } + + bool install_rule:: + uninstall_extra (const file& t, const install_dir& id) const + { + bool r (false); + + if (t.is_a<libs> ()) + { + // Here we may have a bunch of symlinks that we need to uninstall. + // + const scope& rs (t.root_scope ()); + auto& lp (t.data<link_rule::libs_paths> ()); + + auto rm = [&rs, &id] (const path& l) + { + return uninstall_f (rs, id, nullptr, l.leaf (), 2 /* verbosity */); + }; + + const path& lk (lp.link); + const path& ld (lp.load); + const path& so (lp.soname); + const path& in (lp.interm); + + if (!lk.empty ()) r = rm (lk) || r; + if (!ld.empty ()) r = rm (ld) || r; + if (!so.empty ()) r = rm (so) || r; + if (!in.empty ()) r = rm (in) || r; + } + + return r; + } + + // libux_install_rule + // + libux_install_rule:: + libux_install_rule (data&& d, const link_rule& l) + : common (move (d)), link_ (l) {} + + const target* libux_install_rule:: + filter (action a, const target& t, prerequisite_iterator& i) const + { + const prerequisite& p (i->prerequisite); + + // The "see through" semantics that should be parallel to install_rule + // above. In particular, here we use libue/libua/libus{} as proxies for + // exe/liba/libs{} there. + // + otype ot (link_type (t).type); + + bool st (t.is_a<libue> () || t.is_a<libus> ()); // Target needs shared. + bool at (t.is_a<libua> () || t.is_a<libus> ()); // Target needs static. + + if ((st && (p.is_a<libx> () || p.is_a<libs> ())) || + (at && (p.is_a<libx> () || p.is_a<liba> ()))) + { + const target* pt (&search (t, p)); + + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, link_info (t.base_scope (), ot)); + + if ((st && pt->is_a<libs> ()) || (at && pt->is_a<liba> ())) + return pt->in (t.weak_scope ()) ? pt : nullptr; + + if (pt->is_a<libux> ()) + return pt; + } + + const target* pt (install::file_rule::instance.filter (a, t, p)); + if (pt == nullptr) + return pt; + + if (t.is_a<libue> ()) + { + if (x_header (p)) + pt = nullptr; + else if (p.type.see_through) + { + for (i.enter_group (); i.group (); ) + { + if (x_header (*++i)) + pt = nullptr; + } + } + + if (pt == nullptr) + return pt; + } + + bool g (false); + if (p.is_a<bmi> () || (g = p.is_a (compile_types (ot).bmi))) + { + if (g) + resolve_group (a, *pt); + + for (prerequisite_member pm: + group_prerequisite_members (a, *pt, members_mode::maybe)) + { + if (pm.is_a (*x_mod)) + { + pt = t.is_a<libue> () + ? nullptr + : install::file_rule::instance.filter (a, *pt, pm.prerequisite); + break; + } + } + + if (pt == nullptr) + return pt; + } + + return pt; + } + + bool libux_install_rule:: + match (action a, target& t, const string& hint) const + { + // We only want to handle installation if we are also the ones building + // this target. So first run link's match(). + // + return link_.match (a, t, hint) && alias_rule::match (a, t, ""); + } + } +} diff --git a/libbuild2/cc/install-rule.hxx b/libbuild2/cc/install-rule.hxx new file mode 100644 index 0000000..6d7ceb8 --- /dev/null +++ b/libbuild2/cc/install-rule.hxx @@ -0,0 +1,82 @@ +// file : libbuild2/cc/install-rule.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_INSTALL_RULE_HXX +#define LIBBUILD2_CC_INSTALL_RULE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/install/rule.hxx> + +#include <libbuild2/cc/types.hxx> +#include <libbuild2/cc/common.hxx> + +#include <libbuild2/cc/export.hxx> + +namespace build2 +{ + namespace cc + { + class link_rule; + + // Installation rule for exe{} and lib*{}. Here we do: + // + // 1. Signal to the link rule that this is update for install. + // + // 2. Custom filtering of prerequisites (e.g., headers of an exe{}). + // + // 3. Extra un/installation (e.g., libs{} symlinks). + // + class LIBBUILD2_CC_SYMEXPORT install_rule: public install::file_rule, + virtual common + { + public: + install_rule (data&&, const link_rule&); + + virtual const target* + filter (action, const target&, prerequisite_iterator&) const override; + + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + virtual bool + install_extra (const file&, const install_dir&) const override; + + virtual bool + uninstall_extra (const file&, const install_dir&) const override; + + private: + const link_rule& link_; + }; + + // Installation rule for libu*{}. + // + // While libu*{} members themselves are not installable, we need to see + // through them in case they depend on stuff that we need to install + // (e.g., headers). Note that we use the alias_rule as a base. + // + class LIBBUILD2_CC_SYMEXPORT libux_install_rule: + public install::alias_rule, + virtual common + { + public: + libux_install_rule (data&&, const link_rule&); + + virtual const target* + filter (action, const target&, prerequisite_iterator&) const override; + + virtual bool + match (action, target&, const string&) const override; + + private: + const link_rule& link_; + }; + } +} + +#endif // LIBBUILD2_CC_INSTALL_RULE_HXX diff --git a/libbuild2/cc/lexer+char-literal.test.testscript b/libbuild2/cc/lexer+char-literal.test.testscript new file mode 100644 index 0000000..afd16dd --- /dev/null +++ b/libbuild2/cc/lexer+char-literal.test.testscript @@ -0,0 +1,67 @@ +# file : libbuild2/cc/lexer+char-literal.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test character literals. +# + +: normal +: +$* <<EOI >>EOO +'a' +'aa' +'"' +EOI +<char literal> +<char literal> +<char literal> +EOO + +: prefix +: +$* <<EOI >>EOO +L'a' +U'a' +u'a' +u8'a' +u8R'a' +EOI +<char literal> +<char literal> +<char literal> +<char literal> +'u8R' +<char literal> +EOO + +: suffix +: +$* <<EOI >>EOO +'a'x +'a'_X123 +EOI +<char literal> +<char literal> +EOO + +: escape +: +$* <<EOI >>EOO +'\'' +'\\' +'\\\'' +'\n' +U'\U0001f34c' +EOI +<char literal> +<char literal> +<char literal> +<char literal> +<char literal> +EOO + +: unterminated +: +$* <"'a" 2>>EOE != 0 +stdin:1:1: error: unterminated character literal +EOE diff --git a/libbuild2/cc/lexer+comment.test.testscript b/libbuild2/cc/lexer+comment.test.testscript new file mode 100644 index 0000000..bfcc440 --- /dev/null +++ b/libbuild2/cc/lexer+comment.test.testscript @@ -0,0 +1,88 @@ +# file : libbuild2/cc/lexer+comment.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test C and C++ comments. +# + +: c-comment +: +$* <<EOI +/* 'one' */ +/* "two" // three +*/ +/** +four +// five */ +/** +six /* +*/ +EOI + +: cxx-comment +: +$* <<EOI +// 'one' +// "two" // three +// four /* five */ +EOI + +: commented-out +: +$* <<EOI >"';'" +// /* +; +// */ +EOI + +: c-unterminated +: +$* <<EOI 2>>EOE != 0 +/* +comment +EOI +stdin:1:2: error: unterminated comment +EOE + +: cxx-unterminated +: +$* <<:EOI +// comment +EOI + +: in-char-literal +: +$* <<EOI >>EOO +'//' +'/*'*/ +EOI +<char literal> +<char literal> +<punctuation> +<punctuation> +EOO + +: in-string-literal +: +$* <<EOI >>EOO +"//foo" +"/*"*/ +EOI +<string literal> +<string literal> +<punctuation> +<punctuation> +EOO + +: in-raw-string-literal +: +$* <<EOI >>EOO +R"X( +// foo +/* bar +)X"*/ +EOI +<string literal> +<punctuation> +<punctuation> +EOO diff --git a/libbuild2/cc/lexer+line.test.testscript b/libbuild2/cc/lexer+line.test.testscript new file mode 100644 index 0000000..560c092 --- /dev/null +++ b/libbuild2/cc/lexer+line.test.testscript @@ -0,0 +1,67 @@ +# file : libbuild2/cc/lexer+line.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test line continuations. +# + +: identifier +: +$* <<EOI >"'foo123'" +fo\ +o\ +1\ +2\ +3 +EOI + +: punctuation +: +$* <<EOI >'<punctuation>' +.\ +.\ +. +EOI + +: c-comment +: +$* <<EOI +/\ +* +comment +*\ +/\ + +EOI + +: cxx-comment +: +$* <<EOI +/\ +/ comment\ +more\ +more +EOI + +: other +: +$* <<EOI >>EOO +\abc +EOI +<punctuation> +'abc' +EOO + +: multiple +: +$* <<EOI >>EOO +\\ +EOI +<punctuation> +EOO + +: unterminated +: +$* <<:EOI >'<punctuation>' +\ +EOI diff --git a/libbuild2/cc/lexer+number.test.testscript b/libbuild2/cc/lexer+number.test.testscript new file mode 100644 index 0000000..f361245 --- /dev/null +++ b/libbuild2/cc/lexer+number.test.testscript @@ -0,0 +1,48 @@ +# file : libbuild2/cc/lexer+number.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test numbers. +# + +$* <'1' >'<number literal>' +$* <'.1' >'<number literal>' +$* <'1.' >'<number literal>' + +$* <'0b101' >'<number literal>' +$* <'0123' >'<number literal>' +$* <'0X12AB' >'<number literal>' + +$* <'1e10' >'<number literal>' +$* <'1E+10' >'<number literal>' +$* <'0x1.p10' >'<number literal>' +$* <'0x1.P-10' >'<number literal>' + +$* <"123'456" >'<number literal>' +$* <"0xff00'00ff" >'<number literal>' + +$* <'123f' >'<number literal>' +$* <'123UL' >'<number literal>' +$* <'123_X' >'<number literal>' + +: separate-punctuation +: +$* <'123;' >>EOO +<number literal> +';' +EOO + +: separate-plus-minus +: +$* <'1.0_a+2.0' >>EOO +<number literal> +<punctuation> +<number literal> +EOO + +: separate-whitespace +: +$* <'123 abc' >>EOO +<number literal> +'abc' +EOO diff --git a/libbuild2/cc/lexer+preprocessor.test.testscript b/libbuild2/cc/lexer+preprocessor.test.testscript new file mode 100644 index 0000000..e33eb90 --- /dev/null +++ b/libbuild2/cc/lexer+preprocessor.test.testscript @@ -0,0 +1,73 @@ +# file : libbuild2/cc/lexer+preprocessor.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test preprocessor lines. +# + +: normal +: +$* <<EOI +#pragma message("abc") +EOI + +: multiline +: +$* <<EOI +#pragma message \ +( \ +"abc" \ +) +EOI + +: comment +: +$* <<EOI +#pragma foo /* +bar +baz +*/ +#pragma foo // bar baz +EOI + +: line +: +$* -l <<EOI >>EOO +; +# 1 "test.cxx" 2 +; + ; +# 4 +; +#line 8 "z:\\tmp\\test.hxx" +; +#line 10 +; +# 5 "test.cxx" +; +EOI +';' stdin:1:1 +';' test.cxx:1:1 +';' test.cxx:2:3 +';' test.cxx:4:1 +';' z:\tmp\test.hxx:8:1 +';' z:\tmp\test.hxx:10:1 +';' test.cxx:5:1 +EOO + +: include +: +$* <<EOI 2>>EOE != 0 +#include <foo/bar> +EOI +stdin:1:1: error: unexpected #include directive +EOE + +: nested +: +$* <<EOI >>EOO +#define FOO(x) #y +; +EOI +';' +EOO diff --git a/libbuild2/cc/lexer+raw-string-literal.test.testscript b/libbuild2/cc/lexer+raw-string-literal.test.testscript new file mode 100644 index 0000000..93cddc1 --- /dev/null +++ b/libbuild2/cc/lexer+raw-string-literal.test.testscript @@ -0,0 +1,90 @@ +# file : libbuild2/cc/lexer+raw-string-literal.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test raw string literals. +# + +: normal +: +$* <<EOI >>EOO +R"()" +R"(ab)" +R"(a"b)" +R"(a)b)" +R"%(a%)b)%" +R"X(a + b)X" +R"X(a\ + b)X" +EOI +<string literal> +<string literal> +<string literal> +<string literal> +<string literal> +<string literal> +<string literal> +EOO + +: prefix +: +$* <<EOI >>EOO +LR"(ab)" +UR"(ab)" +uR"(ab)" +u8R"(ab)" +EOI +<string literal> +<string literal> +<string literal> +<string literal> +EOO + +: suffix +: +$* <<EOI >>EOO +R"(ab)"x +R"(ab)"_X123 +EOI +<string literal> +<string literal> +EOO + +: escape +: +$* <<EOI >>EOO +R"(\)" +EOI +<string literal> +EOO + +: invalid-no-paren +: +$* <'R"a"' 2>>EOE != 0 +stdin:1:2: error: invalid raw string literal +EOE + +: invalid-paren +: +$* <'R")()("' 2>>EOE != 0 +stdin:1:2: error: invalid raw string literal +EOE + +: invalid-unterminated-paren +: +$* <'R"(abc"' 2>>EOE != 0 +stdin:1:2: error: invalid raw string literal +EOE + +: invalid-unterminated-delimiter +: +$* <'R"X(abc)"' 2>>EOE != 0 +stdin:1:2: error: invalid raw string literal +EOE + +: invalid-unterminated-quote +: +$* <'R"X(abc)X' 2>>EOE != 0 +stdin:1:2: error: invalid raw string literal +EOE diff --git a/libbuild2/cc/lexer+string-literal.test.testscript b/libbuild2/cc/lexer+string-literal.test.testscript new file mode 100644 index 0000000..a2509c9 --- /dev/null +++ b/libbuild2/cc/lexer+string-literal.test.testscript @@ -0,0 +1,65 @@ +# file : libbuild2/cc/lexer+string-literal.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test string literals (except raw). +# + +: normal +: +$* <<EOI >>EOO +"aa" +"'" +"a""b" +EOI +<string literal> +<string literal> +<string literal> +<string literal> +EOO + +: prefix +: +$* <<EOI >>EOO +L"ab" +U"ab" +u"ab" +u8"ab" +EOI +<string literal> +<string literal> +<string literal> +<string literal> +EOO + +: suffix +: +$* <<EOI >>EOO +"ab"x +"ab"_X123 +EOI +<string literal> +<string literal> +EOO + +: escape +: +$* <<EOI >>EOO +"\"\"" +"\\\\" +"\\\"\\" +"\n\t" +U"a\U0001f34c" +EOI +<string literal> +<string literal> +<string literal> +<string literal> +<string literal> +EOO + +: unterminated +: +$* <'"ab' 2>>EOE != 0 +stdin:1:1: error: unterminated string literal +EOE diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx new file mode 100644 index 0000000..6eba57e --- /dev/null +++ b/libbuild2/cc/lexer.cxx @@ -0,0 +1,1129 @@ +// file : libbuild2/cc/lexer.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/lexer.hxx> + +using namespace std; +using namespace butl; + +// bit 0 - identifier character (_0-9A-Ba-b). +// +static const uint8_t char_flags[256] = +//0 1 2 3 4 5 6 7 8 9 A B C D E F +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 3 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 5 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 7 + + // 128-255 + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 +}; + +// Diagnostics plumbing. +// +namespace butl // ADL +{ + inline build2::location + get_location (const butl::char_scanner::xchar& c, const void* data) + { + using namespace build2; + + assert (data != nullptr); // E.g., must be &lexer::name_. + return location (static_cast<const path*> (data), c.line, c.column); + } +} + +namespace build2 +{ + namespace cc + { + auto lexer:: + peek (bool e) -> xchar + { + if (unget_) + return ungetc_; + + if (unpeek_) + return unpeekc_; + + xchar c (base::peek ()); + + if (e && c == '\\') + { + get (c); + xchar p (base::peek ()); + + // Handle Windows CRLF sequence. Similar to char_scanner, we treat a + // single CR as if it was followed by LF and also collapse multiple + // CRs. + // + while (p == '\r') + { + get (p); + p = base::peek (); + + if (p == '\n') + break; + + // Pretend '\n' was there and recurse. + // + if (p != '\r') + return peek (e); + } + + if (p == '\n') + { + get (p); + return peek (e); // Recurse. + } + + // Save in the unpeek buffer so that it is returned on the subsequent + // calls to peek() (until get()). + // + unpeek_ = true; + unpeekc_ = c; + } + + return c; + } + + inline auto lexer:: + get (bool e) -> xchar + { + if (unget_) + { + unget_ = false; + return ungetc_; + } + else + { + xchar c (peek (e)); + get (c); + return c; + } + } + + inline void lexer:: + get (const xchar& c) + { + // Increment the logical line similar to how base will increment the + // physical (the column counts are the same). + // + if (log_line_ && c == '\n' && !unget_) + ++*log_line_; + + base::get (c); + } + + inline auto lexer:: + geth (bool e) -> xchar + { + xchar c (get (e)); + cs_.append (c); + return c; + } + + inline void lexer:: + geth (const xchar& c) + { + get (c); + cs_.append (c); + } + + using type = token_type; + + void lexer:: + next (token& t, xchar c, bool ignore_pp) + { + for (;; c = skip_spaces ()) + { + t.file = log_file_; + t.line = log_line_ ? *log_line_ : c.line; + t.column = c.column; + + if (eos (c)) + { + t.type = type::eos; + return; + } + + const location l (&name_, c.line, c.column); + + // Hash the token's line. The reason is debug info. In fact, doing + // this will make quite a few "noop" changes (like adding a newline + // anywhere in the source) cause the checksum change. But there + // doesn't seem to be any way around it: the case where we benefit + // from the precise change detection the most (development) is also + // where we will most likely have debug info enable. + // + // Note that in order not to make this completely useless we don't + // hash the column. Even if it is part of the debug info, having it a + // bit off shouldn't cause any significant mis-positioning. We also + // don't hash the file path for each token instead only hashing it + // when changed with the #line directive (as well as in the + // constructor for the initial path). + // + cs_.append (t.line); + cs_.append (c); + + switch (c) + { + // Preprocessor lines. + // + case '#': + { + // It is tempting to simply scan until the newline ignoring + // anything in between. However, these lines can start a + // multi-line C-style comment. So we have to tokenize them (and + // hash the data for each token). + // + // Note that this may not work for things like #error that can + // contain pretty much anything. Also note that lines that start + // with '#' can contain '#' further down. In this case we need to + // be careful not to recurse (and consume multiple newlines). Thus + // the ignore_pp flag. + // + // Finally, to support diagnostics properly we need to recognize + // #line directives. + // + if (ignore_pp) + { + for (bool first (true);;) + { + // Note that we keep using the passed token for buffers. + // + c = skip_spaces (false); // Stop at newline. + + if (eos (c) || c == '\n') + break; + + if (first) + { + first = false; + + // Recognize #line and its shorthand version: + // + // #line <integer> [<string literal>] ... + // # <integer> [<string literal>] ... + // + // Also diagnose #include while at it. + // + if (!(c >= '0' && c <= '9')) + { + next (t, c, false); + + if (t.type == type::identifier) + { + if (t.value == "include") + fail (l) << "unexpected #include directive"; + else if (t.value != "line") + continue; + } + else + continue; + + if (t.type != type::identifier || t.value != "line") + continue; + + c = skip_spaces (false); + + if (!(c >= '0' && c <= '9')) + fail (c) << "line number expected after #line directive"; + } + + // Ok, this is #line and next comes the line number. + // + line_directive (t, c); + continue; // Parse the tail, if any. + } + + next (t, c, false); + } + break; + } + else + { + t.type = type::punctuation; + return; + } + } + // Single-letter punctuation. + // + case ';': t.type = type::semi; return; + case '{': t.type = type::lcbrace; return; + case '}': t.type = type::rcbrace; return; + // Other single-letter punctuation. + // + case '(': + case ')': + case '[': + case ']': + case ',': + case '?': + case '~': + case '\\': t.type = type::punctuation; return; + // Potentially multi-letter punctuation. + // + case '.': // . .* .<N> ... + { + xchar p (peek ()); + + if (p == '*') + { + geth (p); + t.type = type::punctuation; + return; + } + else if (p >= '0' && p <= '9') + { + number_literal (t, c); + return; + } + else if (p == '.') + { + get (p); + + xchar q (peek ()); + if (q == '.') + { + cs_.append (p); + + geth (q); + t.type = type::punctuation; + return; + } + unget (p); + // Fall through. + } + + t.type = type::dot; + return; + } + case '=': // = == + case '!': // ! != + case '*': // * *= + case '/': // / /= (/* and // handled by skip_spaced() above) + case '%': // % %= + case '^': // ^ ^= + { + xchar p (peek ()); + + if (p == '=') + geth (p); + + t.type = type::punctuation; + return; + } + case '<': // < <= << <<= + case '>': // > >= >> >>= + { + xchar p (peek ()); + + if (p == c) + { + geth (p); + if ((p = peek ()) == '=') + geth (p); + t.type = type::punctuation; + } + else if (p == '=') + { + geth (p); + t.type = type::punctuation; + } + else + t.type = (c == '<' ? type::less : type::greater); + + return; + } + case '+': // + ++ += + case '-': // - -- -= -> ->* + { + xchar p (peek ()); + + if (p == c || p == '=') + geth (p); + else if (c == '-' && p == '>') + { + geth (p); + if ((p = peek ()) == '*') + geth (p); + } + + t.type = type::punctuation; + return; + } + case '&': // & && &= + case '|': // | || |= + { + xchar p (peek ()); + + if (p == c || p == '=') + geth (p); + + t.type = type::punctuation; + return; + } + case ':': // : :: + { + xchar p (peek ()); + + if (p == ':') + geth (p); + + t.type = type::punctuation; + return; + } + // Number (and also .<N> above). + // + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + number_literal (t, c); + return; + } + // Char/string literal, identifier, or other (\, $, @, `). + // + default: + { + bool raw (false); // Raw string literal. + + // Note: known not to be a digit (see above). + // + if (char_flags[static_cast<uint8_t> (c)] & 0x01) + { + // This smells a little: we know skip_spaces() did not peek at + // the next character because this is not '/'. Which means the + // position in the stream must be of this character + 1. + // + t.position = buf_->tellg () - 1; + + string& id (t.value); + id = c; + + while (char_flags[static_cast<uint8_t> (c = peek ())] & 0x01) + { + geth (c); + id += c; + + // Direct buffer scan. Note that we always follow up with the + // normal peek() call which may load the next chunk, handle + // line continuations, etc. In other words, the end of the + // "raw" scan doesn't necessarily mean the end. + // + const char* b (gptr_); + const char* p (b); + + for (const char* e (egptr_); + p != e && char_flags[static_cast<uint8_t> (*p)] & 0x01; + ++p) ; + + // Unrolling this loop doesn't make a difference. + // + // for (const char* e (egptr_ - 4); p < e; p += 4) + // { + // uint8_t c; + // + // c = static_cast<uint8_t> (p[0]); + // if (!(char_flags[c] & 0x01)) break; + // + // c = static_cast<uint8_t> (p[1]); + // if (!(char_flags[c] & 0x01)) {p += 1; break;} + // + // c = static_cast<uint8_t> (p[2]); + // if (!(char_flags[c] & 0x01)) {p += 2; break;} + // + // c = static_cast<uint8_t> (p[3]); + // if (!(char_flags[c] & 0x01)) {p += 3; break;} + // } + + size_t n (p - b); + id.append (b, n); cs_.append (b, n); + gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n; + } + + // If the following character is a quote, see if the identifier + // is one of the literal prefixes. + // + if (c == '\'' || c == '\"') + { + size_t n (id.size ()), i (0); + switch (id[0]) + { + case 'u': + { + if (n > 1 && id[1] == '8') + ++i; + } + // Fall through. + case 'L': + case 'U': + { + ++i; + + if (c == '\"' && n > i && id[i] == 'R') + { + ++i; + raw = true; + } + break; + } + case 'R': + { + if (c == '\"') + { + ++i; + raw = true; + } + break; + } + } + + if (i == n) // All characters "consumed". + { + geth (c); + id.clear (); + } + } + + if (!id.empty ()) + { + t.type = type::identifier; + return; + } + } + + switch (c) + { + case '\'': + { + char_literal (t, c); + return; + } + case '\"': + { + if (raw) + raw_string_literal (t, c); + else + string_literal (t, c); + return; + } + default: + { + t.type = type::other; + return; + } + } + } + } + } + } + + void lexer:: + number_literal (token& t, xchar c) + { + // note: c is hashed + + // A number (integer or floating point literal) can: + // + // 1. Start with a dot (which must be followed by a digit, e.g., .123). + // + // 2. Can have a radix prefix (0b101, 0123, 0X12AB). + // + // 3. Can have an exponent (1e10, 0x1.p-10, 1.). + // + // 4. Digits can be separated with ' (123'456, 0xff00'00ff). + // + // 5. End with a built-in or user defined literal (123f, 123UL, 123_X) + // + // Quoting from GCC's preprocessor documentation: + // + // "Formally preprocessing numbers begin with an optional period, a + // required decimal digit, and then continue with any sequence of + // letters, digits, underscores, periods, and exponents. Exponents are + // the two-character sequences 'e+', 'e-', 'E+', 'E-', 'p+', 'p-', 'P+', + // and 'P-'." + // + // So it looks like a "C++ number" is then any unseparated (with + // whitespace or punctuation) sequence of those plus '. The only mildly + // tricky part is then to recognize +/- as being part of the exponent. + // + while (!eos ((c = peek ()))) + { + switch (c) + { + // All the whitespace, punctuation, and other characters that end + // the number. + // + case ' ': + case '\n': + case '\t': + case '\r': + case '\f': + case '\v': + + case '#': + case ';': + case '{': + case '}': + case '(': + case ')': + case '[': + case ']': + case ',': + case '?': + case '~': + case '=': + case '!': + case '*': + case '/': + case '%': + case '^': + case '>': + case '<': + case '&': + case '|': + case ':': + case '+': // The exponent case is handled below. + case '-': // The exponent case is handled below. + case '"': + case '\\': + + case '@': + case '$': + case '`': + break; + + // Recognize +/- after the exponent. + // + case 'e': + case 'E': + case 'p': + case 'P': + { + geth (c); + c = peek (); + if (c == '+' || c == '-') + geth (c); + continue; + } + + case '_': + case '.': + case '\'': + default: // Digits and letters. + { + geth (c); + continue; + } + } + + break; + } + + t.type = type::number; + } + + void lexer:: + char_literal (token& t, xchar c) + { + // note: c is hashed + + const location l (&name_, c.line, c.column); + + for (char p (c);;) // Previous character (see below). + { + c = geth (); + + if (eos (c) || c == '\n') + fail (l) << "unterminated character literal"; + + if (c == '\'' && p != '\\') + break; + + // Keep track of \\-escapings so we don't confuse them with \', as in + // '\\'. + // + p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c); + } + + // See if we have a user-defined suffix (which is an identifier). + // + if ((c = peek ()) == '_' || alpha (c)) + literal_suffix (c); + + t.type = type::character; + } + + void lexer:: + string_literal (token& t, xchar c) + { + // note: c is hashed + + const location l (&name_, c.line, c.column); + + for (char p (c);;) // Previous character (see below). + { + c = geth (); + + if (eos (c) || c == '\n') + fail (l) << "unterminated string literal"; + + if (c == '\"' && p != '\\') + break; + + // Keep track of \\-escapings so we don't confuse them with \", as in + // "\\". + // + p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c); + + // Direct buffer scan. + // + if (p != '\\') + { + const char* b (gptr_); + const char* e (egptr_); + const char* p (b); + + for (char c; + p != e && (c = *p) != '\"' && c != '\\' && c != '\n'; + ++p) ; + + size_t n (p - b); + cs_.append (b, n); + gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n; + } + } + + // See if we have a user-defined suffix (which is an identifier). + // + if ((c = peek ()) == '_' || alpha (c)) + literal_suffix (c); + + t.type = type::string; + } + + void lexer:: + raw_string_literal (token& t, xchar c) + { + // note: c is hashed + + // The overall form is: + // + // R"<delimiter>(<raw_characters>)<delimiter>" + // + // Where <delimiter> is a potentially-empty character sequence made of + // any source character but parentheses, backslash and spaces. It can be + // at most 16 characters long. + // + // Note that the <raw_characters> are not processed in any way, not even + // for line continuations. + // + const location l (&name_, c.line, c.column); + + // As a first step, parse the delimiter (including the openning paren). + // + string d (1, ')'); + + for (;;) + { + c = geth (); + + if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ') + fail (l) << "invalid raw string literal"; + + if (c == '(') + break; + + d += c; + } + + d += '"'; + + // Now parse the raw characters while trying to match the closing + // delimiter. + // + for (size_t i (0);;) // Position to match in d. + { + c = geth (false); // No newline escaping. + + if (eos (c)) // Note: newline is ok. + fail (l) << "invalid raw string literal"; + + if (c != d[i] && i != 0) // Restart from the beginning. + i = 0; + + if (c == d[i]) + { + if (++i == d.size ()) + break; + } + } + + // See if we have a user-defined suffix (which is an identifier). + // + if ((c = peek ()) == '_' || alpha (c)) + literal_suffix (c); + + t.type = type::string; + } + + void lexer:: + literal_suffix (xchar c) + { + // note: c is unhashed + + // Parse a user-defined literal suffix identifier. + // + for (geth (c); (c = peek ()) == '_' || alnum (c); geth (c)) ; + } + + void lexer:: + line_directive (token& t, xchar c) + { + // enter: first digit of the line number + // leave: last character of the line number or file string + // note: c is unhashed + + // If our number and string tokens contained the literal values, then we + // could have used that. However, we ignore the value (along with escape + // processing, etc), for performance. Let's keep it that way and instead + // handle it ourselves. + // + // Note also that we are not hashing these at the character level + // instead hashing the switch to a new file path below and leaving the + // line number to the token line hashing. + // + { + string& s (t.value); + + for (s = c; (c = peek ()) >= '0' && c <= '9'; get (c)) + s += c; + + // The newline that ends the directive will increment the logical line + // so subtract one to compensate. Note: can't be 0 and shouldn't throw + // for valid lines. + // + log_line_ = stoull (s.c_str ()) - 1; + } + + // See if we have the file. + // + c = skip_spaces (false); + + if (c == '\"') + { + const location l (&name_, c.line, c.column); + + // It is common to have a large number of #line directives that don't + // change the file (they seem to be used to track macro locations or + // some such). So we are going to optimize for this by comparing the + // current path to what's in #line. + // + string& s (tmp_file_); + s.clear (); + + for (char p ('\0'); p != '\"'; ) // Previous character. + { + c = get (); + + if (eos (c) || c == '\n') + fail (l) << "unterminated string literal"; + + // Handle escapes. + // + if (p == '\\') + { + p = '\0'; // Clear so we don't confuse \" and \\". + + // We only handle what can reasonably be expected in a file name. + // + switch (c) + { + case '\\': + case '\'': + case '\"': break; // Add as is. + default: + fail (c) << "unsupported escape sequence in #line directive"; + } + } + else + { + p = c; + + switch (c) + { + case '\\': + case '\"': continue; + } + } + + s += c; + + // Direct buffer scan. + // + if (p != '\\') + { + const char* b (gptr_); + const char* e (egptr_); + const char* p (b); + + for (char c; + p != e && (c = *p) != '\"' && c != '\\' && c != '\n'; + ++p) ; + + size_t n (p - b); + s.append (b, n); + gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n; + } + } + + if (log_file_.string () == s) + return; + + // Swap the two string buffers. + // + { + string r (move (log_file_).string ()); // Move string rep out. + r.swap (s); + log_file_ = path (move (r)); // Move back in. + } + + // If the path is relative, then prefix it with the current working + // directory. Failed that, we will end up with different checksums for + // invocations from different directories. + // + // While this should work fine for normal cross-compilation, it's an + // entirely different story for the emulated case (e.g., msvc-linux + // where the preprocessed output contains absolute Windows paths). So + // we try to sense if things look fishy and leave the path alone. + // + // Also detect special names like <built-in> and <command-line>. Plus + // GCC sometimes adds what looks like working directory (has trailing + // slash). So ignore that as well. + // + // We now switched to using absolute translation unit paths (because + // of __FILE__/assert(); see compile.cxx for details). But we might + // still need this logic when we try to calculate location-independent + // hash for distributed compilation/caching. The idea is to only hash + // the part starting from the project root which is immutable. Plus + // we will need -ffile-prefix-map to deal with __FILE__. + // + if (!log_file_.to_directory ()) + cs_.append (log_file_.string ()); +#if 0 + { + using tr = path::traits; + const string& f (log_file_.string ()); + + if (f.find (':') != string::npos || + (f.front () == '<' && f.back () == '>') || + log_file_.absolute ()) + cs_.append (f); + else + { + // This gets complicated and slow: the path may contain '..' and + // '.' so strictly speaking we would need to normalize it. + // Instead, we are going to handle leading '..'s ourselves (the + // sane case) and ignore everything else (so if you have '..' or + // '.' somewhere in the middle, then things might not work + // optimally for you). + // + const string& d (work.string ()); + + // Iterate over leading '..' in f "popping" the corresponding + // number of trailing components from d. + // + size_t fp (0); + size_t dp (d.size () - 1); + + for (size_t p;; ) + { + // Note that in file we recognize any directory separator, not + // just of this platform (see note about emulation above). + // + if (f.compare (fp, 2, "..") != 0 || + (f[fp + 2] != '/' && f[fp + 2] != '\\') || // Could be '\0'. + (p = tr::rfind_separator (d, dp)) == string::npos) + break; + + fp += 3; + dp = p - 1; + } + + cs_.append (d.c_str (), dp + 1); + cs_.append (tr::directory_separator); // Canonical in work. + cs_.append (f.c_str () + fp); + } + } +#endif + } + else + unget (c); + } + + auto lexer:: + skip_spaces (bool nl) -> xchar + { + xchar c (get ()); + + for (; !eos (c); c = get ()) + { + switch (c) + { + case '\n': + if (!nl) break; + // Fall through. + case ' ': + case '\t': + case '\r': + case '\f': + case '\v': + { + // Direct buffer scan. + // + const char* b (gptr_); + const char* e (egptr_); + const char* p (b); + + for (char c; + p != e && ((c = *p) == ' ' || c == '\t'); + ++p) ; + + size_t n (p - b); + gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n; + + continue; + } + case '/': + { + xchar p (peek ()); + + // C++ comment. + // + if (p == '/') + { + get (p); + + for (;;) + { + c = get (); + if (c == '\n' || eos (c)) + break; + + // Direct buffer scan. + // + const char* b (gptr_); + const char* e (egptr_); + const char* p (b); + + for (char c; + p != e && (c = *p) != '\n' && c != '\\'; + ++p) ; + + size_t n (p - b); + gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n; + } + + if (!nl) + break; + + continue; + } + + // C comment. + // + if (p == '*') + { + get (p); + + for (;;) + { + c = get (); + + if (eos (c)) + fail (p) << "unterminated comment"; + + if (c == '*' && (c = peek ()) == '/') + { + get (c); + break; + } + + // Direct buffer scan. + // + const char* b (gptr_); + const char* e (egptr_); + const char* p (b); + + for (char c; + p != e && (c = *p) != '*' && c != '\\'; + ++p) + { + if (c == '\n') + { + if (log_line_) ++*log_line_; + ++line; + column = 1; + } + else + ++column; + } + + gptr_ = p; buf_->gbump (static_cast<int> (p - b)); + } + continue; + } + break; + } + } + break; + } + + return c; + } + + ostream& + operator<< (ostream& o, const token& t) + { + switch (t.type) + { + case type::dot: o << "'.'"; break; + case type::semi: o << "';'"; break; + case type::less: o << "'<'"; break; + case type::greater: o << "'>'"; break; + case type::lcbrace: o << "'{'"; break; + case type::rcbrace: o << "'}'"; break; + case type::punctuation: o << "<punctuation>"; break; + + case type::identifier: o << '\'' << t.value << '\''; break; + + case type::number: o << "<number literal>"; break; + case type::character: o << "<char literal>"; break; + case type::string: o << "<string literal>"; break; + + case type::other: o << "<other>"; break; + case type::eos: o << "<end of file>"; break; + } + + return o; + } + } +} diff --git a/libbuild2/cc/lexer.hxx b/libbuild2/cc/lexer.hxx new file mode 100644 index 0000000..cb2b3a5 --- /dev/null +++ b/libbuild2/cc/lexer.hxx @@ -0,0 +1,190 @@ +// file : libbuild2/cc/lexer.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_LEXER_HXX +#define LIBBUILD2_CC_LEXER_HXX + +#include <libbutl/sha256.mxx> +#include <libbutl/char-scanner.mxx> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/diagnostics.hxx> + +namespace build2 +{ + namespace cc + { + // Preprocessor-level tokenization of C/C++ source. In other words, the + // sequence of tokens returned is similar to what a real C/C++ compiler + // would see from its preprocessor. + // + // The input is a (partially-)preprocessed translation unit that may still + // contain comments, line continuations, and preprocessor directives such + // as #line, #pragma, but not #include (which is diagnosed). Currently, + // all preprocessor directives except #line are ignored and no values are + // saved from literals. The #line directive (and its shorthand notation) + // is recognized to provide the logical token location. + // + // While at it we also calculate the checksum of the input ignoring + // comments, whitespaces, etc. This is used to detect changes that do not + // alter the resulting token stream. + // + enum class token_type + { + // NOTE: remember to update operator<<() if changing anything here! + // + eos, + + dot, // . + semi, // ; + less, // < + greater, // > + lcbrace, // { + rcbrace, // } + + punctuation, // Other punctuation. + + identifier, + + number, // Number literal. + character, // Char literal. + string, // String literal. + + other // Other token. + }; + + struct token + { + token_type type = token_type::eos; + string value; + + // Logical position. + // + path file; + uint64_t line = 0; + uint64_t column = 0; + + // Physical position in the stream, currently only for identifiers. + // + uint64_t position = 0; + }; + + // Output the token value in a format suitable for diagnostics. + // + ostream& + operator<< (ostream&, const token&); + + class lexer: protected butl::char_scanner + { + public: + lexer (ifdstream& is, const path& name) + : char_scanner (is, false), + name_ (name), + fail ("error", &name_), + log_file_ (name) {} + + const path& + name () const {return name_;} + + string + checksum () const {return cs_.string ();} + + // Note that it is ok to call next() again after getting eos. + // + token + next () + { + token t; + next (t, skip_spaces (), true); + return t; + } + + // As above but reuse the token to avoid a (potential) memory + // allocation. Typical usage: + // + // for (token t; l.next (t) != token_type::eos; ) + // ... + // + token_type + next (token& t) + { + next (t, skip_spaces (), true); + return t.type; + } + + private: + void + next (token&, xchar, bool); + + void + number_literal (token&, xchar); + + void + char_literal (token&, xchar); + + void + string_literal (token&, xchar); + + void + raw_string_literal (token&, xchar); + + void + literal_suffix (xchar); + + void + line_directive (token&, xchar); + + xchar + skip_spaces (bool newline = true); + + // The char_scanner adaptation for newline escape sequence processing. + // Enabled by default and is only disabled in the raw string literals. + // + private: + using base = char_scanner; + + xchar + peek (bool escape = true); + + xchar + get (bool escape = true); + + void + get (const xchar& peeked); + + // Hashing versions. + // + xchar + geth (bool escape = true); + + void + geth (const xchar& peeked); + + private: + const path name_; + const fail_mark fail; + + // Logical file and line as set by the #line directives. Note that the + // lexer diagnostics still uses the physical file/lines. + // + path log_file_; + optional<uint64_t> log_line_; + + string tmp_file_; + sha256 cs_; + }; + + // Diagnostics plumbing. + // + inline location + get_location (const token& t, const void* = nullptr) + { + return location (&t.file, t.line, t.column); + } + } +} + +#endif // LIBBUILD2_CC_LEXER_HXX diff --git a/libbuild2/cc/lexer.test.cxx b/libbuild2/cc/lexer.test.cxx new file mode 100644 index 0000000..0aeadba --- /dev/null +++ b/libbuild2/cc/lexer.test.cxx @@ -0,0 +1,80 @@ +// file : libbuild2/cc/lexer.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cassert> +#include <iostream> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/cc/lexer.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + // Usage: argv[0] [-l] [<file>] + // + int + main (int argc, char* argv[]) + { + bool loc (false); + const char* file (nullptr); + + for (int i (1); i != argc; ++i) + { + string a (argv[i]); + + if (a == "-l") + loc = true; + else + { + file = argv[i]; + break; + } + } + + try + { + ifdstream is; + if (file != nullptr) + is.open (file); + else + { + file = "stdin"; + is.open (fddup (stdin_fd ())); + } + + lexer l (is, path (file)); + + // No use printing eos since we will either get it or loop forever. + // + for (token t; l.next (t) != token_type::eos; ) + { + cout << t; + + if (loc) + cout << ' ' << t.file << ':' << t.line << ':' << t.column; + + cout << endl; + } + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::cc::main (argc, argv); +} diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx new file mode 100644 index 0000000..110a992 --- /dev/null +++ b/libbuild2/cc/link-rule.cxx @@ -0,0 +1,3043 @@ +// file : libbuild2/cc/link-rule.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/link-rule.hxx> + +#include <map> +#include <cstdlib> // exit() +#include <cstring> // strlen() + +#include <libbutl/filesystem.mxx> // file_exists() + +#include <libbuild2/depdb.hxx> +#include <libbuild2/scope.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/target.hxx> // c, pc* +#include <libbuild2/cc/utility.hxx> + +using std::map; +using std::exit; + +using namespace butl; + +namespace build2 +{ + namespace cc + { + using namespace bin; + + link_rule:: + link_rule (data&& d) + : common (move (d)), + rule_id (string (x) += ".link 1") + { + static_assert (sizeof (match_data) <= target::data_size, + "insufficient space"); + } + + link_rule::match_result link_rule:: + match (action a, + const target& t, + const target* g, + otype ot, + bool library) const + { + // NOTE: the target may be a group (see utility library logic below). + + match_result r; + + // Scan prerequisites and see if we can work with what we've got. Note + // that X could be C (as in language). We handle this by always checking + // for X first. + // + // Note also that we treat bmi{} as obj{}. @@ MODHDR hbmi{}? + // + for (prerequisite_member p: + prerequisite_members (a, t, group_prerequisites (t, g))) + { + // If excluded or ad hoc, then don't factor it into our tests. + // + if (include (a, t, p) != include_type::normal) + continue; + + if (p.is_a (x_src) || + (x_mod != nullptr && p.is_a (*x_mod)) || + // Header-only X library (or library with C source and X header). + (library && x_header (p, false /* c_hdr */))) + { + r.seen_x = r.seen_x || true; + } + else if (p.is_a<c> () || + // Header-only C library. + (library && p.is_a<h> ())) + { + r.seen_c = r.seen_c || true; + } + else if (p.is_a<obj> () || p.is_a<bmi> ()) + { + r.seen_obj = r.seen_obj || true; + } + else if (p.is_a<obje> () || p.is_a<bmie> ()) + { + // We can make these "no-match" if/when there is a valid use case. + // + if (ot != otype::e) + fail << p.type ().name << "{} as prerequisite of " << t; + + r.seen_obj = r.seen_obj || true; + } + else if (p.is_a<obja> () || p.is_a<bmia> ()) + { + if (ot != otype::a) + fail << p.type ().name << "{} as prerequisite of " << t; + + r.seen_obj = r.seen_obj || true; + } + else if (p.is_a<objs> () || p.is_a<bmis> ()) + { + if (ot != otype::s) + fail << p.type ().name << "{} as prerequisite of " << t; + + r.seen_obj = r.seen_obj || true; + } + else if (p.is_a<libul> () || p.is_a<libux> ()) + { + // For a unility library we look at its prerequisites, recursively. + // Since these checks are not exactly light-weight, only do them if + // we haven't already seen any X prerequisites. + // + if (!r.seen_x) + { + // This is a bit iffy: in our model a rule can only search a + // target's prerequisites if it matches. But we don't yet know + // whether we match. However, it seems correct to assume that any + // rule-specific search will always resolve to an existing target + // if there is one. So perhaps it's time to relax this restriction + // a little? Note that this fits particularly well with what we + // doing here since if there is no existing target, then there can + // be no prerequisites. + // + // Note, however, that we cannot linkup a prerequisite target + // member to its group since we are not matching this target. As + // result we have to do all the steps except for setting t.group + // and pass both member and group (we also cannot query t.group + // since it's racy). + // + const target* pg (nullptr); + const target* pt (p.search_existing ()); + + if (p.is_a<libul> ()) + { + if (pt != nullptr) + { + // If this is a group then try to pick (again, if exists) a + // suitable member. If it doesn't exist, then we will only be + // considering the group's prerequisites. + // + if (const target* pm = + link_member (pt->as<libul> (), + a, + linfo {ot, lorder::a /* unused */}, + true /* existing */)) + { + pg = pt; + pt = pm; + } + } + else + { + // It's possible we have no group but have a member so try + // that. + // + const target_type& tt (ot == otype::a ? libua::static_type : + ot == otype::s ? libus::static_type : + libue::static_type); + + // We know this prerequisite member is a prerequisite since + // otherwise the above search would have returned the member + // target. + // + pt = search_existing (t.ctx, p.prerequisite.key (tt)); + } + } + else if (!p.is_a<libue> ()) + { + // See if we also/instead have a group. + // + pg = search_existing (t.ctx, + p.prerequisite.key (libul::static_type)); + + if (pt == nullptr) + swap (pt, pg); + } + + if (pt != nullptr) + { + // If we are matching a target, use the original output type + // since that would be the member that we pick. + // + otype pot (pt->is_a<libul> () ? ot : link_type (*pt).type); + match_result pr (match (a, *pt, pg, pot, true /* lib */)); + + // Do we need to propagate any other seen_* values? Hm, that + // would in fact match with the "see-through" semantics of + // utility libraries we have in other places. + // + r.seen_x = pr.seen_x; + } + else + r.seen_lib = r.seen_lib || true; // Consider as just a library. + } + } + else if (p.is_a<lib> () || + p.is_a<liba> () || + p.is_a<libs> ()) + { + r.seen_lib = r.seen_lib || true; + } + // Some other c-common header/source (say C++ in a C rule) other than + // a C header (we assume everyone can hanle that). + // + else if (p.is_a<cc> () && !(x_header (p, true /* c_hdr */))) + { + r.seen_cc = true; + break; + } + } + + return r; + } + + bool link_rule:: + match (action a, target& t, const string& hint) const + { + // NOTE: may be called multiple times and for both inner and outer + // operations (see the install rules). + + tracer trace (x, "link_rule::match"); + + ltype lt (link_type (t)); + + // If this is a group member library, link-up to our group (this is the + // target group protocol which means this can be done whether we match + // or not). + // + // If we are called for the outer operation (see install rules), then + // use resolve_group() to delegate to inner. + // + if (lt.member_library ()) + { + if (a.outer ()) + resolve_group (a, t); + else if (t.group == nullptr) + t.group = &search (t, + lt.utility ? libul::static_type : lib::static_type, + t.dir, t.out, t.name); + } + + match_result r (match (a, t, t.group, lt.type, lt.library ())); + + // If this is some other c-common header/source (say C++ in a C rule), + // then we shouldn't try to handle that (it may need to be compiled, + // etc). + // + if (r.seen_cc) + { + l4 ([&]{trace << "non-" << x_lang << " prerequisite " + << "for target " << t;}); + return false; + } + + if (!(r.seen_x || r.seen_c || r.seen_obj || r.seen_lib)) + { + l4 ([&]{trace << "no " << x_lang << ", C, or obj/lib prerequisite " + << "for target " << t;}); + return false; + } + + // We will only chain a C source if there is also an X source or we were + // explicitly told to. + // + if (r.seen_c && !r.seen_x && hint < x) + { + l4 ([&]{trace << "C prerequisite without " << x_lang << " or hint " + << "for target " << t;}); + return false; + } + + return true; + } + + auto link_rule:: + derive_libs_paths (file& t, + const char* pfx, + const char* sfx) const -> libs_paths + { + bool win (tclass == "windows"); + + // Get default prefix and extension. + // + const char* ext (nullptr); + if (win) + { + if (tsys == "mingw32") + { + if (pfx == nullptr) + pfx = "lib"; + } + + ext = "dll"; + } + else + { + if (pfx == nullptr) + pfx = "lib"; + + if (tclass == "macos") + ext = "dylib"; + else + ext = "so"; + } + + // First sort out which extension we are using. + // + const string& e (t.derive_extension (ext)); + + auto append_ext = [&e] (path& p) + { + if (!e.empty ()) + { + p += '.'; + p += e; + } + }; + + // See if we have the load suffix. + // + const string& ls (cast_empty<string> (t["bin.lib.load_suffix"])); + + // Figure out the version. + // + string ver; + using verion_map = map<string, string>; + if (const verion_map* m = cast_null<verion_map> (t["bin.lib.version"])) + { + // First look for the target system. + // + auto i (m->find (tsys)); + + // Then look for the target class. + // + if (i == m->end ()) + i = m->find (tclass); + + // Then look for the wildcard. Since it is higly unlikely one can have + // a version that will work across platforms, this is only useful to + // say "all others -- no version". + // + if (i == m->end ()) + i = m->find ("*"); + + // At this stage the only platform-specific version we support is the + // "no version" override. + // + if (i != m->end () && !i->second.empty ()) + fail << i->first << "-specific bin.lib.version not yet supported"; + + // Finally look for the platform-independent version. + // + if (i == m->end ()) + i = m->find (""); + + // If we didn't find anything, fail. If the bin.lib.version was + // specified, then it should explicitly handle all the targets. + // + if (i == m->end ()) + fail << "no version for " << ctgt << " in bin.lib.version" << + info << "considere adding " << tsys << "@<ver> or " << tclass + << "@<ver>"; + + ver = i->second; + } + + // Now determine the paths. + // + path lk, ld, so, in; + + // We start with the basic path. + // + path b (t.dir); + + if (pfx != nullptr && pfx[0] != '\0') + { + b /= pfx; + b += t.name; + } + else + b /= t.name; + + if (sfx != nullptr && sfx[0] != '\0') + b += sfx; + + // Clean pattern. + // + path cp (b); + cp += "?*"; // Don't match empty (like the libfoo.so symlink). + append_ext (cp); + + // On Windows the real path is to libs{} and the link path is empty. + // Note that we still need to derive the import library path. + // + if (win) + { + // Usually on Windows with MSVC the import library is called the same + // as the DLL but with the .lib extension. Which means it clashes with + // the static library. Instead of decorating the static library name + // with ugly suffixes (as is customary), let's use the MinGW approach + // (one must admit it's quite elegant) and call it .dll.lib. + // + libi& i (*find_adhoc_member<libi> (t)); + + if (i.path ().empty ()) + { + path ip (b); + append_ext (ip); + i.derive_path (move (ip), tsys == "mingw32" ? "a" : "lib"); + } + } + // We will only need the link name if the following name differs. + // + else if (!ver.empty () || !ls.empty ()) + { + lk = b; + append_ext (lk); + } + + // See if we have the load suffix. + // + if (!ls.empty ()) + { + b += ls; + + // We will only need the load name if the following name differs. + // + if (!ver.empty ()) + { + ld = b; + append_ext (ld); + } + } + + if (!ver.empty ()) + b += ver; + + const path& re (t.derive_path (move (b))); + + return libs_paths { + move (lk), move (ld), move (so), move (in), &re, move (cp)}; + } + + // Look for binary-full utility library recursively until we hit a + // non-utility "barier". + // + static bool + find_binfull (action a, const target& t, linfo li) + { + for (const target* pt: t.prerequisite_targets[a]) + { + if (pt == nullptr || unmark (pt) != 0) // Called after pass 1 below. + continue; + + const file* pf; + + // If this is the libu*{} group, then pick the appropriate member. + // + if (const libul* ul = pt->is_a<libul> ()) + { + pf = &link_member (*ul, a, li)->as<file> (); + } + else if ((pf = pt->is_a<libue> ()) || + (pf = pt->is_a<libus> ()) || + (pf = pt->is_a<libua> ())) + ; + else + continue; + + if (!pf->path ().empty () || find_binfull (a, *pf, li)) + return true; + } + + return false; + }; + + recipe link_rule:: + apply (action a, target& xt) const + { + tracer trace (x, "link_rule::apply"); + + file& t (xt.as<file> ()); + context& ctx (t.ctx); + + // Note that for_install is signalled by install_rule and therefore + // can only be relied upon during execute. + // + match_data& md (t.data (match_data ())); + + const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); + + ltype lt (link_type (t)); + otype ot (lt.type); + linfo li (link_info (bs, ot)); + + // Set the library type (C, C++, etc) as rule-specific variable. + // + if (lt.library ()) + t.state[a].assign (c_type) = string (x); + + bool binless (lt.library ()); // Binary-less until proven otherwise. + + // Inject dependency on the output directory. Note that we do it even + // for binless libraries since there could be other output (e.g., .pc + // files). + // + inject_fsdir (a, t); + + // Process prerequisites, pass 1: search and match prerequisite + // libraries, search obj/bmi{} targets, and search targets we do rule + // chaining for. + // + // Also clear the binless flag if we see any source or object files. + // Note that if we don't see any this still doesn't mean the library is + // binless since it can depend on a binfull utility library. This we + // check below, after matching the libraries. + // + // We do libraries first in order to indicate that we will execute these + // targets before matching any of the obj/bmi{}. This makes it safe for + // compile::apply() to unmatch them and therefore not to hinder + // parallelism. + // + // We also create obj/bmi{} chain targets because we need to add + // (similar to lib{}) all the bmi{} as prerequisites to all the other + // obj/bmi{} that we are creating. Note that this doesn't mean that the + // compile rule will actually treat them all as prerequisite targets. + // Rather, they are used to resolve actual module imports. We don't + // really have to search obj{} targets here but it's the same code so we + // do it here to avoid duplication. + // + // Also, when cleaning, we ignore prerequisites that are not in the same + // or a subdirectory of our project root. Except for libraries: if we + // ignore them, then they won't be added to synthesized dependencies and + // this will break things if we do, say, update after clean in the same + // invocation. So for libraries we ignore them later, on pass 3. + // + optional<dir_paths> usr_lib_dirs; // Extract lazily. + compile_target_types tts (compile_types (ot)); + + auto skip = [&a, &rs] (const target* pt) -> bool + { + return a.operation () == clean_id && !pt->dir.sub (rs.out_path ()); + }; + + auto& pts (t.prerequisite_targets[a]); + size_t start (pts.size ()); + + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + include_type pi (include (a, t, p)); + + // We pre-allocate a NULL slot for each (potential; see clean) + // prerequisite target. + // + pts.push_back (prerequisite_target (nullptr, pi)); + const target*& pt (pts.back ()); + + if (pi != include_type::normal) // Skip excluded and ad hoc. + continue; + + // Mark: + // 0 - lib + // 1 - src + // 2 - mod + // 3 - obj/bmi and also lib not to be cleaned + // + uint8_t m (0); + + bool mod (x_mod != nullptr && p.is_a (*x_mod)); + + if (mod || p.is_a (x_src) || p.is_a<c> ()) + { + binless = binless && false; + + // Rule chaining, part 1. + // + + // Which scope shall we use to resolve the root? Unlikely, but + // possible, the prerequisite is from a different project + // altogether. So we are going to use the target's project. + // + + // If the source came from the lib{} group, then create the obj{} + // group and add the source as a prerequisite of the obj{} group, + // not the obj*{} member. This way we only need one prerequisite + // for, say, both liba{} and libs{}. The same goes for bmi{}. + // + bool group (!p.prerequisite.belongs (t)); // Group's prerequisite. + + const target_type& rtt (mod + ? (group ? bmi::static_type : tts.bmi) + : (group ? obj::static_type : tts.obj)); + + const prerequisite_key& cp (p.key ()); // Source key. + + // Come up with the obj*/bmi*{} target. The source prerequisite + // directory can be relative (to the scope) or absolute. If it is + // relative, then use it as is. If absolute, then translate it to + // the corresponding directory under out_root. While the source + // directory is most likely under src_root, it is also possible it + // is under out_root (e.g., generated source). + // + dir_path d; + { + const dir_path& cpd (*cp.tk.dir); + + if (cpd.relative () || cpd.sub (rs.out_path ())) + d = cpd; + else + { + if (!cpd.sub (rs.src_path ())) + fail << "out of project prerequisite " << cp << + info << "specify corresponding " << rtt.name << "{} " + << "target explicitly"; + + d = rs.out_path () / cpd.leaf (rs.src_path ()); + } + } + + // obj/bmi{} is always in the out tree. Note that currently it could + // be the group -- we will pick a member in part 2 below. + // + pt = &search (t, rtt, d, dir_path (), *cp.tk.name, nullptr, cp.scope); + + // If we shouldn't clean obj{}, then it is fair to assume we + // shouldn't clean the source either (generated source will be in + // the same directory as obj{} and if not, well, go find yourself + // another build system ;-)). + // + if (skip (pt)) + { + pt = nullptr; + continue; + } + + m = mod ? 2 : 1; + } + else if (p.is_a<libx> () || + p.is_a<liba> () || + p.is_a<libs> () || + p.is_a<libux> ()) + { + // Handle imported libraries. + // + // Note that since the search is rule-specific, we don't cache the + // target in the prerequisite. + // + if (p.proj ()) + pt = search_library ( + a, sys_lib_dirs, usr_lib_dirs, p.prerequisite); + + // The rest is the same basic logic as in search_and_match(). + // + if (pt == nullptr) + pt = &p.search (t); + + if (skip (pt)) + m = 3; // Mark so it is not matched. + + // If this is the lib{}/libu{} group, then pick the appropriate + // member. + // + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, li); + } + else + { + // If this is the obj{} or bmi{} target group, then pick the + // appropriate member. + // + if (p.is_a<obj> ()) pt = &search (t, tts.obj, p.key ()); + else if (p.is_a<bmi> ()) pt = &search (t, tts.bmi, p.key ()); + // + // Windows module definition (.def). For other platforms (and for + // static libraries) treat it as an ordinary prerequisite. + // + else if (p.is_a<def> () && tclass == "windows" && ot != otype::a) + { + pt = &p.search (t); + } + // + // Something else. This could be something unrelated that the user + // tacked on (e.g., a doc{}). Or it could be some ad hoc input to + // the linker (say a linker script or some such). + // + else + { + if (!p.is_a<objx> () && !p.is_a<bmix> ()) + { + // @@ Temporary hack until we get the default outer operation + // for update. This allows operations like test and install to + // skip such tacked on stuff. + // + // Note that ad hoc inputs have to be explicitly marked with the + // include=adhoc prerequisite-specific variable. + // + if (ctx.current_outer_oif != nullptr) + continue; + } + + pt = &p.search (t); + } + + if (skip (pt)) + { + pt = nullptr; + continue; + } + + // @@ MODHDR: hbmix{} has no objx{} + // + binless = binless && !(pt->is_a<objx> () || pt->is_a<bmix> ()); + + m = 3; + } + + mark (pt, m); + } + + // Match lib{} (the only unmarked) in parallel and wait for completion. + // + match_members (a, t, pts, start); + + // Check if we have any binfull utility libraries. + // + binless = binless && !find_binfull (a, t, li); + + // Now that we know for sure whether we are binless, derive file name(s) + // and add ad hoc group members. Note that for binless we still need the + // .pc member (whose name depends on the libray prefix) so we take care + // to not derive the path for the library target itself inside. + // + { + const char* e (nullptr); // Extension. + const char* p (nullptr); // Prefix. + const char* s (nullptr); // Suffix. + + if (lt.utility) + { + // These are all static libraries with names indicating the kind of + // object files they contain (similar to how we name object files + // themselves). We add the 'u' extension to avoid clashes with + // real libraries/import stubs. + // + // libue libhello.u.a hello.exe.u.lib + // libua libhello.a.u.a hello.lib.u.lib + // libus libhello.so.u.a hello.dll.u.lib hello.dylib.u.lib + // + // Note that we currently don't add bin.lib.{prefix,suffix} since + // these are not installed. + // + if (tsys == "win32-msvc") + { + switch (ot) + { + case otype::e: e = "exe.u.lib"; break; + case otype::a: e = "lib.u.lib"; break; + case otype::s: e = "dll.u.lib"; break; + } + } + else + { + p = "lib"; + + if (tsys == "mingw32") + { + switch (ot) + { + case otype::e: e = "exe.u.a"; break; + case otype::a: e = "a.u.a"; break; + case otype::s: e = "dll.u.a"; break; + } + + } + else if (tsys == "darwin") + { + switch (ot) + { + case otype::e: e = "u.a"; break; + case otype::a: e = "a.u.a"; break; + case otype::s: e = "dylib.u.a"; break; + } + } + else + { + switch (ot) + { + case otype::e: e = "u.a"; break; + case otype::a: e = "a.u.a"; break; + case otype::s: e = "so.u.a"; break; + } + } + } + + if (binless) + t.path (empty_path); + else + t.derive_path (e, p, s); + } + else + { + if (auto l = t[ot == otype::e ? "bin.exe.prefix" : "bin.lib.prefix"]) + p = cast<string> (l).c_str (); + if (auto l = t[ot == otype::e ? "bin.exe.suffix" : "bin.lib.suffix"]) + s = cast<string> (l).c_str (); + + switch (ot) + { + case otype::e: + { + if (tclass == "windows") + e = "exe"; + else + e = ""; + + t.derive_path (e, p, s); + break; + } + case otype::a: + { + if (tsys == "win32-msvc") + e = "lib"; + else + { + if (p == nullptr) p = "lib"; + e = "a"; + } + + if (binless) + t.path (empty_path); + else + t.derive_path (e, p, s); + + break; + } + case otype::s: + { + if (binless) + t.path (empty_path); + else + { + // On Windows libs{} is an ad hoc group. The libs{} itself is + // the DLL and we add libi{} import library as its member. + // + if (tclass == "windows") + { + e = "dll"; + add_adhoc_member<libi> (t); + } + + md.libs_paths = derive_libs_paths (t, p, s); + } + + break; + } + } + + // Add VC's .pdb. Note that we are looking for the link.exe /DEBUG + // option. + // + if (!binless && ot != otype::a && tsys == "win32-msvc") + { + if (find_option ("/DEBUG", t, c_loptions, true) || + find_option ("/DEBUG", t, x_loptions, true)) + { + const target_type& tt (*bs.find_target_type ("pdb")); + + // We call the target foo.{exe,dll}.pdb rather than just foo.pdb + // because we can have both foo.exe and foo.dll in the same + // directory. + // + file& pdb (add_adhoc_member<file> (t, tt, e)); + + // Note that the path is derived from the exe/dll path (so it + // will include the version in case of a dll). + // + if (pdb.path ().empty ()) + pdb.derive_path (t.path (), "pdb"); + } + } + + // Add pkg-config's .pc file. + // + // Note that we do it regardless of whether we are installing or not + // for two reasons. Firstly, it is not easy to detect this situation + // here since the for_install hasn't yet been communicated by + // install_rule. Secondly, always having this member takes care of + // cleanup automagically. The actual generation happens in + // perform_update() below. + // + if (ot != otype::e) + { + file& pc (add_adhoc_member<file> (t, + (ot == otype::a + ? pca::static_type + : pcs::static_type))); + + // Note that here we always use the lib name prefix, even on + // Windows with VC. The reason is the user needs a consistent name + // across platforms by which they can refer to the library. This + // is also the reason why we use the .static and .shared second- + // level extensions rather that a./.lib and .so/.dylib/.dll. + // + if (pc.path ().empty ()) + pc.derive_path (nullptr, (p == nullptr ? "lib" : p), s); + } + + // Add the Windows rpath emulating assembly directory as fsdir{}. + // + // Currently this is used in the backlinking logic and in the future + // could also be used for clean (though there we may want to clean + // old assemblies). + // + if (ot == otype::e && tclass == "windows") + { + // Note that here we cannot determine whether we will actually + // need one (for_install, library timestamps are not available at + // this point to call windows_rpath_timestamp()). So we may add + // the ad hoc target but actually not produce the assembly. So + // whomever relies on this must check if the directory actually + // exists (windows_rpath_assembly() does take care to clean it up + // if not used). + // +#ifdef _WIN32 + target& dir = +#endif + add_adhoc_member (t, + fsdir::static_type, + path_cast<dir_path> (t.path () + ".dlls"), + t.out, + string () /* name */); + + // By default our backlinking logic will try to symlink the + // directory and it can even be done on Windows using junctions. + // The problem is the Windows DLL assembly "logic" refuses to + // recognize a junction as a valid assembly for some reason. So we + // are going to resort to copy-link (i.e., a real directory with a + // bunch of links). + // + // Interestingly, the directory symlink works just fine under + // Wine. So we only resort to copy-link'ing if we are running on + // Windows. + // +#ifdef _WIN32 + dir.state[a].assign (ctx.var_backlink) = "copy"; +#endif + } + } + } + + // Process prerequisites, pass 2: finish rule chaining but don't start + // matching anything yet since that may trigger recursive matching of + // bmi{} targets we haven't completed yet. Hairy, I know. + // + + // Parallel prerequisites/prerequisite_targets loop. + // + size_t i (start); + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + const target*& pt (pts[i].target); + uintptr_t& pd (pts[i++].data); + + if (pt == nullptr) + continue; + + // New mark: + // 1 - completion + // 2 - verification + // + uint8_t m (unmark (pt)); + + if (m == 3) // obj/bmi or lib not to be cleaned + { + m = 1; // Just completion. + + // Note that if this is a library not to be cleaned, we keep it + // marked for completion (see the next phase). + } + else if (m == 1 || m == 2) // Source/module chain. + { + bool mod (m == 2); + + m = 1; + + const target& rt (*pt); + bool group (!p.prerequisite.belongs (t)); // Group's prerequisite. + + // If we have created a obj/bmi{} target group, pick one of its + // members; the rest would be primarily concerned with it. + // + pt = + group + ? &search (t, (mod ? tts.bmi : tts.obj), rt.dir, rt.out, rt.name) + : &rt; + + const target_type& rtt (mod + ? (group ? bmi::static_type : tts.bmi) + : (group ? obj::static_type : tts.obj)); + + // If this obj*{} already has prerequisites, then verify they are + // "compatible" with what we are doing here. Otherwise, synthesize + // the dependency. Note that we may also end up synthesizing with + // someone beating us to it. In this case also verify. + // + bool verify (true); + + // Note that we cannot use has_group_prerequisites() since the + // target is not yet matched. So we check the group directly. Of + // course, all of this is racy (see below). + // + if (!pt->has_prerequisites () && + (!group || !rt.has_prerequisites ())) + { + prerequisites ps {p.as_prerequisite ()}; // Source. + + // Add our lib*{} (see the export.* machinery for details) and + // bmi*{} (both original and chained; see module search logic) + // prerequisites. + // + // Note that we don't resolve lib{} to liba{}/libs{} here + // instead leaving it to whomever (e.g., the compile rule) will + // be needing *.export.*. One reason for doing it there is that + // the object target might be specified explicitly by the user + // in which case they will have to specify the set of lib{} + // prerequisites and it's much cleaner to do as lib{} rather + // than liba{}/libs{}. + // + // Initially, we were only adding imported libraries, but there + // is a problem with this approach: the non-imported library + // might depend on the imported one(s) which we will never "see" + // unless we start with this library. + // + // Note: have similar logic in make_module_sidebuild(). + // + size_t j (start); + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + const target* pt (pts[j++]); + + if (pt == nullptr) // Note: ad hoc is taken care of. + continue; + + // NOTE: pt may be marked (even for a library -- see clean + // above). So watch out for a faux pax in this careful dance. + // + if (p.is_a<libx> () || + p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> () || + p.is_a<bmi> () || p.is_a (tts.bmi)) + { + ps.push_back (p.as_prerequisite ()); + } + else if (x_mod != nullptr && p.is_a (*x_mod)) // Chained module. + { + // Searched during pass 1 but can be NULL or marked. + // + if (pt != nullptr && i != j) // Don't add self (note: both +1). + { + // This is sticky: pt might have come before us and if it + // was a group, then we would have picked up a member. So + // here we may have to "unpick" it. + // + bool group (j < i && !p.prerequisite.belongs (t)); + + unmark (pt); + ps.push_back (prerequisite (group ? *pt->group : *pt)); + } + } + } + + // Note: adding to the group, not the member. + // + verify = !rt.prerequisites (move (ps)); + + // Recheck that the target still has no prerequisites. If that's + // no longer the case, then verify the result is compatible with + // what we need. + // + // Note that there are scenarios where we will not detect this or + // the detection will be racy. For example, thread 1 adds the + // prerequisite to the group and then thread 2, which doesn't use + // the group, adds the prerequisite to the member. This could be + // triggered by something like this (undetectable): + // + // lib{foo}: cxx{foo} + // exe{foo}: cxx{foo} + // + // Or this (detection is racy): + // + // lib{bar}: cxx{foo} + // liba{baz}: cxx{foo} + // + // The current feeling, however, is that in non-contrived cases + // (i.e., the source file is the same) this should be harmless. + // + if (!verify && group) + verify = pt->has_prerequisites (); + } + + if (verify) + { + // This gets a bit tricky. We need to make sure the source files + // are the same which we can only do by comparing the targets to + // which they resolve. But we cannot search ot's prerequisites -- + // only the rule that matches can. Note, however, that if all this + // works out, then our next step is to match the obj*{} target. If + // things don't work out, then we fail, in which case searching + // and matching speculatively doesn't really hurt. So we start the + // async match here and finish this verification in the "harvest" + // loop below. + // + resolve_group (a, *pt); // Not matched yet so resolve group. + + bool src (false); + for (prerequisite_member p1: group_prerequisite_members (a, *pt)) + { + // Most of the time we will have just a single source so fast- + // path that case. + // + if (p1.is_a (mod ? *x_mod : x_src) || p1.is_a<c> ()) + { + src = true; + continue; // Check the rest of the prerequisites. + } + + // Ignore some known target types (fsdir, headers, libraries, + // modules). + // + if (p1.is_a<fsdir> () || + p1.is_a<libx> () || + p1.is_a<liba> () || p1.is_a<libs> () || p1.is_a<libux> () || + p1.is_a<bmi> () || p1.is_a<bmix> () || + (p.is_a (mod ? *x_mod : x_src) && x_header (p1)) || + (p.is_a<c> () && p1.is_a<h> ())) + continue; + + fail << "synthesized dependency for prerequisite " << p + << " would be incompatible with existing target " << *pt << + info << "unexpected existing prerequisite type " << p1 << + info << "specify corresponding " << rtt.name << "{} " + << "dependency explicitly"; + } + + if (!src) + fail << "synthesized dependency for prerequisite " << p + << " would be incompatible with existing target " << *pt << + info << "no existing c/" << x_name << " source prerequisite" << + info << "specify corresponding " << rtt.name << "{} " + << "dependency explicitly"; + + m = 2; // Needs verification. + } + } + else // lib*{} + { + // If this is a static library, see if we need to link it whole. + // Note that we have to do it after match since we rely on the + // group link-up. + // + bool u; + if ((u = pt->is_a<libux> ()) || pt->is_a<liba> ()) + { + const variable& var (ctx.var_pool["bin.whole"]); // @@ Cache. + + // See the bin module for the lookup semantics discussion. Note + // that the variable is not overridable so we omit find_override() + // calls. + // + lookup l (p.prerequisite.vars[var]); + + if (!l.defined ()) + l = pt->find_original (var, true).first; + + if (!l.defined ()) + { + bool g (pt->group != nullptr); + l = bs.find_original (var, + &pt->type (), + &pt->name, + (g ? &pt->group->type () : nullptr), + (g ? &pt->group->name : nullptr)).first; + } + + if (l ? cast<bool> (*l) : u) + pd |= lflag_whole; + } + } + + mark (pt, m); + } + + // Process prerequisites, pass 3: match everything and verify chains. + // + + // Wait with unlocked phase to allow phase switching. + // + wait_guard wg (ctx, ctx.count_busy (), t[a].task_count, true); + + i = start; + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + bool adhoc (pts[i].adhoc); + const target*& pt (pts[i++]); + + uint8_t m; + + if (pt == nullptr) + { + // Handle ad hoc prerequisities. + // + if (!adhoc) + continue; + + pt = &p.search (t); + m = 1; // Mark for completion. + } + else if ((m = unmark (pt)) != 0) + { + // If this is a library not to be cleaned, we can finally blank it + // out. + // + if (skip (pt)) + { + pt = nullptr; + continue; + } + } + + match_async (a, *pt, ctx.count_busy (), t[a].task_count); + mark (pt, m); + } + + wg.wait (); + + // The "harvest" loop: finish matching the targets we have started. Note + // that we may have bailed out early (thus the parallel i/n for-loop). + // + i = start; + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + const target*& pt (pts[i++]); + + // Skipped or not marked for completion. + // + uint8_t m; + if (pt == nullptr || (m = unmark (pt)) == 0) + continue; + + build2::match (a, *pt); + + // Nothing else to do if not marked for verification. + // + if (m == 1) + continue; + + // Finish verifying the existing dependency (which is now matched) + // compared to what we would have synthesized. + // + bool mod (x_mod != nullptr && p.is_a (*x_mod)); + + // Note: group already resolved in the previous loop. + + for (prerequisite_member p1: group_prerequisite_members (a, *pt)) + { + if (p1.is_a (mod ? *x_mod : x_src) || p1.is_a<c> ()) + { + // Searching our own prerequisite is ok, p1 must already be + // resolved. + // + const target& tp (p.search (t)); + const target& tp1 (p1.search (*pt)); + + if (&tp != &tp1) + { + bool group (!p.prerequisite.belongs (t)); + + const target_type& rtt (mod + ? (group ? bmi::static_type : tts.bmi) + : (group ? obj::static_type : tts.obj)); + + fail << "synthesized dependency for prerequisite " << p << " " + << "would be incompatible with existing target " << *pt << + info << "existing prerequisite " << p1 << " does not match " + << p << + info << p1 << " resolves to target " << tp1 << + info << p << " resolves to target " << tp << + info << "specify corresponding " << rtt.name << "{} " + << "dependency explicitly"; + } + + break; + } + } + } + + md.binless = binless; + md.start = start; + + switch (a) + { + case perform_update_id: return [this] (action a, const target& t) + { + return perform_update (a, t); + }; + case perform_clean_id: return [this] (action a, const target& t) + { + return perform_clean (a, t); + }; + default: return noop_recipe; // Configure update. + } + } + + void link_rule:: + append_libraries (strings& args, + const file& l, bool la, lflags lf, + const scope& bs, action a, linfo li) const + { + struct data + { + strings& args; + const file& l; + action a; + linfo li; + compile_target_types tts; + } d {args, l, a, li, compile_types (li.type)}; + + auto imp = [] (const file&, bool la) + { + return la; + }; + + auto lib = [&d, this] (const file* const* lc, + const string& p, + lflags f, + bool) + { + const file* l (lc != nullptr ? *lc : nullptr); + + if (l == nullptr) + { + // Don't try to link a library (whether -lfoo or foo.lib) to a + // static library. + // + if (d.li.type != otype::a) + d.args.push_back (p); + } + else + { + bool lu (l->is_a<libux> ()); + + // The utility/non-utility case is tricky. Consider these two + // scenarios: + // + // exe -> (libu1-e -> libu1-e) -> (liba) -> libu-a -> (liba1) + // exe -> (liba) -> libu1-a -> libu1-a -> (liba1) -> libu-a1 + // + // Libraries that should be linked are in '()'. That is, we need to + // link the initial sequence of utility libraries and then, after + // encountering a first non-utility, only link non-utilities + // (because they already contain their utility's object files). + // + if (lu) + { + for (ptrdiff_t i (-1); lc[i] != nullptr; --i) + if (!lc[i]->is_a<libux> ()) + return; + } + + if (d.li.type == otype::a) + { + // Linking a utility library to a static library. + // + // Note that utility library prerequisites of utility libraries + // are automatically handled by process_libraries(). So all we + // have to do is implement the "thin archive" logic. + // + // We may also end up trying to link a non-utility library to a + // static library via a utility library (direct linking is taken + // care of by perform_update()). So we cut it off here. + // + if (!lu) + return; + + if (l->mtime () == timestamp_unreal) // Binless. + return; + + for (const target* pt: l->prerequisite_targets[d.a]) + { + if (pt == nullptr) + continue; + + if (modules) + { + if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{} + pt = find_adhoc_member (*pt, d.tts.obj); + } + + // We could have dependency diamonds with utility libraries. + // Repeats will be handled by the linker (in fact, it could be + // required to repeat them to satisfy all the symbols) but here + // we have to suppress duplicates ourselves. + // + if (const file* f = pt->is_a<objx> ()) + { + string p (relative (f->path ()).string ()); + if (find (d.args.begin (), d.args.end (), p) == d.args.end ()) + d.args.push_back (move (p)); + } + } + } + else + { + // Linking a library to a shared library or executable. + // + + if (l->mtime () == timestamp_unreal) // Binless. + return; + + // On Windows a shared library is a DLL with the import library as + // an ad hoc group member. MinGW though can link directly to DLLs + // (see search_library() for details). + // + if (tclass == "windows" && l->is_a<libs> ()) + { + if (const libi* li = find_adhoc_member<libi> (*l)) + l = li; + } + + string p (relative (l->path ()).string ()); + + if (f & lflag_whole) + { + if (tsys == "win32-msvc") + { + p.insert (0, "/WHOLEARCHIVE:"); // Only available from VC14U2. + } + else if (tsys == "darwin") + { + p.insert (0, "-Wl,-force_load,"); + } + else + { + d.args.push_back ("-Wl,--whole-archive"); + d.args.push_back (move (p)); + d.args.push_back ("-Wl,--no-whole-archive"); + return; + } + } + + d.args.push_back (move (p)); + } + } + }; + + auto opt = [&d, this] (const file& l, + const string& t, + bool com, + bool exp) + { + // Don't try to pass any loptions when linking a static library. + // + if (d.li.type == otype::a) + return; + + // If we need an interface value, then use the group (lib{}). + // + if (const target* g = exp && l.is_a<libs> () ? l.group : &l) + { + const variable& var ( + com + ? (exp ? c_export_loptions : c_loptions) + : (t == x + ? (exp ? x_export_loptions : x_loptions) + : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")])); + + append_options (d.args, *g, var); + } + }; + + process_libraries ( + a, bs, li, sys_lib_dirs, l, la, lf, imp, lib, opt, true); + } + + void link_rule:: + hash_libraries (sha256& cs, + bool& update, timestamp mt, + const file& l, bool la, lflags lf, + const scope& bs, action a, linfo li) const + { + struct data + { + sha256& cs; + const dir_path& out_root; + bool& update; + timestamp mt; + linfo li; + } d {cs, bs.root_scope ()->out_path (), update, mt, li}; + + auto imp = [] (const file&, bool la) + { + return la; + }; + + auto lib = [&d, this] (const file* const* lc, + const string& p, + lflags f, + bool) + { + const file* l (lc != nullptr ? *lc : nullptr); + + if (l == nullptr) + { + if (d.li.type != otype::a) + d.cs.append (p); + } + else + { + bool lu (l->is_a<libux> ()); + + if (lu) + { + for (ptrdiff_t i (-1); lc[i] != nullptr; --i) + if (!lc[i]->is_a<libux> ()) + return; + } + + // We also don't need to do anything special for linking a utility + // library to a static library. If any of its object files (or the + // set of its object files) changes, then the library will have to + // be updated as well. In other words, we use the library timestamp + // as a proxy for all of its member's timestamps. + // + // We do need to cut of the static to static linking, just as in + // append_libraries(). + // + if (d.li.type == otype::a && !lu) + return; + + if (l->mtime () == timestamp_unreal) // Binless. + return; + + // Check if this library renders us out of date. + // + d.update = d.update || l->newer (d.mt); + + // On Windows a shared library is a DLL with the import library as + // an ad hoc group member. MinGW though can link directly to DLLs + // (see search_library() for details). + // + if (tclass == "windows" && l->is_a<libs> ()) + { + if (const libi* li = find_adhoc_member<libi> (*l)) + l = li; + } + + d.cs.append (f); + hash_path (d.cs, l->path (), d.out_root); + } + }; + + auto opt = [&d, this] (const file& l, + const string& t, + bool com, + bool exp) + { + if (d.li.type == otype::a) + return; + + if (const target* g = exp && l.is_a<libs> () ? l.group : &l) + { + const variable& var ( + com + ? (exp ? c_export_loptions : c_loptions) + : (t == x + ? (exp ? x_export_loptions : x_loptions) + : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")])); + + hash_options (d.cs, *g, var); + } + }; + + process_libraries ( + a, bs, li, sys_lib_dirs, l, la, lf, imp, lib, opt, true); + } + + void link_rule:: + rpath_libraries (strings& args, + const target& t, + const scope& bs, + action a, + linfo li, + bool link) const + { + // Use -rpath-link only on targets that support it (Linux, *BSD). Note + // that we don't really need it for top-level libraries. + // + if (link) + { + if (tclass != "linux" && tclass != "bsd") + return; + } + + auto imp = [link] (const file& l, bool la) + { + // If we are not rpath-link'ing, then we only need to rpath interface + // libraries (they will include rpath's for their implementations) + // Otherwise, we have to do this recursively. In both cases we also + // want to see through utility libraries. + // + // The rpath-link part is tricky: ideally we would like to get only + // implementations and only of shared libraries. We are not interested + // in interfaces because we are linking their libraries explicitly. + // However, in our model there is no such thing as "implementation + // only"; it is either interface or interface and implementation. So + // we are going to rpath-link all of them which should be harmless + // except for some noise on the command line. + // + // + return (link ? !la : false) || l.is_a<libux> (); + }; + + // Package the data to keep within the 2-pointer small std::function + // optimization limit. + // + struct + { + strings& args; + bool link; + } d {args, link}; + + auto lib = [&d, this] (const file* const* lc, + const string& f, + lflags, + bool sys) + { + const file* l (lc != nullptr ? *lc : nullptr); + + // We don't rpath system libraries. Why, you may ask? There are many + // good reasons and I have them written on a napkin somewhere... + // + if (sys) + return; + + if (l != nullptr) + { + if (!l->is_a<libs> ()) + return; + + if (l->mtime () == timestamp_unreal) // Binless. + return; + } + else + { + // This is an absolute path and we need to decide whether it is + // a shared or static library. Doesn't seem there is anything + // better than checking for a platform-specific extension (maybe + // we should cache it somewhere). + // + size_t p (path::traits_type::find_extension (f)); + + if (p == string::npos) + return; + + ++p; // Skip dot. + + bool c (true); + const char* e; + + if (tclass == "windows") {e = "dll"; c = false;} + else if (tsys == "darwin") e = "dylib"; + else e = "so"; + + if ((c + ? f.compare (p, string::npos, e) + : casecmp (f.c_str () + p, e)) != 0) + return; + } + + // Ok, if we are here then it means we have a non-system, shared + // library and its absolute path is in f. + // + string o (d.link ? "-Wl,-rpath-link," : "-Wl,-rpath,"); + + size_t p (path::traits_type::rfind_separator (f)); + assert (p != string::npos); + + o.append (f, 0, (p != 0 ? p : 1)); // Don't include trailing slash. + d.args.push_back (move (o)); + }; + + // In case we don't have the "small function object" optimization. + // + const function<bool (const file&, bool)> impf (imp); + const function< + void (const file* const*, const string&, lflags, bool)> libf (lib); + + for (const prerequisite_target& pt: t.prerequisite_targets[a]) + { + if (pt == nullptr) + continue; + + bool la; + const file* f; + + if ((la = (f = pt->is_a<liba> ())) || + (la = (f = pt->is_a<libux> ())) || + ( f = pt->is_a<libs> ())) + { + if (!link && !la) + { + // Top-level shared library dependency. + // + if (!f->path ().empty ()) // Not binless. + { + // It is either matched or imported so should be a cc library. + // + if (!cast_false<bool> (f->vars[c_system])) + args.push_back ( + "-Wl,-rpath," + f->path ().directory ().string ()); + } + } + + process_libraries (a, bs, li, sys_lib_dirs, + *f, la, pt.data, + impf, libf, nullptr); + } + } + } + + // Filter link.exe noise (msvc.cxx). + // + void + msvc_filter_link (ifdstream&, const file&, otype); + + // Translate target CPU to the link.exe/lib.exe /MACHINE option. + // + const char* + msvc_machine (const string& cpu); // msvc.cxx + + target_state link_rule:: + perform_update (action a, const target& xt) const + { + tracer trace (x, "link_rule::perform_update"); + + const file& t (xt.as<file> ()); + const path& tp (t.path ()); + + context& ctx (t.ctx); + + const scope& bs (t.base_scope ()); + const scope& rs (*bs.root_scope ()); + + match_data& md (t.data<match_data> ()); + + // Unless the outer install rule signalled that this is update for + // install, signal back that we've performed plain update. + // + if (!md.for_install) + md.for_install = false; + + bool for_install (*md.for_install); + + ltype lt (link_type (t)); + otype ot (lt.type); + linfo li (link_info (bs, ot)); + compile_target_types tts (compile_types (ot)); + + bool binless (md.binless); + assert (ot != otype::e || !binless); // Sanity check. + + // Determine if we are out-of-date. + // + bool update (false); + bool scratch (false); + timestamp mt (binless ? timestamp_unreal : t.load_mtime ()); + + // Update prerequisites. We determine if any relevant non-ad hoc ones + // render us out-of-date manually below. + // + // Note that execute_prerequisites() blanks out all the ad hoc + // prerequisites so we don't need to worry about them from now on. + // + target_state ts; + + if (optional<target_state> s = + execute_prerequisites (a, + t, + mt, + [] (const target&, size_t) {return false;})) + ts = *s; + else + { + // An ad hoc prerequisite renders us out-of-date. Let's update from + // scratch for good measure. + // + scratch = update = true; + ts = target_state::changed; + } + + // Check for the for_install variable on each prerequisite and blank out + // those that don't match. Note that we have to do it after updating + // prerequisites to keep the dependency counts straight. + // + if (const variable* var_fi = ctx.var_pool.find ("for_install")) + { + // Parallel prerequisites/prerequisite_targets loop. + // + size_t i (md.start); + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + const target*& pt (t.prerequisite_targets[a][i++]); + + if (pt == nullptr) + continue; + + if (lookup l = p.prerequisite.vars[var_fi]) + { + if (cast<bool> (l) != for_install) + { + l5 ([&]{trace << "excluding " << *pt << " due to for_install";}); + pt = nullptr; + } + } + } + } + + // (Re)generate pkg-config's .pc file. While the target itself might be + // up-to-date from a previous run, there is no guarantee that .pc exists + // or also up-to-date. So to keep things simple we just regenerate it + // unconditionally. + // + // Also, if you are wondering why don't we just always produce this .pc, + // install or no install, the reason is unless and until we are updating + // for install, we have no idea where-to things will be installed. + // + if (for_install && lt.library () && !lt.utility) + pkgconfig_save (a, t, lt.static_library (), binless); + + // If we have no binary to build then we are done. + // + if (binless) + { + t.mtime (timestamp_unreal); + return ts; + } + + // Open the dependency database (do it before messing with Windows + // manifests to diagnose missing output directory). + // + depdb dd (tp + ".d"); + + // If targeting Windows, take care of the manifest. + // + path manifest; // Manifest itself (msvc) or compiled object file. + timestamp rpath_timestamp = timestamp_nonexistent; // DLLs timestamp. + + if (lt.executable () && tclass == "windows") + { + // First determine if we need to add our rpath emulating assembly. The + // assembly itself is generated later, after updating the target. Omit + // it if we are updating for install. + // + if (!for_install && cast_true<bool> (t["bin.rpath.auto"])) + rpath_timestamp = windows_rpath_timestamp (t, bs, a, li); + + auto p (windows_manifest (t, rpath_timestamp != timestamp_nonexistent)); + path& mf (p.first); + timestamp mf_mt (p.second); + + if (tsys == "mingw32") + { + // Compile the manifest into the object file with windres. While we + // are going to synthesize an .rc file to pipe to windres' stdin, we + // will still use .manifest to check if everything is up-to-date. + // + manifest = mf + ".o"; + + if (mf_mt == timestamp_nonexistent || mf_mt > mtime (manifest)) + { + path of (relative (manifest)); + + const process_path& rc (cast<process_path> (rs["bin.rc.path"])); + + // @@ Would be good to add this to depdb (e.g,, rc changes). + // + const char* args[] = { + rc.recall_string (), + "--input-format=rc", + "--output-format=coff", + "-o", of.string ().c_str (), + nullptr}; + + if (verb >= 3) + print_process (args); + + if (!ctx.dry_run) + { + auto_rmfile rm (of); + + try + { + process pr (rc, args, -1); + + try + { + ofdstream os (move (pr.out_fd)); + + // 1 is resource ID, 24 is RT_MANIFEST. We also need to + // escape Windows path backslashes. + // + os << "1 24 \""; + + const string& s (mf.string ()); + for (size_t i (0), j;; i = j + 1) + { + j = s.find ('\\', i); + os.write (s.c_str () + i, + (j == string::npos ? s.size () : j) - i); + + if (j == string::npos) + break; + + os.write ("\\\\", 2); + } + + os << "\"" << endl; + + os.close (); + rm.cancel (); + } + catch (const io_error& e) + { + if (pr.wait ()) // Ignore if child failed. + fail << "unable to pipe resource file to " << args[0] + << ": " << e; + } + + run_finish (args, pr); + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + if (e.child) + exit (1); + + throw failed (); + } + } + + update = true; // Manifest changed, force update. + } + } + else + { + manifest = move (mf); // Save for link.exe's /MANIFESTINPUT. + + if (mf_mt == timestamp_nonexistent || mf_mt > mt) + update = true; // Manifest changed, force update. + } + } + + // Check/update the dependency database. + // + // First should come the rule name/version. + // + if (dd.expect (rule_id) != nullptr) + l4 ([&]{trace << "rule mismatch forcing update of " << t;}); + + lookup ranlib; + + // Then the linker checksum (ar/ranlib or the compiler). + // + if (lt.static_library ()) + { + ranlib = rs["bin.ranlib.path"]; + + const char* rl ( + ranlib + ? cast<string> (rs["bin.ranlib.checksum"]).c_str () + : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + + if (dd.expect (cast<string> (rs["bin.ar.checksum"])) != nullptr) + l4 ([&]{trace << "ar mismatch forcing update of " << t;}); + + if (dd.expect (rl) != nullptr) + l4 ([&]{trace << "ranlib mismatch forcing update of " << t;}); + } + else + { + // For VC we use link.exe directly. + // + const string& cs ( + cast<string> ( + rs[tsys == "win32-msvc" + ? ctx.var_pool["bin.ld.checksum"] + : x_checksum])); + + if (dd.expect (cs) != nullptr) + l4 ([&]{trace << "linker mismatch forcing update of " << t;}); + } + + // Next check the target. While it might be incorporated into the linker + // checksum, it also might not (e.g., VC link.exe). + // + if (dd.expect (ctgt.string ()) != nullptr) + l4 ([&]{trace << "target mismatch forcing update of " << t;}); + + // Start building the command line. While we don't yet know whether we + // will really need it, we need to hash it to find out. So the options + // are to either replicate the exact process twice, first for hashing + // then for building or to go ahead and start building and hash the + // result. The first approach is probably more efficient while the + // second is simpler. Let's got with the simpler for now (actually it's + // kind of a hybrid). + // + cstrings args {nullptr}; // Reserve one for config.bin.ar/config.x. + + // Storage. + // + string arg1, arg2; + strings sargs; + + if (lt.static_library ()) + { + if (tsys == "win32-msvc") + { + // lib.exe has /LIBPATH but it's not clear/documented what it's used + // for. Perhaps for link-time code generation (/LTCG)? If that's the + // case, then we may need to pass *.loptions. + // + args.push_back ("/NOLOGO"); + + // Add /MACHINE. + // + args.push_back (msvc_machine (cast<string> (rs[x_target_cpu]))); + } + else + { + // If the user asked for ranlib, don't try to do its function with + // -s. Some ar implementations (e.g., the LLVM one) don't support + // leading '-'. + // + arg1 = ranlib ? "rc" : "rcs"; + + // For utility libraries use thin archives if possible. + // + // Thin archives are supported by GNU ar since binutils 2.19.1 and + // LLVM ar since LLVM 3.8.0. Note that strictly speaking thin + // archives also have to be supported by the linker but it is + // probably safe to assume that the two came from the same version + // of binutils/LLVM. + // + if (lt.utility) + { + const string& id (cast<string> (rs["bin.ar.id"])); + + for (bool g (id == "gnu"); g || id == "llvm"; ) // Breakout loop. + { + auto mj (cast<uint64_t> (rs["bin.ar.version.major"])); + if (mj < (g ? 2 : 3)) break; + if (mj == (g ? 2 : 3)) + { + auto mi (cast<uint64_t> (rs["bin.ar.version.minor"])); + if (mi < (g ? 18 : 8)) break; + if (mi == 18 && g) + { + auto pa (cast<uint64_t> (rs["bin.ar.version.patch"])); + if (pa < 1) break; + } + } + + arg1 += 'T'; + break; + } + } + + args.push_back (arg1.c_str ()); + } + + append_options (args, t, c_aoptions); + append_options (args, t, x_aoptions); + } + else + { + if (tsys == "win32-msvc") + { + // We are using link.exe directly so don't pass the compiler + // options. + } + else + { + append_options (args, t, c_coptions); + append_options (args, t, x_coptions); + append_options (args, tstd); + } + + append_options (args, t, c_loptions); + append_options (args, t, x_loptions); + + // Extra system library dirs (last). + // + // @@ /LIBPATH:<path>, not /LIBPATH <path> + // + assert (sys_lib_dirs_extra <= sys_lib_dirs.size ()); + append_option_values ( + args, + cclass == compiler_class::msvc ? "/LIBPATH:" : "-L", + sys_lib_dirs.begin () + sys_lib_dirs_extra, sys_lib_dirs.end (), + [] (const dir_path& d) {return d.string ().c_str ();}); + + // Handle soname/rpath. + // + if (tclass == "windows") + { + // Limited emulation for Windows with no support for user-defined + // rpath/rpath-link. + // + lookup l; + + if ((l = t["bin.rpath"]) && !l->empty ()) + fail << ctgt << " does not support rpath"; + + if ((l = t["bin.rpath_link"]) && !l->empty ()) + fail << ctgt << " does not support rpath-link"; + } + else + { + // Set soname. + // + if (lt.shared_library ()) + { + const libs_paths& paths (md.libs_paths); + const string& leaf (paths.effect_soname ().leaf ().string ()); + + if (tclass == "macos") + { + // With Mac OS 10.5 (Leopard) Apple finally caved in and gave us + // a way to emulate vanilla -rpath. + // + // It may seem natural to do something different on update for + // install. However, if we don't make it @rpath, then the user + // won't be able to use config.bin.rpath for installed libraries. + // + arg1 = "-install_name"; + arg2 = "@rpath/" + leaf; + } + else + arg1 = "-Wl,-soname," + leaf; + + if (!arg1.empty ()) + args.push_back (arg1.c_str ()); + + if (!arg2.empty ()) + args.push_back (arg2.c_str ()); + } + + // Add rpaths. We used to first add the ones specified by the user + // so that they take precedence. But that caused problems if we have + // old versions of the libraries sitting in the rpath location + // (e.g., installed libraries). And if you think about this, it's + // probably correct to prefer libraries that we explicitly imported + // to the ones found via rpath. + // + // Note also that if this is update for install, then we don't add + // rpath of the imported libraries (i.e., we assume they are also + // installed). But we add -rpath-link for some platforms. + // + if (cast_true<bool> (t[for_install + ? "bin.rpath_link.auto" + : "bin.rpath.auto"])) + rpath_libraries (sargs, t, bs, a, li, for_install /* link */); + + lookup l; + + if ((l = t["bin.rpath"]) && !l->empty ()) + for (const dir_path& p: cast<dir_paths> (l)) + sargs.push_back ("-Wl,-rpath," + p.string ()); + + if ((l = t["bin.rpath_link"]) && !l->empty ()) + { + // Only certain targets support -rpath-link (Linux, *BSD). + // + if (tclass != "linux" && tclass != "bsd") + fail << ctgt << " does not support rpath-link"; + + for (const dir_path& p: cast<dir_paths> (l)) + sargs.push_back ("-Wl,-rpath-link," + p.string ()); + } + } + } + + // All the options should now be in. Hash them and compare with the db. + // + { + sha256 cs; + + for (size_t i (1); i != args.size (); ++i) + cs.append (args[i]); + + for (size_t i (0); i != sargs.size (); ++i) + cs.append (sargs[i]); + + // @@ Note that we don't hash output options so if one of the ad hoc + // members that we manage gets renamed, we will miss a rebuild. + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "options mismatch forcing update of " << t;}); + } + + // Finally, hash and compare the list of input files. + // + // Should we capture actual file names or their checksum? The only good + // reason for capturing actual files is diagnostics: we will be able to + // pinpoint exactly what is causing the update. On the other hand, the + // checksum is faster and simpler. And we like simple. + // + const file* def (nullptr); // Cached if present. + { + sha256 cs; + + for (const prerequisite_target& p: t.prerequisite_targets[a]) + { + const target* pt (p.target); + + if (pt == nullptr) + continue; + + // If this is bmi*{}, then obj*{} is its ad hoc member. + // + if (modules) + { + if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{} + pt = find_adhoc_member (*pt, tts.obj); + } + + const file* f; + bool la (false), ls (false); + + // We link utility libraries to everything except other utility + // libraries. In case of linking to liba{} we follow the "thin + // archive" lead and "see through" to their object file + // prerequisites (recursively, until we encounter a non-utility). + // + if ((f = pt->is_a<objx> ()) || + (!lt.utility && + (la = (f = pt->is_a<libux> ()))) || + (!lt.static_library () && + ((la = (f = pt->is_a<liba> ())) || + (ls = (f = pt->is_a<libs> ()))))) + { + // Link all the dependent interface libraries (shared) or interface + // and implementation (static), recursively. + // + // Also check if any of them render us out of date. The tricky + // case is, say, a utility library (static) that depends on a + // shared library. When the shared library is updated, there is no + // reason to re-archive the utility but those who link the utility + // have to "see through" the changes in the shared library. + // + if (la || ls) + { + hash_libraries (cs, update, mt, *f, la, p.data, bs, a, li); + f = nullptr; // Timestamp checked by hash_libraries(). + } + else + hash_path (cs, f->path (), rs.out_path ()); + } + else if ((f = pt->is_a<bin::def> ())) + { + if (tclass == "windows" && !lt.static_library ()) + { + // At least link.exe only allows a single .def file. + // + if (def != nullptr) + fail << "multiple module definition files specified for " << t; + + hash_path (cs, f->path (), rs.out_path ()); + def = f; + } + else + f = nullptr; // Not an input. + } + else + f = pt->is_a<exe> (); // Consider executable mtime (e.g., linker). + + // Check if this input renders us out of date. + // + if (f != nullptr) + update = update || f->newer (mt); + } + + // Treat it as input for both MinGW and VC (mtime checked above). + // + if (!manifest.empty ()) + hash_path (cs, manifest, rs.out_path ()); + + // Treat *.libs variable values as inputs, not options. + // + if (!lt.static_library ()) + { + hash_options (cs, t, c_libs); + hash_options (cs, t, x_libs); + } + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "file set mismatch forcing update of " << t;}); + } + + // If any of the above checks resulted in a mismatch (different linker, + // options or input file set), or if the database is newer than the + // target (interrupted update) then force the target update. Also note + // this situation in the "from scratch" flag. + // + if (dd.writing () || dd.mtime > mt) + scratch = update = true; + + dd.close (); + + // If nothing changed, then we are done. + // + if (!update) + return ts; + + // Ok, so we are updating. Finish building the command line. + // + string in, out, out1, out2, out3; // Storage. + + // Translate paths to relative (to working directory) ones. This results + // in easier to read diagnostics. + // + path relt (relative (tp)); + + const process_path* ld (nullptr); + if (lt.static_library ()) + { + ld = &cast<process_path> (rs["bin.ar.path"]); + + if (tsys == "win32-msvc") + { + out = "/OUT:" + relt.string (); + args.push_back (out.c_str ()); + } + else + args.push_back (relt.string ().c_str ()); + } + else + { + // The options are usually similar enough to handle executables + // and shared libraries together. + // + if (tsys == "win32-msvc") + { + // Using link.exe directly. + // + ld = &cast<process_path> (rs["bin.ld.path"]); + args.push_back ("/NOLOGO"); + + if (ot == otype::s) + args.push_back ("/DLL"); + + // Add /MACHINE. + // + args.push_back (msvc_machine (cast<string> (rs[x_target_cpu]))); + + // Unless explicitly enabled with /INCREMENTAL, disable incremental + // linking (it is implicitly enabled if /DEBUG is specified). The + // reason is the .ilk file: its name cannot be changed and if we + // have, say, foo.exe and foo.dll, then they will end up stomping on + // each other's .ilk's. + // + // So the idea is to disable it by default but let the user request + // it explicitly if they are sure their project doesn't suffer from + // the above issue. We can also have something like 'incremental' + // config initializer keyword for this. + // + // It might also be a good idea to ask Microsoft to add an option. + // + if (!find_option ("/INCREMENTAL", args, true)) + args.push_back ("/INCREMENTAL:NO"); + + if (ctype == compiler_type::clang) + { + // According to Clang's MSVC.cpp, we shall link libcmt.lib (static + // multi-threaded runtime) unless -nostdlib or -nostartfiles is + // specified. + // + if (!find_options ({"-nostdlib", "-nostartfiles"}, t, c_coptions) && + !find_options ({"-nostdlib", "-nostartfiles"}, t, x_coptions)) + args.push_back ("/DEFAULTLIB:libcmt.lib"); + } + + // If you look at the list of libraries Visual Studio links by + // default, it includes everything and a couple of kitchen sinks + // (winspool32.lib, ole32.lib, odbc32.lib, etc) while we want to + // keep our low-level build as pure as possible. However, there seem + // to be fairly essential libraries that are not linked by link.exe + // by default (use /VERBOSE:LIB to see the list). For example, MinGW + // by default links advapi32, shell32, user32, and kernel32. And so + // we follow suit and make sure those are linked. advapi32 and + // kernel32 are already on the default list and we only need to add + // the other two. + // + // The way we are going to do it is via the /DEFAULTLIB option + // rather than specifying the libraries as normal inputs (as VS + // does). This way the user can override our actions with the + // /NODEFAULTLIB option. + // + args.push_back ("/DEFAULTLIB:shell32.lib"); + args.push_back ("/DEFAULTLIB:user32.lib"); + + // Take care of the manifest (will be empty for the DLL). + // + if (!manifest.empty ()) + { + out3 = "/MANIFESTINPUT:"; + out3 += relative (manifest).string (); + args.push_back ("/MANIFEST:EMBED"); + args.push_back (out3.c_str ()); + } + + if (def != nullptr) + { + in = "/DEF:" + relative (def->path ()).string (); + args.push_back (in.c_str ()); + } + + if (ot == otype::s) + { + // On Windows libs{} is the DLL and an ad hoc group member is the + // import library. + // + // This will also create the .exp export file. Its name will be + // derived from the import library by changing the extension. + // Lucky for us -- there is no option to name it. + // + const file& imp (*find_adhoc_member<libi> (t)); + + out2 = "/IMPLIB:"; + out2 += relative (imp.path ()).string (); + args.push_back (out2.c_str ()); + } + + // If we have /DEBUG then name the .pdb file. It is an ad hoc group + // member. + // + if (find_option ("/DEBUG", args, true)) + { + const file& pdb ( + *find_adhoc_member<file> (t, *bs.find_target_type ("pdb"))); + + out1 = "/PDB:"; + out1 += relative (pdb.path ()).string (); + args.push_back (out1.c_str ()); + } + + // @@ An executable can have an import library and VS seems to + // always name it. I wonder what would trigger its generation? + // Could it be the presence of export symbols? Yes, link.exe will + // generate the import library iff there are exported symbols. + // Which means there could be a DLL without an import library + // (which we currently don't handle very well). + // + out = "/OUT:" + relt.string (); + args.push_back (out.c_str ()); + } + else + { + switch (cclass) + { + case compiler_class::gcc: + { + ld = &cpath; + + // Add the option that triggers building a shared library and + // take care of any extras (e.g., import library). + // + if (ot == otype::s) + { + if (tclass == "macos") + args.push_back ("-dynamiclib"); + else + args.push_back ("-shared"); + + if (tsys == "mingw32") + { + // On Windows libs{} is the DLL and an ad hoc group member + // is the import library. + // + const file& imp (*find_adhoc_member<libi> (t)); + out = "-Wl,--out-implib=" + relative (imp.path ()).string (); + args.push_back (out.c_str ()); + } + } + + args.push_back ("-o"); + args.push_back (relt.string ().c_str ()); + + // For MinGW the .def file is just another input. + // + if (def != nullptr) + { + in = relative (def->path ()).string (); + args.push_back (in.c_str ()); + } + + break; + } + case compiler_class::msvc: assert (false); + } + } + } + + args[0] = ld->recall_string (); + + // Append input files noticing the position of the first. + // +#ifdef _WIN32 + size_t args_input (args.size ()); +#endif + + // The same logic as during hashing above. See also a similar loop + // inside append_libraries(). + // + for (const prerequisite_target& p: t.prerequisite_targets[a]) + { + const target* pt (p.target); + + if (pt == nullptr) + continue; + + if (modules) + { + if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{} + pt = find_adhoc_member (*pt, tts.obj); + } + + const file* f; + bool la (false), ls (false); + + if ((f = pt->is_a<objx> ()) || + (!lt.utility && + (la = (f = pt->is_a<libux> ()))) || + (!lt.static_library () && + ((la = (f = pt->is_a<liba> ())) || + (ls = (f = pt->is_a<libs> ()))))) + { + if (la || ls) + append_libraries (sargs, *f, la, p.data, bs, a, li); + else + sargs.push_back (relative (f->path ()).string ()); // string()&& + } + } + + // For MinGW manifest is an object file. + // + if (!manifest.empty () && tsys == "mingw32") + sargs.push_back (relative (manifest).string ()); + + // Shallow-copy sargs to args. Why not do it as we go along pushing into + // sargs? Because of potential reallocations in sargs. + // + for (const string& a: sargs) + args.push_back (a.c_str ()); + + if (!lt.static_library ()) + { + append_options (args, t, c_libs); + append_options (args, t, x_libs); + } + + args.push_back (nullptr); + + // Cleanup old (versioned) libraries. Let's do it even for dry-run to + // keep things simple. + // + if (lt.shared_library ()) + { + const libs_paths& paths (md.libs_paths); + const path& p (paths.clean); + + if (!p.empty ()) + try + { + if (verb >= 4) // Seeing this with -V doesn't really add any value. + text << "rm " << p; + + auto rm = [&paths, this] (path&& m, const string&, bool interm) + { + if (!interm) + { + // Filter out paths that have one of the current paths as a + // prefix. + // + auto test = [&m] (const path& p) + { + const string& s (p.string ()); + return s.empty () || m.string ().compare (0, s.size (), s) != 0; + }; + + if (test (*paths.real) && + test ( paths.interm) && + test ( paths.soname) && + test ( paths.load) && + test ( paths.link)) + { + try_rmfile (m); + try_rmfile (m + ".d"); + + if (tsys == "win32-msvc") + { + try_rmfile (m.base () += ".ilk"); + try_rmfile (m += ".pdb"); + } + } + } + return true; + }; + + // Note: doesn't follow symlinks. + // + path_search (p, rm, dir_path () /* start */, path_match_flags::none); + } + catch (const system_error&) {} // Ignore errors. + } + else if (lt.static_library ()) + { + // We use relative paths to the object files which means we may end + // up with different ones depending on CWD and some implementation + // treat them as different archive members. So remote the file to + // be sure. Note that we ignore errors leaving it to the archiever + // to complain. + // + if (mt != timestamp_nonexistent) + try_rmfile (relt, true); + } + + if (verb == 1) + text << (lt.static_library () ? "ar " : "ld ") << t; + else if (verb == 2) + print_process (args); + + // Do any necessary fixups to the command line to make it runnable. + // + // Notice the split in the diagnostics: at verbosity level 1 we print + // the "logical" command line while at level 2 and above -- what we are + // actually executing. + // + // On Windows we need to deal with the command line length limit. The + // best workaround seems to be passing (part of) the command line in an + // "options file" ("response file" in Microsoft's terminology). Both + // Microsoft's link.exe/lib.exe as well as GNU g??.exe/ar.exe support + // the same @<file> notation (and with a compatible subset of the + // content format; see below). Note also that GCC is smart enough to use + // an options file to call the underlying linker if we called it with + // @<file>. We will also assume that any other linker that we might be + // using supports this notation. + // + // Note that this is a limitation of the host platform, not the target + // (and Wine, where these lines are a bit blurred, does not have this + // length limitation). + // +#ifdef _WIN32 + auto_rmfile trm; + string targ; + { + // Calculate the would-be command line length similar to how process' + // implementation does it. + // + auto quote = [s = string ()] (const char* a) mutable -> const char* + { + return process::quote_argument (a, s); + }; + + size_t n (0); + for (const char* a: args) + { + if (a != nullptr) + { + if (n != 0) + n++; // For the space separator. + + n += strlen (quote (a)); + } + } + + if (n > 32766) // 32768 - "Unicode terminating null character". + { + // Use the .t extension (for "temporary"). + // + const path& f ((trm = auto_rmfile (relt + ".t")).path); + + try + { + ofdstream ofs (f); + + // Both Microsoft and GNU support a space-separated list of + // potentially-quoted arguments. GNU also supports backslash- + // escaping (whether Microsoft supports it is unclear; but it + // definitely doesn't need it for backslashes themselves, for + // example, in paths). + // + bool e (tsys != "win32-msvc"); // Assume GNU if not MSVC. + string b; + + for (size_t i (args_input), n (args.size () - 1); i != n; ++i) + { + const char* a (args[i]); + + if (e) // We will most likely have backslashes so just do it. + { + for (b.clear (); *a != '\0'; ++a) + { + if (*a != '\\') + b += *a; + else + b += "\\\\"; + } + + a = b.c_str (); + } + + ofs << (i != args_input ? " " : "") << quote (a); + } + + ofs << '\n'; + ofs.close (); + } + catch (const io_error& e) + { + fail << "unable to write " << f << ": " << e; + } + + // Replace input arguments with @file. + // + targ = '@' + f.string (); + args.resize (args_input); + args.push_back (targ.c_str()); + args.push_back (nullptr); + + //@@ TODO: leave .t file if linker failed and verb > 2? + } + } +#endif + + if (verb > 2) + print_process (args); + + // Remove the target file if any of the subsequent (after the linker) + // actions fail or if the linker fails but does not clean up its mess + // (like link.exe). If we don't do that, then we will end up with a + // broken build that is up-to-date. + // + auto_rmfile rm; + + if (!ctx.dry_run) + { + rm = auto_rmfile (relt); + + try + { + // VC tools (both lib.exe and link.exe) send diagnostics to stdout. + // Also, link.exe likes to print various gratuitous messages. So for + // link.exe we redirect stdout to a pipe, filter that noise out, and + // send the rest to stderr. + // + // For lib.exe (and any other insane linker that may try to pull off + // something like this) we are going to redirect stdout to stderr. + // For sane compilers this should be harmless. + // + bool filter (tsys == "win32-msvc" && !lt.static_library ()); + + process pr (*ld, args.data (), 0, (filter ? -1 : 2)); + + if (filter) + { + try + { + ifdstream is ( + move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit); + + msvc_filter_link (is, t, ot); + + // If anything remains in the stream, send it all to stderr. + // Note that the eof check is important: if the stream is at + // eof, this and all subsequent writes to the diagnostics stream + // will fail (and you won't see a thing). + // + if (is.peek () != ifdstream::traits_type::eof ()) + diag_stream_lock () << is.rdbuf (); + + is.close (); + } + catch (const io_error&) {} // Assume exits with error. + } + + run_finish (args, pr); + } + catch (const process_error& e) + { + error << "unable to execute " << args[0] << ": " << e; + + // In a multi-threaded program that fork()'ed but did not exec(), it + // is unwise to try to do any kind of cleanup (like unwinding the + // stack and running destructors). + // + if (e.child) + { + rm.cancel (); +#ifdef _WIN32 + trm.cancel (); +#endif + exit (1); + } + + throw failed (); + } + + // VC link.exe creates an import library and .exp file for an + // executable if any of its object files export any symbols (think a + // unit test linking libus{}). And, no, there is no way to suppress + // it. Well, there is a way: create a .def file with an empty EXPORTS + // section, pass it to lib.exe to create a dummy .exp (and .lib), and + // then pass this empty .exp to link.exe. Wanna go this way? Didn't + // think so. Having no way to disable this, the next simplest thing + // seems to be just cleaning the mess up. + // + // Note also that if at some point we decide to support such "shared + // executables" (-rdynamic, etc), then it will probably have to be a + // different target type (exes{}?) since it will need a different set + // of object files (-fPIC so probably objs{}), etc. + // + if (lt.executable () && tsys == "win32-msvc") + { + path b (relt.base ()); + try_rmfile (b + ".lib", true /* ignore_errors */); + try_rmfile (b + ".exp", true /* ignore_errors */); + } + } + + if (ranlib) + { + const process_path& rl (cast<process_path> (ranlib)); + + const char* args[] = { + rl.recall_string (), + relt.string ().c_str (), + nullptr}; + + if (verb >= 2) + print_process (args); + + if (!ctx.dry_run) + run (rl, args); + } + + // For Windows generate (or clean up) rpath-emulating assembly. + // + if (tclass == "windows") + { + if (lt.executable ()) + windows_rpath_assembly (t, bs, a, li, + cast<string> (rs[x_target_cpu]), + rpath_timestamp, + scratch); + } + + if (lt.shared_library ()) + { + // For shared libraries we may need to create a bunch of symlinks (or + // fallback to hardlinks/copies on Windows). + // + auto ln = [&ctx] (const path& f, const path& l) + { + if (verb >= 3) + text << "ln -sf " << f << ' ' << l; + + if (ctx.dry_run) + return; + + try + { + try + { + // The -f part. + // + if (file_exists (l, false /* follow_symlinks */)) + try_rmfile (l); + + mkanylink (f, l, true /* copy */, true /* relative */); + } + catch (system_error& e) + { + throw pair<entry_type, system_error> (entry_type::symlink, + move (e)); + } + } + catch (const pair<entry_type, system_error>& e) + { + const char* w (e.first == entry_type::regular ? "copy" : + e.first == entry_type::symlink ? "symlink" : + e.first == entry_type::other ? "hardlink" : + nullptr); + + fail << "unable to make " << w << ' ' << l << ": " << e.second; + } + }; + + const libs_paths& paths (md.libs_paths); + + const path& lk (paths.link); + const path& ld (paths.load); + const path& so (paths.soname); + const path& in (paths.interm); + + const path* f (paths.real); + + if (!in.empty ()) {ln (*f, in); f = ∈} + if (!so.empty ()) {ln (*f, so); f = &so;} + if (!ld.empty ()) {ln (*f, ld); f = &ld;} + if (!lk.empty ()) {ln (*f, lk);} + } + else if (lt.static_library ()) + { + // Apple ar (from cctools) for some reason truncates fractional + // seconds when running on APFS (HFS has a second resolution so it's + // not an issue there). This can lead to object files being newer than + // the archive, which is naturally bad news. Filed as bug 49604334, + // reportedly fixed in Xcode 11 beta 5. + // + // Note that this block is not inside #ifdef __APPLE__ because we + // could be cross-compiling, theoretically. We also make sure we use + // Apple's ar (which is (un)recognized as 'generic') instead of, say, + // llvm-ar. + // + if (tsys == "darwin" && cast<string> (rs["bin.ar.id"]) == "generic") + { + if (!ctx.dry_run) + touch (ctx, tp, false /* create */, verb_never); + } + } + + if (!ctx.dry_run) + { + rm.cancel (); + dd.check_mtime (tp); + } + + // Should we go to the filesystem and get the new mtime? We know the + // file has been modified, so instead just use the current clock time. + // It has the advantage of having the subseconds precision. Plus, in + // case of dry-run, the file won't be modified. + // + t.mtime (system_clock::now ()); + return target_state::changed; + } + + target_state link_rule:: + perform_clean (action a, const target& xt) const + { + const file& t (xt.as<file> ()); + + ltype lt (link_type (t)); + const match_data& md (t.data<match_data> ()); + + clean_extras extras; + clean_adhoc_extras adhoc_extras; + + if (md.binless) + ; // Clean prerequsites/members. + else + { + if (tclass != "windows") + ; // Everything is the default. + else if (tsys == "mingw32") + { + if (lt.executable ()) + { + extras = {".d", ".dlls/", ".manifest.o", ".manifest"}; + } + + // For shared and static library it's the default. + } + else + { + // Assuming MSVC or alike. + // + if (lt.executable ()) + { + // Clean up .ilk in case the user enabled incremental linking + // (notice that the .ilk extension replaces .exe). + // + extras = {".d", ".dlls/", ".manifest", "-.ilk"}; + } + else if (lt.shared_library ()) + { + // Clean up .ilk and .exp. + // + // Note that .exp is based on the .lib, not .dll name. And with + // versioning their bases may not be the same. + // + extras = {".d", "-.ilk"}; + adhoc_extras.push_back ({libi::static_type, {"-.exp"}}); + } + + // For static library it's the default. + } + + if (extras.empty ()) + extras = {".d"}; // Default. + +#ifdef _WIN32 + extras.push_back (".t"); // Options file. +#endif + // For shared libraries we may have a bunch of symlinks that we need + // to remove. + // + if (lt.shared_library ()) + { + const libs_paths& lp (md.libs_paths); + + auto add = [&extras] (const path& p) + { + if (!p.empty ()) + extras.push_back (p.string ().c_str ()); + }; + + add (lp.link); + add (lp.load); + add (lp.soname); + add (lp.interm); + } + } + + return perform_clean_extra (a, t, extras, adhoc_extras); + } + } +} diff --git a/libbuild2/cc/link-rule.hxx b/libbuild2/cc/link-rule.hxx new file mode 100644 index 0000000..2a296a7 --- /dev/null +++ b/libbuild2/cc/link-rule.hxx @@ -0,0 +1,188 @@ +// file : libbuild2/cc/link-rule.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_LINK_RULE_HXX +#define LIBBUILD2_CC_LINK_RULE_HXX + +#include <set> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/rule.hxx> + +#include <libbuild2/cc/types.hxx> +#include <libbuild2/cc/common.hxx> + +#include <libbuild2/cc/export.hxx> + +namespace build2 +{ + namespace cc + { + class LIBBUILD2_CC_SYMEXPORT link_rule: public rule, virtual common + { + public: + link_rule (data&&); + + struct match_result + { + bool seen_x = false; + bool seen_c = false; + bool seen_cc = false; + bool seen_obj = false; + bool seen_lib = false; + }; + + match_result + match (action, const target&, const target*, otype, bool) const; + + virtual bool + match (action, target&, const string&) const override; + + virtual recipe + apply (action, target&) const override; + + target_state + perform_update (action, const target&) const; + + target_state + perform_clean (action, const target&) const; + + private: + friend class install_rule; + friend class libux_install_rule; + + // Shared library paths. + // + struct libs_paths + { + // If any (except real) is empty, then it is the same as the next + // one. Except for load and intermediate, for which empty indicates + // that it is not used. + // + // Note that the paths must form a "hierarchy" with subsequent paths + // adding extra information as suffixes. This is relied upon by the + // clean pattern (see below). + // + // The libs{} path is always the real path. On Windows what we link + // to is the import library and the link path is empty. + // + path link; // What we link: libfoo.so + path load; // What we load (with dlopen() or similar) + path soname; // SONAME: libfoo-1.so, libfoo.so.1 + path interm; // Intermediate: libfoo.so.1.2 + const path* real; // Real: libfoo.so.1.2.3 + + inline const path& + effect_link () const {return link.empty () ? effect_soname () : link;} + + inline const path& + effect_soname () const {return soname.empty () ? *real : soname;} + + // Cleanup pattern used to remove previous versions. If empty, no + // cleanup is performed. The above (current) names are automatically + // filtered out. + // + path clean; + }; + + libs_paths + derive_libs_paths (file&, const char*, const char*) const; + + struct match_data + { + // The "for install" condition is signalled to us by install_rule when + // it is matched for the update operation. It also verifies that if we + // have already been executed, then it was for install. + // + // This has an interesting implication: it means that this rule cannot + // be used to update targets during match. Specifically, we cannot be + // executed for group resolution purposes (not a problem) nor as part + // of the generated source update. The latter case can be a problem: + // imagine a code generator that itself may need to be updated before + // it can be used to re-generate some out-of-date source code. As an + // aside, note that even if we were somehow able to communicate the + // "for install" in this case, the result of such an update may not + // actually be "usable" (e.g., not runnable because of the missing + // rpaths). There is another prominent case where the result may not + // be usable: cross-compilation. + // + // So the current (admittedly fuzzy) thinking is that a project shall + // not try to use its own build for update since it may not be usable + // (because of cross-compilations, being "for install", etc). Instead, + // it should rely on another, "usable" build of itself (this, BTW, is + // related to bpkg's build-time vs run-time dependencies). + // + optional<bool> for_install; + + bool binless; // Binary-less library. + size_t start; // Parallel prerequisites/prerequisite_targets start. + + link_rule::libs_paths libs_paths; + }; + + // Library handling. + // + void + append_libraries (strings&, + const file&, bool, lflags, + const scope&, action, linfo) const; + + void + hash_libraries (sha256&, + bool&, timestamp, + const file&, bool, lflags, + const scope&, action, linfo) const; + + void + rpath_libraries (strings&, + const target&, + const scope&, action, linfo, + bool) const; + + // Windows rpath emulation (windows-rpath.cxx). + // + struct windows_dll + { + const string& dll; + const string* pdb; // NULL if none. + string pdb_storage; + + bool operator< (const windows_dll& y) const {return dll < y.dll;} + }; + + using windows_dlls = std::set<windows_dll>; + + timestamp + windows_rpath_timestamp (const file&, + const scope&, + action, linfo) const; + + windows_dlls + windows_rpath_dlls (const file&, const scope&, action, linfo) const; + + void + windows_rpath_assembly (const file&, const scope&, action, linfo, + const string&, + timestamp, + bool) const; + + // Windows-specific (windows-manifest.cxx). + // + pair<path, timestamp> + windows_manifest (const file&, bool rpath_assembly) const; + + // pkg-config's .pc file generation (pkgconfig.cxx). + // + void + pkgconfig_save (action, const file&, bool, bool) const; + + private: + const string rule_id; + }; + } +} + +#endif // LIBBUILD2_CC_LINK_RULE_HXX diff --git a/libbuild2/cc/module.cxx b/libbuild2/cc/module.cxx new file mode 100644 index 0000000..3113b5c --- /dev/null +++ b/libbuild2/cc/module.cxx @@ -0,0 +1,781 @@ +// file : libbuild2/cc/module.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/module.hxx> + +#include <iomanip> // left, setw() + +#include <libbuild2/scope.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/target.hxx> // pc* + +#include <libbuild2/config/utility.hxx> +#include <libbuild2/install/utility.hxx> + +#include <libbuild2/cc/guess.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + void config_module:: + guess (scope& rs, const location& loc, const variable_map&) + { + tracer trace (x, "guess_init"); + + bool cc_loaded (cast_false<bool> (rs["cc.core.guess.loaded"])); + + // Adjust module priority (compiler). Also order cc module before us + // (we don't want to use priorities for that in case someone manages + // to slot in-between). + // + if (!cc_loaded) + config::save_module (rs, "cc", 250); + + config::save_module (rs, x, 250); + + auto& vp (rs.ctx.var_pool.rw (rs)); + + // Must already exist. + // + const variable& config_c_poptions (vp["config.cc.poptions"]); + const variable& config_c_coptions (vp["config.cc.coptions"]); + const variable& config_c_loptions (vp["config.cc.loptions"]); + + // config.x + // + + // Normally we will have a persistent configuration and computing the + // default value every time will be a waste. So try without a default + // first. + // + auto p (config::omitted (rs, config_x)); + + if (!p.first) + { + // If there is a config.x value for one of the modules that can hint + // us the toolchain, load it's .guess module. This makes sure that the + // order in which we load the modules is unimportant and that the user + // can specify the toolchain using any of the config.x values. + // + if (!cc_loaded) + { + for (const char* const* pm (x_hinters); *pm != nullptr; ++pm) + { + string m (*pm); + + // Must be the same as in module's init(). + // + const variable& v (vp.insert<path> ("config." + m, true)); + + if (rs[v].defined ()) + { + load_module (rs, rs, m + ".guess", loc); + cc_loaded = true; + break; + } + } + } + + // If cc.core.config is already loaded then use its toolchain id and + // (optional) pattern to guess an appropriate default (e.g., for {gcc, + // *-4.9} we will get g++-4.9). + // + path d; + + if (cc_loaded) + d = guess_default (x_lang, + cast<string> (rs["cc.id"]), + cast<string> (rs["cc.pattern"])); + else + { + d = path (x_default); + + if (d.empty ()) + fail << "not built with default " << x_lang << " compiler" << + info << "use config." << x << " to specify"; + } + + // If this value was hinted, save it as commented out so that if the + // user changes the source of the pattern, this one will get updated + // as well. + // + p = config::required (rs, + config_x, + d, + false, + cc_loaded ? config::save_commented : 0); + } + + // Figure out which compiler we are dealing with, its target, etc. + // + ci_ = &build2::cc::guess ( + x, + x_lang, + cast<path> (*p.first), + cast_null<string> (config::omitted (rs, config_x_id).first), + cast_null<string> (config::omitted (rs, config_x_version).first), + cast_null<string> (config::omitted (rs, config_x_target).first), + cast_null<strings> (rs[config_c_poptions]), + cast_null<strings> (rs[config_x_poptions]), + cast_null<strings> (rs[config_c_coptions]), + cast_null<strings> (rs[config_x_coptions]), + cast_null<strings> (rs[config_c_loptions]), + cast_null<strings> (rs[config_x_loptions])); + + const compiler_info& ci (*ci_); + + // Split/canonicalize the target. First see if the user asked us to + // use config.sub. + // + target_triplet tt; + { + string ct; + + if (config_sub) + { + ct = run<string> (3, + *config_sub, + ci.target.c_str (), + [] (string& l, bool) {return move (l);}); + l5 ([&]{trace << "config.sub target: '" << ct << "'";}); + } + + try + { + tt = target_triplet (ct.empty () ? ci.target : ct); + l5 ([&]{trace << "canonical target: '" << tt.string () << "'; " + << "class: " << tt.class_;}); + } + catch (const invalid_argument& e) + { + // This is where we suggest that the user specifies --config-sub to + // help us out. + // + fail << "unable to parse " << x_lang << " compiler target '" + << ci.target << "': " << e << + info << "consider using the --config-sub option"; + } + } + + // Assign values to variables that describe the compiler. + // + rs.assign (x_id) = ci.id.string (); + rs.assign (x_id_type) = to_string (ci.id.type); + rs.assign (x_id_variant) = ci.id.variant; + + rs.assign (x_class) = to_string (ci.class_); + + rs.assign (x_version) = ci.version.string; + rs.assign (x_version_major) = ci.version.major; + rs.assign (x_version_minor) = ci.version.minor; + rs.assign (x_version_patch) = ci.version.patch; + rs.assign (x_version_build) = ci.version.build; + + // Also enter as x.target.{cpu,vendor,system,version,class} for + // convenience of access. + // + rs.assign (x_target_cpu) = tt.cpu; + rs.assign (x_target_vendor) = tt.vendor; + rs.assign (x_target_system) = tt.system; + rs.assign (x_target_version) = tt.version; + rs.assign (x_target_class) = tt.class_; + + rs.assign (x_target) = move (tt); + + rs.assign (x_pattern) = ci.pattern; + + if (!x_stdlib.alias (c_stdlib)) + rs.assign (x_stdlib) = ci.x_stdlib; + + new_ = p.second; + + // Load cc.core.guess. + // + if (!cc_loaded) + { + // Prepare configuration hints. + // + variable_map h (rs.ctx); + + // Note that all these variables have already been registered. + // + h.assign ("config.cc.id") = cast<string> (rs[x_id]); + h.assign ("config.cc.hinter") = string (x); + h.assign ("config.cc.target") = cast<target_triplet> (rs[x_target]); + + if (!ci.pattern.empty ()) + h.assign ("config.cc.pattern") = ci.pattern; + + h.assign (c_runtime) = ci.runtime; + h.assign (c_stdlib) = ci.c_stdlib; + + load_module (rs, rs, "cc.core.guess", loc, false, h); + } + else + { + // If cc.core.guess is already loaded, verify its configuration + // matched ours since it could have been loaded by another c-family + // module. + // + const auto& h (cast<string> (rs["cc.hinter"])); + + auto check = [&loc, &h, this] (const auto& cv, + const auto& xv, + const char* what, + bool error = true) + { + if (cv != xv) + { + diag_record dr (error ? fail (loc) : warn (loc)); + + dr << h << " and " << x << " module " << what << " mismatch" << + info << h << " is '" << cv << "'" << + info << x << " is '" << xv << "'" << + info << "consider explicitly specifying config." << h + << " and config." << x; + } + }; + + check (cast<string> (rs["cc.id"]), + cast<string> (rs[x_id]), + "toolchain"); + + // We used to not require that patterns match assuming that if the + // toolchain id and target are the same, then where exactly the tools + // come from doesn't really matter. But in most cases it will be the + // g++-7 vs gcc kind of mistakes. So now we warn since even if + // intentional, it is still probably a bad idea. + // + check (cast<string> (rs["cc.pattern"]), + cast<string> (rs[x_pattern]), + "toolchain pattern", + false); + + check (cast<target_triplet> (rs["cc.target"]), + cast<target_triplet> (rs[x_target]), + "target"); + + check (cast<string> (rs["cc.runtime"]), + ci.runtime, + "runtime"); + + check (cast<string> (rs["cc.stdlib"]), + ci.c_stdlib, + "c standard library"); + } + } + +#ifndef _WIN32 + static const dir_path usr_inc ("/usr/include"); + static const dir_path usr_loc_lib ("/usr/local/lib"); + static const dir_path usr_loc_inc ("/usr/local/include"); +# ifdef __APPLE__ + static const dir_path a_usr_inc ( + "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include"); +# endif +#endif + + void config_module:: + init (scope& rs, const location& loc, const variable_map&) + { + tracer trace (x, "config_init"); + + const compiler_info& ci (*ci_); + const target_triplet& tt (cast<target_triplet> (rs[x_target])); + + // config.x.std overrides x.std + // + { + lookup l (config::omitted (rs, config_x_std).first); + + const string* v; + if (l.defined ()) + { + v = cast_null<string> (l); + rs.assign (x_std) = v; + } + else + v = cast_null<string> (rs[x_std]); + + // Translate x_std value (if any) to the compiler option(s) (if any). + // + tstd = translate_std (ci, rs, v); + } + + // Extract system header/library search paths from the compiler and + // determine if we need any additional search paths. + // + dir_paths lib_dirs; + dir_paths inc_dirs; + + switch (ci.class_) + { + case compiler_class::gcc: + { + lib_dirs = gcc_library_search_paths (ci.path, rs); + inc_dirs = gcc_header_search_paths (ci.path, rs); + break; + } + case compiler_class::msvc: + { + lib_dirs = msvc_library_search_paths (ci.path, rs); + inc_dirs = msvc_header_search_paths (ci.path, rs); + break; + } + } + + sys_lib_dirs_extra = lib_dirs.size (); + sys_inc_dirs_extra = inc_dirs.size (); + +#ifndef _WIN32 + // Add /usr/local/{include,lib}. We definitely shouldn't do this if we + // are cross-compiling. But even if the build and target are the same, + // it's possible the compiler uses some carefully crafted sysroot and by + // adding /usr/local/* we will just mess things up. So the heuristics + // that we will use is this: if the compiler's system include directories + // contain /usr[/local]/include then we add /usr/local/*. + // + // Note that similar to GCC we also check for the directory existence. + // Failed that, we can end up with some bizarre yo-yo'ing cases where + // uninstall removes the directories which in turn triggers a rebuild + // on the next invocation. + // + { + auto& is (inc_dirs); + auto& ls (lib_dirs); + + bool ui (find (is.begin (), is.end (), usr_inc) != is.end ()); + bool uli (find (is.begin (), is.end (), usr_loc_inc) != is.end ()); + +#ifdef __APPLE__ + // On Mac OS starting from 10.14 there is no longer /usr/include. + // Instead we get the following: + // + // Homebrew GCC 9: + // + // /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include + // + // Apple Clang 10.0.1: + // + // /Library/Developer/CommandLineTools/usr/include + // /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include + // + // What exactly all this means is anyone's guess, of course. So for + // now we will assume that anything that is or resolves (like that + // MacOSX10.14.sdk symlink) to: + // + // /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include + // + // Is Apple's /usr/include. + // + if (!ui && !uli) + { + for (const dir_path& d: inc_dirs) + { + // Both Clang and GCC skip non-existent paths but let's handle + // (and ignore) directories that cause any errors, for good + // measure. + // + try + { + if (d == a_usr_inc || dir_path (d).realize () == a_usr_inc) + { + ui = true; + break; + } + } + catch (...) {} + } + } +#endif + if (ui || uli) + { + bool ull (find (ls.begin (), ls.end (), usr_loc_lib) != ls.end ()); + + // Many platforms don't search in /usr/local/lib by default (but do + // for headers in /usr/local/include). So add it as the last option. + // + if (!ull && exists (usr_loc_lib, true /* ignore_error */)) + ls.push_back (usr_loc_lib); + + // FreeBSD is at least consistent: it searches in neither. Quoting + // its wiki: "FreeBSD can't even find libraries that it installed." + // So let's help it a bit. + // + if (!uli && exists (usr_loc_inc, true /* ignore_error */)) + is.push_back (usr_loc_inc); + } + } +#endif + + // If this is a new value (e.g., we are configuring), then print the + // report at verbosity level 2 and up (-v). + // + if (verb >= (new_ ? 2 : 3)) + { + diag_record dr (text); + + { + dr << x << ' ' << project (rs) << '@' << rs << '\n' + << " " << left << setw (11) << x << ci.path << '\n' + << " id " << ci.id << '\n' + << " version " << ci.version.string << '\n' + << " major " << ci.version.major << '\n' + << " minor " << ci.version.minor << '\n' + << " patch " << ci.version.patch << '\n'; + } + + if (!ci.version.build.empty ()) + { + dr << " build " << ci.version.build << '\n'; + } + + { + const string& ct (tt.string ()); // Canonical target. + + dr << " signature " << ci.signature << '\n' + << " checksum " << ci.checksum << '\n' + << " target " << ct; + + if (ct != ci.original_target) + dr << " (" << ci.original_target << ")"; + + dr << "\n runtime " << ci.runtime + << "\n stdlib " << ci.x_stdlib; + + if (!x_stdlib.alias (c_stdlib)) + dr << "\n c stdlib " << ci.c_stdlib; + } + + if (!tstd.empty ()) + { + dr << "\n std "; // One less space. + for (const string& o: tstd) dr << ' ' << o; + } + + if (!ci.pattern.empty ()) // Note: bin_pattern printed by bin + { + dr << "\n pattern " << ci.pattern; + } + + if (verb >= 3 && !inc_dirs.empty ()) + { + dr << "\n inc dirs"; + for (size_t i (0); i != inc_dirs.size (); ++i) + { + if (i == sys_inc_dirs_extra) + dr << "\n --"; + dr << "\n " << inc_dirs[i]; + } + } + + if (verb >= 3 && !lib_dirs.empty ()) + { + dr << "\n lib dirs"; + for (size_t i (0); i != lib_dirs.size (); ++i) + { + if (i == sys_lib_dirs_extra) + dr << "\n --"; + dr << "\n " << lib_dirs[i]; + } + } + } + + rs.assign (x_path) = process_path (ci.path, false /* init */); + rs.assign (x_sys_lib_dirs) = move (lib_dirs); + rs.assign (x_sys_inc_dirs) = move (inc_dirs); + + rs.assign (x_signature) = ci.signature; + rs.assign (x_checksum) = ci.checksum; + + // config.x.{p,c,l}options + // config.x.libs + // + // These are optional. We also merge them into the corresponding + // x.* variables. + // + // The merging part gets a bit tricky if this module has already + // been loaded in one of the outer scopes. By doing the straight + // append we would just be repeating the same options over and + // over. So what we are going to do is only append to a value if + // it came from this scope. Then the usage for merging becomes: + // + // x.coptions = <overridable options> # Note: '='. + // using x + // x.coptions += <overriding options> # Note: '+='. + // + rs.assign (x_poptions) += cast_null<strings> ( + config::optional (rs, config_x_poptions)); + + rs.assign (x_coptions) += cast_null<strings> ( + config::optional (rs, config_x_coptions)); + + rs.assign (x_loptions) += cast_null<strings> ( + config::optional (rs, config_x_loptions)); + + rs.assign (x_aoptions) += cast_null<strings> ( + config::optional (rs, config_x_aoptions)); + + rs.assign (x_libs) += cast_null<strings> ( + config::optional (rs, config_x_libs)); + + // config.x.importable_header + // + // It's still fuzzy whether specifying (or maybe tweaking) this list in + // the configuration will be a common thing to do so for now we use + // omitted. It's also probably too early to think whether we should have + // the cc.* version and what the semantics should be. + // + if (x_importable_headers != nullptr) + { + lookup l (config::omitted (rs, *config_x_importable_headers).first); + + // @@ MODHDR: if(modules) ? + // + rs.assign (x_importable_headers) += cast_null<strings> (l); + } + + // Load cc.core.config. + // + if (!cast_false<bool> (rs["cc.core.config.loaded"])) + { + variable_map h (rs.ctx); + + if (!ci.bin_pattern.empty ()) + h.assign ("config.bin.pattern") = ci.bin_pattern; + + load_module (rs, rs, "cc.core.config", loc, false, h); + } + } + + void module:: + init (scope& rs, const location& loc, const variable_map&) + { + tracer trace (x, "init"); + + // Load cc.core. Besides other things, this will load bin (core) plus + // extra bin.* modules we may need. + // + if (!cast_false<bool> (rs["cc.core.loaded"])) + load_module (rs, rs, "cc.core", loc); + + // Process, sort, and cache (in this->import_hdr) importable headers. + // Keep the cache NULL if unused or empty. + // + // @@ MODHDR TODO: support exclusions entries (e.g., -<stdio.h>)? + // + if (modules && x_importable_headers != nullptr) + { + strings* ih (cast_null<strings> (rs.assign (x_importable_headers))); + + if (ih != nullptr && !ih->empty ()) + { + // Translate <>-style header names to absolute paths using the + // compiler's include search paths. Otherwise complete and normalize + // since when searching in this list we always use the absolute and + // normalized header target path. + // + for (string& h: *ih) + { + if (h.empty ()) + continue; + + path f; + if (h.front () == '<' && h.back () == '>') + { + h.pop_back (); + h.erase (0, 1); + + for (const dir_path& d: sys_inc_dirs) + { + if (file_exists ((f = d, f /= h), + true /* follow_symlinks */, + true /* ignore_errors */)) + goto found; + } + + // What should we do if not found? While we can fail, this could + // be too drastic if, for example, the header is "optional" and + // may or may not be present/used. So for now let's restore the + // original form to aid debugging (it can't possibly match any + // absolute path). + // + h.insert (0, 1, '<'); + h.push_back ('>'); + continue; + + found: + ; // Fall through. + } + else + { + f = path (move (h)); + + if (f.relative ()) + f.complete (); + } + + // @@ MODHDR: should we use the more elaborate but robust + // normalize/realize scheme so the we get the same + // path? Feels right. + f.normalize (); + h = move (f).string (); + } + + sort (ih->begin (), ih->end ()); + import_hdr = ih; + } + } + + // Register target types and configure their "installability". + // + bool install_loaded (cast_false<bool> (rs["install.loaded"])); + + { + using namespace install; + + rs.insert_target_type (x_src); + + auto insert_hdr = [&rs, install_loaded] (const target_type& tt) + { + rs.insert_target_type (tt); + + // Install headers into install.include. + // + if (install_loaded) + install_path (rs, tt, dir_path ("include")); + }; + + // Note: module (x_mod) is in x_hdr. + // + for (const target_type* const* ht (x_hdr); *ht != nullptr; ++ht) + insert_hdr (**ht); + + // Also register the C header for C-derived languages. + // + if (*x_hdr != &h::static_type) + insert_hdr (h::static_type); + + rs.insert_target_type<pca> (); + rs.insert_target_type<pcs> (); + + if (install_loaded) + install_path<pc> (rs, dir_path ("pkgconfig")); + } + + // Register rules. + // + { + using namespace bin; + + auto& r (rs.rules); + + // We register for configure so that we detect unresolved imports + // during configuration rather that later, e.g., during update. + // + const compile_rule& cr (*this); + const link_rule& lr (*this); + + r.insert<obje> (perform_update_id, x_compile, cr); + r.insert<obje> (perform_clean_id, x_compile, cr); + r.insert<obje> (configure_update_id, x_compile, cr); + + r.insert<obja> (perform_update_id, x_compile, cr); + r.insert<obja> (perform_clean_id, x_compile, cr); + r.insert<obja> (configure_update_id, x_compile, cr); + + r.insert<objs> (perform_update_id, x_compile, cr); + r.insert<objs> (perform_clean_id, x_compile, cr); + r.insert<objs> (configure_update_id, x_compile, cr); + + if (modules) + { + r.insert<bmie> (perform_update_id, x_compile, cr); + r.insert<bmie> (perform_clean_id, x_compile, cr); + r.insert<bmie> (configure_update_id, x_compile, cr); + + r.insert<hbmie> (perform_update_id, x_compile, cr); + r.insert<hbmie> (perform_clean_id, x_compile, cr); + r.insert<hbmie> (configure_update_id, x_compile, cr); + + r.insert<bmia> (perform_update_id, x_compile, cr); + r.insert<bmia> (perform_clean_id, x_compile, cr); + r.insert<bmia> (configure_update_id, x_compile, cr); + + r.insert<hbmia> (perform_update_id, x_compile, cr); + r.insert<hbmia> (perform_clean_id, x_compile, cr); + r.insert<hbmia> (configure_update_id, x_compile, cr); + + r.insert<bmis> (perform_update_id, x_compile, cr); + r.insert<bmis> (perform_clean_id, x_compile, cr); + r.insert<bmis> (configure_update_id, x_compile, cr); + + r.insert<hbmis> (perform_update_id, x_compile, cr); + r.insert<hbmis> (perform_clean_id, x_compile, cr); + r.insert<hbmis> (configure_update_id, x_compile, cr); + } + + r.insert<libue> (perform_update_id, x_link, lr); + r.insert<libue> (perform_clean_id, x_link, lr); + r.insert<libue> (configure_update_id, x_link, lr); + + r.insert<libua> (perform_update_id, x_link, lr); + r.insert<libua> (perform_clean_id, x_link, lr); + r.insert<libua> (configure_update_id, x_link, lr); + + r.insert<libus> (perform_update_id, x_link, lr); + r.insert<libus> (perform_clean_id, x_link, lr); + r.insert<libus> (configure_update_id, x_link, lr); + + r.insert<exe> (perform_update_id, x_link, lr); + r.insert<exe> (perform_clean_id, x_link, lr); + r.insert<exe> (configure_update_id, x_link, lr); + + r.insert<liba> (perform_update_id, x_link, lr); + r.insert<liba> (perform_clean_id, x_link, lr); + r.insert<liba> (configure_update_id, x_link, lr); + + r.insert<libs> (perform_update_id, x_link, lr); + r.insert<libs> (perform_clean_id, x_link, lr); + r.insert<libs> (configure_update_id, x_link, lr); + + // Note that while libu*{} are not installable, we need to see through + // them in case they depend on stuff that we need to install (see the + // install rule implementations for details). + // + if (install_loaded) + { + const install_rule& ir (*this); + + r.insert<exe> (perform_install_id, x_install, ir); + r.insert<exe> (perform_uninstall_id, x_uninstall, ir); + + r.insert<liba> (perform_install_id, x_install, ir); + r.insert<liba> (perform_uninstall_id, x_uninstall, ir); + + r.insert<libs> (perform_install_id, x_install, ir); + r.insert<libs> (perform_uninstall_id, x_uninstall, ir); + + const libux_install_rule& lr (*this); + + r.insert<libue> (perform_install_id, x_install, lr); + r.insert<libue> (perform_uninstall_id, x_uninstall, lr); + + r.insert<libua> (perform_install_id, x_install, lr); + r.insert<libua> (perform_uninstall_id, x_uninstall, lr); + + r.insert<libus> (perform_install_id, x_install, lr); + r.insert<libus> (perform_uninstall_id, x_uninstall, lr); + } + } + } + } +} diff --git a/libbuild2/cc/module.hxx b/libbuild2/cc/module.hxx new file mode 100644 index 0000000..43670c3 --- /dev/null +++ b/libbuild2/cc/module.hxx @@ -0,0 +1,103 @@ +// file : libbuild2/cc/module.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_MODULE_HXX +#define LIBBUILD2_CC_MODULE_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/module.hxx> +#include <libbuild2/variable.hxx> + +#include <libbuild2/cc/common.hxx> + +#include <libbuild2/cc/compile-rule.hxx> +#include <libbuild2/cc/link-rule.hxx> +#include <libbuild2/cc/install-rule.hxx> + +#include <libbuild2/cc/export.hxx> + +namespace build2 +{ + namespace cc + { + struct compiler_info; + + class LIBBUILD2_CC_SYMEXPORT config_module: public module_base, + public virtual config_data + { + public: + explicit + config_module (config_data&& d) : config_data (move (d)) {} + + // We split the configuration process into into two parts: guessing the + // compiler information and the actual configuration. This allows one to + // adjust configuration (say the standard or enabled experimental + // features) base on the compiler information by first loading the + // guess module. + // + void + guess (scope&, const location&, const variable_map&); + + void + init (scope&, const location&, const variable_map&); + + // Translate the x.std value (if any) to the standard-selecting + // option(s) (if any). May also check/set x.features.* variables on the + // root scope. + // + virtual strings + translate_std (const compiler_info&, scope&, const string*) const = 0; + + strings tstd; + size_t sys_lib_dirs_extra; // First extra path (size if none). + size_t sys_inc_dirs_extra; // First extra path (size if none). + + const compiler_info* ci_; + + private: + // Defined in gcc.cxx. + // + dir_paths + gcc_header_search_paths (const process_path&, scope&) const; + + dir_paths + gcc_library_search_paths (const process_path&, scope&) const; + + // Defined in msvc.cxx. + // + dir_paths + msvc_header_search_paths (const process_path&, scope&) const; + + dir_paths + msvc_library_search_paths (const process_path&, scope&) const; + + private: + bool new_; // See guess() and init() for details. + }; + + class LIBBUILD2_CC_SYMEXPORT module: public module_base, + public virtual common, + link_rule, + compile_rule, + install_rule, + libux_install_rule + { + public: + explicit + module (data&& d) + : common (move (d)), + link_rule (move (d)), + compile_rule (move (d)), + install_rule (move (d), *this), + libux_install_rule (move (d), *this) {} + + void + init (scope&, const location&, const variable_map&); + }; + } +} + +#endif // LIBBUILD2_CC_MODULE_HXX diff --git a/libbuild2/cc/msvc.cxx b/libbuild2/cc/msvc.cxx new file mode 100644 index 0000000..d802b98 --- /dev/null +++ b/libbuild2/cc/msvc.cxx @@ -0,0 +1,502 @@ +// file : libbuild2/cc/msvc.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cstring> // strcmp() + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/types.hxx> + +#include <libbuild2/cc/common.hxx> +#include <libbuild2/cc/module.hxx> + +using std::strcmp; + +using namespace butl; + +namespace build2 +{ + namespace cc + { + using namespace bin; + + // Translate the target triplet CPU to lib.exe/link.exe /MACHINE option. + // + const char* + msvc_machine (const string& cpu) + { + const char* m (cpu == "i386" || cpu == "i686" ? "/MACHINE:x86" : + cpu == "x86_64" ? "/MACHINE:x64" : + cpu == "arm" ? "/MACHINE:ARM" : + cpu == "arm64" ? "/MACHINE:ARM64" : + nullptr); + + if (m == nullptr) + fail << "unable to translate CPU " << cpu << " to /MACHINE"; + + return m; + } + + // Sanitize cl.exe options. + // + void + msvc_sanitize_cl (cstrings& args) + { + // VC is trying to be "helpful" and warn about one command line option + // overriding another. For example: + // + // cl : Command line warning D9025 : overriding '/W1' with '/W2' + // + // So we have to sanitize the command line and suppress duplicates of + // certain options. + // + // Note also that it is theoretically possible we will treat an option's + // argument as an option. Oh, well, nobody is perfect in the Microsoft + // land. + + // We want to keep the last option seen at the position (relative to + // other options) that it was encountered. If we were to iterate forward + // and keep positions of the enountered options, then we would have had + // to adjust some of them once we remove a duplicate. So instead we are + // going to iterate backwards, in which case we don't even need to keep + // positions, just flags. Note that args[0] is cl.exe itself in which we + // are conveniently not interested. + // + bool W (false); // /WN /Wall /w + + for (size_t i (args.size () - 1); i != 0; --i) + { + auto erase = [&args, &i] () + { + args.erase (args.begin () + i); + }; + + const char* a (args[i]); + + if (*a != '/' && *a != '-') // Not an option. + continue; + + ++a; + + // /WN /Wall /w + // + if ((a[0] == 'W' && digit (a[1]) && a[2] == '\0') || // WN + (a[0] == 'W' && strcmp (a + 1, "all") == 0) || // Wall + (a[0] == 'w' && a[1] == '\0')) // w + { + if (W) + erase (); + else + W = true; + } + } + } + + // Sense whether this is a diagnostics line returning the postion of the + // NNNN code in XNNNN and npos otherwise. + // + size_t + msvc_sense_diag (const string& l, char f) + { + size_t p (l.find (':')); + + // Note that while the C-numbers seems to all be in the ' CNNNN:' form, + // the D ones can be ' DNNNN :', for example: + // + // cl : Command line warning D9025 : overriding '/W3' with '/W4' + // + for (size_t n (l.size ()); + p != string::npos; + p = ++p != n ? l.find_first_of (": ", p) : string::npos) + { + if (p > 5 && + l[p - 6] == ' ' && + l[p - 5] == f && + digit (l[p - 4]) && + digit (l[p - 3]) && + digit (l[p - 2]) && + digit (l[p - 1])) + { + p -= 4; // Start of the error code. + break; + } + } + + return p; + } + + // Filter cl.exe and link.exe noise. + // + void + msvc_filter_cl (ifdstream& is, const path& src) + { + // While it appears VC always prints the source name (event if the + // file does not exist), let's do a sanity check. Also handle the + // command line errors/warnings which come before the file name. + // + for (string l; !eof (getline (is, l)); ) + { + if (l != src.leaf ().string ()) + { + diag_stream_lock () << l << endl; + + if (msvc_sense_diag (l, 'D') != string::npos) + continue; + } + + break; + } + } + + void + msvc_filter_link (ifdstream& is, const file& t, otype lt) + { + // Filter lines until we encounter something we don't recognize. We also + // have to assume the messages can be translated. + // + for (string l; getline (is, l); ) + { + // " Creating library foo\foo.dll.lib and object foo\foo.dll.exp" + // + // This can also appear when linking executables if any of the object + // files export any symbols. + // + if (l.compare (0, 3, " ") == 0) + { + // Use the actual import library name if this is a library (since we + // override this name) and the executable name otherwise (by default + // .lib/.exp are named by replacing the .exe extension). + // + path i ( + lt == otype::s + ? find_adhoc_member<libi> (t)->path ().leaf () + : t.path ().leaf ().base () + ".lib"); + + if (l.find (i.string ()) != string::npos && + l.find (i.base ().string () + ".exp") != string::npos) + continue; + } + + // /INCREMENTAL causes linker to sometimes issue messages but now I + // can't quite reproduce it. + // + + diag_stream_lock () << l << endl; + break; + } + } + + // Extract system header search paths from MSVC. + // + dir_paths config_module:: + msvc_header_search_paths (const process_path&, scope&) const + { + // The compiler doesn't seem to have any built-in paths and all of them + // come from the INCLUDE environment variable. + + // @@ VC: how are we going to do this? E.g., cl-14 does this internally. + // cl.exe /Be prints INCLUDE. + // + // Should we actually bother? INCLUDE is normally used for system + // headers and its highly unlikely we will see an imported library + // that lists one of those directories in pkg-config Cflags value. + // Let's wait and see. + // + return dir_paths (); + } + + // Extract system library search paths from MSVC. + // + dir_paths config_module:: + msvc_library_search_paths (const process_path&, scope&) const + { + // The linker doesn't seem to have any built-in paths and all of them + // come from the LIB environment variable. + + // @@ VC: how are we going to do this? E.g., cl-14 does this internally. + // cl.exe /Be prints LIB. + // + // Should we actually bother? LIB is normally used for system + // libraries and its highly unlikely we will see an explicit import + // for a library from one of those directories. Let's wait and see. + // + return dir_paths (); + } + + // Inspect the file and determine if it is static or import library. + // Return otype::e if it is neither (which we quietly ignore). + // + static otype + library_type (const process_path& ld, const path& l) + { + // The are several reasonably reliable methods to tell whether it is a + // static or import library. One is lib.exe /LIST -- if there aren't any + // .obj members, then it is most likely an import library (it can also + // be an empty static library in which case there won't be any members). + // For an import library /LIST will print a bunch of .dll members. + // + // Another approach is dumpbin.exe (link.exe /DUMP) with /ARCHIVEMEMBERS + // (similar to /LIST) and /LINKERMEMBER (looking for __impl__ symbols or + // _IMPORT_DESCRIPTOR_). + // + // Note also, that apparently it is possible to have a hybrid library. + // + // While the lib.exe approach is probably the simplest, the problem is + // it will require us loading the bin.ar module even if we are not + // building any static libraries. On the other hand, if we are searching + // for libraries then we have bin.ld. So we will use the link.exe /DUMP + // /ARCHIVEMEMBERS. + // + const char* args[] = {ld.recall_string (), + "/DUMP", // Must come first. + "/NOLOGO", + "/ARCHIVEMEMBERS", + l.string ().c_str (), + nullptr}; + + if (verb >= 3) + print_process (args); + + // Link.exe seem to always dump everything to stdout but just in case + // redirect stderr to stdout. + // + process pr (run_start (ld, + args, + 0 /* stdin */, + -1 /* stdout */, + false /* error */)); + + bool obj (false), dll (false); + string s; + + try + { + ifdstream is ( + move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit); + + while (getline (is, s)) + { + // Detect the one error we should let through. + // + if (s.compare (0, 18, "unable to execute ") == 0) + break; + + // The lines we are interested in seem to have this form (though + // presumably the "Archive member name at" part can be translated): + // + // Archive member name at 746: [...]hello.dll[/][ ]* + // Archive member name at 8C70: [...]hello.lib.obj[/][ ]* + // + size_t n (s.size ()); + + for (; n != 0 && s[n - 1] == ' '; --n) ; // Skip trailing spaces. + + if (n >= 7) // At least ": X.obj" or ": X.dll". + { + --n; + + if (s[n] == '/') // Skip trailing slash if one is there. + --n; + + n -= 3; // Beginning of extension. + + if (s[n] == '.') + { + // Make sure there is ": ". + // + size_t p (s.rfind (':', n - 1)); + + if (p != string::npos && s[p + 1] == ' ') + { + const char* e (s.c_str () + n + 1); + + if (casecmp (e, "obj", 3) == 0) + obj = true; + + if (casecmp (e, "dll", 3) == 0) + dll = true; + } + } + } + } + } + catch (const io_error&) + { + // Presumably the child process failed. Let run_finish() deal with + // that. + } + + if (!run_finish (args, pr, false, s)) + return otype::e; + + if (obj && dll) + { + warn << l << " looks like hybrid static/import library, ignoring"; + return otype::e; + } + + if (!obj && !dll) + { + warn << l << " looks like empty static or import library, ignoring"; + return otype::e; + } + + return obj ? otype::a : otype::s; + } + + template <typename T> + static T* + msvc_search_library (const process_path& ld, + const dir_path& d, + const prerequisite_key& p, + otype lt, + const char* pfx, + const char* sfx, + bool exist, + tracer& trace) + { + // Pretty similar logic to search_library(). + // + assert (p.scope != nullptr); + + const optional<string>& ext (p.tk.ext); + const string& name (*p.tk.name); + + // Assemble the file path. + // + path f (d); + + if (*pfx != '\0') + { + f /= pfx; + f += name; + } + else + f /= name; + + if (*sfx != '\0') + f += sfx; + + const string& e (!ext || p.is_a<lib> () // Only for liba/libs. + ? string ("lib") + : *ext); + + if (!e.empty ()) + { + f += '.'; + f += e; + } + + // Check if the file exists and is of the expected type. + // + timestamp mt (mtime (f)); + + if (mt != timestamp_nonexistent && library_type (ld, f) == lt) + { + // Enter the target. + // + T* t; + common::insert_library (p.scope->ctx, t, name, d, e, exist, trace); + + t->mtime (mt); + t->path (move (f)); + + return t; + } + + return nullptr; + } + + liba* common:: + msvc_search_static (const process_path& ld, + const dir_path& d, + const prerequisite_key& p, + bool exist) const + { + tracer trace (x, "msvc_search_static"); + + liba* r (nullptr); + + auto search = [&r, &ld, &d, &p, exist, &trace] ( + const char* pf, const char* sf) -> bool + { + r = msvc_search_library<liba> ( + ld, d, p, otype::a, pf, sf, exist, trace); + return r != nullptr; + }; + + // Try: + // foo.lib + // libfoo.lib + // foolib.lib + // foo_static.lib + // + return + search ("", "") || + search ("lib", "") || + search ("", "lib") || + search ("", "_static") ? r : nullptr; + } + + libs* common:: + msvc_search_shared (const process_path& ld, + const dir_path& d, + const prerequisite_key& pk, + bool exist) const + { + tracer trace (x, "msvc_search_shared"); + + assert (pk.scope != nullptr); + + libs* s (nullptr); + + auto search = [&s, &ld, &d, &pk, exist, &trace] ( + const char* pf, const char* sf) -> bool + { + if (libi* i = msvc_search_library<libi> ( + ld, d, pk, otype::s, pf, sf, exist, trace)) + { + ulock l ( + insert_library ( + pk.scope->ctx, s, *pk.tk.name, d, nullopt, exist, trace)); + + if (!exist) + { + if (l.owns_lock ()) + { + s->member = i; // We are first. + l.unlock (); + } + else + assert (find_adhoc_member<libi> (*s) == i); + + // Presumably there is a DLL somewhere, we just don't know where. + // + s->mtime (i->mtime ()); + s->path (path ()); + } + } + + return s != nullptr; + }; + + // Try: + // foo.lib + // libfoo.lib + // foodll.lib + // + return + search ("", "") || + search ("lib", "") || + search ("", "dll") ? s : nullptr; + } + } +} diff --git a/libbuild2/cc/parser+module.test.testscript b/libbuild2/cc/parser+module.test.testscript new file mode 100644 index 0000000..d51ac0a --- /dev/null +++ b/libbuild2/cc/parser+module.test.testscript @@ -0,0 +1,147 @@ +# file : libbuild2/cc/parser+module.test.testscript +# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test C++ module constructs. +# + +# NOTE: currently header unit imports don't produce anything. +# + +: import +: +$* <<EOI >>EOI +import foo; +import foo.bar; +import foo.bar.baz; +EOI + +: import-header +: +$* <<EOI +import "foo.h"; +import <sys/foo.h>; +__import "/usr/include/stdio.h"; +EOI + +: module-implementation +: +$* <<EOI >>EOI +module foo; +EOI + +: module-interface +: +$* <<EOI >>EOI +export module foo; +EOI + +: export-imported +: +$* <<EOI >>EOO +export import foo; +export import "foo.h"; +export import <sys/foo.h>; +EOI +export import foo; +EOO + +: non-module +: +$* <<EOI +#pragma import module foo; +#pragma export module foo; +#pragma module foo; +export namespace bar {int fox ();} +EOI + +: attribute +: +$* <<EOI >>EOO +import foo [[export({import})]]; +import "foo.h" [[export({import})]]; +module bar [[module({module})]]; +EOI +import foo; +module bar; +EOO + +: import-duplicate +: +$* <<EOI >>EOO +import foo; +import bar.baz; +import foo; +import bar . baz; +EOI +import foo; +import bar.baz; +EOO + +: brace-missing +: +$* <<EOI 2>>EOE != 0 +export +{ + class foo + { + //}; + module foo; +} +EOI +stdin:8:1: error: {}-imbalance detected +EOE + +: brace-stray +: +$* <<EOI 2>>EOE != 0 +export +{ + class foo + { + };} +} +module foo; +EOI +stdin:6:1: error: {}-imbalance detected +EOE + +: import-missing-name +: +$* <<EOI 2>>EOE != 0 +import ; +EOI +stdin:1:8: error: module or header name expected instead of ';' +EOE + +: module-missing-name +: +$* <<EOI 2>>EOE != 0 +module ; +EOI +stdin:1:1: error: module declaration expected after leading module marker +EOE + +: import-missing-semi +: +$* <<EOI 2>>EOE != 0 +import foo +EOI +stdin:2:1: error: ';' expected instead of <end of file> +EOE + +: module-missing-semi +: +$* <<EOI 2>>EOE != 0 +export module foo +EOI +stdin:2:1: error: ';' expected instead of <end of file> +EOE + +: import-missing-header +: +$* <<EOI 2>>EOE != 0 +import <foo.h; +EOI +stdin:2:1: error: closing '>' expected after header name +EOE diff --git a/libbuild2/cc/parser.cxx b/libbuild2/cc/parser.cxx new file mode 100644 index 0000000..179043e --- /dev/null +++ b/libbuild2/cc/parser.cxx @@ -0,0 +1,263 @@ +// file : libbuild2/cc/parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/parser.hxx> + +#include <libbuild2/cc/lexer.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + using type = token_type; + + unit parser:: + parse (ifdstream& is, const path& name) + { + lexer l (is, name); + l_ = &l; + + unit u; + u_ = &u; + + // If the source has errors then we want the compiler to issues the + // diagnostics. However, the errors could as likely be because we are + // mis-parsing things. Initially, as a middle ground, we were going to + // issue warnings. But the problem with this approach is that they are + // easy to miss. So for now we fail. And it turns out we don't mis- + // parse much. + // + size_t bb (0); // {}-balance. + + token t; + for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; ) + { + // Break to stop, continue to continue, set n to false if the + // next token already extracted. + // + n = true; + + switch (t.type) + { + case type::lcbrace: + { + ++bb; + continue; + } + case type::rcbrace: + { + if (bb-- == 0) + break; // Imbalance. + + continue; + } + case type::identifier: + { + // Constructs we need to recognize: + // + // module ; + // [export] import <module-name> [<attributes>] ; + // [export] import <header-name> [<attributes>] ; + // [export] module <module-name> [<attributes>] ; + // + // Additionally, when include is translated to an import, it's + // normally replaced with the special __import keyword since it + // may appear in C context. + // + const string& id (t.value); + + if (bb == 0) + { + if (id == "import" || id == "__import") + { + parse_import (t, false); + } + else if (id == "module") + { + parse_module (t, false); + } + else if (id == "export") + { + if (l_->next (t) == type::identifier) + { + if (id == "module") parse_module (t, true); + else if (id == "import") parse_import (t, true); + else n = false; // Something else (e.g., export namespace). + } + else + n = false; + } + } + continue; + } + default: continue; + } + + break; + } + + if (bb != 0) + /*warn*/ fail (t) << "{}-imbalance detected"; + + if (module_marker_ && u.module_info.name.empty ()) + fail (*module_marker_) << "module declaration expected after " + << "leading module marker"; + + checksum = l.checksum (); + return u; + } + + void parser:: + parse_import (token& t, bool ex) + { + // enter: import keyword + // leave: semi + + string un; + unit_type ut; + switch (l_->next (t)) // Start of module/header name. + { + case type::less: + case type::string: + { + un = parse_header_name (t); + ut = unit_type::module_header; + break; + } + case type::identifier: + { + un = parse_module_name (t); + ut = unit_type::module_iface; + break; + } + default: + fail (t) << "module or header name expected instead of " << t << endf; + } + + // Should be {}-balanced. + // + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + // For now we skip header units (see a comment on module type/info + // string serialization in compile rule for details). Note that + // currently parse_header_name() always returns empty name. + // + if (ut == unit_type::module_header) + return; + + // Ignore duplicates. We don't expect a large numbers of (direct) + // imports so vector/linear search is probably more efficient than a + // set. + // + auto& is (u_->module_info.imports); + + auto i (find_if (is.begin (), is.end (), + [&un] (const module_import& i) + { + return i.name == un; + })); + + if (i == is.end ()) + is.push_back (module_import {ut, move (un), ex, 0}); + else + i->exported = i->exported || ex; + } + + void parser:: + parse_module (token& t, bool ex) + { + // enter: module keyword + // leave: semi + + location l (get_location (t)); + + l_->next (t); + + // Handle the leading 'module;' marker (p0713). + // + // Note that we don't bother diagnosing invalid/duplicate markers + // leaving that to the compiler. + // + if (!ex && t.type == type::semi) + { + module_marker_ = move (l); + return; + } + + // Otherwise it should be the start of the module name. + // + string n (parse_module_name (t)); + + // Should be {}-balanced. + // + for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; + + if (t.type != type::semi) + fail (t) << "';' expected instead of " << t; + + if (!u_->module_info.name.empty ()) + fail (l) << "multiple module declarations"; + + u_->type = ex ? unit_type::module_iface : unit_type::module_impl; + u_->module_info.name = move (n); + } + + string parser:: + parse_module_name (token& t) + { + // enter: first token of module name + // leave: token after module name + + string n; + + // <identifier>[ . <identifier>]* + // + for (;; l_->next (t)) + { + if (t.type != type::identifier) + fail (t) << "module name expected instead of " << t; + + n += t.value; + + if (l_->next (t) != type::dot) + break; + + n += '.'; + } + + return n; + } + + string parser:: + parse_header_name (token& t) + { + // enter: first token of module name, either string or less + // leave: token after module name + + string n; + + // NOTE: actual name is a TODO if/when we need it. + // + if (t.type == type::string) + /*n = move (t.value)*/; + else + { + while (l_->next (t) != type::greater) + { + if (t.type == type::eos) + fail (t) << "closing '>' expected after header name" << endf; + } + } + + l_->next (t); + return n; + } + } +} diff --git a/libbuild2/cc/parser.hxx b/libbuild2/cc/parser.hxx new file mode 100644 index 0000000..324b62a --- /dev/null +++ b/libbuild2/cc/parser.hxx @@ -0,0 +1,55 @@ +// file : libbuild2/cc/parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_PARSER_HXX +#define LIBBUILD2_CC_PARSER_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/cc/types.hxx> + +namespace build2 +{ + namespace cc + { + // Extract translation unit information from a preprocessed C/C++ source. + // + struct token; + class lexer; + + class parser + { + public: + unit + parse (ifdstream&, const path& name); + + private: + void + parse_import (token&, bool); + + void + parse_module (token&, bool); + + string + parse_module_name (token&); + + string + parse_header_name (token&); + + public: + string checksum; // Translation unit checksum. + + private: + lexer* l_; + unit* u_; + + optional<location> module_marker_; + }; + } +} + +#endif // LIBBUILD2_CC_PARSER_HXX diff --git a/libbuild2/cc/parser.test.cxx b/libbuild2/cc/parser.test.cxx new file mode 100644 index 0000000..82c68d1 --- /dev/null +++ b/libbuild2/cc/parser.test.cxx @@ -0,0 +1,67 @@ +// file : libbuild2/cc/parser.test.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <cassert> +#include <iostream> + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/cc/parser.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + // Usage: argv[0] [<file>] + // + int + main (int argc, char* argv[]) + { + try + { + const char* file; + + ifdstream is; + if (argc > 1) + { + file = argv[1]; + is.open (file); + } + else + { + file = "stdin"; + is.open (fddup (stdin_fd ())); + } + + parser p; + unit u (p.parse (is, path (file))); + unit_type ut (u.type); + + for (const module_import& m: u.module_info.imports) + cout << (m.exported ? "export " : "") + << "import " << m.name << ';' << endl; + + if (ut == unit_type::module_iface || ut == unit_type::module_impl) + cout << (ut == unit_type::module_iface ? "export " : "") + << "module " << u.module_info.name << ';' << endl; + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::cc::main (argc, argv); +} diff --git a/libbuild2/cc/pkgconfig.cxx b/libbuild2/cc/pkgconfig.cxx new file mode 100644 index 0000000..0669b02 --- /dev/null +++ b/libbuild2/cc/pkgconfig.cxx @@ -0,0 +1,1550 @@ +// file : libbuild2/cc/pkgconfig.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +// In order not to complicate the bootstrap procedure with libpkgconf building +// exclude functionality that involves reading of .pc files. +// +#ifndef BUILD2_BOOTSTRAP +# include <libpkgconf/libpkgconf.h> +#endif + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/install/utility.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/types.hxx> +#include <libbuild2/cc/target.hxx> // pc +#include <libbuild2/cc/utility.hxx> + +#include <libbuild2/cc/common.hxx> +#include <libbuild2/cc/compile-rule.hxx> +#include <libbuild2/cc/link-rule.hxx> + +#ifndef BUILD2_BOOTSTRAP + +// Note that the libpkgconf library doesn't provide the version macro that we +// could use to compile the code conditionally against different API versions. +// Thus, we need to sense the pkgconf_client_new() function signature +// ourselves to call it properly. +// +namespace details +{ + void* + pkgconf_cross_personality_default (); // Never called. +} + +using namespace details; + +template <typename H> +static inline pkgconf_client_t* +call_pkgconf_client_new (pkgconf_client_t* (*f) (H, void*), + H error_handler, + void* error_handler_data) +{ + return f (error_handler, error_handler_data); +} + +template <typename H, typename P> +static inline pkgconf_client_t* +call_pkgconf_client_new (pkgconf_client_t* (*f) (H, void*, P), + H error_handler, + void* error_handler_data) +{ + return f (error_handler, + error_handler_data, + ::pkgconf_cross_personality_default ()); +} + +#endif + +using namespace std; +using namespace butl; + +namespace build2 +{ +#ifndef BUILD2_BOOTSTRAP + + // Load package information from a .pc file. Filter out the -I/-L options + // that refer to system directories. + // + // Note that the prerequisite package .pc files search order is as follows: + // + // - in directory of the specified file + // - in pc_dirs directories (in the natural order) + // + class pkgconf + { + public: + using path_type = build2::path; + + path_type path; + + public: + explicit + pkgconf (path_type, + const dir_paths& pc_dirs, + const dir_paths& sys_inc_dirs, + const dir_paths& sys_lib_dirs); + + // Create a special empty object. Querying package information on such + // an object is illegal. + // + pkgconf () = default; + + ~pkgconf (); + + // Movable-only type. + // + pkgconf (pkgconf&& p) + : path (move (p.path)), + client_ (p.client_), + pkg_ (p.pkg_) + { + p.client_ = nullptr; + p.pkg_ = nullptr; + } + + pkgconf& + operator= (pkgconf&& p) + { + if (this != &p) + { + this->~pkgconf (); + new (this) pkgconf (move (p)); // Assume noexcept move-construction. + } + return *this; + } + + pkgconf (const pkgconf&) = delete; + pkgconf& operator= (const pkgconf&) = delete; + + strings + cflags (bool stat) const; + + strings + libs (bool stat) const; + + string + variable (const char*) const; + + string + variable (const string& s) const {return variable (s.c_str ());} + + private: + // Keep them as raw pointers not to deal with API thread-unsafety in + // deleters and introducing additional mutex locks. + // + pkgconf_client_t* client_ = nullptr; + pkgconf_pkg_t* pkg_ = nullptr; + }; + + // Currently the library is not thread-safe, even on the pkgconf_client_t + // level (see issue #128 for details). + // + // @@ An update: seems that the obvious thread-safety issues are fixed. + // However, let's keep mutex locking for now not to introduce potential + // issues before we make sure that there are no other ones. + // + static mutex pkgconf_mutex; + + // The package dependency traversal depth limit. + // + static const int pkgconf_max_depth = 100; + + // Normally the error_handler() callback can be called multiple times to + // report a single error (once per message line), to produce a multi-line + // message like this: + // + // Package foo was not found in the pkg-config search path.\n + // Perhaps you should add the directory containing `foo.pc'\n + // to the PKG_CONFIG_PATH environment variable\n + // Package 'foo', required by 'bar', not found\n + // + // For the above example callback will be called 4 times. To suppress all the + // junk we will use PKGCONF_PKG_PKGF_SIMPLIFY_ERRORS to get just: + // + // Package 'foo', required by 'bar', not found\n + // + static const int pkgconf_flags = PKGCONF_PKG_PKGF_SIMPLIFY_ERRORS; + + static bool + pkgconf_error_handler (const char* msg, const pkgconf_client_t*, const void*) + { + error << runtime_error (msg); // Sanitize the message. + return true; + } + + // Deleters. Note that they are thread-safe. + // + struct fragments_deleter + { + void operator() (pkgconf_list_t* f) const {pkgconf_fragment_free (f);} + }; + + // Convert fragments to strings. Skip the -I/-L options that refer to system + // directories. + // + static strings + to_strings (const pkgconf_list_t& frags, + char type, + const pkgconf_list_t& sysdirs) + { + assert (type == 'I' || type == 'L'); + + strings r; + + auto add = [&r] (const pkgconf_fragment_t* frag) + { + string s; + if (frag->type != '\0') + { + s += '-'; + s += frag->type; + } + + s += frag->data; + r.push_back (move (s)); + }; + + // Option that is separated from its value, for example: + // + // -I /usr/lib + // + const pkgconf_fragment_t* opt (nullptr); + + pkgconf_node_t *node; + PKGCONF_FOREACH_LIST_ENTRY(frags.head, node) + { + auto frag (static_cast<const pkgconf_fragment_t*> (node->data)); + + // Add the separated option and directory, unless the latest is a system + // one. + // + if (opt != nullptr) + { + // Note that we should restore the directory path that was + // (mis)interpreted as an option, for example: + // + // -I -Ifoo + // + // In the above example option '-I' is followed by directory '-Ifoo', + // which is represented by libpkgconf library as fragment 'foo' with + // type 'I'. + // + if (!pkgconf_path_match_list ( + frag->type == '\0' + ? frag->data + : (string ({'-', frag->type}) + frag->data).c_str (), + &sysdirs)) + { + add (opt); + add (frag); + } + + opt = nullptr; + continue; + } + + // Skip the -I/-L option if it refers to a system directory. + // + if (frag->type == type) + { + // The option is separated from a value, that will (presumably) follow. + // + if (*frag->data == '\0') + { + opt = frag; + continue; + } + + if (pkgconf_path_match_list (frag->data, &sysdirs)) + continue; + } + + add (frag); + } + + if (opt != nullptr) // Add the dangling option. + add (opt); + + return r; + } + + // Note that some libpkgconf functions can potentially return NULL, failing + // to allocate the required memory block. However, we will not check the + // returned value for NULL as the library doesn't do so, prior to filling the + // allocated structures. So such a code complication on our side would be + // useless. Also, for some functions the NULL result has a special semantics, + // for example "not found". + // + pkgconf:: + pkgconf (path_type p, + const dir_paths& pc_dirs, + const dir_paths& sys_lib_dirs, + const dir_paths& sys_inc_dirs) + : path (move (p)) + { + auto add_dirs = [] (pkgconf_list_t& dir_list, + const dir_paths& dirs, + bool suppress_dups, + bool cleanup = false) + { + if (cleanup) + { + pkgconf_path_free (&dir_list); + dir_list = PKGCONF_LIST_INITIALIZER; + } + + for (const auto& d: dirs) + pkgconf_path_add (d.string ().c_str (), &dir_list, suppress_dups); + }; + + mlock l (pkgconf_mutex); + + // Initialize the client handle. + // + unique_ptr<pkgconf_client_t, void (*) (pkgconf_client_t*)> c ( + call_pkgconf_client_new (&pkgconf_client_new, + pkgconf_error_handler, + nullptr /* handler_data */), + [] (pkgconf_client_t* c) {pkgconf_client_free (c);}); + + pkgconf_client_set_flags (c.get (), pkgconf_flags); + + // Note that the system header and library directory lists are + // automatically pre-filled by the pkgconf_client_new() call (see above). + // We will re-create these lists from scratch. + // + add_dirs (c->filter_libdirs, + sys_lib_dirs, + false /* suppress_dups */, + true /* cleanup */); + + add_dirs (c->filter_includedirs, + sys_inc_dirs, + false /* suppress_dups */, + true /* cleanup */); + + // Note that the loaded file directory is added to the (yet empty) search + // list. Also note that loading of the prerequisite packages is delayed + // until flags retrieval, and their file directories are not added to the + // search list. + // + pkg_ = pkgconf_pkg_find (c.get (), path.string ().c_str ()); + + if (pkg_ == nullptr) + fail << "package '" << path << "' not found or invalid"; + + // Add the .pc file search directories. + // + assert (c->dir_list.length == 1); // Package file directory (see above). + add_dirs (c->dir_list, pc_dirs, true /* suppress_dups */); + + client_ = c.release (); + } + + pkgconf:: + ~pkgconf () + { + if (client_ != nullptr) // Not empty. + { + assert (pkg_ != nullptr); + + mlock l (pkgconf_mutex); + pkgconf_pkg_unref (client_, pkg_); + pkgconf_client_free (client_); + } + } + + strings pkgconf:: + cflags (bool stat) const + { + assert (client_ != nullptr); // Must not be empty. + + mlock l (pkgconf_mutex); + + pkgconf_client_set_flags ( + client_, + pkgconf_flags | + + // Walk through the private package dependencies (Requires.private) + // besides the public ones while collecting the flags. Note that we do + // this for both static and shared linking. + // + PKGCONF_PKG_PKGF_SEARCH_PRIVATE | + + // Collect flags from Cflags.private besides those from Cflags for the + // static linking. + // + (stat + ? PKGCONF_PKG_PKGF_MERGE_PRIVATE_FRAGMENTS + : 0)); + + pkgconf_list_t f = PKGCONF_LIST_INITIALIZER; // Aggregate initialization. + int e (pkgconf_pkg_cflags (client_, pkg_, &f, pkgconf_max_depth)); + + if (e != PKGCONF_PKG_ERRF_OK) + throw failed (); // Assume the diagnostics is issued. + + unique_ptr<pkgconf_list_t, fragments_deleter> fd (&f); // Auto-deleter. + return to_strings (f, 'I', client_->filter_includedirs); + } + + strings pkgconf:: + libs (bool stat) const + { + assert (client_ != nullptr); // Must not be empty. + + mlock l (pkgconf_mutex); + + pkgconf_client_set_flags ( + client_, + pkgconf_flags | + + // Additionally collect flags from the private dependency packages + // (see above) and from the Libs.private value for the static linking. + // + (stat + ? PKGCONF_PKG_PKGF_SEARCH_PRIVATE | + PKGCONF_PKG_PKGF_MERGE_PRIVATE_FRAGMENTS + : 0)); + + pkgconf_list_t f = PKGCONF_LIST_INITIALIZER; // Aggregate initialization. + int e (pkgconf_pkg_libs (client_, pkg_, &f, pkgconf_max_depth)); + + if (e != PKGCONF_PKG_ERRF_OK) + throw failed (); // Assume the diagnostics is issued. + + unique_ptr<pkgconf_list_t, fragments_deleter> fd (&f); // Auto-deleter. + return to_strings (f, 'L', client_->filter_libdirs); + } + + string pkgconf:: + variable (const char* name) const + { + assert (client_ != nullptr); // Must not be empty. + + mlock l (pkgconf_mutex); + const char* r (pkgconf_tuple_find (client_, &pkg_->vars, name)); + return r != nullptr ? string (r) : string (); + } + +#endif + + namespace cc + { + using namespace bin; + + // In pkg-config backslashes, spaces, etc are escaped with a backslash. + // + static string + escape (const string& s) + { + string r; + + for (size_t p (0);;) + { + size_t sp (s.find_first_of ("\\ ", p)); + + if (sp != string::npos) + { + r.append (s, p, sp - p); + r += '\\'; + r += s[sp]; + p = sp + 1; + } + else + { + r.append (s, p, sp); + break; + } + } + + return r; + } + + // Try to find a .pc file in the pkgconfig/ subdirectory of libd, trying + // several names derived from stem. If not found, return false. If found, + // load poptions, loptions, libs, and modules, set the corresponding + // *.export.* variables and add prerequisites on targets, and return true. + // Note that we assume the targets are locked so that all of this is + // MT-safe. + // + // System library search paths (those extracted from the compiler) are + // passed in top_sysd while the user-provided (via -L) in top_usrd. + // + // Note that scope and link order should be "top-level" from the + // search_library() POV. + // + // Also note that the bootstrapped version of build2 will not search for + // .pc files, always returning false (see above for the reasoning). + // +#ifndef BUILD2_BOOTSTRAP + + // Iterate over pkgconf directories that correspond to the specified + // library directory, passing them to the callback function for as long as + // it returns false (not found). Return true if the callback returned + // true. + // + bool common:: + pkgconfig_search (const dir_path& d, const pkgconfig_callback& f) const + { + dir_path pd (d); + + // First always check the pkgconfig/ subdirectory in this library + // directory. Even on platforms where this is not the canonical place, + // .pc files of autotools-based packages installed by the user often + // still end up there. + // + if (exists (pd /= "pkgconfig") && f (move (pd))) + return true; + + // Platform-specific locations. + // + if (tsys == "freebsd") + { + // On FreeBSD .pc files go to libdata/pkgconfig/, not lib/pkgconfig/. + // + (((pd = d) /= "..") /= "libdata") /= "pkgconfig"; + + if (exists (pd) && f (move (pd))) + return true; + } + + return false; + } + + // Search for the .pc files in the pkgconf directories that correspond to + // the specified library directory. If found, return static (first) and + // shared (second) library .pc files. If common is false, then only + // consider our .static/.shared files. + // + pair<path, path> common:: + pkgconfig_search (const dir_path& libd, + const optional<project_name>& proj, + const string& stem, + bool common) const + { + // When it comes to looking for .pc files we have to decide where to + // search (which directory(ies)) as well as what to search for (which + // names). Suffix is our ".shared" or ".static" extension. + // + auto search_dir = [&proj, &stem] (const dir_path& dir, + const string& sfx) -> path + { + path f; + + // See if there is a corresponding .pc file. About half of them are + // called foo.pc and half libfoo.pc (and one of the pkg-config's + // authors suggests that some of you should call yours foolib.pc, just + // to keep things interesting, you know). + // + // Given the (general) import in the form <proj>%lib{<stem>}, we will + // first try lib<stem>.pc, then <stem>.pc. Maybe it also makes sense + // to try <proj>.pc, just in case. Though, according to pkg-config + // docs, the .pc file should correspond to a library, not project. But + // then you get something like zlib which calls it zlib.pc. So let's + // just do it. + // + f = dir; + f /= "lib"; + f += stem; + f += sfx; + f += ".pc"; + if (exists (f)) + return f; + + f = dir; + f /= stem; + f += sfx; + f += ".pc"; + if (exists (f)) + return f; + + if (proj) + { + f = dir; + f /= proj->string (); + f += sfx; + f += ".pc"; + if (exists (f)) + return f; + } + + return path (); + }; + + // Return false (and so stop the iteration) if a .pc file is found. + // + // Note that we rely on the "small function object" optimization here. + // + struct data + { + path a; + path s; + bool common; + } d {path (), path (), common}; + + auto check = [&d, &search_dir] (dir_path&& p) -> bool + { + // First look for static/shared-specific files. + // + d.a = search_dir (p, ".static"); + d.s = search_dir (p, ".shared"); + + if (!d.a.empty () || !d.s.empty ()) + return true; + + // Then the common. + // + if (d.common) + d.a = d.s = search_dir (p, ""); + + return !d.a.empty (); + }; + + pair<path, path> r; + + if (pkgconfig_search (libd, check)) + { + r.first = move (d.a); + r.second = move (d.s); + } + + return r; + }; + + bool common:: + pkgconfig_load (action a, + const scope& s, + lib& lt, + liba* at, + libs* st, + const optional<project_name>& proj, + const string& stem, + const dir_path& libd, + const dir_paths& top_sysd, + const dir_paths& top_usrd) const + { + assert (at != nullptr || st != nullptr); + + pair<path, path> p ( + pkgconfig_search (libd, proj, stem, true /* common */)); + + if (p.first.empty () && p.second.empty ()) + return false; + + pkgconfig_load (a, s, lt, at, st, p, libd, top_sysd, top_usrd); + return true; + } + + void common:: + pkgconfig_load (action a, + const scope& s, + lib& lt, + liba* at, + libs* st, + const pair<path, path>& paths, + const dir_path& libd, + const dir_paths& top_sysd, + const dir_paths& top_usrd) const + { + tracer trace (x, "pkgconfig_load"); + + assert (at != nullptr || st != nullptr); + + const path& ap (paths.first); + const path& sp (paths.second); + + assert (!ap.empty () || !sp.empty ()); + + // Extract --cflags and set them as lib?{}:export.poptions. Note that we + // still pass --static in case this is pkgconf which has Cflags.private. + // + auto parse_cflags = [&trace, this] (target& t, + const pkgconf& pc, + bool la) + { + strings pops; + + bool arg (false); + for (auto& o: pc.cflags (la)) + { + if (arg) + { + // Can only be an argument for -I, -D, -U options. + // + pops.push_back (move (o)); + arg = false; + continue; + } + + size_t n (o.size ()); + + // We only keep -I, -D and -U. + // + if (n >= 2 && + o[0] == '-' && + (o[1] == 'I' || o[1] == 'D' || o[1] == 'U')) + { + pops.push_back (move (o)); + arg = (n == 2); + continue; + } + + l4 ([&]{trace << "ignoring " << pc.path << " --cflags option " + << o;}); + } + + if (arg) + fail << "argument expected after " << pops.back () << + info << "while parsing pkg-config --cflags " << pc.path; + + if (!pops.empty ()) + { + auto p (t.vars.insert (c_export_poptions)); + + // The only way we could already have this value is if this same + // library was also imported as a project (as opposed to installed). + // Unlikely but possible. In this case the values were set by the + // export stub and we shouldn't touch them. + // + if (p.second) + p.first.get () = move (pops); + } + }; + + // Parse --libs into loptions/libs (interface and implementation). If + // ps is not NULL, add each resolves library target as a prerequisite. + // + auto parse_libs = [a, &s, top_sysd, this] (target& t, + bool binless, + const pkgconf& pc, + bool la, + prerequisites* ps) + { + strings lops; + vector<name> libs; + + // Normally we will have zero or more -L's followed by one or more + // -l's, with the first one being the library itself, unless the + // library is binless. But sometimes we may have other linker options, + // for example, -Wl,... or -pthread. It's probably a bad idea to + // ignore them. Also, theoretically, we could have just the library + // name/path. + // + // The tricky part, of course, is to know whether what follows after + // an option we don't recognize is its argument or another option or + // library. What we do at the moment is stop recognizing just library + // names (without -l) after seeing an unknown option. + // + bool arg (false), first (true), known (true), have_L; + for (auto& o: pc.libs (la)) + { + if (arg) + { + // Can only be an argument for an loption. + // + lops.push_back (move (o)); + arg = false; + continue; + } + + size_t n (o.size ()); + + // See if this is -L. + // + if (n >= 2 && o[0] == '-' && o[1] == 'L') + { + have_L = true; + lops.push_back (move (o)); + arg = (n == 2); + continue; + } + + // See if that's -l or just the library name/path. + // + if ((known && o[0] != '-') || + (n > 2 && o[0] == '-' && o[1] == 'l')) + { + // Unless binless, the first one is the library itself, which we + // skip. Note that we don't verify this and theoretically it could + // be some other library, but we haven't encountered such a beast + // yet. + // + if (first) + { + first = false; + + if (!binless) + continue; + } + + // @@ If by some reason this is the library itself (doesn't go + // first or libpkgconf parsed libs in some bizarre way) we will + // hang trying to lock it's target inside search_library() (or + // fail an assertion if run serially) as by now it is already + // locked. To be safe we probably shouldn't rely on the position + // and filter out all occurrences of the library itself (by + // name?) and complain if none were encountered. + // + libs.push_back (name (move (o))); + continue; + } + + // Otherwise we assume it is some other loption. + // + known = false; + lops.push_back (move (o)); + } + + if (arg) + fail << "argument expected after " << lops.back () << + info << "while parsing pkg-config --libs " << pc.path; + + // Space-separated list of escaped library flags. + // + auto lflags = [&pc, la] () -> string + { + string r; + for (const auto& o: pc.libs (la)) + { + if (!r.empty ()) + r += ' '; + r += escape (o); + } + return r; + }; + + if (first && !binless) + fail << "library expected in '" << lflags () << "'" << + info << "while parsing pkg-config --libs " << pc.path; + + // Resolve -lfoo into the library file path using our import installed + // machinery (i.e., we are going to call search_library() that will + // probably call us again, and so on). + // + // The reason we do it is the link order. For general libraries it + // shouldn't matter if we imported them via an export stub, direct + // import installed, or via a .pc file (which we could have generated + // from the export stub). The exception is "runtime libraries" (which + // are really the extension of libc) such as -lm, -ldl, -lpthread, + // etc. Those we will detect and leave as -l*. + // + // If we managed to resolve all the -l's (sans runtime), then we can + // omit -L's for nice and tidy command line. + // + bool all (true); + optional<dir_paths> usrd; // Populate lazily. + + for (name& n: libs) + { + string& l (n.value); + + // These ones are common/standard/POSIX. + // + if (l[0] != '-' || // e.g., shell32.lib + l == "-lm" || + l == "-ldl" || + l == "-lrt" || + l == "-lpthread") + continue; + + // Note: these list are most likely incomplete. + // + if (tclass == "linux") + { + // Some extras from libc (see libc6-dev) and other places. + // + if (l == "-lanl" || + l == "-lcrypt" || + l == "-lnsl" || + l == "-lresolv" || + l == "-lgcc") + continue; + } + else if (tclass == "macos") + { + if (l == "-lSystem") + continue; + } + + // Prepare user search paths by entering the -L paths from the .pc + // file. + // + if (have_L && !usrd) + { + usrd = dir_paths (); + + for (auto i (lops.begin ()); i != lops.end (); ++i) + { + const string& o (*i); + + if (o.size () >= 2 && o[0] == '-' && o[1] == 'L') + { + string p; + + if (o.size () == 2) + p = *++i; // We've verified it's there. + else + p = string (o, 2); + + dir_path d (move (p)); + + if (d.relative ()) + fail << "relative -L directory in '" << lflags () << "'" << + info << "while parsing pkg-config --libs " << pc.path; + + usrd->push_back (move (d)); + } + } + } + + // @@ OUT: for now we assume out is undetermined, just like in + // resolve_library(). + // + dir_path out; + string name (l, 2); // Sans -l. + + prerequisite_key pk { + nullopt, {&lib::static_type, &out, &out, &name, nullopt}, &s}; + + if (const target* lt = search_library (a, top_sysd, usrd, pk)) + { + // We used to pick a member but that doesn't seem right since the + // same target could be used with different link orders. + // + n.dir = lt->dir; + n.type = lib::static_type.name; + n.value = lt->name; + + if (ps != nullptr) + ps->push_back (prerequisite (*lt)); + } + else + // If we couldn't find the library, then leave it as -l. + // + all = false; + } + + // If all the -l's resolved and there were no other options, then drop + // all the -L's. If we have unknown options, then leave them in to be + // safe. + // + if (all && known) + lops.clear (); + + if (!lops.empty ()) + { + if (cclass == compiler_class::msvc) + { + // Translate -L to /LIBPATH. + // + for (auto i (lops.begin ()); i != lops.end (); ) + { + string& o (*i); + size_t n (o.size ()); + + if (n >= 2 && o[0] == '-' && o[1] == 'L') + { + o.replace (0, 2, "/LIBPATH:"); + + if (n == 2) + { + o += *++i; // We've verified it's there. + i = lops.erase (i); + continue; + } + } + + ++i; + } + } + + auto p (t.vars.insert (c_export_loptions)); + + if (p.second) + p.first.get () = move (lops); + } + + // Set even if empty (export override). + // + { + auto p (t.vars.insert (c_export_libs)); + + if (p.second) + p.first.get () = move (libs); + } + }; + + // On Windows pkg-config will escape backslahses in paths. In fact, it + // may escape things even on non-Windows platforms, for example, + // spaces. So we use a slightly modified version of next_word(). + // + auto next = [] (const string& s, size_t& b, size_t& e) -> string + { + string r; + size_t n (s.size ()); + + if (b != e) + b = e; + + // Skip leading delimiters. + // + for (; b != n && s[b] == ' '; ++b) ; + + if (b == n) + { + e = n; + return r; + } + + // Find first trailing delimiter while taking care of escapes. + // + r = s[b]; + for (e = b + 1; e != n && s[e] != ' '; ++e) + { + if (s[e] == '\\') + { + if (++e == n) + fail << "dangling escape in pkg-config output '" << s << "'"; + } + + r += s[e]; + } + + return r; + }; + + // Parse modules and add them to the prerequisites. + // + auto parse_modules = [&trace, &next, &s, this] + (const pkgconf& pc, prerequisites& ps) + { + string mstr (pc.variable ("cxx_modules")); + + string m; + for (size_t b (0), e (0); !(m = next (mstr, b, e)).empty (); ) + { + // The format is <name>=<path>. + // + size_t p (m.find ('=')); + if (p == string::npos || + p == 0 || // Empty name. + p == m.size () - 1) // Empty path. + fail << "invalid module information in '" << mstr << "'" << + info << "while parsing pkg-config --variable=cxx_modules " + << pc.path; + + string mn (m, 0, p); + path mp (m, p + 1, string::npos); + path mf (mp.leaf ()); + + // Extract module properties, if any. + // + string pp (pc.variable ("cxx_module_preprocessed." + mn)); + string se (pc.variable ("cxx_module_symexport." + mn)); + + // For now there are only C++ modules. + // + auto tl ( + s.ctx.targets.insert_locked ( + *x_mod, + mp.directory (), + dir_path (), + mf.base ().string (), + mf.extension (), + true, // Implied. + trace)); + + target& mt (tl.first); + + // If the target already exists, then setting its variables is not + // MT-safe. So currently we only do it if we have the lock (and thus + // nobody can see this target yet) assuming that this has already + // been done otherwise. + // + // @@ This is not quite correct, though: this target could already + // exist but for a "different purpose" (e.g., it could be used as + // a header). + // + // @@ Could setting it in the rule-specific vars help? (But we + // are not matching a rule for it.) Note that we are setting + // it on the module source, not bmi*{}! So rule-specific vars + // don't seem to the answer here. + // + if (tl.second.owns_lock ()) + { + mt.vars.assign (c_module_name) = move (mn); + + // Set module properties. Note that if unspecified we should still + // set them to their default values since the hosting project may + // have them set to incompatible value. + // + { + value& v (mt.vars.assign (x_preprocessed)); // NULL + if (!pp.empty ()) v = move (pp); + } + + { + mt.vars.assign (x_symexport) = (se == "true"); + } + + tl.second.unlock (); + } + + ps.push_back (prerequisite (mt)); + } + }; + + // For now we only populate prerequisites for lib{}. To do it for + // liba{} would require weeding out duplicates that are already in + // lib{}. + // + prerequisites prs; + + pkgconf apc; + pkgconf spc; + + // Create the .pc files search directory list. + // + dir_paths pc_dirs; + + // Note that we rely on the "small function object" optimization here. + // + auto add_pc_dir = [&pc_dirs] (dir_path&& d) -> bool + { + pc_dirs.emplace_back (move (d)); + return false; + }; + + pkgconfig_search (libd, add_pc_dir); + for (const dir_path& d: top_usrd) pkgconfig_search (d, add_pc_dir); + for (const dir_path& d: top_sysd) pkgconfig_search (d, add_pc_dir); + + bool pa (at != nullptr && !ap.empty ()); + if (pa || sp.empty ()) + apc = pkgconf (ap, pc_dirs, sys_lib_dirs, sys_inc_dirs); + + bool ps (st != nullptr && !sp.empty ()); + if (ps || ap.empty ()) + spc = pkgconf (sp, pc_dirs, sys_lib_dirs, sys_inc_dirs); + + // Sort out the interface dependencies (which we are setting on lib{}). + // If we have the shared .pc variant, then we use that. Otherwise -- + // static but extract without the --static option (see also the saving + // logic). + // + pkgconf& ipc (ps ? spc : apc); // Interface package info. + + parse_libs ( + lt, + (ps ? st->mtime () : at->mtime ()) == timestamp_unreal /* binless */, + ipc, + false, + &prs); + + if (pa) + { + parse_cflags (*at, apc, true); + parse_libs (*at, at->path ().empty (), apc, true, nullptr); + } + + if (ps) + parse_cflags (*st, spc, false); + + // For now we assume static and shared variants export the same set of + // modules. While technically possible, having a different set will + // most likely lead to all sorts of trouble (at least for installed + // libraries) and life is short. + // + if (modules) + parse_modules (ipc, prs); + + assert (!lt.has_prerequisites ()); + if (!prs.empty ()) + lt.prerequisites (move (prs)); + + // Bless the library group with a "trust me it exists" timestamp. Failed + // that, if we add it as a prerequisite (like we do above), the fallback + // file rule won't match. + // + lt.mtime (mtime (ipc.path)); + } + +#else + + pair<path, path> common:: + pkgconfig_search (const dir_path&, + const optional<project_name>&, + const string&, + bool) const + { + return pair<path, path> (); + } + + bool common:: + pkgconfig_load (action, + const scope&, + lib&, + liba*, + libs*, + const optional<project_name>&, + const string&, + const dir_path&, + const dir_paths&, + const dir_paths&) const + { + return false; + } + + void common:: + pkgconfig_load (action, + const scope&, + lib&, + liba*, + libs*, + const pair<path, path>&, + const dir_path&, + const dir_paths&, + const dir_paths&) const + { + assert (false); // Should never be called. + } + +#endif + + void link_rule:: + pkgconfig_save (action a, const file& l, bool la, bool binless) const + { + tracer trace (x, "pkgconfig_save"); + + context& ctx (l.ctx); + + const scope& bs (l.base_scope ()); + const scope& rs (*bs.root_scope ()); + + auto* t (find_adhoc_member<pc> (l)); + assert (t != nullptr); + + // By default we assume things go into install.{include, lib}. + // + using install::resolve_dir; + + dir_path idir (resolve_dir (l, cast<dir_path> (l["install.include"]))); + dir_path ldir (resolve_dir (l, cast<dir_path> (l["install.lib"]))); + + const path& p (t->path ()); + + if (verb >= 2) + text << "cat >" << p; + + if (ctx.dry_run) + return; + + auto_rmfile arm (p); + + try + { + ofdstream os (p); + + { + const project_name& n (project (rs)); + + if (n.empty ()) + fail << "no project name in " << rs; + + lookup vl (rs.vars[ctx.var_version]); + if (!vl) + fail << "no version variable in project " << n << + info << "while generating " << p; + + const string& v (cast<string> (vl)); + + os << "Name: " << n << endl; + os << "Version: " << v << endl; + + // This one is required so make something up if unspecified. + // + os << "Description: "; + if (const string* s = cast_null<string> (rs[ctx.var_project_summary])) + os << *s << endl; + else + os << n << ' ' << v << endl; + + if (const string* u = cast_null<string> (rs[ctx.var_project_url])) + os << "URL: " << *u << endl; + } + + auto save_poptions = [&l, &os] (const variable& var) + { + if (const strings* v = cast_null<strings> (l[var])) + { + for (auto i (v->begin ()); i != v->end (); ++i) + { + const string& o (*i); + size_t n (o.size ()); + + // Filter out -I (both -I<dir> and -I <dir> forms). + // + if (n >= 2 && o[0] == '-' && o[1] == 'I') + { + if (n == 2) + ++i; + + continue; + } + + os << ' ' << escape (o); + } + } + }; + + // Given a library save its -l-style library name. + // + auto save_library = [&os, this] (const file& l) + { + // If available (it may not, in case of import-installed libraris), + // use the .pc file name to derive the -l library name (in case of + // the shared library, l.path() may contain version). + // + string n; + + auto strip_lib = [&n] () + { + if (n.size () > 3 && + path::traits_type::compare (n.c_str (), 3, "lib", 3) == 0) + n.erase (0, 3); + }; + + if (auto* t = find_adhoc_member<pc> (l)) + { + // We also want to strip the lib prefix unless it is part of the + // target name while keeping custom library prefix/suffix, if any. + // + n = t->path ().leaf ().base ().base ().string (); + + if (path::traits_type::compare (n.c_str (), n.size (), + l.name.c_str (), l.name.size ()) != 0) + strip_lib (); + } + else + { + // Derive -l-name from the file name in a fuzzy, platform-specific + // manner. + // + n = l.path ().leaf ().base ().string (); + + if (cclass != compiler_class::msvc) + strip_lib (); + } + + os << " -l" << n; + }; + + // @@ TODO: support whole archive? + // + + // Cflags. + // + os << "Cflags:"; + os << " -I" << escape (idir.string ()); + save_poptions (c_export_poptions); + save_poptions (x_export_poptions); + os << endl; + + // Libs. + // + // While we generate split shared/static .pc files, in case of static + // we still want to sort things out into Libs/Libs.private. This is + // necessary to distinguish between interface and implementation + // dependencies if we don't have the shared variant (see the load + // logic for details). + // + //@@ TODO: would be nice to weed out duplicates. But is it always + // safe? Think linking archives: will have to keep duplicates in + // the second position, not first. Gets even trickier with + // Libs.private split. + // + { + os << "Libs:"; + + // While we don't need it for a binless library itselt, it may be + // necessary to resolve its binfull dependencies. + // + os << " -L" << escape (ldir.string ()); + + // Now process ourselves as if we were being linked to something (so + // pretty similar to link_rule::append_libraries()). + // + bool priv (false); + auto imp = [&priv] (const file&, bool la) {return priv && la;}; + + auto lib = [&os, &save_library] (const file* const* c, + const string& p, + lflags, + bool) + { + const file* l (c != nullptr ? *c : nullptr); + + if (l != nullptr) + { + if (l->is_a<libs> () || l->is_a<liba> ()) // See through libux. + save_library (*l); + } + else + os << ' ' << p; // Something "system'y", pass as is. + }; + + auto opt = [] (const file&, + const string&, + bool, bool) + { + //@@ TODO: should we filter -L similar to -I? + //@@ TODO: how will the Libs/Libs.private work? + //@@ TODO: remember to use escape() + + /* + // If we need an interface value, then use the group (lib{}). + // + if (const target* g = exp && l.is_a<libs> () ? l.group : &l) + { + const variable& var ( + com + ? (exp ? c_export_loptions : c_loptions) + : (t == x + ? (exp ? x_export_loptions : x_loptions) + : var_pool[t + (exp ? ".export.loptions" : ".loptions")])); + + append_options (args, *g, var); + } + */ + }; + + // Pretend we are linking an executable using what would be normal, + // system-default link order. + // + linfo li {otype::e, la ? lorder::a_s : lorder::s_a}; + + process_libraries (a, bs, li, sys_lib_dirs, + l, la, 0, // Link flags. + imp, lib, opt, !binless); + os << endl; + + if (la) + { + os << "Libs.private:"; + + priv = true; + process_libraries (a, bs, li, sys_lib_dirs, + l, la, 0, // Link flags. + imp, lib, opt, false); + os << endl; + } + } + + // If we have modules, list them in the modules variable. We also save + // some extra info about them (yes, the rabbit hole runs deep). This + // code is pretty similar to compiler::search_modules(). + // + if (modules) + { + struct module + { + string name; + path file; + + string pp; + bool symexport; + }; + vector<module> modules; + + for (const target* pt: l.prerequisite_targets[a]) + { + // @@ UTL: we need to (recursively) see through libu*{} (and + // also in search_modules()). + // + if (pt != nullptr && pt->is_a<bmix> ()) + { + // What we have is a binary module interface. What we need is + // a module interface source it was built from. We assume it's + // the first mxx{} target that we see. + // + const target* mt (nullptr); + for (const target* t: pt->prerequisite_targets[a]) + { + if ((mt = t->is_a (*x_mod))) + break; + } + + // Can/should there be a bmi{} without mxx{}? Can't think of a + // reason. + // + assert (mt != nullptr); + + path p (install::resolve_file (mt->as<file> ())); + + if (p.empty ()) // Not installed. + continue; + + string pp; + if (const string* v = cast_null<string> ((*mt)[x_preprocessed])) + pp = *v; + + modules.push_back ( + module { + cast<string> (pt->state[a].vars[c_module_name]), + move (p), + move (pp), + symexport + }); + } + } + + if (!modules.empty ()) + { + os << endl + << "cxx_modules ="; + + // Module names shouldn't require escaping. + // + for (const module& m: modules) + os << ' ' << m.name << '=' << escape (m.file.string ()); + + os << endl; + + // Module-specific properties. The format is: + // + // <lang>_module_<property>.<module> = <value> + // + for (const module& m: modules) + { + if (!m.pp.empty ()) + os << "cxx_module_preprocessed." << m.name << " = " << m.pp + << endl; + + if (m.symexport) + os << "cxx_module_symexport." << m.name << " = true" << endl; + } + } + } + + os.close (); + arm.cancel (); + } + catch (const io_error& e) + { + fail << "unable to write " << p << ": " << e; + } + } + } +} diff --git a/libbuild2/cc/target.cxx b/libbuild2/cc/target.cxx new file mode 100644 index 0000000..a438898 --- /dev/null +++ b/libbuild2/cc/target.cxx @@ -0,0 +1,101 @@ +// file : libbuild2/cc/target.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/target.hxx> + +#include <libbuild2/context.hxx> + +using namespace std; + +namespace build2 +{ + namespace cc + { + const target_type cc::static_type + { + "cc", + &file::static_type, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &target_search, + false + }; + + extern const char h_ext_def[] = "h"; + + const target_type h::static_type + { + "h", + &cc::static_type, + &target_factory<h>, + nullptr, /* fixed_extension */ + &target_extension_var<h_ext_def>, + &target_pattern_var<h_ext_def>, + nullptr, + &file_search, + false + }; + + extern const char c_ext_def[] = "c"; + + const target_type c::static_type + { + "c", + &cc::static_type, + &target_factory<c>, + nullptr, /* fixed_extension */ + &target_extension_var<c_ext_def>, + &target_pattern_var<c_ext_def>, + nullptr, + &file_search, + false + }; + + const target_type pc::static_type + { + "pc", + &file::static_type, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &target_search, + false + }; + + extern const char pca_ext[] = "static.pc"; // VC14 rejects constexpr. + + const target_type pca::static_type + { + "pca", + &pc::static_type, + &target_factory<pca>, + &target_extension_fix<pca_ext>, + nullptr, /* default_extension */ + &target_pattern_fix<pca_ext>, + &target_print_0_ext_verb, // Fixed extension, no use printing. + &file_search, + false + }; + + extern const char pcs_ext[] = "shared.pc"; // VC14 rejects constexpr. + + const target_type pcs::static_type + { + "pcs", + &pc::static_type, + &target_factory<pcs>, + &target_extension_fix<pcs_ext>, + nullptr, /* default_extension */ + &target_pattern_fix<pcs_ext>, + &target_print_0_ext_verb, // Fixed extension, no use printing. + &file_search, + false + }; + } +} diff --git a/libbuild2/cc/target.hxx b/libbuild2/cc/target.hxx new file mode 100644 index 0000000..885bf68 --- /dev/null +++ b/libbuild2/cc/target.hxx @@ -0,0 +1,96 @@ +// file : libbuild2/cc/target.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_TARGET_HXX +#define LIBBUILD2_CC_TARGET_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target.hxx> + +#include <libbuild2/cc/export.hxx> + +namespace build2 +{ + namespace cc + { + // This is an abstract base target for all c-common header/source files. + // We use this arrangement during rule matching to detect "unknown" (to + // this rule) source/header files that it cannot handle but should not + // ignore either. For example, a C link rule that sees a C++ source file. + // + class LIBBUILD2_CC_SYMEXPORT cc: public file + { + public: + using file::file; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const = 0; + }; + + // There is hardly a c-family compilation without a C header inclusion. + // As a result, this target type is registered for any c-family module. + // + class LIBBUILD2_CC_SYMEXPORT h: public cc + { + public: + using cc::cc; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // This one we define in cc but the target type is only registered by the + // c module. This way we can implement rule chaining without jumping + // through too many hoops (like resolving target type dynamically) but + // also without relaxing things too much (i.e., the user still won't be + // able to refer to c{} without loading the c module). + // + class LIBBUILD2_CC_SYMEXPORT c: public cc + { + public: + using cc::cc; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + // pkg-config file targets. + // + class LIBBUILD2_CC_SYMEXPORT pc: public file + { + public: + using file::file; + + public: + static const target_type static_type; + }; + + class LIBBUILD2_CC_SYMEXPORT pca: public pc // .static.pc + { + public: + using pc::pc; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + + class LIBBUILD2_CC_SYMEXPORT pcs: public pc // .shared.pc + { + public: + using pc::pc; + + public: + static const target_type static_type; + virtual const target_type& dynamic_type () const {return static_type;} + }; + } +} + +#endif // LIBBUILD2_CC_TARGET_HXX diff --git a/libbuild2/cc/types.hxx b/libbuild2/cc/types.hxx new file mode 100644 index 0000000..280dcbf --- /dev/null +++ b/libbuild2/cc/types.hxx @@ -0,0 +1,116 @@ +// file : libbuild2/cc/types.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_TYPES_HXX +#define LIBBUILD2_CC_TYPES_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target-type.hxx> + +namespace build2 +{ + namespace cc + { + // Translation unit information. + // + // We use absolute and normalized header path as the header unit module + // name. + // + // Note that our terminology doesn't exactly align with the (current) + // standard where a header unit is not a module (that is, you either + // import a "module [interface translation unit]" or a "[synthesized] + // header [translation] unit"). On the other hand, lots of the underlying + // mechanics suggest that a header unit is module-like; they end up having + // BMIs (which stand for "binary module interface"), etc. In a sense, a + // header unit is an "interface unit" for (a part of) the global module + // (maybe a partition). + // + enum class unit_type + { + non_modular, + module_iface, + module_impl, + module_header + }; + + struct module_import + { + unit_type type; // Either module_iface or module_header. + string name; + bool exported; // True if re-exported (export import M;). + size_t score; // Match score (see compile::search_modules()). + }; + + using module_imports = vector<module_import>; + + struct module_info + { + string name; // Empty if non-modular. + module_imports imports; // Imported modules. + }; + + struct unit + { + unit_type type = unit_type::non_modular; + build2::cc::module_info module_info; + }; + + // Compiler language. + // + enum class lang {c, cxx}; + + inline ostream& + operator<< (ostream& os, lang l) + { + return os << (l == lang::c ? "C" : "C++"); + } + + // Compile/link output type (executable, static, or shared). + // + enum class otype {e, a, s}; + + struct ltype + { + otype type; + bool utility; // True for utility libraries. + + bool executable () const {return type == otype::e && !utility;} + bool library () const {return type != otype::e || utility;} + bool static_library () const {return type == otype::a || utility;} + bool shared_library () const {return type == otype::s && !utility;} + bool member_library () const {return type != otype::e;} + }; + + // Compile target types. + // + struct compile_target_types + { + const target_type& obj; + const target_type& bmi; + const target_type& hbmi; + }; + + // Library link order. + // + enum class lorder {a, s, a_s, s_a}; + + // Link information: output type and link order. + // + struct linfo + { + otype type; + lorder order; + }; + + // Prerequisite link flags. + // + using lflags = uintptr_t; // To match prerequisite_target::data. + + const lflags lflag_whole = 0x00000001U; // Link whole liba{}/libu*}. + } +} + +#endif // LIBBUILD2_CC_TYPES_HXX diff --git a/libbuild2/cc/utility.cxx b/libbuild2/cc/utility.cxx new file mode 100644 index 0000000..07f3b2e --- /dev/null +++ b/libbuild2/cc/utility.cxx @@ -0,0 +1,114 @@ +// file : libbuild2/cc/utility.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/cc/utility.hxx> + +#include <libbuild2/file.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/algorithm.hxx> // search() + +#include <libbuild2/bin/rule.hxx> +#include <libbuild2/bin/target.hxx> + +using namespace std; + +namespace build2 +{ + namespace cc + { + using namespace bin; + + const dir_path module_dir ("cc"); + const dir_path modules_sidebuild_dir (dir_path (module_dir) /= "modules"); + + lorder + link_order (const scope& bs, otype ot) + { + // Initialize to suppress 'may be used uninitialized' warning produced + // by MinGW GCC 5.4.0. + // + const char* var (nullptr); + + switch (ot) + { + case otype::e: var = "bin.exe.lib"; break; + case otype::a: var = "bin.liba.lib"; break; + case otype::s: var = "bin.libs.lib"; break; + } + + const auto& v (cast<strings> (bs[var])); + return v[0] == "shared" + ? v.size () > 1 && v[1] == "static" ? lorder::s_a : lorder::s + : v.size () > 1 && v[1] == "shared" ? lorder::a_s : lorder::a; + } + + const target* + link_member (const bin::libx& x, action a, linfo li, bool exist) + { + if (x.is_a<libul> ()) + { + // For libul{} that is linked to an executable the member choice + // should be dictated by the members of lib{} this libul{} is + // "primarily" for. If both are being built, then it seems natural to + // prefer static over shared since it could be faster (but I am sure + // someone will probably want this configurable). + // + if (li.type == otype::e) + { + // Utility libraries are project-local which means the primarily + // target should be in the same project as us. + // + li.type = lib_rule::build_members (x.root_scope ()).a + ? otype::a + : otype::s; + } + + const target_type& tt (li.type == otype::a + ? libua::static_type + : libus::static_type); + + // Called by the compile rule during execute. + // + return x.ctx.phase == run_phase::match && !exist + ? &search (x, tt, x.dir, x.out, x.name) + : search_existing (x.ctx, tt, x.dir, x.out, x.name); + } + else + { + assert (!exist); + + const lib& l (x.as<lib> ()); + + // Make sure group members are resolved. + // + group_view gv (resolve_members (a, l)); + assert (gv.members != nullptr); + + lorder lo (li.order); + + bool ls (true); + switch (lo) + { + case lorder::a: + case lorder::a_s: + ls = false; // Fall through. + case lorder::s: + case lorder::s_a: + { + if (ls ? l.s == nullptr : l.a == nullptr) + { + if (lo == lorder::a_s || lo == lorder::s_a) + ls = !ls; + else + fail << (ls ? "shared" : "static") << " variant of " << l + << " is not available"; + } + } + } + + return ls ? static_cast<const target*> (l.s) : l.a; + } + } + } +} diff --git a/libbuild2/cc/utility.hxx b/libbuild2/cc/utility.hxx new file mode 100644 index 0000000..3ee07bd --- /dev/null +++ b/libbuild2/cc/utility.hxx @@ -0,0 +1,73 @@ +// file : libbuild2/cc/utility.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_CC_UTILITY_HXX +#define LIBBUILD2_CC_UTILITY_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/target.hxx> +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/types.hxx> + +namespace build2 +{ + struct variable; + + namespace cc + { + // To form the complete path do: + // + // root.out_path () / root.root_extra->build_dir / module_dir + // + extern const dir_path module_dir; // cc/ + extern const dir_path modules_sidebuild_dir; // cc/modules/ + + // Compile output type. + // + otype + compile_type (const target&, unit_type); + + compile_target_types + compile_types (otype); + + // Link output type. + // + ltype + link_type (const target&); + + // Library link order. + // + // The reason we pass scope and not the target is because this function is + // called not only for exe/lib but also for obj as part of the library + // meta-information protocol implementation. Normally the bin.*.lib values + // will be project-wide. With this scheme they can be customized on the + // per-directory basis but not per-target which means all exe/lib in the + // same directory have to have the same link order. + // + lorder + link_order (const scope& base, otype); + + inline linfo + link_info (const scope& base, otype ot) + { + return linfo {ot, link_order (base, ot)}; + } + + // Given the link order return the library member to link. That is, liba{} + // or libs{} for lib{} and libua{} or libus{} for libul{}. + // + // If existing is true, then only return the member target if it exists + // (currently only used and supported for utility libraries). + // + const target* + link_member (const bin::libx&, action, linfo, bool existing = false); + } +} + +#include <libbuild2/cc/utility.ixx> + +#endif // LIBBUILD2_CC_UTILITY_HXX diff --git a/libbuild2/cc/utility.ixx b/libbuild2/cc/utility.ixx new file mode 100644 index 0000000..1509bf2 --- /dev/null +++ b/libbuild2/cc/utility.ixx @@ -0,0 +1,73 @@ +// file : libbuild2/cc/utility.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + namespace cc + { + inline otype + compile_type (const target& t, unit_type u) + { + using namespace bin; + + auto test = [&t, u] (const auto& h, const auto& i, const auto& o) + { + return t.is_a (u == unit_type::module_header ? h : + u == unit_type::module_iface ? i : + o); + }; + + return + test (hbmie::static_type, bmie::static_type, obje::static_type) ? otype::e : + test (hbmia::static_type, bmia::static_type, obja::static_type) ? otype::a : + otype::s; + } + + inline ltype + link_type (const target& t) + { + using namespace bin; + + bool u (false); + otype o ( + t.is_a<exe> () || (u = t.is_a<libue> ()) ? otype::e : + t.is_a<liba> () || (u = t.is_a<libua> ()) ? otype::a : + t.is_a<libs> () || (u = t.is_a<libus> ()) ? otype::s : + static_cast<otype> (0xFF)); + + return ltype {o, u}; + } + + inline compile_target_types + compile_types (otype t) + { + using namespace bin; + + const target_type* o (nullptr); + const target_type* i (nullptr); + const target_type* h (nullptr); + + switch (t) + { + case otype::e: + o = &obje::static_type; + i = &bmie::static_type; + h = &hbmie::static_type; + break; + case otype::a: + o = &obja::static_type; + i = &bmia::static_type; + h = &hbmia::static_type; + break; + case otype::s: + o = &objs::static_type; + i = &bmis::static_type; + h = &hbmis::static_type; + break; + } + + return compile_target_types {*o, *i, *h}; + } + } +} diff --git a/libbuild2/cc/windows-manifest.cxx b/libbuild2/cc/windows-manifest.cxx new file mode 100644 index 0000000..8d67f0c --- /dev/null +++ b/libbuild2/cc/windows-manifest.cxx @@ -0,0 +1,143 @@ +// file : libbuild2/cc/windows-manifest.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/scope.hxx> +#include <libbuild2/target.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/cc/link-rule.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + // Translate the compiler target CPU value to the processorArchitecture + // attribute value. + // + const char* + windows_manifest_arch (const string& tcpu) + { + const char* pa (tcpu == "i386" || tcpu == "i686" ? "x86" : + tcpu == "x86_64" ? "amd64" : + nullptr); + + if (pa == nullptr) + fail << "unable to translate CPU " << tcpu << " to manifest " + << "processor architecture"; + + return pa; + } + + // Generate a Windows manifest and if necessary create/update the manifest + // file corresponding to the exe{} target. Return the manifest file path + // and its timestamp if unchanged or timestamp_nonexistent otherwise. + // + pair<path, timestamp> link_rule:: + windows_manifest (const file& t, bool rpath_assembly) const + { + tracer trace (x, "link_rule::windows_manifest"); + + const scope& rs (t.root_scope ()); + + const char* pa (windows_manifest_arch (cast<string> (rs[x_target_cpu]))); + + string m; + + m += "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n"; + m += "<assembly xmlns='urn:schemas-microsoft-com:asm.v1'\n"; + m += " manifestVersion='1.0'>\n"; + + // Program name, version, etc. + // + string name (t.path ().leaf ().string ()); + + m += " <assemblyIdentity name='"; m += name; m += "'\n"; + m += " type='win32'\n"; + m += " processorArchitecture='"; m += pa; m += "'\n"; + m += " version='0.0.0.0'/>\n"; + + // Our rpath-emulating assembly. + // + if (rpath_assembly) + { + m += " <dependency>\n"; + m += " <dependentAssembly>\n"; + m += " <assemblyIdentity name='"; m += name; m += ".dlls'\n"; + m += " type='win32'\n"; + m += " processorArchitecture='"; m += pa; m += "'\n"; + m += " language='*'\n"; + m += " version='0.0.0.0'/>\n"; + m += " </dependentAssembly>\n"; + m += " </dependency>\n"; + } + + // UAC information. Without it Windows will try to guess, which, as you + // can imagine, doesn't end well. + // + m += " <trustInfo xmlns='urn:schemas-microsoft-com:asm.v3'>\n"; + m += " <security>\n"; + m += " <requestedPrivileges>\n"; + m += " <requestedExecutionLevel level='asInvoker' uiAccess='false'/>\n"; + m += " </requestedPrivileges>\n"; + m += " </security>\n"; + m += " </trustInfo>\n"; + + m += "</assembly>\n"; + + // If the manifest file exists, compare to its content. If nothing + // changed (common case), then we can avoid any further updates. + // + // The potentially faster alternative would be to hash it and store an + // entry in depdb. This, however, gets a bit complicated since we will + // need to avoid a race between the depdb and .manifest updates. + // + path mf (t.path () + ".manifest"); + + timestamp mt (mtime (mf)); + + if (mt != timestamp_nonexistent) + { + try + { + ifdstream is (mf); + if (is.read_text () == m) + return make_pair (move (mf), mt); + } + catch (const io_error&) + { + // Whatever the reason we failed for, let's rewrite the file. + } + } + + if (verb >= 3) + text << "cat >" << mf; + + if (!t.ctx.dry_run) + { + auto_rmfile rm (mf); + + try + { + ofdstream os (mf); + os << m; + os.close (); + rm.cancel (); + + } + catch (const io_error& e) + { + fail << "unable to write to " << mf << ": " << e; + } + } + + return make_pair (move (mf), timestamp_nonexistent); + } + } +} diff --git a/libbuild2/cc/windows-rpath.cxx b/libbuild2/cc/windows-rpath.cxx new file mode 100644 index 0000000..5583315 --- /dev/null +++ b/libbuild2/cc/windows-rpath.cxx @@ -0,0 +1,400 @@ +// file : libbuild2/cc/windows-rpath.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <errno.h> // E* + +#include <libbuild2/scope.hxx> +#include <libbuild2/context.hxx> +#include <libbuild2/variable.hxx> +#include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> +#include <libbuild2/diagnostics.hxx> + +#include <libbuild2/bin/target.hxx> + +#include <libbuild2/cc/link-rule.hxx> + +using namespace std; +using namespace butl; + +namespace build2 +{ + namespace cc + { + // Provide limited emulation of the rpath functionality on Windows using a + // side-by-side assembly. In a nutshell, the idea is to create an assembly + // with links to all the prerequisite DLLs. + // + // Note that currently our assemblies contain all the DLLs that the + // executable depends on, recursively. The alternative approach could be + // to also create assemblies for DLLs. This appears to be possible (but we + // will have to use the resource ID 2 for such a manifest). And it will + // probably be necessary for DLLs that are loaded dynamically with + // LoadLibrary(). The tricky part is how such nested assemblies will be + // found. Since we are effectively (from the loader's point of view) + // copying the DLLs, we will also have to copy their assemblies (because + // the loader looks for them in the same directory as the DLL). It's not + // clear how well such nested assemblies are supported (e.g., in Wine). + // + // What if the DLL is in the same directory as the executable, will it + // still be found even if there is an assembly? On the other hand, + // handling it as any other won't hurt us much. + // + using namespace bin; + + // Return the greatest (newest) timestamp of all the DLLs that we will be + // adding to the assembly or timestamp_nonexistent if there aren't any. + // + timestamp link_rule:: + windows_rpath_timestamp (const file& t, + const scope& bs, + action a, + linfo li) const + { + timestamp r (timestamp_nonexistent); + + // We need to collect all the DLLs, so go into implementation of both + // shared and static (in case they depend on shared). + // + auto imp = [] (const file&, bool) {return true;}; + + auto lib = [&r] (const file* const* lc, + const string& f, + lflags, + bool sys) + { + const file* l (lc != nullptr ? *lc : nullptr); + + // We don't rpath system libraries. + // + if (sys) + return; + + // Skip static libraries. + // + if (l != nullptr) + { + // This can be an "undiscovered" DLL (see search_library()). + // + if (!l->is_a<libs> () || l->path ().empty ()) // Also covers binless. + return; + } + else + { + // This is an absolute path and we need to decide whether it is + // a shared or static library. + // + // @@ This is so broken: we don't link to DLLs, we link to .lib or + // .dll.a! Should we even bother? Maybe only for "our" DLLs + // (.dll.lib/.dll.a)? But the DLL can also be in a different + // directory (lib/../bin). + // + // Though this can happen on MinGW with direct DLL link... + // + size_t p (path::traits_type::find_extension (f)); + + if (p == string::npos || casecmp (f.c_str () + p + 1, "dll") != 0) + return; + } + + // Ok, this is a DLL. + // + timestamp t (l != nullptr + ? l->load_mtime () + : mtime (f.c_str ())); + + if (t > r) + r = t; + }; + + for (const prerequisite_target& pt: t.prerequisite_targets[a]) + { + if (pt == nullptr || pt.adhoc) + continue; + + bool la; + const file* f; + + if ((la = (f = pt->is_a<liba> ())) || + (la = (f = pt->is_a<libux> ())) || // See through. + ( f = pt->is_a<libs> ())) + process_libraries (a, bs, li, sys_lib_dirs, + *f, la, pt.data, + imp, lib, nullptr, true); + } + + return r; + } + + // Like *_timestamp() but actually collect the DLLs (and weed out the + // duplicates). + // + auto link_rule:: + windows_rpath_dlls (const file& t, + const scope& bs, + action a, + linfo li) const -> windows_dlls + { + windows_dlls r; + + auto imp = [] (const file&, bool) {return true;}; + + auto lib = [&r, &bs] (const file* const* lc, + const string& f, + lflags, + bool sys) + { + const file* l (lc != nullptr ? *lc : nullptr); + + if (sys) + return; + + if (l != nullptr) + { + if (l->is_a<libs> () && !l->path ().empty ()) // Also covers binless. + { + // Get .pdb if there is one. + // + const target_type* tt (bs.find_target_type ("pdb")); + const target* pdb (tt != nullptr + ? find_adhoc_member (*l, *tt) + : nullptr); + r.insert ( + windows_dll { + f, + pdb != nullptr ? &pdb->as<file> ().path ().string () : nullptr, + string () + }); + } + } + else + { + size_t p (path::traits_type::find_extension (f)); + + if (p != string::npos && casecmp (f.c_str () + p + 1, "dll") == 0) + { + // See if we can find a corresponding .pdb. + // + windows_dll wd {f, nullptr, string ()}; + string& pdb (wd.pdb_storage); + + // First try "our" naming: foo.dll.pdb. + // + pdb = f; + pdb += ".pdb"; + + if (!exists (path (pdb))) + { + // Then try the usual naming: foo.pdb. + // + pdb.assign (f, 0, p); + pdb += ".pdb"; + + if (!exists (path (pdb))) + pdb.clear (); + } + + if (!pdb.empty ()) + wd.pdb = &pdb; + + r.insert (move (wd)); + } + } + }; + + for (const prerequisite_target& pt: t.prerequisite_targets[a]) + { + if (pt == nullptr || pt.adhoc) + continue; + + bool la; + const file* f; + + if ((la = (f = pt->is_a<liba> ())) || + (la = (f = pt->is_a<libux> ())) || // See through. + ( f = pt->is_a<libs> ())) + process_libraries (a, bs, li, sys_lib_dirs, + *f, la, pt.data, + imp, lib, nullptr, true); + } + + return r; + } + + const char* + windows_manifest_arch (const string& tcpu); // windows-manifest.cxx + + // The ts argument should be the DLLs timestamp returned by *_timestamp(). + // + // The scratch argument should be true if the DLL set has changed and we + // need to regenerate everything from scratch. Otherwise, we try to avoid + // unnecessary work by comparing the DLLs timestamp against the assembly + // manifest file. + // + void link_rule:: + windows_rpath_assembly (const file& t, + const scope& bs, + action a, + linfo li, + const string& tcpu, + timestamp ts, + bool scratch) const + { + // Assembly paths and name. + // + dir_path ad (path_cast<dir_path> (t.path () + ".dlls")); + string an (ad.leaf ().string ()); + path am (ad / path (an + ".manifest")); + + // First check if we actually need to do anything. Since most of the + // time we won't, we don't want to combine it with the *_dlls() call + // below which allocates memory, etc. + // + if (!scratch) + { + // The corner case here is when _timestamp() returns nonexistent + // signalling that there aren't any DLLs but the assembly manifest + // file exists. This, however, can only happen if we somehow managed + // to transition from the "have DLLs" state to "no DLLs" without going + // through the "from scratch" update. Actually this can happen when + // switching to update-for-install. + // + if (ts != timestamp_nonexistent && ts <= mtime (am)) + return; + } + + // Next collect the set of DLLs that will be in our assembly. We need to + // do this recursively which means we may end up with duplicates. Also, + // it is possible that there aren't/no longer are any DLLs which means + // we just need to clean things up. + // + bool empty (ts == timestamp_nonexistent); + + windows_dlls dlls; + if (!empty) + dlls = windows_rpath_dlls (t, bs, a, li); + + // Clean the assembly directory and make sure it exists. Maybe it would + // have been faster to overwrite the existing manifest rather than + // removing the old one and creating a new one. But this is definitely + // simpler. + // + { + rmdir_status s (rmdir_r (t.ctx, ad, empty, 3)); + + if (empty) + return; + + if (s == rmdir_status::not_exist) + mkdir (ad, 3); + } + + // Symlink or copy the DLLs. + // + { + const scope& as (t.weak_scope ()); // Amalgamation. + + auto link = [&as] (const path& f, const path& l) + { + auto print = [&f, &l] (const char* cmd) + { + if (verb >= 3) + text << cmd << ' ' << f << ' ' << l; + }; + + // First we try to create a symlink. If that fails (e.g., "Windows + // happens"), then we resort to hard links. If that doesn't work + // out either (e.g., not on the same filesystem), then we fall back + // to copies. + // + // For the symlink use a relative target path if both paths are part + // of the same amalgamation. This way if the amalgamation is moved + // as a whole, the links will remain valid. + // + try + { + switch (mkanylink (f, l, + true /* copy */, + f.sub (as.out_path ()) /* relative */)) + { + case entry_type::regular: print ("cp"); break; + case entry_type::symlink: print ("ln -s"); break; + case entry_type::other: print ("ln"); break; + default: assert (false); + } + } + catch (const pair<entry_type, system_error>& e) + { + const char* w (nullptr); + switch (e.first) + { + case entry_type::regular: print ("cp"); w = "copy"; break; + case entry_type::symlink: print ("ln -s"); w = "symlink"; break; + case entry_type::other: print ("ln"); w = "hardlink"; break; + default: assert (false); + } + + fail << "unable to make " << w << ' ' << l << ": " << e.second; + } + }; + + for (const windows_dll& wd: dlls) + { + //@@ Would be nice to avoid copying. Perhaps reuse buffers + // by adding path::assign() and traits::leaf(). + // + path dp (wd.dll); // DLL path. + path dn (dp.leaf ()); // DLL name. + + link (dp, ad / dn); + + // Link .pdb if there is one. + // + if (wd.pdb != nullptr) + { + path pp (*wd.pdb); + link (pp, ad / pp.leaf ()); + } + } + } + + if (verb >= 3) + text << "cat >" << am; + + if (t.ctx.dry_run) + return; + + auto_rmfile rm (am); + + try + { + ofdstream os (am); + + const char* pa (windows_manifest_arch (tcpu)); + + os << "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n" + << "<assembly xmlns='urn:schemas-microsoft-com:asm.v1'\n" + << " manifestVersion='1.0'>\n" + << " <assemblyIdentity name='" << an << "'\n" + << " type='win32'\n" + << " processorArchitecture='" << pa << "'\n" + << " version='0.0.0.0'/>\n"; + + + + for (const windows_dll& wd: dlls) + os << " <file name='" << path (wd.dll).leaf () << "'/>\n"; + + os << "</assembly>\n"; + + os.close (); + rm.cancel (); + } + catch (const io_error& e) + { + fail << "unable to write to " << am << ": " << e; + } + } + } +} diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx index bb7c61d..4e7080a 100644 --- a/libbuild2/module.cxx +++ b/libbuild2/module.cxx @@ -39,6 +39,7 @@ namespace build2 static const char* bundled_modules[] = { "bash", "bin", + "cc", "in", "version" }; |