aboutsummaryrefslogtreecommitdiff
path: root/libbuild2
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2019-08-24 17:41:30 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2019-08-28 15:01:48 +0300
commit4bdf53837e010073de802070d4e6087410662d3e (patch)
tree2820d3964877d1a7d498833da325aa3d3a699353 /libbuild2
parentea24f530048cbce0c5335ca3fd3632c8ce34315a (diff)
Move cc build system module to separate library
Diffstat (limited to 'libbuild2')
-rw-r--r--libbuild2/buildfile2
-rw-r--r--libbuild2/cc/buildfile74
-rw-r--r--libbuild2/cc/common.cxx1031
-rw-r--r--libbuild2/cc/common.hxx358
-rw-r--r--libbuild2/cc/compile-rule.cxx6098
-rw-r--r--libbuild2/cc/compile-rule.hxx189
-rw-r--r--libbuild2/cc/export.hxx38
-rw-r--r--libbuild2/cc/gcc.cxx263
-rw-r--r--libbuild2/cc/guess.cxx1892
-rw-r--r--libbuild2/cc/guess.hxx246
-rw-r--r--libbuild2/cc/init.cxx493
-rw-r--r--libbuild2/cc/init.hxx36
-rw-r--r--libbuild2/cc/install-rule.cxx355
-rw-r--r--libbuild2/cc/install-rule.hxx82
-rw-r--r--libbuild2/cc/lexer+char-literal.test.testscript67
-rw-r--r--libbuild2/cc/lexer+comment.test.testscript88
-rw-r--r--libbuild2/cc/lexer+line.test.testscript67
-rw-r--r--libbuild2/cc/lexer+number.test.testscript48
-rw-r--r--libbuild2/cc/lexer+preprocessor.test.testscript73
-rw-r--r--libbuild2/cc/lexer+raw-string-literal.test.testscript90
-rw-r--r--libbuild2/cc/lexer+string-literal.test.testscript65
-rw-r--r--libbuild2/cc/lexer.cxx1129
-rw-r--r--libbuild2/cc/lexer.hxx190
-rw-r--r--libbuild2/cc/lexer.test.cxx80
-rw-r--r--libbuild2/cc/link-rule.cxx3043
-rw-r--r--libbuild2/cc/link-rule.hxx188
-rw-r--r--libbuild2/cc/module.cxx781
-rw-r--r--libbuild2/cc/module.hxx103
-rw-r--r--libbuild2/cc/msvc.cxx502
-rw-r--r--libbuild2/cc/parser+module.test.testscript147
-rw-r--r--libbuild2/cc/parser.cxx263
-rw-r--r--libbuild2/cc/parser.hxx55
-rw-r--r--libbuild2/cc/parser.test.cxx67
-rw-r--r--libbuild2/cc/pkgconfig.cxx1550
-rw-r--r--libbuild2/cc/target.cxx101
-rw-r--r--libbuild2/cc/target.hxx96
-rw-r--r--libbuild2/cc/types.hxx116
-rw-r--r--libbuild2/cc/utility.cxx114
-rw-r--r--libbuild2/cc/utility.hxx73
-rw-r--r--libbuild2/cc/utility.ixx73
-rw-r--r--libbuild2/cc/windows-manifest.cxx143
-rw-r--r--libbuild2/cc/windows-rpath.cxx400
-rw-r--r--libbuild2/module.cxx1
43 files changed, 20869 insertions, 1 deletions
diff --git a/libbuild2/buildfile b/libbuild2/buildfile
index 57f4895..aad4e78 100644
--- a/libbuild2/buildfile
+++ b/libbuild2/buildfile
@@ -5,7 +5,7 @@
# NOTE: remember to update bundled_modules in libbuild2/modules.cxx if adding
# a new module.
#
-./: lib{build2} bash/ bin/ in/ version/
+./: lib{build2} bash/ bin/ cc/ in/ version/
import int_libs = libbutl%lib{butl}
diff --git a/libbuild2/cc/buildfile b/libbuild2/cc/buildfile
new file mode 100644
index 0000000..5b3d8eb
--- /dev/null
+++ b/libbuild2/cc/buildfile
@@ -0,0 +1,74 @@
+# file : libbuild2/cc/buildfile
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+import int_libs = libbutl%lib{butl}
+import imp_libs = libpkgconf%lib{pkgconf}
+
+include ../
+int_libs += ../lib{build2}
+
+include ../bin/
+int_libs += ../bin/lib{build2-bin}
+
+./: lib{build2-cc}: libul{build2-cc}: {hxx ixx txx cxx}{** -**.test...} \
+ $imp_libs $int_libs
+
+# Unit tests.
+#
+exe{*.test}:
+{
+ test = true
+ install = false
+}
+
+for t: cxx{**.test...}
+{
+ d = $directory($t)
+ n = $name($t)...
+ b = $path.base($name($t))
+
+ ./: $d/exe{$n}: $t $d/{hxx ixx txx}{+$n} $d/testscript{+$n +$b+*.test...}
+ $d/exe{$n}: libul{build2-cc}: bin.whole = false
+}
+
+# Build options.
+#
+obja{*}: cxx.poptions += -DLIBBUILD2_CC_STATIC_BUILD
+objs{*}: cxx.poptions += -DLIBBUILD2_CC_SHARED_BUILD
+
+# Export options.
+#
+lib{build2-cc}:
+{
+ cxx.export.poptions = "-I$out_root" "-I$src_root"
+ cxx.export.libs = $int_libs
+}
+
+liba{build2-cc}: cxx.export.poptions += -DLIBBUILD2_CC_STATIC
+libs{build2-cc}: cxx.export.poptions += -DLIBBUILD2_CC_SHARED
+
+# For pre-releases use the complete version to make sure they cannot be used
+# in place of another pre-release or the final version. See the version module
+# for details on the version.* variable values.
+#
+# And because this is a build system module, we also embed the same value as
+# the interface version (note that we cannot use build.version.interface for
+# bundled modules because we could be built with a different version of the
+# build system).
+#
+ver = ($version.pre_release \
+ ? "$version.project_id" \
+ : "$version.major.$version.minor")
+
+lib{build2-cc}: bin.lib.version = @"-$ver"
+libs{build2-cc}: bin.lib.load_suffix = "-$ver"
+
+# Install into the libbuild2/cc/ subdirectory of, say, /usr/include/
+# recreating subdirectories.
+#
+{hxx ixx txx}{*}:
+{
+ install = include/libbuild2/cc/
+ install.subdirs = true
+}
diff --git a/libbuild2/cc/common.cxx b/libbuild2/cc/common.cxx
new file mode 100644
index 0000000..bfcb00c
--- /dev/null
+++ b/libbuild2/cc/common.cxx
@@ -0,0 +1,1031 @@
+// file : libbuild2/cc/common.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/file.hxx> // import()
+#include <libbuild2/scope.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/cc/utility.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using namespace bin;
+
+ // Recursively process prerequisite libraries. If proc_impl returns false,
+ // then only process interface (*.export.libs), otherwise -- interface and
+ // implementation (prerequisite and from *.libs, unless overriden).
+ //
+ // Note that here we assume that an interface library is also an
+ // implementation (since we don't use *.export.libs in static link). We
+ // currently have this restriction to make sure the target in
+ // *.export.libs is up-to-date (which will happen automatically if it is
+ // listed as a prerequisite of this library).
+ //
+ // Storing a reference to library path in proc_lib is legal (it comes
+ // either from the target's path or from one of the *.libs variables
+ // neither of which should change on this run).
+ //
+ // Note that the order of processing is:
+ //
+ // 1. options
+ // 2. lib itself (if self is true)
+ // 3. dependency libs (prerequisite_targets, left to right, depth-first)
+ // 4. dependency libs (*.libs variables).
+ //
+ // The first argument to proc_lib is a pointer to the last element of an
+ // array that contains the current library dependency chain all the way to
+ // the library passes to process_libraries(). The first element of this
+ // array is NULL.
+ //
+ void common::
+ process_libraries (
+ action a,
+ const scope& top_bs,
+ linfo top_li,
+ const dir_paths& top_sysd,
+ const file& l,
+ bool la,
+ lflags lf,
+ const function<bool (const file&,
+ bool la)>& proc_impl, // Implementation?
+ const function<void (const file* const*, // Can be NULL.
+ const string& path, // Library path.
+ lflags, // Link flags.
+ bool sys)>& proc_lib, // True if system library.
+ const function<void (const file&,
+ const string& type, // cc.type
+ bool com, // cc. or x.
+ bool exp)>& proc_opt, // *.export.
+ bool self /*= false*/, // Call proc_lib on l?
+ small_vector<const file*, 16>* chain) const
+ {
+ small_vector<const file*, 16> chain_storage;
+ if (chain == nullptr)
+ {
+ chain = &chain_storage;
+ chain->push_back (nullptr);
+ }
+
+ // See what type of library this is (C, C++, etc). Use it do decide
+ // which x.libs variable name to use. If it's unknown, then we only
+ // look into prerequisites. Note: lookup starting from rule-specific
+ // variables (target should already be matched).
+ //
+ const string* t (cast_null<string> (l.state[a][c_type]));
+
+ bool impl (proc_impl && proc_impl (l, la));
+ bool cc (false), same (false);
+
+ auto& vp (top_bs.ctx.var_pool);
+ lookup c_e_libs;
+ lookup x_e_libs;
+
+ if (t != nullptr)
+ {
+ cc = *t == "cc";
+ same = !cc && *t == x;
+
+ // The explicit export override should be set on the liba/libs{}
+ // target itself. Note also that we only check for *.libs. If one
+ // doesn't have any libraries but needs to set, say, *.loptions, then
+ // *.libs should be set to NULL or empty (this is why we check for
+ // the result being defined).
+ //
+ if (impl)
+ c_e_libs = l.vars[c_export_libs]; // Override.
+ else if (l.group != nullptr) // lib{} group.
+ c_e_libs = l.group->vars[c_export_libs];
+
+ if (!cc)
+ {
+ const variable& var (same
+ ? x_export_libs
+ : vp[*t + ".export.libs"]);
+
+ if (impl)
+ x_e_libs = l.vars[var]; // Override.
+ else if (l.group != nullptr) // lib{} group.
+ x_e_libs = l.group->vars[var];
+ }
+
+ // Process options first.
+ //
+ if (proc_opt)
+ {
+ // If all we know is it's a C-common library, then in both cases we
+ // only look for cc.export.*.
+ //
+ if (cc)
+ proc_opt (l, *t, true, true);
+ else
+ {
+ if (impl)
+ {
+ // Interface and implementation: as discussed above, we can have
+ // two situations: overriden export or default export.
+ //
+ if (c_e_libs.defined () || x_e_libs.defined ())
+ {
+ // NOTE: should this not be from l.vars rather than l? Or
+ // perhaps we can assume non-common values will be set on
+ // libs{}/liba{}.
+ //
+ proc_opt (l, *t, true, true);
+ proc_opt (l, *t, false, true);
+ }
+ else
+ {
+ // For default export we use the same options as were used to
+ // build the library.
+ //
+ proc_opt (l, *t, true, false);
+ proc_opt (l, *t, false, false);
+ }
+ }
+ else
+ {
+ // Interface: only add *.export.* (interface dependencies).
+ //
+ proc_opt (l, *t, true, true);
+ proc_opt (l, *t, false, true);
+ }
+ }
+ }
+ }
+
+ // Determine if an absolute path is to a system library. Note that
+ // we assume both paths to be normalized.
+ //
+ auto sys = [] (const dir_paths& sysd, const string& p) -> bool
+ {
+ size_t pn (p.size ());
+
+ for (const dir_path& d: sysd)
+ {
+ const string& ds (d.string ()); // Can be "/", otherwise no slash.
+ size_t dn (ds.size ());
+
+ if (pn > dn &&
+ p.compare (0, dn, ds) == 0 &&
+ (path::traits_type::is_separator (ds[dn - 1]) ||
+ path::traits_type::is_separator (p[dn])))
+ return true;
+ }
+
+ return false;
+ };
+
+ // Next process the library itself if requested.
+ //
+ if (self && proc_lib)
+ {
+ chain->push_back (&l);
+
+ // Note that while normally the path is assigned, in case of an import
+ // stub the path to the DLL may not be known and so the path will be
+ // empty (but proc_lib() will use the import stub).
+ //
+ const path& p (l.path ());
+
+ bool s (t != nullptr // If cc library (matched or imported).
+ ? cast_false<bool> (l.vars[c_system])
+ : !p.empty () && sys (top_sysd, p.string ()));
+
+ proc_lib (&chain->back (), p.string (), lf, s);
+ }
+
+ const scope& bs (t == nullptr || cc ? top_bs : l.base_scope ());
+ optional<linfo> li; // Calculate lazily.
+ const dir_paths* sysd (nullptr); // Resolve lazily.
+
+ // Find system search directories corresponding to this library, i.e.,
+ // from its project and for its type (C, C++, etc).
+ //
+ auto find_sysd = [&top_sysd, t, cc, same, &bs, &sysd, this] ()
+ {
+ // Use the search dirs corresponding to this library scope/type.
+ //
+ sysd = (t == nullptr || cc)
+ ? &top_sysd // Imported library, use importer's sysd.
+ : &cast<dir_paths> (
+ bs.root_scope ()->vars[same
+ ? x_sys_lib_dirs
+ : bs.ctx.var_pool[*t + ".sys_lib_dirs"]]);
+ };
+
+ auto find_linfo = [top_li, t, cc, &bs, &l, &li] ()
+ {
+ li = (t == nullptr || cc)
+ ? top_li
+ : link_info (bs, link_type (l).type);
+ };
+
+ // Only go into prerequisites (implementation) if instructed and we are
+ // not using explicit export. Otherwise, interface dependencies come
+ // from the lib{}:*.export.libs below.
+ //
+ if (impl && !c_e_libs.defined () && !x_e_libs.defined ())
+ {
+ for (const prerequisite_target& pt: l.prerequisite_targets[a])
+ {
+ // Note: adhoc prerequisites are not part of the library meta-
+ // information protocol.
+ //
+ if (pt == nullptr || pt.adhoc)
+ continue;
+
+ bool la;
+ const file* f;
+
+ if ((la = (f = pt->is_a<liba> ())) ||
+ (la = (f = pt->is_a<libux> ())) ||
+ ( f = pt->is_a<libs> ()))
+ {
+ if (sysd == nullptr) find_sysd ();
+ if (!li) find_linfo ();
+
+ process_libraries (a, bs, *li, *sysd,
+ *f, la, pt.data,
+ proc_impl, proc_lib, proc_opt, true, chain);
+ }
+ }
+ }
+
+ // Process libraries (recursively) from *.export.libs (of type names)
+ // handling import, etc.
+ //
+ // If it is not a C-common library, then it probably doesn't have any of
+ // the *.libs.
+ //
+ if (t != nullptr)
+ {
+ optional<dir_paths> usrd; // Extract lazily.
+
+ // Determine if a "simple path" is a system library.
+ //
+ auto sys_simple = [&sysd, &sys, &find_sysd] (const string& p) -> bool
+ {
+ bool s (!path::traits_type::absolute (p));
+
+ if (!s)
+ {
+ if (sysd == nullptr) find_sysd ();
+
+ s = sys (*sysd, p);
+ }
+
+ return s;
+ };
+
+ auto proc_int = [&l,
+ &proc_impl, &proc_lib, &proc_opt, chain,
+ &sysd, &usrd,
+ &find_sysd, &find_linfo, &sys_simple,
+ &bs, a, &li, this] (const lookup& lu)
+ {
+ const vector<name>* ns (cast_null<vector<name>> (lu));
+ if (ns == nullptr || ns->empty ())
+ return;
+
+ for (const name& n: *ns)
+ {
+ if (n.simple ())
+ {
+ // This is something like -lpthread or shell32.lib so should be
+ // a valid path. But it can also be an absolute library path
+ // (e.g., something that may come from our .static/shared.pc
+ // files).
+ //
+ if (proc_lib)
+ proc_lib (nullptr, n.value, 0, sys_simple (n.value));
+ }
+ else
+ {
+ // This is a potentially project-qualified target.
+ //
+ if (sysd == nullptr) find_sysd ();
+ if (!li) find_linfo ();
+
+ const file& t (resolve_library (a, bs, n, *li, *sysd, usrd));
+
+ if (proc_lib)
+ {
+ // This can happen if the target is mentioned in *.export.libs
+ // (i.e., it is an interface dependency) but not in the
+ // library's prerequisites (i.e., it is not an implementation
+ // dependency).
+ //
+ // Note that we used to just check for path being assigned but
+ // on Windows import-installed DLLs may legally have empty
+ // paths.
+ //
+ if (t.mtime () == timestamp_unknown)
+ fail << "interface dependency " << t << " is out of date" <<
+ info << "mentioned in *.export.libs of target " << l <<
+ info << "is it a prerequisite of " << l << "?";
+ }
+
+ // Process it recursively.
+ //
+ // @@ Where can we get the link flags? Should we try to find
+ // them in the library's prerequisites? What about installed
+ // stuff?
+ //
+ process_libraries (a, bs, *li, *sysd,
+ t, t.is_a<liba> () || t.is_a<libux> (), 0,
+ proc_impl, proc_lib, proc_opt, true, chain);
+ }
+ }
+ };
+
+ // Process libraries from *.libs (of type strings).
+ //
+ auto proc_imp = [&proc_lib, &sys_simple] (const lookup& lu)
+ {
+ const strings* ns (cast_null<strings> (lu));
+ if (ns == nullptr || ns->empty ())
+ return;
+
+ for (const string& n: *ns)
+ {
+ // This is something like -lpthread or shell32.lib so should be a
+ // valid path.
+ //
+ proc_lib (nullptr, n, 0, sys_simple (n));
+ }
+ };
+
+ // Note: the same structure as when processing options above.
+ //
+ // If all we know is it's a C-common library, then in both cases we
+ // only look for cc.export.libs.
+ //
+ if (cc)
+ {
+ if (c_e_libs) proc_int (c_e_libs);
+ }
+ else
+ {
+ if (impl)
+ {
+ // Interface and implementation: as discussed above, we can have
+ // two situations: overriden export or default export.
+ //
+ if (c_e_libs.defined () || x_e_libs.defined ())
+ {
+ if (c_e_libs) proc_int (c_e_libs);
+ if (x_e_libs) proc_int (x_e_libs);
+ }
+ else
+ {
+ // For default export we use the same options/libs as were used
+ // to build the library. Since libraries in (non-export) *.libs
+ // are not targets, we don't need to recurse.
+ //
+ if (proc_lib)
+ {
+ proc_imp (l[c_libs]);
+ proc_imp (l[same ? x_libs : vp[*t + ".libs"]]);
+ }
+ }
+ }
+ else
+ {
+ // Interface: only add *.export.* (interface dependencies).
+ //
+ if (c_e_libs) proc_int (c_e_libs);
+ if (x_e_libs) proc_int (x_e_libs);
+ }
+ }
+ }
+
+ // Remove this library from the chain.
+ //
+ if (self && proc_lib)
+ chain->pop_back ();
+ }
+
+ // The name can be an absolute or relative target name (for example,
+ // /tmp/libfoo/lib{foo} or ../libfoo/lib{foo}) or a project-qualified
+ // relative target name (e.g., libfoo%lib{foo}).
+ //
+ // Note that in case of the relative target that comes from export.libs,
+ // the resolution happens relative to the base scope of the target from
+ // which this export.libs came, which is exactly what we want.
+ //
+ // Note that the scope, search paths, and the link order should all be
+ // derived from the library target that mentioned this name. This way we
+ // will select exactly the same target as the library's matched rule and
+ // that's the only way to guarantee it will be up-to-date.
+ //
+ const file& common::
+ resolve_library (action a,
+ const scope& s,
+ name n,
+ linfo li,
+ const dir_paths& sysd,
+ optional<dir_paths>& usrd) const
+ {
+ if (n.type != "lib" && n.type != "liba" && n.type != "libs")
+ fail << "target name " << n << " is not a library";
+
+ const target* xt (nullptr);
+
+ if (!n.qualified ())
+ {
+ // Search for an existing target with this name "as if" it was a
+ // prerequisite.
+ //
+ xt = search_existing (n, s);
+
+ if (xt == nullptr)
+ fail << "unable to find library " << n;
+ }
+ else
+ {
+ // This is import.
+ //
+ auto rp (s.find_target_type (n, location ())); // Note: changes name.
+ const target_type* tt (rp.first);
+ optional<string>& ext (rp.second);
+
+ if (tt == nullptr)
+ fail << "unknown target type '" << n.type << "' in library " << n;
+
+ // @@ OUT: for now we assume out is undetermined, just like in
+ // search (name, scope).
+ //
+ dir_path out;
+
+ prerequisite_key pk {n.proj, {tt, &n.dir, &out, &n.value, ext}, &s};
+ xt = search_library_existing (a, sysd, usrd, pk);
+
+ if (xt == nullptr)
+ {
+ if (n.qualified ())
+ xt = import_existing (s.ctx, pk);
+ }
+
+ if (xt == nullptr)
+ fail << "unable to find library " << pk;
+ }
+
+ // If this is lib{}/libu*{}, pick appropriate member.
+ //
+ if (const libx* l = xt->is_a<libx> ())
+ xt = link_member (*l, a, li); // Pick lib*{e,a,s}{}.
+
+ return xt->as<file> ();
+ }
+
+ // Insert a target verifying that it already exists if requested. Return
+ // the lock.
+ //
+ template <typename T>
+ ulock common::
+ insert_library (context& ctx,
+ T*& r,
+ const string& name,
+ const dir_path& d,
+ optional<string> ext,
+ bool exist,
+ tracer& trace)
+ {
+ auto p (ctx.targets.insert_locked (T::static_type,
+ d,
+ dir_path (),
+ name,
+ move (ext),
+ true, // Implied.
+ trace));
+
+ assert (!exist || !p.second.owns_lock ());
+ r = &p.first.template as<T> ();
+ return move (p.second);
+ }
+
+ // Note that pk's scope should not be NULL (even if dir is absolute).
+ //
+ target* common::
+ search_library (action act,
+ const dir_paths& sysd,
+ optional<dir_paths>& usrd,
+ const prerequisite_key& p,
+ bool exist) const
+ {
+ tracer trace (x, "search_library");
+
+ assert (p.scope != nullptr);
+
+ // @@ This is hairy enough to warrant a separate implementation for
+ // Windows.
+
+ // Note: since we are searching for a (presumably) installed library,
+ // utility libraries do not apply.
+ //
+ bool l (p.is_a<lib> ());
+ const optional<string>& ext (l ? nullopt : p.tk.ext); // Only liba/libs.
+
+ // First figure out what we need to search for.
+ //
+ const string& name (*p.tk.name);
+
+ // liba
+ //
+ path an;
+ optional<string> ae;
+
+ if (l || p.is_a<liba> ())
+ {
+ // We are trying to find a library in the search paths extracted from
+ // the compiler. It would only be natural if we used the library
+ // prefix/extension that correspond to this compiler and/or its
+ // target.
+ //
+ // Unlike MinGW, VC's .lib/.dll.lib naming is by no means standard and
+ // we might need to search for other names. In fact, there is no
+ // reliable way to guess from the file name what kind of library it
+ // is, static or import and we will have to do deep inspection of such
+ // alternative names. However, if we did find .dll.lib, then we can
+ // assume that .lib is the static library without any deep inspection
+ // overhead.
+ //
+ const char* e ("");
+
+ if (tsys == "win32-msvc")
+ {
+ an = path (name);
+ e = "lib";
+ }
+ else
+ {
+ an = path ("lib" + name);
+ e = "a";
+ }
+
+ ae = ext ? ext : string (e);
+ if (!ae->empty ())
+ {
+ an += '.';
+ an += *ae;
+ }
+ }
+
+ // libs
+ //
+ path sn;
+ optional<string> se;
+
+ if (l || p.is_a<libs> ())
+ {
+ const char* e ("");
+
+ if (tsys == "win32-msvc")
+ {
+ sn = path (name);
+ e = "dll.lib";
+ }
+ else
+ {
+ sn = path ("lib" + name);
+
+ if (tsys == "darwin") e = "dylib";
+ else if (tsys == "mingw32") e = "dll.a"; // See search code below.
+ else e = "so";
+ }
+
+ se = ext ? ext : string (e);
+ if (!se->empty ())
+ {
+ sn += '.';
+ sn += *se;
+ }
+ }
+
+ // Now search.
+ //
+ liba* a (nullptr);
+ libs* s (nullptr);
+
+ pair<path, path> pc; // pkg-config .pc file paths.
+ path f; // Reuse the buffer.
+
+ auto search =[&a, &s, &pc,
+ &an, &ae,
+ &sn, &se,
+ &name, ext,
+ &p, &f, exist, &trace, this] (const dir_path& d) -> bool
+ {
+ context& ctx (p.scope->ctx);
+
+ timestamp mt;
+
+ // libs
+ //
+ // Look for the shared library first. The order is important for VC:
+ // only if we found .dll.lib can we safely assumy that just .lib is a
+ // static library.
+ //
+ if (!sn.empty ())
+ {
+ f = d;
+ f /= sn;
+ mt = mtime (f);
+
+ if (mt != timestamp_nonexistent)
+ {
+ // On Windows what we found is the import library which we need
+ // to make the first ad hoc member of libs{}.
+ //
+ if (tclass == "windows")
+ {
+ libi* i (nullptr);
+ insert_library (ctx, i, name, d, se, exist, trace);
+
+ ulock l (
+ insert_library (ctx, s, name, d, nullopt, exist, trace));
+
+ if (!exist)
+ {
+ if (l.owns_lock ())
+ {
+ s->member = i; // We are first.
+ l.unlock ();
+ }
+ else
+ assert (find_adhoc_member<libi> (*s) == i);
+
+ i->mtime (mt);
+ i->path (move (f));
+
+ // Presumably there is a DLL somewhere, we just don't know
+ // where (and its possible we might have to look for one if we
+ // decide we need to do rpath emulation for installed
+ // libraries as well). We will represent this as empty path
+ // but valid timestamp (aka "trust me, it's there").
+ //
+ s->mtime (mt);
+ s->path (empty_path);
+ }
+ }
+ else
+ {
+ insert_library (ctx, s, name, d, se, exist, trace);
+
+ s->mtime (mt);
+ s->path (move (f));
+ }
+ }
+ else if (!ext && tsys == "mingw32")
+ {
+ // Above we searched for the import library (.dll.a) but if it's
+ // not found, then we also search for the .dll (unless the
+ // extension was specified explicitly) since we can link to it
+ // directly. Note also that the resulting libs{} would end up
+ // being the .dll.
+ //
+ se = string ("dll");
+ f = f.base (); // Remove .a from .dll.a.
+ mt = mtime (f);
+
+ if (mt != timestamp_nonexistent)
+ {
+ insert_library (ctx, s, name, d, se, exist, trace);
+
+ s->mtime (mt);
+ s->path (move (f));
+ }
+ }
+ }
+
+ // liba
+ //
+ // If we didn't find .dll.lib then we cannot assume .lib is static.
+ //
+ if (!an.empty () && (s != nullptr || tsys != "win32-msvc"))
+ {
+ f = d;
+ f /= an;
+
+ if ((mt = mtime (f)) != timestamp_nonexistent)
+ {
+ // Enter the target. Note that because the search paths are
+ // normalized, the result is automatically normalized as well.
+ //
+ // Note that this target is outside any project which we treat
+ // as out trees.
+ //
+ insert_library (ctx, a, name, d, ae, exist, trace);
+ a->mtime (mt);
+ a->path (move (f));
+ }
+ }
+
+ // Alternative search for VC.
+ //
+ if (tsys == "win32-msvc")
+ {
+ const scope& rs (*p.scope->root_scope ());
+ const process_path& ld (cast<process_path> (rs["bin.ld.path"]));
+
+ if (s == nullptr && !sn.empty ())
+ s = msvc_search_shared (ld, d, p, exist);
+
+ if (a == nullptr && !an.empty ())
+ a = msvc_search_static (ld, d, p, exist);
+ }
+
+ // Look for binary-less libraries via pkg-config .pc files. Note that
+ // it is possible we have already found one of them as binfull but the
+ // other is binless.
+ //
+ {
+ bool na (a == nullptr && !an.empty ()); // Need static.
+ bool ns (s == nullptr && !sn.empty ()); // Need shared.
+
+ if (na || ns)
+ {
+ // Only consider the common .pc file if we can be sure there
+ // is no binfull variant.
+ //
+ pair<path, path> r (
+ pkgconfig_search (d, p.proj, name, na && ns /* common */));
+
+ if (na && !r.first.empty ())
+ {
+ insert_library (ctx, a, name, d, nullopt, exist, trace);
+ a->mtime (timestamp_unreal);
+ a->path (empty_path);
+ }
+
+ if (ns && !r.second.empty ())
+ {
+ insert_library (ctx, s, name, d, nullopt, exist, trace);
+ s->mtime (timestamp_unreal);
+ s->path (empty_path);
+ }
+
+ // Only keep these .pc paths if we found anything via them.
+ //
+ if ((na && a != nullptr) || (ns && s != nullptr))
+ pc = move (r);
+ }
+ }
+
+ return a != nullptr || s != nullptr;
+ };
+
+ // First try user directories (i.e., -L).
+ //
+ bool sys (false);
+
+ if (!usrd)
+ usrd = extract_library_dirs (*p.scope);
+
+ const dir_path* pd (nullptr);
+ for (const dir_path& d: *usrd)
+ {
+ if (search (d))
+ {
+ pd = &d;
+ break;
+ }
+ }
+
+ // Next try system directories (i.e., those extracted from the compiler).
+ //
+ if (pd == nullptr)
+ {
+ for (const dir_path& d: sysd)
+ {
+ if (search (d))
+ {
+ pd = &d;
+ break;
+ }
+ }
+
+ sys = true;
+ }
+
+ if (pd == nullptr)
+ return nullptr;
+
+ // Enter (or find) the lib{} target group.
+ //
+ lib* lt;
+ insert_library (
+ p.scope->ctx, lt, name, *pd, l ? p.tk.ext : nullopt, exist, trace);
+
+ // Result.
+ //
+ target* r (l ? lt : (p.is_a<liba> () ? static_cast<target*> (a) : s));
+
+ // Assume the rest is already done if existing.
+ //
+ if (exist)
+ return r;
+
+ // If we cannot acquire the lock then this mean the target has already
+ // been matched (though not clear by whom) and we assume all of this
+ // has already been done.
+ //
+ target_lock ll (lock (act, *lt));
+
+ // Set lib{} group members to indicate what's available. Note that we
+ // must be careful here since its possible we have already imported some
+ // of its members.
+ //
+ if (ll)
+ {
+ if (a != nullptr) lt->a = a;
+ if (s != nullptr) lt->s = s;
+ }
+
+ target_lock al (a != nullptr ? lock (act, *a) : target_lock ());
+ target_lock sl (s != nullptr ? lock (act, *s) : target_lock ());
+
+ if (!al) a = nullptr;
+ if (!sl) s = nullptr;
+
+ if (a != nullptr) a->group = lt;
+ if (s != nullptr) s->group = lt;
+
+ // Mark as a "cc" library (unless already marked) and set the system
+ // flag.
+ //
+ auto mark_cc = [sys, this] (target& t) -> bool
+ {
+ auto p (t.vars.insert (c_type));
+
+ if (p.second)
+ {
+ p.first.get () = string ("cc");
+
+ if (sys)
+ t.vars.assign (c_system) = true;
+ }
+
+ return p.second;
+ };
+
+ // If the library already has cc.type, then assume it was either
+ // already imported or was matched by a rule.
+ //
+ if (a != nullptr && !mark_cc (*a)) a = nullptr;
+ if (s != nullptr && !mark_cc (*s)) s = nullptr;
+
+ // Add the "using static/shared library" macro (used, for example, to
+ // handle DLL export). The absence of either of these macros would
+ // mean some other build system that cannot distinguish between the
+ // two (and no pkg-config information).
+ //
+ auto add_macro = [this] (target& t, const char* suffix)
+ {
+ // If there is already a value (either in cc.export or x.export),
+ // don't add anything: we don't want to be accumulating defines nor
+ // messing with custom values. And if we are adding, then use the
+ // generic cc.export.
+ //
+ // The only way we could already have this value is if this same
+ // library was also imported as a project (as opposed to installed).
+ // Unlikely but possible. In this case the values were set by the
+ // export stub and we shouldn't touch them.
+ //
+ if (!t.vars[x_export_poptions])
+ {
+ auto p (t.vars.insert (c_export_poptions));
+
+ if (p.second)
+ {
+ // The "standard" macro name will be LIB<NAME>_{STATIC,SHARED},
+ // where <name> is the target name. Here we want to strike a
+ // balance between being unique and not too noisy.
+ //
+ string d ("-DLIB");
+
+ d += sanitize_identifier (
+ ucase (const_cast<const string&> (t.name)));
+
+ d += '_';
+ d += suffix;
+
+ strings o;
+ o.push_back (move (d));
+ p.first.get () = move (o);
+ }
+ }
+ };
+
+ if (ll && (a != nullptr || s != nullptr))
+ {
+ // Try to extract library information from pkg-config. We only add the
+ // default macro if we could not extract more precise information. The
+ // idea is that in .pc files that we generate, we copy those macros
+ // (or custom ones) from *.export.poptions.
+ //
+ if (pc.first.empty () && pc.second.empty ())
+ {
+ if (!pkgconfig_load (act, *p.scope,
+ *lt, a, s,
+ p.proj, name,
+ *pd, sysd, *usrd))
+ {
+ if (a != nullptr) add_macro (*a, "STATIC");
+ if (s != nullptr) add_macro (*s, "SHARED");
+ }
+ }
+ else
+ pkgconfig_load (act, *p.scope, *lt, a, s, pc, *pd, sysd, *usrd);
+ }
+
+ // If we have the lock (meaning this is the first time), set the
+ // traget's recipe to noop. Failed that we will keep re-locking it,
+ // updating its members, etc.
+ //
+ if (al) match_recipe (al, noop_recipe);
+ if (sl) match_recipe (sl, noop_recipe);
+ if (ll) match_recipe (ll, noop_recipe);
+
+ return r;
+ }
+
+ dir_paths common::
+ extract_library_dirs (const scope& bs) const
+ {
+ dir_paths r;
+
+ // Extract user-supplied search paths (i.e., -L, /LIBPATH).
+ //
+ auto extract = [&bs, &r, this] (const value& val, const variable& var)
+ {
+ const auto& v (cast<strings> (val));
+
+ for (auto i (v.begin ()), e (v.end ()); i != e; ++i)
+ {
+ const string& o (*i);
+
+ dir_path d;
+
+ try
+ {
+ if (cclass == compiler_class::msvc)
+ {
+ // /LIBPATH:<dir> (case-insensitive).
+ //
+ if ((o[0] == '/' || o[0] == '-') &&
+ casecmp (o.c_str () + 1, "LIBPATH:", 8) == 0)
+ d = dir_path (o, 9, string::npos);
+ else
+ continue;
+ }
+ else
+ {
+ // -L can either be in the "-L<dir>" or "-L <dir>" form.
+ //
+ if (o == "-L")
+ {
+ if (++i == e)
+ break; // Let the compiler complain.
+
+ d = dir_path (*i);
+ }
+ else if (o.compare (0, 2, "-L") == 0)
+ d = dir_path (o, 2, string::npos);
+ else
+ continue;
+ }
+ }
+ catch (const invalid_path& e)
+ {
+ fail << "invalid directory '" << e.path << "'"
+ << " in option '" << o << "'"
+ << " in variable " << var
+ << " for scope " << bs;
+ }
+
+ // Ignore relative paths. Or maybe we should warn?
+ //
+ if (!d.relative ())
+ r.push_back (move (d));
+ }
+ };
+
+ if (auto l = bs[c_loptions]) extract (*l, c_loptions);
+ if (auto l = bs[x_loptions]) extract (*l, x_loptions);
+
+ return r;
+ }
+ }
+}
diff --git a/libbuild2/cc/common.hxx b/libbuild2/cc/common.hxx
new file mode 100644
index 0000000..31219a3
--- /dev/null
+++ b/libbuild2/cc/common.hxx
@@ -0,0 +1,358 @@
+// file : build2/cc/common.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_COMMON_HXX
+#define LIBBUILD2_CC_COMMON_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/guess.hxx> // compiler_id
+#include <libbuild2/cc/target.hxx> // h{}
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // Data entries that define a concrete c-family module (e.g., c or cxx).
+ // These classes are used as a virtual bases by the rules as well as the
+ // modules. This way the member variables can be referenced as is, without
+ // any extra decorations (in other words, it is a bunch of data members
+ // that can be shared between several classes/instances).
+ //
+ struct config_data
+ {
+ lang x_lang;
+
+ const char* x; // Module name ("c", "cxx").
+ const char* x_name; // Compiler name ("c", "c++").
+ const char* x_default; // Compiler default ("gcc", "g++").
+ const char* x_pext; // Preprocessed source extension (".i", ".ii").
+
+ // Array of modules that can hint us the toolchain, terminate with
+ // NULL.
+ //
+ const char* const* x_hinters;
+
+ const variable& config_x;
+ const variable& config_x_id; // <type>[-<variant>]
+ const variable& config_x_version;
+ const variable& config_x_target;
+ const variable& config_x_std;
+ const variable& config_x_poptions;
+ const variable& config_x_coptions;
+ const variable& config_x_loptions;
+ const variable& config_x_aoptions;
+ const variable& config_x_libs;
+ const variable* config_x_importable_headers;
+
+ const variable& x_path; // Compiler process path.
+ const variable& x_sys_lib_dirs; // System library search directories.
+ const variable& x_sys_inc_dirs; // System header search directories.
+
+ const variable& x_std;
+ const variable& x_poptions;
+ const variable& x_coptions;
+ const variable& x_loptions;
+ const variable& x_aoptions;
+ const variable& x_libs;
+ const variable* x_importable_headers;
+
+ const variable& c_poptions; // cc.*
+ const variable& c_coptions;
+ const variable& c_loptions;
+ const variable& c_aoptions;
+ const variable& c_libs;
+
+ const variable& x_export_poptions;
+ const variable& x_export_coptions;
+ const variable& x_export_loptions;
+ const variable& x_export_libs;
+
+ const variable& c_export_poptions; // cc.export.*
+ const variable& c_export_coptions;
+ const variable& c_export_loptions;
+ const variable& c_export_libs;
+
+ const variable& x_stdlib; // x.stdlib
+
+ const variable& c_runtime; // cc.runtime
+ const variable& c_stdlib; // cc.stdlib
+
+ const variable& c_type; // cc.type
+ const variable& c_system; // cc.system
+ const variable& c_module_name; // cc.module_name
+ const variable& c_reprocess; // cc.reprocess
+
+ const variable& x_preprocessed; // x.preprocessed
+ const variable* x_symexport; // x.features.symexport
+
+ const variable& x_id;
+ const variable& x_id_type;
+ const variable& x_id_variant;
+
+ const variable& x_class;
+
+ const variable& x_version;
+ const variable& x_version_major;
+ const variable& x_version_minor;
+ const variable& x_version_patch;
+ const variable& x_version_build;
+
+ const variable& x_signature;
+ const variable& x_checksum;
+
+ const variable& x_pattern;
+
+ const variable& x_target;
+ const variable& x_target_cpu;
+ const variable& x_target_vendor;
+ const variable& x_target_system;
+ const variable& x_target_version;
+ const variable& x_target_class;
+ };
+
+ struct data: config_data
+ {
+ const char* x_compile; // Rule names.
+ const char* x_link;
+ const char* x_install;
+ const char* x_uninstall;
+
+ // Cached values for some commonly-used variables/values.
+ //
+
+ compiler_type ctype; // x.id.type
+ const string& cvariant; // x.id.variant
+ compiler_class cclass; // x.class
+ uint64_t cmaj; // x.version.major
+ uint64_t cmin; // x.version.minor
+ const process_path& cpath; // x.path
+
+ const target_triplet& ctgt; // x.target
+ const string& tsys; // x.target.system
+ const string& tclass; // x.target.class
+
+ const strings& tstd; // Translated x_std value (options).
+
+ bool modules; // x.features.modules
+ bool symexport; // x.features.symexport
+
+ const strings* import_hdr; // x.importable_headers (NULL if unused/empty).
+
+ const dir_paths& sys_lib_dirs; // x.sys_lib_dirs
+ const dir_paths& sys_inc_dirs; // x.sys_inc_dirs
+
+ size_t sys_lib_dirs_extra; // First extra path (size if none).
+ size_t sys_inc_dirs_extra; // First extra path (size if none).
+
+ const target_type& x_src; // Source target type (c{}, cxx{}).
+ const target_type* x_mod; // Module target type (mxx{}), if any.
+
+ // Array of target types that are considered the X-language headers
+ // (excluding h{} except for C). Keep them in the most likely to appear
+ // order with the "real header" first and terminated with NULL.
+ //
+ const target_type* const* x_hdr;
+
+ template <typename T>
+ bool
+ x_header (const T& t, bool c_hdr = true) const
+ {
+ for (const target_type* const* ht (x_hdr); *ht != nullptr; ++ht)
+ if (t.is_a (**ht))
+ return true;
+
+ return c_hdr && t.is_a (h::static_type);
+ }
+
+ // Array of target types that can be #include'd. Used to reverse-lookup
+ // extensions to target types. Keep them in the most likely to appear
+ // order and terminate with NULL.
+ //
+ const target_type* const* x_inc;
+
+ // Aggregate-like constructor with from-base support.
+ //
+ data (const config_data& cd,
+ const char* compile,
+ const char* link,
+ const char* install,
+ const char* uninstall,
+ compiler_type ct,
+ const string& cv,
+ compiler_class cl,
+ uint64_t mj, uint64_t mi,
+ const process_path& path,
+ const target_triplet& tgt,
+ const strings& std,
+ bool fm,
+ bool fs,
+ const dir_paths& sld,
+ const dir_paths& sid,
+ size_t sle,
+ size_t sie,
+ const target_type& src,
+ const target_type* mod,
+ const target_type* const* hdr,
+ const target_type* const* inc)
+ : config_data (cd),
+ x_compile (compile),
+ x_link (link),
+ x_install (install),
+ x_uninstall (uninstall),
+ ctype (ct), cvariant (cv), cclass (cl),
+ cmaj (mj), cmin (mi),
+ cpath (path),
+ ctgt (tgt), tsys (ctgt.system), tclass (ctgt.class_),
+ tstd (std),
+ modules (fm),
+ symexport (fs),
+ import_hdr (nullptr),
+ sys_lib_dirs (sld), sys_inc_dirs (sid),
+ sys_lib_dirs_extra (sle), sys_inc_dirs_extra (sie),
+ x_src (src), x_mod (mod), x_hdr (hdr), x_inc (inc) {}
+ };
+
+ class LIBBUILD2_CC_SYMEXPORT common: public data
+ {
+ public:
+ common (data&& d): data (move (d)) {}
+
+ // Library handling.
+ //
+ public:
+ void
+ process_libraries (
+ action,
+ const scope&,
+ linfo,
+ const dir_paths&,
+ const file&,
+ bool,
+ lflags,
+ const function<bool (const file&, bool)>&,
+ const function<void (const file* const*, const string&, lflags, bool)>&,
+ const function<void (const file&, const string&, bool, bool)>&,
+ bool = false,
+ small_vector<const file*, 16>* = nullptr) const;
+
+ const target*
+ search_library (action a,
+ const dir_paths& sysd,
+ optional<dir_paths>& usrd,
+ const prerequisite& p) const
+ {
+ const target* r (p.target.load (memory_order_consume));
+
+ if (r == nullptr)
+ {
+ if ((r = search_library (a, sysd, usrd, p.key ())) != nullptr)
+ {
+ const target* e (nullptr);
+ if (!p.target.compare_exchange_strong (
+ e, r,
+ memory_order_release,
+ memory_order_consume))
+ assert (e == r);
+ }
+ }
+
+ return r;
+ }
+
+ public:
+ const file&
+ resolve_library (action,
+ const scope&,
+ name,
+ linfo,
+ const dir_paths&,
+ optional<dir_paths>&) const;
+
+ template <typename T>
+ static ulock
+ insert_library (context&,
+ T*&,
+ const string&,
+ const dir_path&,
+ optional<string>,
+ bool,
+ tracer&);
+
+ target*
+ search_library (action,
+ const dir_paths&,
+ optional<dir_paths>&,
+ const prerequisite_key&,
+ bool existing = false) const;
+
+ const target*
+ search_library_existing (action a,
+ const dir_paths& sysd,
+ optional<dir_paths>& usrd,
+ const prerequisite_key& pk) const
+ {
+ return search_library (a, sysd, usrd, pk, true);
+ }
+
+ dir_paths
+ extract_library_dirs (const scope&) const;
+
+ // Alternative search logic for VC (msvc.cxx).
+ //
+ bin::liba*
+ msvc_search_static (const process_path&,
+ const dir_path&,
+ const prerequisite_key&,
+ bool existing) const;
+
+ bin::libs*
+ msvc_search_shared (const process_path&,
+ const dir_path&,
+ const prerequisite_key&,
+ bool existing) const;
+
+ // The pkg-config file searching and loading (pkgconfig.cxx)
+ //
+ using pkgconfig_callback = function<bool (dir_path&& d)>;
+
+ bool
+ pkgconfig_search (const dir_path&, const pkgconfig_callback&) const;
+
+ pair<path, path>
+ pkgconfig_search (const dir_path&,
+ const optional<project_name>&,
+ const string&,
+ bool) const;
+
+ void
+ pkgconfig_load (action, const scope&,
+ bin::lib&, bin::liba*, bin::libs*,
+ const pair<path, path>&,
+ const dir_path&,
+ const dir_paths&,
+ const dir_paths&) const;
+
+ bool
+ pkgconfig_load (action, const scope&,
+ bin::lib&, bin::liba*, bin::libs*,
+ const optional<project_name>&,
+ const string&,
+ const dir_path&,
+ const dir_paths&,
+ const dir_paths&) const;
+ };
+ }
+}
+
+#endif // LIBBUILD2_CC_COMMON_HXX
diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx
new file mode 100644
index 0000000..8cebef0
--- /dev/null
+++ b/libbuild2/cc/compile-rule.cxx
@@ -0,0 +1,6098 @@
+// file : libbuild2/cc/compile-rule.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/compile-rule.hxx>
+
+#include <cstdlib> // exit()
+#include <cstring> // strlen(), strchr()
+
+#include <libbuild2/file.hxx>
+#include <libbuild2/depdb.hxx>
+#include <libbuild2/scope.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx> // mtime()
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/config/utility.hxx> // create_project()
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/parser.hxx>
+#include <libbuild2/cc/target.hxx> // h
+#include <libbuild2/cc/module.hxx>
+#include <libbuild2/cc/utility.hxx>
+
+using std::exit;
+using std::strlen;
+
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using namespace bin;
+
+ // Module type/info string serialization.
+ //
+ // The string representation is a space-separated list of module names
+ // or quoted paths for header units with the following rules:
+ //
+ // 1. If this is a module unit, then the first name is the module name
+ // intself following by either '!' for an interface or header unit and
+ // by '+' for an implementation unit.
+ //
+ // 2. If an imported module is re-exported, then the module name is
+ // followed by '*'.
+ //
+ // For example:
+ //
+ // foo! foo.core* foo.base* foo.impl
+ // foo.base+ foo.impl
+ // foo.base foo.impl
+ // "/usr/include/stdio.h"!
+ // "/usr/include/stdio.h"! "/usr/include/stddef.h"
+ //
+ // NOTE: currently we omit the imported header units since we have no need
+ // for this information (everything is handled by the mapper). Plus,
+ // resolving an import declaration to an absolute path would require
+ // some effort.
+ //
+ static string
+ to_string (unit_type ut, const module_info& mi)
+ {
+ string s;
+
+ if (ut != unit_type::non_modular)
+ {
+ if (ut == unit_type::module_header) s += '"';
+ s += mi.name;
+ if (ut == unit_type::module_header) s += '"';
+
+ s += (ut == unit_type::module_impl ? '+' : '!');
+ }
+
+ for (const module_import& i: mi.imports)
+ {
+ if (!s.empty ())
+ s += ' ';
+
+ if (i.type == unit_type::module_header) s += '"';
+ s += i.name;
+ if (i.type == unit_type::module_header) s += '"';
+
+ if (i.exported)
+ s += '*';
+ }
+
+ return s;
+ }
+
+ static pair<unit_type, module_info>
+ to_module_info (const string& s)
+ {
+ unit_type ut (unit_type::non_modular);
+ module_info mi;
+
+ for (size_t b (0), e (0), n (s.size ()), m; e < n; )
+ {
+ // Let's handle paths with spaces seeing that we already quote them.
+ //
+ char d (s[b = e] == '"' ? '"' : ' ');
+
+ if ((m = next_word (s, n, b, e, d)) == 0)
+ break;
+
+ char c (d == ' ' ? s[e - 1] : // Before delimiter.
+ e + 1 < n ? s[e + 1] : // After delimiter.
+ '\0');
+
+ switch (c)
+ {
+ case '!':
+ case '+':
+ case '*': break;
+ default: c = '\0';
+ }
+
+ string w (s, b, m - (d == ' ' && c != '\0' ? 1 : 0));
+
+ unit_type t (c == '+' ? unit_type::module_impl :
+ d == ' ' ? unit_type::module_iface :
+ unit_type::module_header);
+
+ if (c == '!' || c == '+')
+ {
+ ut = t;
+ mi.name = move (w);
+ }
+ else
+ mi.imports.push_back (module_import {t, move (w), c == '*', 0});
+
+ // Skip to the next word (quote and space or just space).
+ //
+ e += (d == '"' ? 2 : 1);
+ }
+
+ return pair<unit_type, module_info> (move (ut), move (mi));
+ }
+
+ // preprocessed
+ //
+ template <typename T>
+ inline bool
+ operator< (preprocessed l, T r) // Template because of VC14 bug.
+ {
+ return static_cast<uint8_t> (l) < static_cast<uint8_t> (r);
+ }
+
+ preprocessed
+ to_preprocessed (const string& s)
+ {
+ if (s == "none") return preprocessed::none;
+ if (s == "includes") return preprocessed::includes;
+ if (s == "modules") return preprocessed::modules;
+ if (s == "all") return preprocessed::all;
+ throw invalid_argument ("invalid preprocessed value '" + s + "'");
+ }
+
+ struct compile_rule::match_data
+ {
+ explicit
+ match_data (unit_type t, const prerequisite_member& s)
+ : type (t), src (s) {}
+
+ unit_type type;
+ preprocessed pp = preprocessed::none;
+ bool symexport = false; // Target uses __symexport.
+ bool touch = false; // Target needs to be touched.
+ timestamp mt = timestamp_unknown; // Target timestamp.
+ prerequisite_member src;
+ auto_rmfile psrc; // Preprocessed source, if any.
+ path dd; // Dependency database path.
+ size_t headers = 0; // Number of imported header units.
+ module_positions modules = {0, 0, 0}; // Positions of imported modules.
+ };
+
+ compile_rule::
+ compile_rule (data&& d)
+ : common (move (d)),
+ rule_id (string (x) += ".compile 4")
+ {
+ static_assert (sizeof (match_data) <= target::data_size,
+ "insufficient space");
+ }
+
+ size_t compile_rule::
+ append_lang_options (cstrings& args, const match_data& md) const
+ {
+ size_t r (args.size ());
+
+ // Normally there will be one or two options/arguments.
+ //
+ const char* o1 (nullptr);
+ const char* o2 (nullptr);
+
+ switch (cclass)
+ {
+ case compiler_class::msvc:
+ {
+ switch (x_lang)
+ {
+ case lang::c: o1 = "/TC"; break;
+ case lang::cxx: o1 = "/TP"; break;
+ }
+ break;
+ }
+ case compiler_class::gcc:
+ {
+ // For GCC we ignore the preprocessed value since it is handled via
+ // -fpreprocessed -fdirectives-only.
+ //
+ // Clang has *-cpp-output (but not c++-module-cpp-output) and they
+ // handle comments and line continuations. However, currently this
+ // is only by accident since these modes are essentially equivalent
+ // to their cpp-output-less versions.
+ //
+ switch (md.type)
+ {
+ case unit_type::non_modular:
+ case unit_type::module_impl:
+ {
+ o1 = "-x";
+ switch (x_lang)
+ {
+ case lang::c: o2 = "c"; break;
+ case lang::cxx: o2 = "c++"; break;
+ }
+ break;
+ }
+ case unit_type::module_iface:
+ case unit_type::module_header:
+ {
+ // Here things get rather compiler-specific. We also assume
+ // the language is C++.
+ //
+ bool h (md.type == unit_type::module_header);
+
+ //@@ MODHDR TODO: should we try to distinguish c-header vs
+ // c++-header based on the source target type?
+
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ // In GCC compiling a header unit required -fmodule-header
+ // in addition to -x c/c++-header. Probably because relying
+ // on just -x would be ambigous with its PCH support.
+ //
+ if (h)
+ args.push_back ("-fmodule-header");
+
+ o1 = "-x";
+ o2 = h ? "c++-header" : "c++";
+ break;
+ }
+ case compiler_type::clang:
+ {
+ o1 = "-x";
+ o2 = h ? "c++-header" : "c++-module";
+ break;
+ }
+ default:
+ assert (false);
+ }
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ if (o1 != nullptr) args.push_back (o1);
+ if (o2 != nullptr) args.push_back (o2);
+
+ return args.size () - r;
+ }
+
+ inline void compile_rule::
+ append_symexport_options (cstrings& args, const target& t) const
+ {
+ // With VC if a BMI is compiled with dllexport, then when such BMI is
+ // imported, it is auto-magically treated as dllimport. Let's hope
+ // other compilers follow suit.
+ //
+ args.push_back (t.is_a<bmis> () && tclass == "windows"
+ ? "-D__symexport=__declspec(dllexport)"
+ : "-D__symexport=");
+ }
+
+ bool compile_rule::
+ match (action a, target& t, const string&) const
+ {
+ tracer trace (x, "compile_rule::match");
+
+ // Note: unit type will be refined in apply().
+ //
+ unit_type ut (t.is_a<hbmix> () ? unit_type::module_header :
+ t.is_a<bmix> () ? unit_type::module_iface :
+ unit_type::non_modular);
+
+ // Link-up to our group (this is the obj/bmi{} target group protocol
+ // which means this can be done whether we match or not).
+ //
+ if (t.group == nullptr)
+ t.group = &search (t,
+ (ut == unit_type::module_header ? hbmi::static_type:
+ ut == unit_type::module_iface ? bmi::static_type :
+ obj::static_type),
+ t.dir, t.out, t.name);
+
+ // See if we have a source file. Iterate in reverse so that a source
+ // file specified for a member overrides the one specified for the
+ // group. Also "see through" groups.
+ //
+ for (prerequisite_member p: reverse_group_prerequisite_members (a, t))
+ {
+ // If excluded or ad hoc, then don't factor it into our tests.
+ //
+ if (include (a, t, p) != include_type::normal)
+ continue;
+
+ // For a header unit we check the "real header" plus the C header.
+ //
+ if (ut == unit_type::module_header ? p.is_a (**x_hdr) || p.is_a<h> () :
+ ut == unit_type::module_iface ? p.is_a (*x_mod) :
+ p.is_a (x_src))
+ {
+ // Save in the target's auxiliary storage.
+ //
+ t.data (match_data (ut, p));
+ return true;
+ }
+ }
+
+ l4 ([&]{trace << "no " << x_lang << " source file for target " << t;});
+ return false;
+ }
+
+ // Append or hash library options from a pair of *.export.* variables
+ // (first one is cc.export.*) recursively, prerequisite libraries first.
+ //
+ void compile_rule::
+ append_lib_options (const scope& bs,
+ cstrings& args,
+ action a,
+ const target& t,
+ linfo li) const
+ {
+ // See through utility libraries.
+ //
+ auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+ auto opt = [&args, this] (
+ const file& l, const string& t, bool com, bool exp)
+ {
+ // Note that in our model *.export.poptions are always "interface",
+ // even if set on liba{}/libs{}, unlike loptions.
+ //
+ if (!exp) // Ignore libux.
+ return;
+
+ const variable& var (
+ com
+ ? c_export_poptions
+ : (t == x
+ ? x_export_poptions
+ : l.ctx.var_pool[t + ".export.poptions"]));
+
+ append_options (args, l, var);
+ };
+
+ // In case we don't have the "small function object" optimization.
+ //
+ const function<bool (const file&, bool)> impf (imp);
+ const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+ continue;
+
+ // Should be already searched and matched for libraries.
+ //
+ if (const target* pt = p.load ())
+ {
+ if (const libx* l = pt->is_a<libx> ())
+ pt = link_member (*l, a, li);
+
+ bool la;
+ if (!((la = pt->is_a<liba> ()) ||
+ (la = pt->is_a<libux> ()) ||
+ pt->is_a<libs> ()))
+ continue;
+
+ process_libraries (a, bs, li, sys_lib_dirs,
+ pt->as<file> (), la, 0, // Hack: lflags unused.
+ impf, nullptr, optf);
+ }
+ }
+ }
+
+ void compile_rule::
+ hash_lib_options (const scope& bs,
+ sha256& cs,
+ action a,
+ const target& t,
+ linfo li) const
+ {
+ auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+ auto opt = [&cs, this] (
+ const file& l, const string& t, bool com, bool exp)
+ {
+ if (!exp)
+ return;
+
+ const variable& var (
+ com
+ ? c_export_poptions
+ : (t == x
+ ? x_export_poptions
+ : l.ctx.var_pool[t + ".export.poptions"]));
+
+ hash_options (cs, l, var);
+ };
+
+ // The same logic as in append_lib_options().
+ //
+ const function<bool (const file&, bool)> impf (imp);
+ const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+ continue;
+
+ if (const target* pt = p.load ())
+ {
+ if (const libx* l = pt->is_a<libx> ())
+ pt = link_member (*l, a, li);
+
+ bool la;
+ if (!((la = pt->is_a<liba> ()) ||
+ (la = pt->is_a<libux> ()) ||
+ pt->is_a<libs> ()))
+ continue;
+
+ process_libraries (a, bs, li, sys_lib_dirs,
+ pt->as<file> (), la, 0, // Hack: lflags unused.
+ impf, nullptr, optf);
+ }
+ }
+ }
+
+ // Append library prefixes based on the *.export.poptions variables
+ // recursively, prerequisite libraries first.
+ //
+ void compile_rule::
+ append_lib_prefixes (const scope& bs,
+ prefix_map& m,
+ action a,
+ target& t,
+ linfo li) const
+ {
+ auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+ auto opt = [&m, this] (
+ const file& l, const string& t, bool com, bool exp)
+ {
+ if (!exp)
+ return;
+
+ const variable& var (
+ com
+ ? c_export_poptions
+ : (t == x
+ ? x_export_poptions
+ : l.ctx.var_pool[t + ".export.poptions"]));
+
+ append_prefixes (m, l, var);
+ };
+
+ // The same logic as in append_lib_options().
+ //
+ const function<bool (const file&, bool)> impf (imp);
+ const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+ continue;
+
+ if (const target* pt = p.load ())
+ {
+ if (const libx* l = pt->is_a<libx> ())
+ pt = link_member (*l, a, li);
+
+ bool la;
+ if (!((la = pt->is_a<liba> ()) ||
+ (la = pt->is_a<libux> ()) ||
+ pt->is_a<libs> ()))
+ continue;
+
+ process_libraries (a, bs, li, sys_lib_dirs,
+ pt->as<file> (), la, 0, // Hack: lflags unused.
+ impf, nullptr, optf);
+ }
+ }
+ }
+
+ // Update the target during the match phase. Return true if it has changed
+ // or if the passed timestamp is not timestamp_unknown and is older than
+ // the target.
+ //
+ // This function is used to make sure header dependencies are up to date.
+ //
+ // There would normally be a lot of headers for every source file (think
+ // all the system headers) and just calling execute_direct() on all of
+ // them can get expensive. At the same time, most of these headers are
+ // existing files that we will never be updating (again, system headers,
+ // for example) and the rule that will match them is the fallback
+ // file_rule. That rule has an optimization: it returns noop_recipe (which
+ // causes the target state to be automatically set to unchanged) if the
+ // file is known to be up to date. So we do the update "smartly".
+ //
+ static bool
+ update (tracer& trace, action a, const target& t, timestamp ts)
+ {
+ const path_target* pt (t.is_a<path_target> ());
+
+ if (pt == nullptr)
+ ts = timestamp_unknown;
+
+ target_state os (t.matched_state (a));
+
+ if (os == target_state::unchanged)
+ {
+ if (ts == timestamp_unknown)
+ return false;
+ else
+ {
+ // We expect the timestamp to be known (i.e., existing file).
+ //
+ timestamp mt (pt->mtime ());
+ assert (mt != timestamp_unknown);
+ return mt > ts;
+ }
+ }
+ else
+ {
+ // We only want to return true if our call to execute() actually
+ // caused an update. In particular, the target could already have been
+ // in target_state::changed because of a dependency extraction run for
+ // some other source file.
+ //
+ // @@ MT perf: so we are going to switch the phase and execute for
+ // any generated header.
+ //
+ phase_switch ps (t.ctx, run_phase::execute);
+ target_state ns (execute_direct (a, t));
+
+ if (ns != os && ns != target_state::unchanged)
+ {
+ l6 ([&]{trace << "updated " << t
+ << "; old state " << os
+ << "; new state " << ns;});
+ return true;
+ }
+ else
+ return ts != timestamp_unknown ? pt->newer (ts) : false;
+ }
+ }
+
+ recipe compile_rule::
+ apply (action a, target& xt) const
+ {
+ tracer trace (x, "compile_rule::apply");
+
+ file& t (xt.as<file> ()); // Either obj*{} or bmi*{}.
+
+ match_data& md (t.data<match_data> ());
+
+ context& ctx (t.ctx);
+
+ // Note: until refined below, non-BMI-generating translation unit is
+ // assumed non-modular.
+ //
+ unit_type ut (md.type);
+
+ const scope& bs (t.base_scope ());
+ const scope& rs (*bs.root_scope ());
+
+ otype ot (compile_type (t, ut));
+ linfo li (link_info (bs, ot)); // Link info for selecting libraries.
+ compile_target_types tts (compile_types (ot));
+
+ // Derive file name from target name.
+ //
+ string e; // Primary target extension (module or object).
+ {
+ const char* o ("o"); // Object extension (.o or .obj).
+
+ if (tsys == "win32-msvc")
+ {
+ switch (ot)
+ {
+ case otype::e: e = "exe."; break;
+ case otype::a: e = "lib."; break;
+ case otype::s: e = "dll."; break;
+ }
+ o = "obj";
+ }
+ else if (tsys == "mingw32")
+ {
+ switch (ot)
+ {
+ case otype::e: e = "exe."; break;
+ case otype::a: e = "a."; break;
+ case otype::s: e = "dll."; break;
+ }
+ }
+ else if (tsys == "darwin")
+ {
+ switch (ot)
+ {
+ case otype::e: e = ""; break;
+ case otype::a: e = "a."; break;
+ case otype::s: e = "dylib."; break;
+ }
+ }
+ else
+ {
+ switch (ot)
+ {
+ case otype::e: e = ""; break;
+ case otype::a: e = "a."; break;
+ case otype::s: e = "so."; break;
+ }
+ }
+
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ e += (ut != unit_type::non_modular ? "gcm" : o);
+ break;
+ }
+ case compiler_type::clang:
+ {
+ e += (ut != unit_type::non_modular ? "pcm" : o);
+ break;
+ }
+ case compiler_type::msvc:
+ {
+ e += (ut != unit_type::non_modular ? "ifc" : o);
+ break;
+ }
+ case compiler_type::icc:
+ {
+ assert (ut == unit_type::non_modular);
+ e += o;
+ }
+ }
+
+ // If we are compiling a module, then the obj*{} is an ad hoc member
+ // of bmi*{}. For now neither GCC nor Clang produce an object file
+ // for a header unit (but something tells me this is going to change).
+ //
+ if (ut == unit_type::module_iface)
+ {
+ // The module interface unit can be the same as an implementation
+ // (e.g., foo.mxx and foo.cxx) which means obj*{} targets could
+ // collide. So we add the module extension to the target name.
+ //
+ file& obj (add_adhoc_member<file> (t, tts.obj, e.c_str ()));
+
+ if (obj.path ().empty ())
+ obj.derive_path (o);
+ }
+ }
+
+ const path& tp (t.derive_path (e.c_str ()));
+
+ // Inject dependency on the output directory.
+ //
+ const fsdir* dir (inject_fsdir (a, t));
+
+ // Match all the existing prerequisites. The injection code takes care
+ // of the ones it is adding.
+ //
+ // When cleaning, ignore prerequisites that are not in the same or a
+ // subdirectory of our project root.
+ //
+ auto& pts (t.prerequisite_targets[a]);
+ optional<dir_paths> usr_lib_dirs; // Extract lazily.
+
+ // Start asynchronous matching of prerequisites. Wait with unlocked
+ // phase to allow phase switching.
+ //
+ wait_guard wg (ctx, ctx.count_busy (), t[a].task_count, true);
+
+ size_t start (pts.size ()); // Index of the first to be added.
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ const target* pt (nullptr);
+ include_type pi (include (a, t, p));
+
+ if (!pi)
+ continue;
+
+ // A dependency on a library is there so that we can get its
+ // *.export.poptions, modules, etc. This is the library
+ // meta-information protocol. See also append_lib_options().
+ //
+ if (pi == include_type::normal &&
+ (p.is_a<libx> () ||
+ p.is_a<liba> () ||
+ p.is_a<libs> () ||
+ p.is_a<libux> ()))
+ {
+ if (a.operation () == update_id)
+ {
+ // Handle (phase two) imported libraries. We know that for such
+ // libraries we don't need to do match() in order to get options
+ // (if any, they would be set by search_library()).
+ //
+ if (p.proj ())
+ {
+ if (search_library (a,
+ sys_lib_dirs,
+ usr_lib_dirs,
+ p.prerequisite) != nullptr)
+ continue;
+ }
+
+ pt = &p.search (t);
+
+ if (const libx* l = pt->is_a<libx> ())
+ pt = link_member (*l, a, li);
+ }
+ else
+ continue;
+ }
+ //
+ // For modules we pick only what we import which is done below so
+ // skip it here. One corner case is clean: we assume that someone
+ // else (normally library/executable) also depends on it and will
+ // clean it up.
+ //
+ else if (pi == include_type::normal &&
+ (p.is_a<bmi> () || p.is_a (tts.bmi) ||
+ p.is_a<hbmi> () || p.is_a (tts.hbmi)))
+ continue;
+ else
+ {
+ pt = &p.search (t);
+
+ if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
+ continue;
+ }
+
+ match_async (a, *pt, ctx.count_busy (), t[a].task_count);
+ pts.push_back (prerequisite_target (pt, pi));
+ }
+
+ wg.wait ();
+
+ // Finish matching all the targets that we have started.
+ //
+ for (size_t i (start), n (pts.size ()); i != n; ++i)
+ {
+ const target*& pt (pts[i]);
+
+ // Making sure a library is updated before us will only restrict
+ // parallelism. But we do need to match it in order to get its imports
+ // resolved and prerequisite_targets populated. So we match it but
+ // then unmatch if it is safe. And thanks to the two-pass prerequisite
+ // match in link::apply() it will be safe unless someone is building
+ // an obj?{} target directory.
+ //
+ if (build2::match (
+ a,
+ *pt,
+ pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ()
+ ? unmatch::safe
+ : unmatch::none))
+ pt = nullptr; // Ignore in execute.
+ }
+
+ // Inject additional prerequisites. We only do it when performing update
+ // since chances are we will have to update some of our prerequisites in
+ // the process (auto-generated source code, header units).
+ //
+ if (a == perform_update_id)
+ {
+ // The cached prerequisite target should be the same as what is in
+ // t.prerequisite_targets since we used standard search() and match()
+ // above.
+ //
+ const file& src (*md.src.search (t).is_a<file> ());
+
+ // Figure out if __symexport is used. While normally it is specified
+ // on the project root (which we cached), it can be overridden with
+ // a target-specific value for installed modules (which we sidebuild
+ // as part of our project).
+ //
+ // @@ MODHDR MSVC: are we going to do the same for header units? I
+ // guess we will figure it out when MSVC supports header units.
+ // Also see hashing below.
+ //
+ if (ut == unit_type::module_iface)
+ {
+ lookup l (src.vars[x_symexport]);
+ md.symexport = l ? cast<bool> (l) : symexport;
+ }
+
+ // Make sure the output directory exists.
+ //
+ // Is this the right thing to do? It does smell a bit, but then we do
+ // worse things in inject_prerequisites() below. There is also no way
+ // to postpone this until update since we need to extract and inject
+ // header dependencies now (we don't want to be calling search() and
+ // match() in update), which means we need to cache them now as well.
+ // So the only alternative, it seems, is to cache the updates to the
+ // database until later which will sure complicate (and slow down)
+ // things.
+ //
+ if (dir != nullptr)
+ {
+ // We can do it properly by using execute_direct(). But this means
+ // we will be switching to the execute phase with all the associated
+ // overheads. At the same time, in case of update, creation of a
+ // directory is not going to change the external state in any way
+ // that would affect any parallel efforts in building the internal
+ // state. So we are just going to create the directory directly.
+ // Note, however, that we cannot modify the fsdir{} target since
+ // this can very well be happening in parallel. But that's not a
+ // problem since fsdir{}'s update is idempotent.
+ //
+ fsdir_rule::perform_update_direct (a, t);
+ }
+
+ // Note: the leading '@' is reserved for the module map prefix (see
+ // extract_modules()) and no other line must start with it.
+ //
+ depdb dd (tp + ".d");
+
+ // First should come the rule name/version.
+ //
+ if (dd.expect (rule_id) != nullptr)
+ l4 ([&]{trace << "rule mismatch forcing update of " << t;});
+
+ // Then the compiler checksum. Note that here we assume it
+ // incorporates the (default) target so that if the compiler changes
+ // but only in what it targets, then the checksum will still change.
+ //
+ if (dd.expect (cast<string> (rs[x_checksum])) != nullptr)
+ l4 ([&]{trace << "compiler mismatch forcing update of " << t;});
+
+ // Then the options checksum.
+ //
+ // The idea is to keep them exactly as they are passed to the compiler
+ // since the order may be significant.
+ //
+ {
+ sha256 cs;
+
+ // These flags affect how we compile the source and/or the format of
+ // depdb so factor them in.
+ //
+ cs.append (&md.pp, sizeof (md.pp));
+
+ if (ut == unit_type::module_iface)
+ cs.append (&md.symexport, sizeof (md.symexport));
+
+ if (import_hdr != nullptr)
+ hash_options (cs, *import_hdr);
+
+ if (md.pp != preprocessed::all)
+ {
+ hash_options (cs, t, c_poptions);
+ hash_options (cs, t, x_poptions);
+
+ // Hash *.export.poptions from prerequisite libraries.
+ //
+ hash_lib_options (bs, cs, a, t, li);
+
+ // Extra system header dirs (last).
+ //
+ assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+ hash_option_values (
+ cs, "-I",
+ sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+ [] (const dir_path& d) {return d.string ();});
+ }
+
+ hash_options (cs, t, c_coptions);
+ hash_options (cs, t, x_coptions);
+ hash_options (cs, tstd);
+
+ if (ot == otype::s)
+ {
+ // On Darwin, Win32 -fPIC is the default.
+ //
+ if (tclass == "linux" || tclass == "bsd")
+ cs.append ("-fPIC");
+ }
+
+ if (dd.expect (cs.string ()) != nullptr)
+ l4 ([&]{trace << "options mismatch forcing update of " << t;});
+ }
+
+ // Finally the source file.
+ //
+ if (dd.expect (src.path ()) != nullptr)
+ l4 ([&]{trace << "source file mismatch forcing update of " << t;});
+
+ // If any of the above checks resulted in a mismatch (different
+ // compiler, options, or source file) or if the depdb is newer than
+ // the target (interrupted update), then do unconditional update.
+ //
+ // Note that load_mtime() can only be used in the execute phase so we
+ // have to check for a cached value manually.
+ //
+ bool u;
+ timestamp mt;
+
+ if (dd.writing ())
+ u = true;
+ else
+ {
+ if ((mt = t.mtime ()) == timestamp_unknown)
+ t.mtime (mt = mtime (tp)); // Cache.
+
+ u = dd.mtime > mt;
+ }
+
+ if (u)
+ mt = timestamp_nonexistent; // Treat as if it doesn't exist.
+
+ // Update prerequisite targets (normally just the source file).
+ //
+ // This is an unusual place and time to do it. But we have to do it
+ // before extracting dependencies. The reasoning for source file is
+ // pretty clear. What other prerequisites could we have? While
+ // normally they will be some other sources (as in, static content
+ // from src_root), it's possible they are some auto-generated stuff.
+ // And it's possible they affect the preprocessor result. Say some ad
+ // hoc/out-of-band compiler input file that is passed via the command
+ // line. So, to be safe, we make sure everything is up to date.
+ //
+ for (const target* pt: pts)
+ {
+ if (pt == nullptr || pt == dir)
+ continue;
+
+ u = update (trace, a, *pt, u ? timestamp_unknown : mt) || u;
+ }
+
+ // Check if the source is already preprocessed to a certain degree.
+ // This determines which of the following steps we perform and on
+ // what source (original or preprocessed).
+ //
+ // Note: must be set on the src target.
+ //
+ if (const string* v = cast_null<string> (src[x_preprocessed]))
+ try
+ {
+ md.pp = to_preprocessed (*v);
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "invalid " << x_preprocessed.name << " variable value "
+ << "for target " << src << ": " << e;
+ }
+
+ // If we have no #include directives (or header unit imports), then
+ // skip header dependency extraction.
+ //
+ pair<auto_rmfile, bool> psrc (auto_rmfile (), false);
+ if (md.pp < preprocessed::includes)
+ {
+ // Note: trace is used in a test.
+ //
+ l5 ([&]{trace << "extracting headers from " << src;});
+ psrc = extract_headers (a, bs, t, li, src, md, dd, u, mt);
+ }
+
+ // Next we "obtain" the translation unit information. What exactly
+ // "obtain" entails is tricky: If things changed, then we re-parse the
+ // translation unit. Otherwise, we re-create this information from
+ // depdb. We, however, have to do it here and now in case the database
+ // is invalid and we still have to fallback to re-parse.
+ //
+ // Store the translation unit's checksum to detect ignorable changes
+ // (whitespaces, comments, etc).
+ //
+ {
+ optional<string> cs;
+ if (string* l = dd.read ())
+ cs = move (*l);
+ else
+ u = true; // Database is invalid, force re-parse.
+
+ unit tu;
+ for (bool first (true);; first = false)
+ {
+ if (u)
+ {
+ // Flush depdb since it can be used (as a module map) by
+ // parse_unit().
+ //
+ if (dd.writing ())
+ dd.flush ();
+
+ auto p (parse_unit (a, t, li, src, psrc.first, md, dd.path));
+
+ if (!cs || *cs != p.second)
+ {
+ assert (first); // Unchanged TU has a different checksum?
+ dd.write (p.second);
+ }
+ //
+ // Don't clear if it was forced or the checksum should not be
+ // relied upon.
+ //
+ else if (first && !p.second.empty ())
+ {
+ // Clear the update flag and set the touch flag. Unless there
+ // is no object file, of course. See also the md.mt logic
+ // below.
+ //
+ if (mt != timestamp_nonexistent)
+ {
+ u = false;
+ md.touch = true;
+ }
+ }
+
+ tu = move (p.first);
+ }
+
+ if (modules)
+ {
+ if (u || !first)
+ {
+ string s (to_string (tu.type, tu.module_info));
+
+ if (first)
+ dd.expect (s);
+ else
+ dd.write (s);
+ }
+ else
+ {
+ if (string* l = dd.read ())
+ {
+ auto p (to_module_info (*l));
+ tu.type = p.first;
+ tu.module_info = move (p.second);
+ }
+ else
+ {
+ u = true; // Database is invalid, force re-parse.
+ continue;
+ }
+ }
+ }
+
+ break;
+ }
+
+ // Make sure the translation unit type matches the resulting target
+ // type.
+ //
+ switch (tu.type)
+ {
+ case unit_type::non_modular:
+ case unit_type::module_impl:
+ {
+ if (ut != unit_type::non_modular)
+ fail << "translation unit " << src << " is not a module interface" <<
+ info << "consider using " << x_src.name << "{} instead";
+ break;
+ }
+ case unit_type::module_iface:
+ {
+ if (ut != unit_type::module_iface)
+ fail << "translation unit " << src << " is a module interface" <<
+ info << "consider using " << x_mod->name << "{} instead";
+ break;
+ }
+ case unit_type::module_header:
+ {
+ assert (ut == unit_type::module_header);
+ break;
+ }
+ }
+
+ // Refine the non-modular/module-impl decision from match().
+ //
+ ut = md.type = tu.type;
+
+ // Note: trace is used in a test.
+ //
+ l5 ([&]{trace << "extracting modules from " << t;});
+
+ // Extract the module dependency information in addition to header
+ // dependencies.
+ //
+ // NOTE: assumes that no further targets will be added into
+ // t.prerequisite_targets!
+ //
+ if (modules)
+ {
+ extract_modules (a, bs, t, li,
+ tts, src,
+ md, move (tu.module_info), dd, u);
+
+ // Currently in VC module interface units must be compiled from
+ // the original source (something to do with having to detect and
+ // store header boundaries in the .ifc files).
+ //
+ // @@ MODHDR MSVC: should we do the same for header units? I guess
+ // we will figure it out when MSVC supports header units.
+ //
+ if (ctype == compiler_type::msvc)
+ {
+ if (ut == unit_type::module_iface)
+ psrc.second = false;
+ }
+ }
+ }
+
+ // If anything got updated, then we didn't rely on the cache. However,
+ // the cached data could actually have been valid and the compiler run
+ // in extract_headers() as well as the code above merely validated it.
+ //
+ // We do need to update the database timestamp, however. Failed that,
+ // we will keep re-validating the cached data over and over again.
+ //
+ // @@ DRYRUN: note that for dry-run we would keep re-touching the
+ // database on every run (because u is true). So for now we suppress
+ // it (the file will be re-validated on the real run anyway). It feels
+ // like support for reusing the (partially) preprocessed output (see
+ // note below) should help solve this properly (i.e., we don't want
+ // to keep re-validating the file on every subsequent dry-run as well
+ // on the real run).
+ //
+ if (u && dd.reading () && !ctx.dry_run)
+ dd.touch = true;
+
+ dd.close ();
+ md.dd = move (dd.path);
+
+ // If the preprocessed output is suitable for compilation, then pass
+ // it along.
+ //
+ if (psrc.second)
+ {
+ md.psrc = move (psrc.first);
+
+ // Without modules keeping the (partially) preprocessed output
+ // around doesn't buy us much: if the source/headers haven't changed
+ // then neither will the object file. Modules make things more
+ // interesting: now we may have to recompile an otherwise unchanged
+ // translation unit because a BMI it depends on has changed. In this
+ // case re-processing the translation unit would be a waste and
+ // compiling the original source would break distributed
+ // compilation.
+ //
+ // Note also that the long term trend will (hopefully) be for
+ // modularized projects to get rid of #include's which means the
+ // need for producing this partially preprocessed output will
+ // (hopefully) gradually disappear.
+ //
+ if (modules)
+ md.psrc.active = false; // Keep.
+ }
+
+ // Above we may have ignored changes to the translation unit. The
+ // problem is, unless we also update the target's timestamp, we will
+ // keep re-checking this on subsequent runs and it is not cheap.
+ // Updating the target's timestamp is not without problems either: it
+ // will cause a re-link on a subsequent run. So, essentially, we
+ // somehow need to remember two timestamps: one for checking
+ // "preprocessor prerequisites" above and one for checking other
+ // prerequisites (like modules) below. So what we are going to do is
+ // store the first in the target file (so we do touch it) and the
+ // second in depdb (which is never newer that the target).
+ //
+ // Perhaps when we start keeping the partially preprocessed this will
+ // fall away? Yes, please.
+ //
+ md.mt = u ? timestamp_nonexistent : dd.mtime;
+ }
+
+ switch (a)
+ {
+ case perform_update_id: return [this] (action a, const target& t)
+ {
+ return perform_update (a, t);
+ };
+ case perform_clean_id: return [this] (action a, const target& t)
+ {
+ return perform_clean (a, t);
+ };
+ default: return noop_recipe; // Configure update.
+ }
+ }
+
+ // Reverse-lookup target type(s) from extension.
+ //
+ small_vector<const target_type*, 2> compile_rule::
+ map_extension (const scope& s, const string& n, const string& e) const
+ {
+ // We will just have to try all of the possible ones, in the "most
+ // likely to match" order.
+ //
+ auto test = [&s, &n, &e] (const target_type& tt) -> bool
+ {
+ // Call the extension derivation function. Here we know that it will
+ // only use the target type and name from the target key so we can
+ // pass bogus values for the rest.
+ //
+ target_key tk {&tt, nullptr, nullptr, &n, nullopt};
+
+ // This is like prerequisite search.
+ //
+ optional<string> de (tt.default_extension (tk, s, nullptr, true));
+
+ return de && *de == e;
+ };
+
+ small_vector<const target_type*, 2> r;
+
+ for (const target_type* const* p (x_inc); *p != nullptr; ++p)
+ if (test (**p))
+ r.push_back (*p);
+
+ return r;
+ }
+
+ void compile_rule::
+ append_prefixes (prefix_map& m, const target& t, const variable& var) const
+ {
+ tracer trace (x, "compile_rule::append_prefixes");
+
+ // If this target does not belong to any project (e.g, an "imported as
+ // installed" library), then it can't possibly generate any headers for
+ // us.
+ //
+ const scope& bs (t.base_scope ());
+ const scope* rs (bs.root_scope ());
+ if (rs == nullptr)
+ return;
+
+ const dir_path& out_base (t.dir);
+ const dir_path& out_root (rs->out_path ());
+
+ if (auto l = t[var])
+ {
+ const auto& v (cast<strings> (l));
+
+ for (auto i (v.begin ()), e (v.end ()); i != e; ++i)
+ {
+ // -I can either be in the "-Ifoo" or "-I foo" form. For VC it can
+ // also be /I.
+ //
+ const string& o (*i);
+
+ if (o.size () < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I')
+ continue;
+
+ dir_path d;
+
+ try
+ {
+ if (o.size () == 2)
+ {
+ if (++i == e)
+ break; // Let the compiler complain.
+
+ d = dir_path (*i);
+ }
+ else
+ d = dir_path (*i, 2, string::npos);
+ }
+ catch (const invalid_path& e)
+ {
+ fail << "invalid directory '" << e.path << "'"
+ << " in option '" << o << "'"
+ << " in variable " << var
+ << " for target " << t;
+ }
+
+ l6 ([&]{trace << "-I " << d;});
+
+ if (d.relative ())
+ fail << "relative directory " << d
+ << " in option '" << o << "'"
+ << " in variable " << var
+ << " for target " << t;
+
+ // If the directory is not normalized, we can complain or normalize
+ // it. Let's go with normalizing to minimize questions/complaints.
+ //
+ if (!d.normalized (false)) // Allow non-canonical dir separators.
+ d.normalize ();
+
+ // If we are not inside our project root, then ignore.
+ //
+ if (!d.sub (out_root))
+ continue;
+
+ // If the target directory is a sub-directory of the include
+ // directory, then the prefix is the difference between the
+ // two. Otherwise, leave it empty.
+ //
+ // The idea here is to make this "canonical" setup work auto-
+ // magically:
+ //
+ // 1. We include all files with a prefix, e.g., <foo/bar>.
+ // 2. The library target is in the foo/ sub-directory, e.g.,
+ // /tmp/foo/.
+ // 3. The poptions variable contains -I/tmp.
+ //
+ dir_path p (out_base.sub (d) ? out_base.leaf (d) : dir_path ());
+
+ // We use the target's directory as out_base but that doesn't work
+ // well for targets that are stashed in subdirectories. So as a
+ // heuristics we are going to also enter the outer directories of
+ // the original prefix. It is, however, possible, that another -I
+ // option after this one will produce one of these outer prefixes as
+ // its original prefix in which case we should override it.
+ //
+ // So we are going to assign the original prefix priority value 0
+ // (highest) and then increment it for each outer prefix.
+ //
+ auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio)
+ {
+ auto j (m.find (p));
+
+ if (j != m.end ())
+ {
+ prefix_value& v (j->second);
+
+ // We used to reject duplicates but it seems this can be
+ // reasonably expected to work according to the order of the
+ // -I options.
+ //
+ // Seeing that we normally have more "specific" -I paths first,
+ // (so that we don't pick up installed headers, etc), we ignore
+ // it.
+ //
+ if (v.directory == d)
+ {
+ if (v.priority > prio)
+ v.priority = prio;
+ }
+ else if (v.priority <= prio)
+ {
+ if (verb >= 4)
+ trace << "ignoring mapping for prefix '" << p << "'\n"
+ << " existing mapping to " << v.directory
+ << " priority " << v.priority << '\n'
+ << " another mapping to " << d
+ << " priority " << prio;
+ }
+ else
+ {
+ if (verb >= 4)
+ trace << "overriding mapping for prefix '" << p << "'\n"
+ << " existing mapping to " << v.directory
+ << " priority " << v.priority << '\n'
+ << " new mapping to " << d
+ << " priority " << prio;
+
+ v.directory = move (d);
+ v.priority = prio;
+ }
+ }
+ else
+ {
+ l6 ([&]{trace << "'" << p << "' -> " << d << " priority "
+ << prio;});
+ m.emplace (move (p), prefix_value {move (d), prio});
+ }
+ };
+
+#if 1
+ // Enter all outer prefixes, including prefixless.
+ //
+ // The prefixless part is fuzzy but seems to be doing the right
+ // thing ignoring/overriding-wise, at least in cases where one of
+ // the competing -I paths is a subdirectory of another. But the
+ // proper solution will be to keep all the prefixless entries (by
+ // changing prefix_map to a multimap) since for them we have an
+ // extra check (target must be explicitly spelled out in a
+ // buildfile).
+ //
+ for (size_t prio (0);; ++prio)
+ {
+ bool e (p.empty ());
+ enter ((e ? move (p) : p), (e ? move (d) : d), prio);
+ if (e)
+ break;
+ p = p.directory ();
+ }
+#else
+ size_t prio (0);
+ for (bool e (false); !e; ++prio)
+ {
+ dir_path n (p.directory ());
+ e = n.empty ();
+ enter ((e ? move (p) : p), (e ? move (d) : d), prio);
+ p = move (n);
+ }
+#endif
+ }
+ }
+ }
+
+ auto compile_rule::
+ build_prefix_map (const scope& bs,
+ action a,
+ target& t,
+ linfo li) const -> prefix_map
+ {
+ prefix_map m;
+
+ // First process our own.
+ //
+ append_prefixes (m, t, c_poptions);
+ append_prefixes (m, t, x_poptions);
+
+ // Then process the include directories from prerequisite libraries.
+ //
+ append_lib_prefixes (bs, m, a, t, li);
+
+ return m;
+ }
+
+ // Return the next make prerequisite starting from the specified
+ // position and update position to point to the start of the
+ // following prerequisite or l.size() if there are none left.
+ //
+ static string
+ next_make (const string& l, size_t& p)
+ {
+ size_t n (l.size ());
+
+ // Skip leading spaces.
+ //
+ for (; p != n && l[p] == ' '; p++) ;
+
+ // Lines containing multiple prerequisites are 80 characters max.
+ //
+ string r;
+ r.reserve (n);
+
+ // Scan the next prerequisite while watching out for escape sequences.
+ //
+ for (; p != n && l[p] != ' '; p++)
+ {
+ char c (l[p]);
+
+ if (p + 1 != n)
+ {
+ if (c == '$')
+ {
+ // Got to be another (escaped) '$'.
+ //
+ if (l[p + 1] == '$')
+ ++p;
+ }
+ else if (c == '\\')
+ {
+ // This may or may not be an escape sequence depending on whether
+ // what follows is "escapable".
+ //
+ switch (c = l[++p])
+ {
+ case '\\': break;
+ case ' ': break;
+ default: c = '\\'; --p; // Restore.
+ }
+ }
+ }
+
+ r += c;
+ }
+
+ // Skip trailing spaces.
+ //
+ for (; p != n && l[p] == ' '; p++) ;
+
+ // Skip final '\'.
+ //
+ if (p == n - 1 && l[p] == '\\')
+ p++;
+
+ return r;
+ }
+
+ // VC /showIncludes output. The first line is the file being compiled
+ // (handled by our caller). Then we have the list of headers, one per
+ // line, in this form (text can presumably be translated):
+ //
+ // Note: including file: C:\Program Files (x86)\[...]\iostream
+ //
+ // Finally, if we hit a non-existent header, then we end with an error
+ // line in this form:
+ //
+ // x.cpp(3): fatal error C1083: Cannot open include file: 'd/h.hpp':
+ // No such file or directory
+ //
+ // Distinguishing between the include note and the include error is
+ // easy: we can just check for C1083. Distinguising between the note and
+ // other errors/warnings is harder: an error could very well end with
+ // what looks like a path so we cannot look for the note but rather have
+ // to look for an error. Here we assume that a line containing ' CNNNN:'
+ // is an error. Should be robust enough in the face of language
+ // translation, etc.
+ //
+ // It turns out C1083 is also used when we are unable to open the main
+ // source file and the error line (which is printed after the first line
+ // containing the file name) looks like this:
+ //
+ // c1xx: fatal error C1083: Cannot open source file: 's.cpp': No such
+ // file or directory
+
+ size_t
+ msvc_sense_diag (const string&, char); // msvc.cxx
+
+ // Extract the include path from the VC /showIncludes output line. Return
+ // empty string if the line is not an include note or include error. Set
+ // the good_error flag if it is an include error (which means the process
+ // will terminate with the error status that needs to be ignored).
+ //
+ static string
+ next_show (const string& l, bool& good_error)
+ {
+ // The include error should be the last line that we handle.
+ //
+ assert (!good_error);
+
+ size_t p (msvc_sense_diag (l, 'C'));
+ if (p == string::npos)
+ {
+ // Include note.
+ //
+ // We assume the path is always at the end but need to handle both
+ // absolute Windows and POSIX ones.
+ //
+ // Note that VC appears to always write the absolute path to the
+ // included file even if it is ""-included and the source path is
+ // relative. Aren't we lucky today?
+ //
+ p = l.rfind (':');
+
+ if (p != string::npos)
+ {
+ // See if this one is part of the Windows drive letter.
+ //
+ if (p > 1 && p + 1 < l.size () && // 2 chars before, 1 after.
+ l[p - 2] == ' ' &&
+ alpha (l[p - 1]) &&
+ path::traits_type::is_separator (l[p + 1]))
+ p = l.rfind (':', p - 2);
+ }
+
+ if (p != string::npos)
+ {
+ // VC uses indentation to indicate the include nesting so there
+ // could be any number of spaces after ':'. Skip them.
+ //
+ p = l.find_first_not_of (' ', p + 1);
+ }
+
+ if (p == string::npos)
+ fail << "unable to parse /showIncludes include note line \""
+ << l << '"';
+
+ return string (l, p);
+ }
+ else if (l.compare (p, 4, "1083") == 0 &&
+ l.compare (0, 5, "c1xx:") != 0 /* Not the main source file. */ )
+ {
+ // Include error.
+ //
+ // The path is conveniently quoted with ''. Or so we thought: turns
+ // out different translations (e.g., Chinese) can use different quote
+ // characters. But the overall structure seems to be stable:
+ //
+ // ...C1083: <translated>: 'd/h.hpp': <translated>
+ //
+ // Plus, it seems the quote character could to be multi-byte.
+ //
+ size_t p1 (l.find (':', p + 5));
+ size_t p2 (l.rfind (':'));
+
+ if (p1 != string::npos &&
+ p2 != string::npos &&
+ (p2 - p1) > 4 && // At least ": 'x':".
+ l[p1 + 1] == ' ' &&
+ l[p2 + 1] == ' ')
+ {
+ p1 += 3; // First character of the path.
+ p2 -= 1; // One past last character of the path.
+
+ // Skip any non-printable ASCII characters before/after (the mutli-
+ // byte quote case).
+ //
+ auto printable = [] (char c) { return c >= 0x20 && c <= 0x7e; };
+
+ for (; p1 != p2 && !printable (l[p1]); ++p1) ;
+ for (; p2 != p1 && !printable (l[p2 - 1]); --p2) ;
+
+ if (p1 != p2)
+ {
+ good_error = true;
+ return string (l, p1 , p2 - p1);
+ }
+ }
+
+ fail << "unable to parse /showIncludes include error line \""
+ << l << '"' << endf;
+ }
+ else
+ {
+ // Some other error.
+ //
+ return string ();
+ }
+ }
+
+ void
+ msvc_sanitize_cl (cstrings&); // msvc.cxx
+
+ // GCC module mapper handler.
+ //
+ // Note that the input stream is non-blocking while output is blocking
+ // and this function should be prepared to handle closed input stream.
+ // Any unhandled io_error is handled by the caller as a generic module
+ // mapper io error.
+ //
+ struct compile_rule::module_mapper_state
+ {
+ size_t headers = 0; // Number of header units imported.
+ size_t skip; // Number of depdb entries to skip.
+ string data; // Auxiliary data.
+
+ explicit
+ module_mapper_state (size_t skip_count): skip (skip_count) {}
+ };
+
+ void compile_rule::
+ gcc_module_mapper (module_mapper_state& st,
+ action a, const scope& bs, file& t, linfo li,
+ ifdstream& is,
+ ofdstream& os,
+ depdb& dd, bool& update, bool& bad_error,
+ optional<prefix_map>& pfx_map, srcout_map& so_map) const
+ {
+ tracer trace (x, "compile_rule::gcc_module_mapper");
+
+ // Read in the request line.
+ //
+ // Because the dynamic mapper is only used during preprocessing, we
+ // can assume there is no batching and expect to see one line at a
+ // time.
+ //
+ string rq;
+#if 1
+ if (!eof (getline (is, rq)))
+ {
+ if (rq.empty ())
+ rq = "<empty>"; // Not to confuse with EOF.
+ }
+#else
+ for (char buf[4096]; !is.eof (); )
+ {
+ streamsize n (is.readsome (buf, sizeof (buf) - 1));
+ buf[n] = '\0';
+
+ if (char* p = strchr (buf, '\n'))
+ {
+ *p = '\0';
+
+ if (++p != buf + n)
+ fail << "batched module mapper request: '" << p << "'";
+
+ rq += buf;
+ break;
+ }
+ else
+ rq += buf;
+ }
+#endif
+
+ if (rq.empty ()) // EOF
+ return;
+
+ // @@ MODHDR: Should we print the pid we are talking to? It gets hard to
+ // follow once things get nested. But if all our diag will
+ // include some kind of id (chain, thread?), then this will
+ // not be strictly necessary.
+ //
+ if (verb >= 3)
+ text << " > " << rq;
+
+ // Check for a command. If match, remove it and the following space from
+ // the request string saving it in cmd (for diagnostics) unless the
+ // second argument is false, and return true.
+ //
+ const char* cmd (nullptr);
+ auto command = [&rq, &cmd] (const char* c, bool r = true)
+ {
+ size_t n (strlen (c));
+ bool m (rq.compare (0, n, c) == 0 && rq[n] == ' ');
+
+ if (m && r)
+ {
+ cmd = c;
+ rq.erase (0, n + 1);
+ }
+
+ return m;
+ };
+
+ string rs;
+ for (;;) // Breakout loop.
+ {
+ // Each command is reponsible for handling its auxiliary data while we
+ // just clear it.
+ //
+ string data (move (st.data));
+
+ if (command ("HELLO"))
+ {
+ // HELLO <ver> <kind> <ident>
+ //
+ //@@ MODHDR TODO: check protocol version.
+
+ // We don't use "repository path" (whatever it is) so we pass '.'.
+ //
+ rs = "HELLO 0 build2 .";
+ }
+ //
+ // Turns out it's easiest to handle IMPORT together with INCLUDE since
+ // it can also trigger a re-search, etc. In a sense, IMPORT is all of
+ // the INCLUDE logic (skipping translation) plus the BMI dependency
+ // synthesis.
+ //
+ else if (command ("INCLUDE") || command ("IMPORT"))
+ {
+ // INCLUDE [<"']<name>[>"'] <path>
+ // IMPORT [<"']<name>[>"'] <path>
+ // IMPORT '<path>'
+ //
+ // <path> is the resolved path or empty if the header is not found.
+ // It can be relative if it is derived from a relative path (either
+ // via -I or includer). If <name> is single-quoted, then it cannot
+ // be re-searched (e.g., implicitly included stdc-predef.h) and in
+ // this case <path> is never empty.
+ //
+ // In case of re-search or include translation we may have to split
+ // handling the same include or import across multiple commands.
+ // Here are the scenarios in question:
+ //
+ // INCLUDE --> SEARCH -?-> INCLUDE
+ // IMPORT --> SEARCH -?-> IMPORT
+ // INCLUDE --> IMPORT -?-> IMPORT
+ //
+ // The problem is we may not necessarily get the "followup" command
+ // (the question marks above). We may not get the followup after
+ // SEARCH because, for example, the newly found header has already
+ // been included/imported using a different style/path. Similarly,
+ // the IMPORT response may not be followed up with the IMPORT
+ // command because this header has already been imported, for
+ // example, using an import declaration. Throw into this #pragma
+ // once, include guards, and how exactly the compiler deals with
+ // them and things become truly unpredictable and hard to reason
+ // about. As a result, for each command we have to keep the build
+ // state consistent, specifically, without any "dangling" matched
+ // targets (which would lead to skew dependency counts). Note: the
+ // include translation is no longer a problem since we respond with
+ // an immediate BMI.
+ //
+ // To keep things simple we are going to always add a target that we
+ // matched to our prerequisite_targets. This includes the header
+ // target when building the BMI: while not ideal, this should be
+ // harmless provided we don't take its state/mtime into account.
+ //
+ // One thing we do want to handle specially is the "maybe-followup"
+ // case discussed above. It is hard to distinguish from an unrelated
+ // INCLUDE/IMPORT (we could have saved <name> and maybe correlated
+ // based on that). But if we don't, then we will keep matching and
+ // adding each target twice. What we can do, however, is check
+ // whether this target is already in prerequisite_targets and skip
+ // it if that's the case, which is a valid thing to do whether it is
+ // a followup or an unrelated command. In fact, for a followup, we
+ // only need to check the last element in prerequisite_targets.
+ //
+ // This approach strikes a reasonable balance between keeping things
+ // simple and handling normal cases without too much overhead. Note
+ // that we may still end up matching and adding the same targets
+ // multiple times for pathological cases, like when the same header
+ // is included using a different style/path, etc. We could, however,
+ // take care of this by searching the entire prerequisite_targets,
+ // which is always an option (and which would probably be required
+ // if the compiler were to send the INCLUDE command before checking
+ // for #pragma once or include guards, which GCC does not do).
+ //
+ // One thing that we cannot do without distinguishing followup and
+ // unrelated commands is verify the remapped header found by the
+ // compiler resolves to the expected target. So we will also do the
+ // correlation via <name>.
+ //
+ bool imp (cmd[1] == 'M');
+
+ path f; // <path> or <name> if doesn't exist
+ string n; // [<"']<name>[>"']
+ bool exists; // <path> is not empty
+ bool searchable; // <name> is not single-quoted
+ {
+ char q (rq[0]); // Opening quote.
+ q = (q == '<' ? '>' :
+ q == '"' ? '"' :
+ q == '\'' ? '\'' : '\0'); // Closing quote.
+
+ size_t s (rq.size ()), qp; // Quote position.
+ if (q == '\0' || (qp = rq.find (q, 1)) == string::npos)
+ break; // Malformed command.
+
+ n.assign (rq, 0, qp + 1);
+
+ size_t p (qp + 1);
+ if (imp && q == '\'' && p == s) // IMPORT '<path>'
+ {
+ exists = true;
+ // Leave f empty and fall through.
+ }
+ else
+ {
+ if (p != s && rq[p++] != ' ') // Skip following space, if any.
+ break;
+
+ exists = (p != s);
+
+ if (exists)
+ {
+ rq.erase (0, p);
+ f = path (move (rq));
+ assert (!f.empty ());
+ }
+ //else // Leave f empty and fall through.
+ }
+
+ if (f.empty ())
+ {
+ rq.erase (0, 1); // Opening quote.
+ rq.erase (qp - 1); // Closing quote and trailing space, if any.
+ f = path (move (rq));
+ }
+
+ // Complete relative paths not to confuse with non-existent.
+ //
+ if (exists && !f.absolute ())
+ f.complete ();
+
+ searchable = (q != '\'');
+ }
+
+ // The skip_count logic: in a nutshell (and similar to the non-
+ // mapper case), we may have "processed" some portion of the headers
+ // based on the depdb cache and we need to avoid re-processing them
+ // here. See the skip_count discussion for details.
+ //
+ // Note also that we need to be careful not to decrementing the
+ // count for re-searches and include translation.
+ //
+ bool skip (st.skip != 0);
+
+ // The first part is the same for both INCLUDE and IMPORT: resolve
+ // the header path to target, update it, and trigger re-search if
+ // necessary.
+ //
+ const file* ht (nullptr);
+ auto& pts (t.prerequisite_targets[a]);
+
+ // If this is a followup command (or indistinguishable from one),
+ // then as a sanity check verify the header found by the compiler
+ // resolves to the expected target.
+ //
+ if (data == n)
+ {
+ assert (!skip); // We shouldn't be re-searching while skipping.
+
+ if (exists)
+ {
+ pair<const file*, bool> r (
+ enter_header (a, bs, t, li,
+ move (f), false /* cache */,
+ pfx_map, so_map));
+
+ if (!r.second) // Shouldn't be remapped.
+ ht = r.first;
+ }
+
+ if (ht != pts.back ())
+ {
+ ht = static_cast<const file*> (pts.back ().target);
+ rs = "ERROR expected header '" + ht->path ().string () +
+ "' to be found instead";
+ bad_error = true; // We expect an error from the compiler.
+ break;
+ }
+
+ // Fall through.
+ }
+ else
+ {
+ // Enter, update, and see if we need to re-search this header.
+ //
+ bool updated (false), remapped;
+ try
+ {
+ pair<const file*, bool> er (
+ enter_header (a, bs, t, li,
+ move (f), false /* cache */,
+ pfx_map, so_map));
+
+ ht = er.first;
+ remapped = er.second;
+
+ if (remapped && !searchable)
+ {
+ rs = "ERROR remapping non-re-searchable header " + n;
+ bad_error = true;
+ break;
+ }
+
+ // If we couldn't enter this header as a target (as opposed to
+ // not finding a rule to update it), then our diagnostics won't
+ // really add anything to the compiler's.
+ //
+ if (ht == nullptr)
+ {
+ assert (!exists); // Sanity check.
+ throw failed ();
+ }
+
+ // Note that we explicitly update even for IMPORT (instead of,
+ // say, letting the BMI rule do it implicitly) since we may need
+ // to cause a re-search (see below).
+ //
+ if (!skip)
+ {
+ if (pts.empty () || pts.back () != ht)
+ {
+ optional<bool> ir (inject_header (a, t,
+ *ht, false /* cache */,
+ timestamp_unknown));
+ assert (ir); // Not from cache.
+ updated = *ir;
+ }
+ else
+ assert (exists);
+ }
+ else
+ assert (exists && !remapped); // Maybe this should be an error.
+ }
+ catch (const failed&)
+ {
+ // If the header does not exist or could not be updated, do we
+ // want our diagnostics, the compiler's, or both? We definitely
+ // want the compiler's since it points to the exact location.
+ // Ours could also be helpful. So while it will look a bit
+ // messy, let's keep both (it would have been nicer to print
+ // ours after the compiler's but that isn't easy).
+ //
+ rs = !exists
+ ? string ("INCLUDE")
+ : ("ERROR unable to update header '" +
+ (ht != nullptr ? ht->path () : f).string () + "'");
+
+ bad_error = true;
+ break;
+ }
+
+ if (!imp) // Indirect prerequisite (see above).
+ update = updated || update;
+
+ // A mere update is not enough to cause a re-search. It either had
+ // to also not exist or be remapped.
+ //
+ if ((updated && !exists) || remapped)
+ {
+ rs = "SEARCH";
+ st.data = move (n); // Followup correlation.
+ break;
+ }
+
+ // Fall through.
+ }
+
+ // Now handle INCLUDE and IMPORT differences.
+ //
+ const string& hp (ht->path ().string ());
+
+ // Reduce include translation to the import case.
+ //
+ if (!imp && import_hdr != nullptr)
+ {
+ const strings& ih (*import_hdr);
+
+ auto i (lower_bound (ih.begin (),
+ ih.end (),
+ hp,
+ [] (const string& x, const string& y)
+ {
+ return path::traits_type::compare (x, y) < 0;
+ }));
+
+ imp = (i != ih.end () && *i == hp);
+ }
+
+ if (imp)
+ {
+ try
+ {
+ // Synthesize the BMI dependency then update and add the BMI
+ // target as a prerequisite.
+ //
+ const file& bt (make_header_sidebuild (a, bs, li, *ht));
+
+ if (!skip)
+ {
+ optional<bool> ir (inject_header (a, t,
+ bt, false /* cache */,
+ timestamp_unknown));
+ assert (ir); // Not from cache.
+ update = *ir || update;
+ }
+
+ const string& bp (bt.path ().string ());
+
+ if (!skip)
+ {
+ // @@ MODHDR: we write normalized path while the compiler will
+ // look for the original. In particular, this means
+ // that paths with `..` won't work. Maybe write
+ // original for mapping and normalized for our use?
+ //
+ st.headers++;
+ dd.expect ("@ '" + hp + "' " + bp);
+ }
+ else
+ st.skip--;
+
+ rs = "IMPORT " + bp;
+ }
+ catch (const failed&)
+ {
+ rs = "ERROR unable to update header unit '" + hp + "'";
+ bad_error = true;
+ break;
+ }
+ }
+ else
+ {
+ if (!skip)
+ dd.expect (hp);
+ else
+ st.skip--;
+
+ rs = "INCLUDE";
+ }
+ }
+
+ break;
+ }
+
+ if (rs.empty ())
+ {
+ rs = "ERROR unexpected command '";
+
+ if (cmd != nullptr)
+ {
+ rs += cmd; // Add the command back.
+ rs += ' ';
+ }
+
+ rs += rq;
+ rs += "'";
+
+ bad_error = true;
+ }
+
+ if (verb >= 3)
+ text << " < " << rs;
+
+ os << rs << endl;
+ }
+
+ // Enter as a target a header file. Depending on the cache flag, the file
+ // is assumed to either have come from the depdb cache or from the
+ // compiler run.
+ //
+ // Return the header target and an indication of whether it was remapped
+ // or NULL if the header does not exist and cannot be generated. In the
+ // latter case the passed header path is guaranteed to be still valid but
+ // might have been adjusted (e.g., normalized, etc).
+ //
+ // Note: this used to be a lambda inside extract_headers() so refer to the
+ // body of that function for the overall picture.
+ //
+ pair<const file*, bool> compile_rule::
+ enter_header (action a, const scope& bs, file& t, linfo li,
+ path&& f, bool cache,
+ optional<prefix_map>& pfx_map, srcout_map& so_map) const
+ {
+ tracer trace (x, "compile_rule::enter_header");
+
+ // Find or maybe insert the target. The directory is only moved from if
+ // insert is true.
+ //
+ auto find = [&trace, &t, this] (dir_path&& d,
+ path&& f,
+ bool insert) -> const file*
+ {
+ // Split the file into its name part and extension. Here we can assume
+ // the name part is a valid filesystem name.
+ //
+ // Note that if the file has no extension, we record an empty
+ // extension rather than NULL (which would signify that the default
+ // extension should be added).
+ //
+ string e (f.extension ());
+ string n (move (f).string ());
+
+ if (!e.empty ())
+ n.resize (n.size () - e.size () - 1); // One for the dot.
+
+ // See if this directory is part of any project out_root hierarchy and
+ // if so determine the target type.
+ //
+ // Note that this will miss all the headers that come from src_root
+ // (so they will be treated as generic C headers below). Generally, we
+ // don't have the ability to determine that some file belongs to
+ // src_root of some project. But that's not a problem for our
+ // purposes: it is only important for us to accurately determine
+ // target types for headers that could be auto-generated.
+ //
+ // While at it also try to determine if this target is from the src or
+ // out tree of said project.
+ //
+ dir_path out;
+
+ // It's possible the extension-to-target type mapping is ambiguous
+ // (usually because both C and X-language headers use the same .h
+ // extension). In this case we will first try to find one that matches
+ // an explicit target (similar logic to when insert is false).
+ //
+ small_vector<const target_type*, 2> tts;
+
+ const scope& bs (t.ctx.scopes.find (d));
+ if (const scope* rs = bs.root_scope ())
+ {
+ tts = map_extension (bs, n, e);
+
+ if (bs.out_path () != bs.src_path () && d.sub (bs.src_path ()))
+ out = out_src (d, *rs);
+ }
+
+ // If it is outside any project, or the project doesn't have such an
+ // extension, assume it is a plain old C header.
+ //
+ if (tts.empty ())
+ {
+ // If the project doesn't "know" this extension then we can't
+ // possibly find an explicit target of this type.
+ //
+ if (!insert)
+ return nullptr;
+
+ tts.push_back (&h::static_type);
+ }
+
+ // Find or insert target.
+ //
+ // Note that in case of the target type ambiguity we first try to find
+ // an explicit target that resolves this ambiguity.
+ //
+ const target* r (nullptr);
+
+ if (!insert || tts.size () > 1)
+ {
+ // Note that we skip any target type-specific searches (like for an
+ // existing file) and go straight for the target object since we
+ // need to find the target explicitly spelled out.
+ //
+ // Also, it doesn't feel like we should be able to resolve an
+ // absolute path with a spelled-out extension to multiple targets.
+ //
+ for (const target_type* tt: tts)
+ if ((r = t.ctx.targets.find (*tt, d, out, n, e, trace)) != nullptr)
+ break;
+
+ // Note: we can't do this because of the in-source builds where
+ // there won't be explicit targets for non-generated headers.
+ //
+ // This should be harmless, however, since in our world generated
+ // headers are normally spelled-out as explicit targets. And if not,
+ // we will still get an error, just a bit less specific.
+ //
+#if 0
+ if (r == nullptr && insert)
+ {
+ f = d / n;
+ if (!e.empty ())
+ {
+ f += '.';
+ f += e;
+ }
+
+ diag_record dr (fail);
+ dr << "mapping of header " << f << " to target type is ambiguous";
+ for (const target_type* tt: tts)
+ dr << info << "could be " << tt->name << "{}";
+ dr << info << "spell-out its target to resolve this ambiguity";
+ }
+#endif
+ }
+
+ // @@ OPT: move d, out, n
+ //
+ if (r == nullptr && insert)
+ r = &search (t, *tts[0], d, out, n, &e, nullptr);
+
+ return static_cast<const file*> (r);
+ };
+
+ // If it's not absolute then it either does not (yet) exist or is a
+ // relative ""-include (see init_args() for details). Reduce the second
+ // case to absolute.
+ //
+ // Note: we now always use absolute path to the translation unit so this
+ // no longer applies. But let's keep it for posterity.
+ //
+#if 0
+ if (f.relative () && rels.relative ())
+ {
+ // If the relative source path has a directory component, make sure
+ // it matches since ""-include will always start with that (none of
+ // the compilers we support try to normalize this path). Failed that
+ // we may end up searching for a generated header in a random
+ // (working) directory.
+ //
+ const string& fs (f.string ());
+ const string& ss (rels.string ());
+
+ size_t p (path::traits::rfind_separator (ss));
+
+ if (p == string::npos || // No directory.
+ (fs.size () > p + 1 &&
+ path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0))
+ {
+ path t (work / f); // The rels path is relative to work.
+
+ if (exists (t))
+ f = move (t);
+ }
+ }
+#endif
+
+ const file* pt (nullptr);
+ bool remapped (false);
+
+ // If still relative then it does not exist.
+ //
+ if (f.relative ())
+ {
+ // This is probably as often an error as an auto-generated file, so
+ // trace at level 4.
+ //
+ l4 ([&]{trace << "non-existent header '" << f << "'";});
+
+ f.normalize ();
+
+ // The relative path might still contain '..' (e.g., ../foo.hxx;
+ // presumably ""-include'ed). We don't attempt to support auto-
+ // generated headers with such inclusion styles.
+ //
+ if (f.normalized ())
+ {
+ if (!pfx_map)
+ pfx_map = build_prefix_map (bs, a, t, li);
+
+ // First try the whole file. Then just the directory.
+ //
+ // @@ Has to be a separate map since the prefix can be the same as
+ // the file name.
+ //
+ // auto i (pfx_map->find (f));
+
+ // Find the most qualified prefix of which we are a sub-path.
+ //
+ if (!pfx_map->empty ())
+ {
+ dir_path d (f.directory ());
+ auto i (pfx_map->find_sup (d));
+
+ if (i != pfx_map->end ())
+ {
+ const dir_path& pd (i->second.directory);
+
+ l4 ([&]{trace << "prefix '" << d << "' mapped to " << pd;});
+
+ // If this is a prefixless mapping, then only use it if we can
+ // resolve it to an existing target (i.e., it is explicitly
+ // spelled out in a buildfile).
+ //
+ // Note that at some point we will probably have a list of
+ // directories.
+ //
+ pt = find (pd / d, f.leaf (), !i->first.empty ());
+ if (pt != nullptr)
+ {
+ f = pd / f;
+ l4 ([&]{trace << "mapped as auto-generated " << f;});
+ }
+ else
+ l4 ([&]{trace << "no explicit target in " << pd;});
+ }
+ else
+ l4 ([&]{trace << "no prefix map entry for '" << d << "'";});
+ }
+ else
+ l4 ([&]{trace << "prefix map is empty";});
+ }
+ }
+ else
+ {
+ // We used to just normalize the path but that could result in an
+ // invalid path (e.g., for some system/compiler headers on CentOS 7
+ // with Clang 3.4) because of the symlinks (if a directory component
+ // is a symlink, then any following `..` are resolved relative to the
+ // target; see path::normalize() for background).
+ //
+ // Initially, to fix this, we realized (i.e., realpath(3)) it instead.
+ // But that turned out also not to be quite right since now we have
+ // all the symlinks resolved: conceptually it feels correct to keep
+ // the original header names since that's how the user chose to
+ // arrange things and practically this is how the compilers see/report
+ // them (e.g., the GCC module mapper).
+ //
+ // So now we have a pretty elaborate scheme where we try to use the
+ // normalized path if possible and fallback to realized. Normalized
+ // paths will work for situations where `..` does not cross symlink
+ // boundaries, which is the sane case. And for the insane case we only
+ // really care about out-of-project files (i.e., system/compiler
+ // headers). In other words, if you have the insane case inside your
+ // project, then you are on your own.
+ //
+ // All of this is unless the path comes from the depdb, in which case
+ // we've already done that. This is also where we handle src-out remap
+ // (again, not needed if cached).
+ //
+ if (!cache)
+ {
+ // Interestingly, on most paltforms and with most compilers (Clang
+ // on Linux being a notable exception) most system/compiler headers
+ // are already normalized.
+ //
+ path_abnormality a (f.abnormalities ());
+ if (a != path_abnormality::none)
+ {
+ // While we can reasonably expect this path to exit, things do go
+ // south from time to time (like compiling under wine with file
+ // wlantypes.h included as WlanTypes.h).
+ //
+ try
+ {
+ // If we have any parent components, then we have to verify the
+ // normalized path matches realized.
+ //
+ path r;
+ if ((a & path_abnormality::parent) == path_abnormality::parent)
+ {
+ r = f;
+ r.realize ();
+ }
+
+ try
+ {
+ f.normalize ();
+
+ // Note that we might still need to resolve symlinks in the
+ // normalized path.
+ //
+ if (!r.empty () && f != r && path (f).realize () != r)
+ f = move (r);
+ }
+ catch (const invalid_path&)
+ {
+ assert (!r.empty ()); // Shouldn't have failed if no `..`.
+ f = move (r); // Fallback to realize.
+ }
+ }
+ catch (const invalid_path&)
+ {
+ fail << "invalid header path '" << f.string () << "'";
+ }
+ catch (const system_error& e)
+ {
+ fail << "invalid header path '" << f.string () << "': " << e;
+ }
+ }
+
+ if (!so_map.empty ())
+ {
+ // Find the most qualified prefix of which we are a sub-path.
+ //
+ auto i (so_map.find_sup (f));
+ if (i != so_map.end ())
+ {
+ // Ok, there is an out tree for this headers. Remap to a path
+ // from the out tree and see if there is a target for it.
+ //
+ dir_path d (i->second);
+ d /= f.leaf (i->first).directory ();
+ pt = find (move (d), f.leaf (), false); // d is not moved from.
+
+ if (pt != nullptr)
+ {
+ path p (d / f.leaf ());
+ l4 ([&]{trace << "remapping " << f << " to " << p;});
+ f = move (p);
+ remapped = true;
+ }
+ }
+ }
+ }
+
+ if (pt == nullptr)
+ {
+ l6 ([&]{trace << "entering " << f;});
+ pt = find (f.directory (), f.leaf (), true);
+ }
+ }
+
+ return make_pair (pt, remapped);
+ }
+
+ // Update and add (unless add is false) to the list of prerequisite
+ // targets a header or header unit target. Depending on the cache flag,
+ // the target is assumed to either have come from the depdb cache or from
+ // the compiler run.
+ //
+ // Return the indication of whether it has changed or, if the passed
+ // timestamp is not timestamp_unknown, is older than the target. If the
+ // header came from the cache and it no longer exists nor can be
+ // generated, then return nullopt.
+ //
+ // Note: this used to be a lambda inside extract_headers() so refer to the
+ // body of that function for the overall picture.
+ //
+ optional<bool> compile_rule::
+ inject_header (action a, file& t,
+ const file& pt, bool cache, timestamp mt) const
+ {
+ tracer trace (x, "compile_rule::inject_header");
+
+ // Match to a rule.
+ //
+ // If we are reading the cache, then it is possible the file has since
+ // been removed (think of a header in /usr/local/include that has been
+ // uninstalled and now we need to use one from /usr/include). This will
+ // lead to the match failure which we translate to a restart.
+ //
+ if (!cache)
+ build2::match (a, pt);
+ else if (!build2::try_match (a, pt).first)
+ return nullopt;
+
+ bool r (update (trace, a, pt, mt));
+
+ // Add to our prerequisite target list.
+ //
+ t.prerequisite_targets[a].push_back (&pt);
+
+ return r;
+ }
+
+ // Extract and inject header dependencies. Return the preprocessed source
+ // file as well as an indication if it is usable for compilation (see
+ // below for details).
+ //
+ // This is also the place where we handle header units which are a lot
+ // more like auto-generated headers than modules. In particular, if a
+ // header unit BMI is out-of-date, then we have to re-preprocess this
+ // translation unit.
+ //
+ pair<auto_rmfile, bool> compile_rule::
+ extract_headers (action a,
+ const scope& bs,
+ file& t,
+ linfo li,
+ const file& src,
+ match_data& md,
+ depdb& dd,
+ bool& update,
+ timestamp mt) const
+ {
+ tracer trace (x, "compile_rule::extract_headers");
+
+ otype ot (li.type);
+
+ bool reprocess (cast_false<bool> (t[c_reprocess]));
+
+ auto_rmfile psrc;
+ bool puse (true);
+
+ // If things go wrong (and they often do in this area), give the user a
+ // bit extra context.
+ //
+ auto df = make_diag_frame (
+ [&src](const diag_record& dr)
+ {
+ if (verb != 0)
+ dr << info << "while extracting header dependencies from " << src;
+ });
+
+ const scope& rs (*bs.root_scope ());
+
+ // Preprocesor mode that preserves as much information as possible while
+ // still performing inclusions. Also serves as a flag indicating whether
+ // this compiler uses the separate preprocess and compile setup.
+ //
+ const char* pp (nullptr);
+
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ // -fdirectives-only is available since GCC 4.3.0.
+ //
+ if (cmaj > 4 || (cmaj == 4 && cmin >= 3))
+ pp = "-fdirectives-only";
+
+ break;
+ }
+ case compiler_type::clang:
+ {
+ // -frewrite-includes is available since vanilla Clang 3.2.0.
+ //
+ // Apple Clang 5.0 is based on LLVM 3.3svn so it should have this
+ // option (4.2 is based on 3.2svc so it may or may not have it and,
+ // no, we are not going to try to find out).
+ //
+ if (cvariant == "apple"
+ ? (cmaj >= 5)
+ : (cmaj > 3 || (cmaj == 3 && cmin >= 2)))
+ pp = "-frewrite-includes";
+
+ break;
+ }
+ case compiler_type::msvc:
+ {
+ // Asking MSVC to preserve comments doesn't really buy us anything
+ // but does cause some extra buggy behavior.
+ //
+ //pp = "/C";
+ break;
+ }
+ case compiler_type::icc:
+ break;
+ }
+
+ // Initialize lazily, only if required.
+ //
+ environment env;
+ cstrings args;
+ string out; // Storage.
+
+ // Some compilers in certain modes (e.g., when also producing the
+ // preprocessed output) are incapable of writing the dependecy
+ // information to stdout. In this case we use a temporary file.
+ //
+ auto_rmfile drm;
+
+ // Here is the problem: neither GCC nor Clang allow -MG (treat missing
+ // header as generated) when we produce any kind of other output (-MD).
+ // And that's probably for the best since otherwise the semantics gets
+ // pretty hairy (e.g., what is the exit code and state of the output)?
+ //
+ // One thing to note about generated headers: if we detect one, then,
+ // after generating it, we re-run the compiler since we need to get
+ // this header's dependencies.
+ //
+ // So this is how we are going to work around this problem: we first run
+ // with -E but without -MG. If there are any errors (maybe because of
+ // generated headers maybe not), we restart with -MG and without -E. If
+ // this fixes the error (so it was a generated header after all), then
+ // we have to restart at which point we go back to -E and no -MG. And we
+ // keep yo-yoing like this. Missing generated headers will probably be
+ // fairly rare occurrence so this shouldn't be too expensive.
+ //
+ // Actually, there is another error case we would like to handle: an
+ // outdated generated header that is now causing an error (e.g., because
+ // of a check that is now triggering #error or some such). So there are
+ // actually three error cases: outdated generated header, missing
+ // generated header, and some other error. To handle the outdated case
+ // we need the compiler to produce the dependency information even in
+ // case of an error. Clang does it, for VC we parse diagnostics
+ // ourselves, but GCC does not (but a patch has been submitted).
+ //
+ // So the final plan is then as follows:
+ //
+ // 1. Start wothout -MG and with suppressed diagnostics.
+ // 2. If error but we've updated a header, then repeat step 1.
+ // 3. Otherwise, restart with -MG and diagnostics.
+ //
+ // Note that below we don't even check if the compiler supports the
+ // dependency info on error. We just try to use it and if it's not
+ // there we ignore the io error since the compiler has failed.
+ //
+ bool args_gen; // Current state of args.
+ size_t args_i (0); // Start of the -M/-MD "tail".
+
+ // Ok, all good then? Not so fast, the rabbit hole is deeper than it
+ // seems: When we run with -E we have to discard diagnostics. This is
+ // not a problem for errors since they will be shown on the re-run but
+ // it is for (preprocessor) warnings.
+ //
+ // Clang's -frewrite-includes is nice in that it preserves the warnings
+ // so they will be shown during the compilation of the preprocessed
+ // source. They are also shown during -E but that we discard. And unlike
+ // GCC, in Clang -M does not imply -w (disable warnings) so it would
+ // have been shown in -M -MG re-runs but we suppress that with explicit
+ // -w. All is good in the Clang land then (even -Werror works nicely).
+ //
+ // GCC's -fdirective-only, on the other hand, processes all the
+ // directives so they are gone from the preprocessed source. Here is
+ // what we are going to do to work around this: we will detect if any
+ // diagnostics has been written to stderr on the -E run. If that's the
+ // case (but the compiler indicated success) then we assume they are
+ // warnings and disable the use of the preprocessed output for
+ // compilation. This in turn will result in compilation from source
+ // which will display the warnings. Note that we may still use the
+ // preprocessed output for other things (e.g., C++ module dependency
+ // discovery). BTW, another option would be to collect all the
+ // diagnostics and then dump it if the run is successful, similar to
+ // the VC semantics (and drawbacks) described below.
+ //
+ // Finally, for VC, things are completely different: there is no -MG
+ // equivalent and we handle generated headers by analyzing the
+ // diagnostics. This means that unlike in the above two cases, the
+ // preprocessor warnings are shown during dependency extraction, not
+ // compilation. Not ideal but that's the best we can do. Or is it -- we
+ // could implement ad hoc diagnostics sensing... It appears warnings are
+ // in the C4000-C4999 code range though there can also be note lines
+ // which don't have any C-code.
+ //
+ // BTW, triggering a warning in the VC preprocessor is not easy; there
+ // is no #warning and pragmas are passed through to the compiler. One
+ // way to do it is to redefine a macro, for example:
+ //
+ // hello.cxx(4): warning C4005: 'FOO': macro redefinition
+ // hello.cxx(3): note: see previous definition of 'FOO'
+ //
+ // So seeing that it is hard to trigger a legitimate VC preprocessor
+ // warning, for now, we will just treat them as errors by adding /WX.
+ //
+ // Finally, if we are using the module mapper, then all this mess falls
+ // away: we only run the compiler once, we let the diagnostics through,
+ // we get a compiler error (with location information) if a header is
+ // not found, and there is no problem with outdated generated headers
+ // since we update/remap them before the compiler has a chance to read
+ // them. Overall, this "dependency mapper" approach is how it should
+ // have been done from the beginning.
+
+ // Note: diagnostics sensing is currently only supported if dependency
+ // info is written to a file (see above).
+ //
+ bool sense_diag (false);
+
+ // And here is another problem: if we have an already generated header
+ // in src and the one in out does not yet exist, then the compiler will
+ // pick the one in src and we won't even notice. Note that this is not
+ // only an issue with mixing in- and out-of-tree builds (which does feel
+ // wrong but is oh so convenient): this is also a problem with
+ // pre-generated headers, a technique we use to make installing the
+ // generator by end-users optional by shipping pre-generated headers.
+ //
+ // This is a nasty problem that doesn't seem to have a perfect solution
+ // (except, perhaps, C++ modules). So what we are going to do is try to
+ // rectify the situation by detecting and automatically remapping such
+ // mis-inclusions. It works as follows.
+ //
+ // First we will build a map of src/out pairs that were specified with
+ // -I. Here, for performance and simplicity, we will assume that they
+ // always come in pairs with out first and src second. We build this
+ // map lazily only if we are running the preprocessor and reuse it
+ // between restarts.
+ //
+ // With the map in hand we can then check each included header for
+ // potentially having a doppelganger in the out tree. If this is the
+ // case, then we calculate a corresponding header in the out tree and,
+ // (this is the most important part), check if there is a target for
+ // this header in the out tree. This should be fairly accurate and not
+ // require anything explicit from the user except perhaps for a case
+ // where the header is generated out of nothing (so there is no need to
+ // explicitly mention its target in the buildfile). But this probably
+ // won't be very common.
+ //
+ // One tricky area in this setup are target groups: if the generated
+ // sources are mentioned in the buildfile as a group, then there might
+ // be no header target (yet). The way we solve this is by requiring code
+ // generator rules to cooperate and create at least the header target as
+ // part of the group creation. While not all members of the group may be
+ // generated depending on the options (e.g., inline files might be
+ // suppressed), headers are usually non-optional.
+ //
+ // Note that we use path_map instead of dir_path_map to allow searching
+ // using path (file path).
+ //
+ srcout_map so_map; // path_map<dir_path>
+
+ // Dynamic module mapper.
+ //
+ bool mod_mapper (false);
+
+ // The gen argument to init_args() is in/out. The caller signals whether
+ // to force the generated header support and on return it signals
+ // whether this support is enabled. The first call to init_args is
+ // expected to have gen false.
+ //
+ // Return NULL if the dependency information goes to stdout and a
+ // pointer to the temporary file path otherwise.
+ //
+ auto init_args = [a, &t, ot, li, reprocess,
+ &src, &md, &psrc, &sense_diag, &mod_mapper,
+ &rs, &bs,
+ pp, &env, &args, &args_gen, &args_i, &out, &drm,
+ &so_map, this]
+ (bool& gen) -> const path*
+ {
+ const path* r (nullptr);
+
+ if (args.empty ()) // First call.
+ {
+ assert (!gen);
+
+ // We use absolute/relative paths in the dependency output to
+ // distinguish existing headers from (missing) generated. Which
+ // means we have to (a) use absolute paths in -I and (b) pass
+ // absolute source path (for ""-includes). That (b) is a problem:
+ // if we use an absolute path, then all the #line directives will be
+ // absolute and all the diagnostics will have long, noisy paths
+ // (actually, we will still have long paths for diagnostics in
+ // headers).
+ //
+ // To work around this we used to pass a relative path to the source
+ // file and then check every relative path in the dependency output
+ // for existence in the source file's directory. This is not without
+ // issues: it is theoretically possible for a generated header that
+ // is <>-included and found via -I to exist in the source file's
+ // directory. Note, however, that this is a lot more likely to
+ // happen with prefix-less inclusion (e.g., <foo>) and in this case
+ // we assume the file is in the project anyway. And if there is a
+ // conflict with a prefixed include (e.g., <bar/foo>), then, well,
+ // we will just have to get rid of quoted includes (which are
+ // generally a bad idea, anyway).
+ //
+ // But then this approach (relative path) fell apart further when we
+ // tried to implement precise changed detection: the preprocessed
+ // output would change depending from where it was compiled because
+ // of #line (which we could work around) and __FILE__/assert()
+ // (which we can't really do anything about). So it looks like using
+ // the absolute path is the lesser of all the evils (and there are
+ // many).
+ //
+ // Note that we detect and diagnose relative -I directories lazily
+ // when building the include prefix map.
+ //
+ args.push_back (cpath.recall_string ());
+
+ // If we are re-processing the translation unit, then allow the
+ // translation unit to detect header/module dependency extraction.
+ // This can be used to work around separate preprocessing bugs in
+ // the compiler.
+ //
+ if (reprocess)
+ args.push_back ("-D__build2_preprocess");
+
+ append_options (args, t, c_poptions);
+ append_options (args, t, x_poptions);
+
+ // Add *.export.poptions from prerequisite libraries.
+ //
+ append_lib_options (bs, args, a, t, li);
+
+ // Populate the src-out with the -I$out_base -I$src_base pairs.
+ //
+ {
+ // Try to be fast and efficient by reusing buffers as much as
+ // possible.
+ //
+ string ds;
+
+ // Previous -I innermost scope if out_base plus the difference
+ // between the scope path and the -I path (normally empty).
+ //
+ const scope* s (nullptr);
+ dir_path p;
+
+ for (auto i (args.begin ()), e (args.end ()); i != e; ++i)
+ {
+ // -I can either be in the "-Ifoo" or "-I foo" form. For VC it
+ // can also be /I.
+ //
+ const char* o (*i);
+ size_t n (strlen (o));
+
+ if (n < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I')
+ {
+ s = nullptr;
+ continue;
+ }
+
+ if (n == 2)
+ {
+ if (++i == e)
+ break; // Let the compiler complain.
+
+ ds = *i;
+ }
+ else
+ ds.assign (o + 2, n - 2);
+
+ if (!ds.empty ())
+ {
+ // Note that we don't normalize the paths since it would be
+ // quite expensive and normally the pairs we are inerested in
+ // are already normalized (since they are usually specified as
+ // -I$src/out_*). We just need to add a trailing directory
+ // separator if it's not already there.
+ //
+ if (!dir_path::traits_type::is_separator (ds.back ()))
+ ds += dir_path::traits_type::directory_separator;
+
+ dir_path d (move (ds), dir_path::exact); // Move the buffer in.
+
+ // Ignore invalid paths (buffer is not moved).
+ //
+ if (!d.empty ())
+ {
+ // Ignore any paths containing '.', '..' components. Allow
+ // any directory separators thought (think -I$src_root/foo
+ // on Windows).
+ //
+ if (d.absolute () && d.normalized (false))
+ {
+ // If we have a candidate out_base, see if this is its
+ // src_base.
+ //
+ if (s != nullptr)
+ {
+ const dir_path& bp (s->src_path ());
+
+ if (d.sub (bp))
+ {
+ if (p.empty () || d.leaf (bp) == p)
+ {
+ // We've got a pair.
+ //
+ so_map.emplace (move (d), s->out_path () / p);
+ s = nullptr; // Taken.
+ continue;
+ }
+ }
+
+ // Not a pair. Fall through to consider as out_base.
+ //
+ s = nullptr;
+ }
+
+ // See if this path is inside a project with an out-of-
+ // tree build and is in the out directory tree.
+ //
+ const scope& bs (t.ctx.scopes.find (d));
+ if (bs.root_scope () != nullptr)
+ {
+ const dir_path& bp (bs.out_path ());
+ if (bp != bs.src_path ())
+ {
+ bool e;
+ if ((e = (d == bp)) || d.sub (bp))
+ {
+ s = &bs;
+ if (e)
+ p.clear ();
+ else
+ p = d.leaf (bp);
+ }
+ }
+ }
+ }
+ else
+ s = nullptr;
+
+ ds = move (d).string (); // Move the buffer out.
+ }
+ else
+ s = nullptr;
+ }
+ else
+ s = nullptr;
+ }
+ }
+
+ // Extra system header dirs (last).
+ //
+ assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+ append_option_values (
+ args, "-I",
+ sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+ [] (const dir_path& d) {return d.string ().c_str ();});
+
+ if (md.symexport)
+ append_symexport_options (args, t);
+
+ // Some compile options (e.g., -std, -m) affect the preprocessor.
+ //
+ // Currently Clang supports importing "header modules" even when in
+ // the TS mode. And "header modules" support macros which means
+ // imports have to be resolved during preprocessing. Which poses a
+ // bit of a chicken and egg problem for us. For now, the workaround
+ // is to remove the -fmodules-ts option when preprocessing. Hopefully
+ // there will be a "pure modules" mode at some point.
+ //
+ // @@ MODHDR Clang: should be solved with the dynamic module mapper
+ // if/when Clang supports it?
+ //
+
+ // Don't treat warnings as errors.
+ //
+ const char* werror (nullptr);
+ switch (cclass)
+ {
+ case compiler_class::gcc: werror = "-Werror"; break;
+ case compiler_class::msvc: werror = "/WX"; break;
+ }
+
+ bool clang (ctype == compiler_type::clang);
+
+ append_options (args, t, c_coptions, werror);
+ append_options (args, t, x_coptions, werror);
+ append_options (args, tstd,
+ tstd.size () - (modules && clang ? 1 : 0));
+
+ switch (cclass)
+ {
+ case compiler_class::msvc:
+ {
+ args.push_back ("/nologo");
+
+ // See perform_update() for details on overriding the default
+ // exceptions and runtime.
+ //
+ if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+ args.push_back ("/EHsc");
+
+ if (!find_option_prefixes ({"/MD", "/MT"}, args))
+ args.push_back ("/MD");
+
+ args.push_back ("/P"); // Preprocess to file.
+ args.push_back ("/showIncludes"); // Goes to stdout (with diag).
+ if (pp != nullptr)
+ args.push_back (pp); // /C (preserve comments).
+ args.push_back ("/WX"); // Warning as error (see above).
+
+ msvc_sanitize_cl (args);
+
+ psrc = auto_rmfile (t.path () + x_pext);
+
+ if (cast<uint64_t> (rs[x_version_major]) >= 18)
+ {
+ args.push_back ("/Fi:");
+ args.push_back (psrc.path.string ().c_str ());
+ }
+ else
+ {
+ out = "/Fi" + psrc.path.string ();
+ args.push_back (out.c_str ());
+ }
+
+ append_lang_options (args, md); // Compile as.
+ gen = args_gen = true;
+ break;
+ }
+ case compiler_class::gcc:
+ {
+ if (ot == otype::s)
+ {
+ // On Darwin, Win32 -fPIC is the default.
+ //
+ if (tclass == "linux" || tclass == "bsd")
+ args.push_back ("-fPIC");
+ }
+
+ // Setup the dynamic module mapper if needed.
+ //
+ // Note that it's plausible in the future we will use it even if
+ // modules are disabled, for example, to implement better -MG.
+ // In which case it will have probably be better called a
+ // "dependency mapper".
+ //
+ if (modules)
+ {
+ if (ctype == compiler_type::gcc)
+ {
+ args.push_back ("-fmodule-mapper=<>");
+ mod_mapper = true;
+ }
+ }
+
+ // Depending on the compiler, decide whether (and how) we can
+ // produce preprocessed output as a side effect of dependency
+ // extraction.
+ //
+ // Note: -MM -MG skips missing <>-included.
+
+ // Clang's -M does not imply -w (disable warnings). We also
+ // don't need them in the -MD case (see above) so disable for
+ // both.
+ //
+ if (clang)
+ args.push_back ("-w");
+
+ append_lang_options (args, md);
+
+ if (pp != nullptr)
+ {
+ // With the GCC module mapper the dependency information is
+ // written directly to depdb by the mapper.
+ //
+ if (ctype == compiler_type::gcc && mod_mapper)
+ {
+ // Note that in this mode we don't have -MG re-runs. In a
+ // sense we are in the -MG mode (or, more precisely, the "no
+ // -MG required" mode) right away.
+ //
+ args.push_back ("-E");
+ args.push_back (pp);
+ gen = args_gen = true;
+ r = &drm.path; // Bogus/hack to force desired process start.
+ }
+ else
+ {
+ // Previously we used '*' as a target name but it gets
+ // expanded to the current directory file names by GCC (4.9)
+ // that comes with MSYS2 (2.4). Yes, this is the (bizarre)
+ // behavior of GCC being executed in the shell with -MQ '*'
+ // option and not just -MQ *.
+ //
+ args.push_back ("-MQ"); // Quoted target name.
+ args.push_back ("^"); // Old versions can't do empty.
+
+ // Note that the options are carefully laid out to be easy
+ // to override (see below).
+ //
+ args_i = args.size ();
+
+ args.push_back ("-MD");
+ args.push_back ("-E");
+ args.push_back (pp);
+
+ // Dependency output.
+ //
+ // GCC until version 8 was not capable of writing the
+ // dependency information to stdout. We also either need to
+ // sense the diagnostics on the -E runs (which we currently
+ // can only do if we don't need to read stdout) or we could
+ // be communicating with the module mapper via stdin/stdout.
+ //
+ if (ctype == compiler_type::gcc)
+ {
+ // Use the .t extension (for "temporary"; .d is taken).
+ //
+ r = &(drm = auto_rmfile (t.path () + ".t")).path;
+ }
+
+ args.push_back ("-MF");
+ args.push_back (r != nullptr ? r->string ().c_str () : "-");
+
+ sense_diag = (ctype == compiler_type::gcc);
+ gen = args_gen = false;
+ }
+
+ // Preprocessor output.
+ //
+ psrc = auto_rmfile (t.path () + x_pext);
+ args.push_back ("-o");
+ args.push_back (psrc.path.string ().c_str ());
+ }
+ else
+ {
+ args.push_back ("-MQ");
+ args.push_back ("^");
+ args.push_back ("-M");
+ args.push_back ("-MG"); // Treat missing headers as generated.
+ gen = args_gen = true;
+ }
+
+ break;
+ }
+ }
+
+ args.push_back (src.path ().string ().c_str ());
+ args.push_back (nullptr);
+
+ // Note: only doing it here.
+ //
+ if (!env.empty ())
+ env.push_back (nullptr);
+ }
+ else
+ {
+ assert (gen != args_gen && args_i != 0);
+
+ size_t i (args_i);
+
+ if (gen)
+ {
+ // Overwrite.
+ //
+ args[i++] = "-M";
+ args[i++] = "-MG";
+ args[i++] = src.path ().string ().c_str ();
+ args[i] = nullptr;
+
+ if (ctype == compiler_type::gcc)
+ {
+ sense_diag = false;
+ }
+ }
+ else
+ {
+ // Restore.
+ //
+ args[i++] = "-MD";
+ args[i++] = "-E";
+ args[i++] = pp;
+ args[i] = "-MF";
+
+ if (ctype == compiler_type::gcc)
+ {
+ r = &drm.path;
+ sense_diag = true;
+ }
+ }
+
+ args_gen = gen;
+ }
+
+ return r;
+ };
+
+ // Build the prefix map lazily only if we have non-existent files.
+ // Also reuse it over restarts since it doesn't change.
+ //
+ optional<prefix_map> pfx_map;
+
+ // If any prerequisites that we have extracted changed, then we have to
+ // redo the whole thing. The reason for this is auto-generated headers:
+ // the updated header may now include a yet-non-existent header. Unless
+ // we discover this and generate it (which, BTW, will trigger another
+ // restart since that header, in turn, can also include auto-generated
+ // headers), we will end up with an error during compilation proper.
+ //
+ // One complication with this restart logic is that we will see a
+ // "prefix" of prerequisites that we have already processed (i.e., they
+ // are already in our prerequisite_targets list) and we don't want to
+ // keep redoing this over and over again. One thing to note, however, is
+ // that the prefix that we have seen on the previous run must appear
+ // exactly the same in the subsequent run. The reason for this is that
+ // none of the files that it can possibly be based on have changed and
+ // thus it should be exactly the same. To put it another way, the
+ // presence or absence of a file in the dependency output can only
+ // depend on the previous files (assuming the compiler outputs them as
+ // it encounters them and it is hard to think of a reason why would
+ // someone do otherwise). And we have already made sure that all those
+ // files are up to date. And here is the way we are going to exploit
+ // this: we are going to keep track of how many prerequisites we have
+ // processed so far and on restart skip right to the next one.
+ //
+ // And one more thing: most of the time this list of headers would stay
+ // unchanged and extracting them by running the compiler every time is a
+ // bit wasteful. So we are going to cache them in the depdb. If the db
+ // hasn't been invalidated yet (e.g., because the compiler options have
+ // changed), then we start by reading from it. If anything is out of
+ // date then we use the same restart and skip logic to switch to the
+ // compiler run.
+ //
+ size_t skip_count (0);
+
+ // Enter as a target, update, and add to the list of prerequisite
+ // targets a header file. Depending on the cache flag, the file is
+ // assumed to either have come from the depdb cache or from the compiler
+ // run. Return true if the extraction process should be restarted.
+ //
+ auto add = [a, &bs, &t, li,
+ &pfx_map, &so_map,
+ &dd, &skip_count,
+ this] (path hp, bool cache, timestamp mt) -> bool
+ {
+ const file* ht (enter_header (a, bs, t, li,
+ move (hp), cache,
+ pfx_map, so_map).first);
+ if (ht == nullptr)
+ {
+ diag_record dr;
+ dr << fail << "header '" << hp
+ << "' not found and cannot be generated";
+
+ if (verb < 4)
+ dr << info << "re-run with --verbose=4 for more information";
+ }
+
+ if (optional<bool> u = inject_header (a, t, *ht, cache, mt))
+ {
+ // Verify/add it to the dependency database.
+ //
+ if (!cache)
+ dd.expect (ht->path ());
+
+ skip_count++;
+ return *u;
+ }
+
+ dd.write (); // Invalidate this line.
+ return true;
+ };
+
+ // As above but for a header unit. Note that currently it is only used
+ // for the cached case (the other case is handled by the mapper).
+ //
+ auto add_unit = [a, &bs, &t, li,
+ &pfx_map, &so_map,
+ &dd, &skip_count, &md,
+ this] (path hp, path bp, timestamp mt) -> bool
+ {
+ const file* ht (enter_header (a, bs, t, li,
+ move (hp), true /* cache */,
+ pfx_map, so_map).first);
+ if (ht == nullptr)
+ fail << "header '" << hp << "' not found and cannot be generated";
+
+ // Again, looks like we have to update the header explicitly since
+ // we want to restart rather than fail if it cannot be updated.
+ //
+ if (inject_header (a, t, *ht, true /* cache */, mt))
+ {
+ const file& bt (make_header_sidebuild (a, bs, li, *ht));
+
+ // It doesn't look like we need the cache semantics here since given
+ // the header, we should be able to build its BMI. In other words, a
+ // restart is not going to change anything.
+ //
+ optional<bool> u (inject_header (a, t,
+ bt, false /* cache */, mt));
+ assert (u); // Not from cache.
+
+ if (bt.path () == bp)
+ {
+ md.headers++;
+ skip_count++;
+ return *u;
+ }
+ }
+
+ dd.write (); // Invalidate this line.
+ return true;
+ };
+
+ // See init_args() above for details on generated header support.
+ //
+ bool gen (false);
+ optional<bool> force_gen;
+ optional<size_t> force_gen_skip; // Skip count at last force_gen run.
+
+ const path* drmp (nullptr); // Points to drm.path () if active.
+
+ // If nothing so far has invalidated the dependency database, then try
+ // the cached data before running the compiler.
+ //
+ bool cache (!update);
+
+ for (bool restart (true); restart; cache = false)
+ {
+ restart = false;
+
+ if (cache)
+ {
+ // If any, this is always the first run.
+ //
+ assert (skip_count == 0);
+
+ // We should always end with a blank line.
+ //
+ for (;;)
+ {
+ string* l (dd.read ());
+
+ // If the line is invalid, run the compiler.
+ //
+ if (l == nullptr)
+ {
+ restart = true;
+ break;
+ }
+
+ if (l->empty ()) // Done, nothing changed.
+ {
+ // If modules are enabled, then we keep the preprocessed output
+ // around (see apply() for details).
+ //
+ return modules
+ ? make_pair (auto_rmfile (t.path () + x_pext, false), true)
+ : make_pair (auto_rmfile (), false);
+ }
+
+ // This can be a header or a header unit (mapping). The latter
+ // is single-quoted.
+ //
+ // If this header (unit) came from the depdb, make sure it is no
+ // older than the target (if it has changed since the target was
+ // updated, then the cached data is stale).
+ //
+ if ((*l)[0] == '@')
+ {
+ size_t p (l->find ('\'', 3));
+
+ if (p != string::npos)
+ {
+ path h (*l, 3, p - 3);
+ path b (move (l->erase (0, p + 2)));
+
+ restart = add_unit (move (h), move (b), mt);
+ }
+ else
+ restart = true; // Corrupt database?
+ }
+ else
+ restart = add (path (move (*l)), true, mt);
+
+ if (restart)
+ {
+ update = true;
+ l6 ([&]{trace << "restarting (cache)";});
+ break;
+ }
+ }
+ }
+ else
+ {
+ try
+ {
+ if (force_gen)
+ gen = *force_gen;
+
+ if (args.empty () || gen != args_gen)
+ drmp = init_args (gen);
+
+ if (verb >= 3)
+ print_process (args.data ()); // Disable pipe mode.
+
+ process pr;
+
+ try
+ {
+ // Assume the preprocessed output (if produced) is usable
+ // until proven otherwise.
+ //
+ puse = true;
+
+ // Save the timestamp just before we start preprocessing. If
+ // we depend on any header that has been updated since, then
+ // we should assume we've "seen" the old copy and re-process.
+ //
+ timestamp pmt (system_clock::now ());
+
+ // In some cases we may need to ignore the error return status.
+ // The good_error flag keeps track of that. Similarly, sometimes
+ // we expect the error return status based on the output that we
+ // see. The bad_error flag is for that.
+ //
+ bool good_error (false), bad_error (false);
+
+ // If we have no generated header support, then suppress all
+ // diagnostics (if things go badly we will restart with this
+ // support).
+ //
+ if (drmp == nullptr) // Dependency info goes to stdout.
+ {
+ assert (!sense_diag); // Note: could support with fdselect().
+
+ // For VC with /P the dependency info and diagnostics all go
+ // to stderr so redirect it to stdout.
+ //
+ pr = process (
+ cpath,
+ args.data (),
+ 0,
+ -1,
+ cclass == compiler_class::msvc ? 1 : gen ? 2 : -2,
+ nullptr, // CWD
+ env.empty () ? nullptr : env.data ());
+ }
+ else // Dependency info goes to a temporary file.
+ {
+ pr = process (cpath,
+ args.data (),
+ mod_mapper ? -1 : 0,
+ mod_mapper ? -1 : 2, // Send stdout to stderr.
+ gen ? 2 : sense_diag ? -1 : -2,
+ nullptr, // CWD
+ env.empty () ? nullptr : env.data ());
+
+ // Monitor for module mapper requests and/or diagnostics. If
+ // diagnostics is detected, mark the preprocessed output as
+ // unusable for compilation.
+ //
+ if (mod_mapper || sense_diag)
+ {
+ module_mapper_state mm_state (skip_count);
+
+ const char* w (nullptr);
+ try
+ {
+ // For now we don't need to do both so let's use a simpler
+ // blocking implementation. Note that the module mapper
+ // also needs to be adjusted when switching to the
+ // non-blocking version.
+ //
+#if 1
+ assert (mod_mapper != sense_diag);
+
+ if (mod_mapper)
+ {
+ w = "module mapper request";
+
+ // Note: the order is important (see the non-blocking
+ // verison for details).
+ //
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::skip,
+ ifdstream::badbit);
+ ofdstream os (move (pr.out_fd));
+
+ do
+ {
+ gcc_module_mapper (mm_state,
+ a, bs, t, li,
+ is, os,
+ dd, update, bad_error,
+ pfx_map, so_map);
+ } while (!is.eof ());
+
+ os.close ();
+ is.close ();
+ }
+
+ if (sense_diag)
+ {
+ w = "diagnostics";
+ ifdstream is (move (pr.in_efd), fdstream_mode::skip);
+ puse = puse && (is.peek () == ifdstream::traits_type::eof ());
+ is.close ();
+ }
+#else
+ fdselect_set fds;
+ auto add = [&fds] (const auto_fd& afd) -> fdselect_state*
+ {
+ int fd (afd.get ());
+ fdmode (fd, fdstream_mode::non_blocking);
+ fds.push_back (fd);
+ return &fds.back ();
+ };
+
+ // Note that while we read both streams until eof in
+ // normal circumstances, we cannot use fdstream_mode::skip
+ // for the exception case on both of them: we may end up
+ // being blocked trying to read one stream while the
+ // process may be blocked writing to the other. So in case
+ // of an exception we only skip the diagnostics and close
+ // the mapper stream hard. The latter should happen first
+ // so the order of the following variable is important.
+ //
+ ifdstream es;
+ ofdstream os;
+ ifdstream is;
+
+ fdselect_state* ds (nullptr);
+ if (sense_diag)
+ {
+ w = "diagnostics";
+ ds = add (pr.in_efd);
+ es.open (move (pr.in_efd), fdstream_mode::skip);
+ }
+
+ fdselect_state* ms (nullptr);
+ if (mod_mapper)
+ {
+ w = "module mapper request";
+ ms = add (pr.in_ofd);
+ is.open (move (pr.in_ofd));
+ os.open (move (pr.out_fd)); // Note: blocking.
+ }
+
+ // Set each state pointer to NULL when the respective
+ // stream reaches eof.
+ //
+ while (ds != nullptr || ms != nullptr)
+ {
+ w = "output";
+ ifdselect (fds);
+
+ // First read out the diagnostics in case the mapper
+ // interaction produces more. To make sure we don't get
+ // blocked by full stderr, the mapper should only handle
+ // one request at a time.
+ //
+ if (ds != nullptr && ds->ready)
+ {
+ w = "diagnostics";
+
+ for (char buf[4096];;)
+ {
+ streamsize c (sizeof (buf));
+ streamsize n (es.readsome (buf, c));
+
+ if (puse && n > 0)
+ puse = false;
+
+ if (n < c)
+ break;
+ }
+
+ if (es.eof ())
+ {
+ es.close ();
+ ds->fd = nullfd;
+ ds = nullptr;
+ }
+ }
+
+ if (ms != nullptr && ms->ready)
+ {
+ w = "module mapper request";
+
+ gcc_module_mapper (mm_state,
+ a, bs, t, li,
+ is, os,
+ dd, update, bad_error,
+ pfx_map, so_map);
+ if (is.eof ())
+ {
+ os.close ();
+ is.close ();
+ ms->fd = nullfd;
+ ms = nullptr;
+ }
+ }
+ }
+#endif
+ }
+ catch (const io_error& e)
+ {
+ if (pr.wait ())
+ fail << "io error handling " << x_lang << " compiler "
+ << w << ": " << e;
+
+ // Fall through.
+ }
+
+ if (mod_mapper)
+ md.headers += mm_state.headers;
+ }
+
+ // The idea is to reduce this to the stdout case.
+ //
+ pr.wait ();
+
+ // With -MG we want to read dependency info even if there is
+ // an error (in case an outdated header file caused it). But
+ // with the GCC module mapper an error is non-negotiable, so
+ // to speak, and so we want to skip all of that. In fact, we
+ // now write directly to depdb without generating and then
+ // parsing an intermadiate dependency makefile.
+ //
+ pr.in_ofd = (ctype == compiler_type::gcc && mod_mapper)
+ ? auto_fd (nullfd)
+ : fdopen (*drmp, fdopen_mode::in);
+ }
+
+ if (pr.in_ofd != nullfd)
+ {
+ // We may not read all the output (e.g., due to a restart).
+ // Before we used to just close the file descriptor to signal
+ // to the other end that we are not interested in the rest.
+ // This works fine with GCC but Clang (3.7.0) finds this
+ // impolite and complains, loudly (broken pipe). So now we are
+ // going to skip until the end.
+ //
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::text | fdstream_mode::skip,
+ ifdstream::badbit);
+
+ size_t skip (skip_count);
+ string l; // Reuse.
+ for (bool first (true), second (false); !restart; )
+ {
+ if (eof (getline (is, l)))
+ break;
+
+ l6 ([&]{trace << "header dependency line '" << l << "'";});
+
+ // Parse different dependency output formats.
+ //
+ switch (cclass)
+ {
+ case compiler_class::msvc:
+ {
+ if (first)
+ {
+ // The first line should be the file we are compiling.
+ // If it is not, then something went wrong even before
+ // we could compile anything (e.g., file does not
+ // exist). In this case the first line (and everything
+ // after it) is presumably diagnostics.
+ //
+ // It can, however, be a command line warning, for
+ // example:
+ //
+ // cl : Command line warning D9025 : overriding '/W3' with '/W4'
+ //
+ // So we try to detect and skip them assuming they
+ // will also show up during the compilation proper.
+ //
+ if (l != src.path ().leaf ().string ())
+ {
+ // D8XXX are errors while D9XXX are warnings.
+ //
+ size_t p (msvc_sense_diag (l, 'D'));
+ if (p != string::npos && l[p] == '9')
+ continue;
+
+ text << l;
+ bad_error = true;
+ break;
+ }
+
+ first = false;
+ continue;
+ }
+
+ string f (next_show (l, good_error));
+
+ if (f.empty ()) // Some other diagnostics.
+ {
+ text << l;
+ bad_error = true;
+ break;
+ }
+
+ // Skip until where we left off.
+ //
+ if (skip != 0)
+ {
+ // We can't be skipping over a non-existent header.
+ //
+ assert (!good_error);
+ skip--;
+ }
+ else
+ {
+ restart = add (path (move (f)), false, pmt);
+
+ // If the header does not exist (good_error), then
+ // restart must be true. Except that it is possible
+ // that someone running in parallel has already
+ // updated it. In this case we must force a restart
+ // since we haven't yet seen what's after this
+ // at-that-time-non-existent header.
+ //
+ // We also need to force the target update (normally
+ // done by add()).
+ //
+ if (good_error)
+ restart = true;
+ //
+ // And if we have updated the header (restart is
+ // true), then we may end up in this situation: an old
+ // header got included which caused the preprocessor
+ // to fail down the line. So if we are restarting, set
+ // the good error flag in case the process fails
+ // because of something like this (and if it is for a
+ // valid reason, then we will pick it up on the next
+ // round).
+ //
+ else if (restart)
+ good_error = true;
+
+ if (restart)
+ {
+ update = true;
+ l6 ([&]{trace << "restarting";});
+ }
+ }
+
+ break;
+ }
+ case compiler_class::gcc:
+ {
+ // Make dependency declaration.
+ //
+ size_t pos (0);
+
+ if (first)
+ {
+ // Empty/invalid output should mean the wait() call
+ // below will return false.
+ //
+ if (l.empty () ||
+ l[0] != '^' || l[1] != ':' || l[2] != ' ')
+ {
+ // @@ Hm, we don't seem to redirect stderr to stdout
+ // for this class of compilers so I wonder why
+ // we are doing this?
+ //
+ if (!l.empty ())
+ text << l;
+
+ bad_error = true;
+ break;
+ }
+
+ first = false;
+ second = true;
+
+ // While normally we would have the source file on the
+ // first line, if too long, it will be moved to the
+ // next line and all we will have on this line is:
+ // "^: \".
+ //
+ if (l.size () == 4 && l[3] == '\\')
+ continue;
+ else
+ pos = 3; // Skip "^: ".
+
+ // Fall through to the 'second' block.
+ }
+
+ if (second)
+ {
+ second = false;
+ next_make (l, pos); // Skip the source file.
+ }
+
+ while (pos != l.size ())
+ {
+ string f (next_make (l, pos));
+
+ // Skip until where we left off.
+ //
+ if (skip != 0)
+ {
+ skip--;
+ continue;
+ }
+
+ restart = add (path (move (f)), false, pmt);
+
+ if (restart)
+ {
+ // The same "preprocessor may fail down the line"
+ // logic as above.
+ //
+ good_error = true;
+
+ update = true;
+ l6 ([&]{trace << "restarting";});
+ break;
+ }
+ }
+
+ break;
+ }
+ }
+
+ if (bad_error)
+ break;
+ }
+
+ // In case of VC, we are parsing stderr and if things go
+ // south, we need to copy the diagnostics for the user to see.
+ //
+ if (bad_error && cclass == compiler_class::msvc)
+ {
+ // We used to just dump the whole rdbuf but it turns out VC
+ // may continue writing include notes interleaved with the
+ // diagnostics. So we have to filter them out.
+ //
+ for (; !eof (getline (is, l)); )
+ {
+ size_t p (msvc_sense_diag (l, 'C'));
+ if (p != string::npos && l.compare (p, 4, "1083") != 0)
+ diag_stream_lock () << l << endl;
+ }
+ }
+
+ is.close ();
+
+ // This is tricky: it is possible that in parallel someone has
+ // generated all our missing headers and we wouldn't restart
+ // normally.
+ //
+ // In this case we also need to force the target update (which
+ // is normally done by add()).
+ //
+ if (force_gen && *force_gen)
+ {
+ restart = update = true;
+ force_gen = false;
+ }
+ }
+
+ if (pr.wait ())
+ {
+ if (!bad_error) // Ignore expected successes (we are done).
+ continue;
+
+ fail << "expected error exit status from " << x_lang
+ << " compiler";
+ }
+ else if (pr.exit->normal ())
+ {
+ if (good_error) // Ignore expected errors (restart).
+ continue;
+ }
+
+ // Fall through.
+ }
+ catch (const io_error& e)
+ {
+ if (pr.wait ())
+ fail << "unable to read " << x_lang << " compiler header "
+ << "dependency output: " << e;
+
+ // Fall through.
+ }
+
+ assert (pr.exit && !*pr.exit);
+ const process_exit& e (*pr.exit);
+
+ // For normal exit we assume the child process issued some
+ // diagnostics.
+ //
+ if (e.normal ())
+ {
+ // If this run was with the generated header support then we
+ // have issued diagnostics and it's time to give up.
+ //
+ if (gen)
+ throw failed ();
+
+ // Just to recap, being here means something is wrong with the
+ // source: it can be a missing generated header, it can be an
+ // outdated generated header (e.g., some check triggered #error
+ // which will go away if only we updated the generated header),
+ // or it can be a real error that is not going away.
+ //
+ // So this is what we are going to do here: if anything got
+ // updated on this run (i.e., the compiler has produced valid
+ // dependency information even though there were errors and we
+ // managed to find and update a header based on this
+ // informaion), then we restart in the same mode hoping that
+ // this fixes things. Otherwise, we force the generated header
+ // support which will either uncover a missing generated header
+ // or will issue diagnostics.
+ //
+ if (restart)
+ l6 ([&]{trace << "trying again without generated headers";});
+ else
+ {
+ // In some pathological situations we may end up switching
+ // back and forth indefinitely without making any headway. So
+ // we use skip_count to track our progress.
+ //
+ // Examples that have been encountered so far:
+ //
+ // - Running out of disk space.
+ //
+ // - Using __COUNTER__ in #if which is incompatible with the
+ // GCC's -fdirectives-only mode.
+ //
+ // - A Clang bug: https://bugs.llvm.org/show_bug.cgi?id=35580
+ //
+ // So let's show the yo-yo'ing command lines and ask the user
+ // to investigate.
+ //
+ // Note: we could restart one more time but this time without
+ // suppressing diagnostics. This could be useful since, say,
+ // running out of disk space may not reproduce on its own (for
+ // example, because we have removed all the partially
+ // preprocessed source files).
+ //
+ if (force_gen_skip && *force_gen_skip == skip_count)
+ {
+ diag_record dr (fail);
+
+ dr << "inconsistent " << x_lang << " compiler behavior" <<
+ info << "run the following two commands to investigate";
+
+ dr << info;
+ print_process (dr, args.data ()); // No pipes.
+
+ init_args ((gen = true));
+ dr << info << "";
+ print_process (dr, args.data ()); // No pipes.
+ }
+
+ restart = true;
+ force_gen = true;
+ force_gen_skip = skip_count;
+ l6 ([&]{trace << "restarting with forced generated headers";});
+ }
+ continue;
+ }
+ else
+ run_finish (args, pr); // Throws.
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << args[0] << ": " << e;
+
+ // In a multi-threaded program that fork()'ed but did not exec(),
+ // it is unwise to try to do any kind of cleanup (like unwinding
+ // the stack and running destructors).
+ //
+ if (e.child)
+ {
+ drm.cancel ();
+ exit (1);
+ }
+
+ throw failed ();
+ }
+ }
+ }
+
+ // Add the terminating blank line (we are updating depdb).
+ //
+ dd.expect ("");
+
+ puse = puse && !reprocess && !psrc.path.empty ();
+ return make_pair (move (psrc), puse);
+ }
+
+ // Return the translation unit information (first) and its checksum
+ // (second). If the checksum is empty, then it should not be used.
+ //
+ pair<unit, string> compile_rule::
+ parse_unit (action a,
+ file& t,
+ linfo li,
+ const file& src,
+ auto_rmfile& psrc,
+ const match_data& md,
+ const path& dd) const
+ {
+ tracer trace (x, "compile_rule::parse_unit");
+
+ otype ot (li.type);
+
+ // If things go wrong give the user a bit extra context.
+ //
+ auto df = make_diag_frame (
+ [&src](const diag_record& dr)
+ {
+ if (verb != 0)
+ dr << info << "while parsing " << src;
+ });
+
+ // For some compilers (GCC, Clang) the preporcessed output is only
+ // partially preprocessed. For others (VC), it is already fully
+ // preprocessed (well, almost: it still has comments but we can handle
+ // that). Plus, the source file might already be (sufficiently)
+ // preprocessed.
+ //
+ // So the plan is to start the compiler process that writes the fully
+ // preprocessed output to stdout and reduce the already preprocessed
+ // case to it.
+ //
+ environment env;
+ cstrings args;
+ small_vector<string, 2> header_args; // Header unit options storage.
+
+ const path* sp; // Source path.
+
+ // @@ MODHDR: If we are reprocessing, then will need module mapper for
+ // include translation. Hairy... Can't we add support for
+ // include translation in file mapper?
+ //
+ bool reprocess (cast_false<bool> (t[c_reprocess]));
+
+ bool ps; // True if extracting from psrc.
+ if (md.pp < preprocessed::modules)
+ {
+ // If we were instructed to reprocess the source during compilation,
+ // then also reprocess it here. While the preprocessed output may be
+ // usable for our needs, to be safe we assume it is not (and later we
+ // may extend cc.reprocess to allow specifying where reprocessing is
+ // needed).
+ //
+ ps = !psrc.path.empty () && !reprocess;
+ sp = &(ps ? psrc.path : src.path ());
+
+ // VC's preprocessed output, if present, is fully preprocessed.
+ //
+ if (cclass != compiler_class::msvc || !ps)
+ {
+ // This should match with how we setup preprocessing and is pretty
+ // similar to init_args() from extract_headers().
+ //
+ args.push_back (cpath.recall_string ());
+
+ if (reprocess)
+ args.push_back ("-D__build2_preprocess");
+
+ append_options (args, t, c_poptions);
+ append_options (args, t, x_poptions);
+
+ append_lib_options (t.base_scope (), args, a, t, li);
+
+ assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+ append_option_values (
+ args, "-I",
+ sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+ [] (const dir_path& d) {return d.string ().c_str ();});
+
+ if (md.symexport)
+ append_symexport_options (args, t);
+
+ // Make sure we don't fail because of warnings.
+ //
+ // @@ Can be both -WX and /WX.
+ //
+ const char* werror (nullptr);
+ switch (cclass)
+ {
+ case compiler_class::gcc: werror = "-Werror"; break;
+ case compiler_class::msvc: werror = "/WX"; break;
+ }
+
+ bool clang (ctype == compiler_type::clang);
+
+ append_options (args, t, c_coptions, werror);
+ append_options (args, t, x_coptions, werror);
+ append_options (args, tstd,
+ tstd.size () - (modules && clang ? 1 : 0));
+
+ append_headers (env, args, header_args, a, t, md, dd);
+
+ switch (cclass)
+ {
+ case compiler_class::msvc:
+ {
+ args.push_back ("/nologo");
+
+ if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+ args.push_back ("/EHsc");
+
+ if (!find_option_prefixes ({"/MD", "/MT"}, args))
+ args.push_back ("/MD");
+
+ args.push_back ("/E");
+ // args.push_back ("/C"); // See above.
+
+ msvc_sanitize_cl (args);
+
+ append_lang_options (args, md); // Compile as.
+
+ break;
+ }
+ case compiler_class::gcc:
+ {
+ if (ot == otype::s)
+ {
+ if (tclass == "linux" || tclass == "bsd")
+ args.push_back ("-fPIC");
+ }
+
+ args.push_back ("-E");
+ append_lang_options (args, md);
+
+ // Options that trigger preprocessing of partially preprocessed
+ // output are a bit of a compiler-specific voodoo.
+ //
+ if (ps)
+ {
+ if (ctype == compiler_type::gcc)
+ {
+ // Note that only these two *plus* -x do the trick.
+ //
+ args.push_back ("-fpreprocessed");
+ args.push_back ("-fdirectives-only");
+ }
+ }
+
+ break;
+ }
+ }
+
+ args.push_back (sp->string ().c_str ());
+ args.push_back (nullptr);
+ }
+
+ if (!env.empty ())
+ env.push_back (nullptr);
+ }
+ else
+ {
+ // Extracting directly from source.
+ //
+ ps = false;
+ sp = &src.path ();
+ }
+
+ // Preprocess and parse.
+ //
+ for (;;) // Breakout loop.
+ try
+ {
+ // Disarm the removal of the preprocessed file in case of an error.
+ // We re-arm it below.
+ //
+ if (ps)
+ psrc.active = false;
+
+ process pr;
+
+ try
+ {
+ if (args.empty ())
+ {
+ pr = process (process_exit (0)); // Successfully exited.
+ pr.in_ofd = fdopen (*sp, fdopen_mode::in);
+ }
+ else
+ {
+ if (verb >= 3)
+ print_process (args);
+
+ // We don't want to see warnings multiple times so ignore all
+ // diagnostics.
+ //
+ pr = process (cpath,
+ args.data (),
+ 0, -1, -2,
+ nullptr, // CWD
+ env.empty () ? nullptr : env.data ());
+ }
+
+ // Use binary mode to obtain consistent positions.
+ //
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::binary | fdstream_mode::skip);
+
+ parser p;
+ unit tu (p.parse (is, *sp));
+
+ is.close ();
+
+ if (pr.wait ())
+ {
+ if (ps)
+ psrc.active = true; // Re-arm.
+
+ unit_type& ut (tu.type);
+ module_info& mi (tu.module_info);
+
+ if (!modules)
+ {
+ if (ut != unit_type::non_modular || !mi.imports.empty ())
+ fail << "modules support required by " << src;
+ }
+ else
+ {
+ // Sanity checks.
+ //
+ // If we are compiling a module interface, make sure the
+ // translation unit has the necessary declarations.
+ //
+ if (ut != unit_type::module_iface && src.is_a (*x_mod))
+ fail << src << " is not a module interface unit";
+
+ // A header unit should look like a non-modular translation unit.
+ //
+ if (md.type == unit_type::module_header)
+ {
+ if (ut != unit_type::non_modular)
+ fail << "module declaration in header unit " << src;
+
+ ut = md.type;
+ mi.name = src.path ().string ();
+ }
+
+ // Prior to 15.5 (19.12) VC was not using the 'export module M;'
+ // syntax so we use the preprequisite type to distinguish
+ // between interface and implementation units.
+ //
+ if (ctype == compiler_type::msvc && cmaj == 19 && cmin <= 11)
+ {
+ if (ut == unit_type::module_impl && src.is_a (*x_mod))
+ ut = unit_type::module_iface;
+ }
+ }
+
+ // If we were forced to reprocess, assume the checksum is not
+ // accurate (parts of the translation unit could have been
+ // #ifdef'ed out; see __build2_preprocess).
+ //
+ return pair<unit, string> (
+ move (tu),
+ reprocess ? string () : move (p.checksum));
+ }
+
+ // Fall through.
+ }
+ catch (const io_error& e)
+ {
+ if (pr.wait ())
+ fail << "unable to read " << x_lang << " preprocessor output: "
+ << e;
+
+ // Fall through.
+ }
+
+ assert (pr.exit && !*pr.exit);
+ const process_exit& e (*pr.exit);
+
+ // What should we do with a normal error exit? Remember we suppressed
+ // the compiler's diagnostics. We used to issue a warning and continue
+ // with the assumption that the compilation step will fail with
+ // diagnostics. The problem with this approach is that we may fail
+ // before that because the information we return (e.g., module name)
+ // is bogus. So looks like failing is the only option.
+ //
+ if (e.normal ())
+ {
+ fail << "unable to preprocess " << src <<
+ info << "re-run with -s -V to display failing command" <<
+ info << "then run failing command to display compiler diagnostics";
+ }
+ else
+ run_finish (args, pr); // Throws.
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << args[0] << ": " << e;
+
+ if (e.child)
+ exit (1);
+ }
+
+ throw failed ();
+ }
+
+ // Extract and inject module dependencies.
+ //
+ void compile_rule::
+ extract_modules (action a,
+ const scope& bs,
+ file& t,
+ linfo li,
+ const compile_target_types& tts,
+ const file& src,
+ match_data& md,
+ module_info&& mi,
+ depdb& dd,
+ bool& update) const
+ {
+ tracer trace (x, "compile_rule::extract_modules");
+
+ // If things go wrong, give the user a bit extra context.
+ //
+ auto df = make_diag_frame (
+ [&src](const diag_record& dr)
+ {
+ if (verb != 0)
+ dr << info << "while extracting module dependencies from " << src;
+ });
+
+ unit_type ut (md.type);
+ module_imports& is (mi.imports);
+
+ // Search and match all the modules we depend on. If this is a module
+ // implementation unit, then treat the module itself as if it was
+ // imported (we insert it first since for some compilers we have to
+ // differentiate between this special module and real imports). Note:
+ // move.
+ //
+ if (ut == unit_type::module_impl)
+ is.insert (
+ is.begin (),
+ module_import {unit_type::module_iface, move (mi.name), false, 0});
+
+ // The change to the set of imports would have required a change to
+ // source code (or options). Changes to the bmi{}s themselves will be
+ // detected via the normal prerequisite machinery. However, the same set
+ // of imports could be resolved to a different set of bmi{}s (in a sense
+ // similar to changing the source file). To detect this we calculate and
+ // store a hash of all (not just direct) bmi{}'s paths.
+ //
+ sha256 cs;
+
+ if (!is.empty ())
+ md.modules = search_modules (a, bs, t, li, tts.bmi, src, is, cs);
+
+ if (dd.expect (cs.string ()) != nullptr)
+ update = true;
+
+ // Save the module map for compilers that use it.
+ //
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ // We don't need to redo this if the above hash hasn't changed and
+ // the database is still valid.
+ //
+ if (dd.writing () || !dd.skip ())
+ {
+ auto write = [&dd] (const string& name, const path& file, bool q)
+ {
+ dd.write ("@ ", false);
+ if (q) dd.write ('\'', false);
+ dd.write (name, false);
+ if (q) dd.write ('\'', false);
+ dd.write (' ', false);
+ dd.write (file);
+ };
+
+ // The output mapping is provided in the same way as input.
+ //
+ if (ut == unit_type::module_iface ||
+ ut == unit_type::module_header)
+ write (mi.name, t.path (), ut == unit_type::module_header);
+
+ if (size_t start = md.modules.start)
+ {
+ // Note that we map both direct and indirect imports to override
+ // any module paths that might be stored in the BMIs (or
+ // resolved relative to "repository path", whatever that is).
+ //
+ const auto& pts (t.prerequisite_targets[a]);
+ for (size_t i (start); i != pts.size (); ++i)
+ {
+ if (const target* m = pts[i])
+ {
+ // Save a variable lookup by getting the module name from
+ // the import list (see search_modules()).
+ //
+ // Note: all real modules (not header units).
+ //
+ write (is[i - start].name, m->as<file> ().path (), false);
+ }
+ }
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Set the cc.module_name rule-specific variable if this is an interface
+ // unit. Note that it may seem like a good idea to set it on the bmi{}
+ // group to avoid duplication. We, however, cannot do it MT-safely since
+ // we don't match the group.
+ //
+ // @@ MODHDR TODO: do we need this for header units? Currently we don't
+ // see header units here.
+ //
+ if (ut == unit_type::module_iface /*|| ut == unit_type::module_header*/)
+ {
+ if (value& v = t.state[a].assign (c_module_name))
+ assert (cast<string> (v) == mi.name);
+ else
+ v = move (mi.name); // Note: move.
+ }
+ }
+
+ inline bool
+ std_module (const string& m)
+ {
+ size_t n (m.size ());
+ return (n >= 3 &&
+ m[0] == 's' && m[1] == 't' && m[2] == 'd' &&
+ (n == 3 || m[3] == '.'));
+ };
+
+ // Resolve imported modules to bmi*{} targets.
+ //
+ module_positions compile_rule::
+ search_modules (action a,
+ const scope& bs,
+ file& t,
+ linfo li,
+ const target_type& btt,
+ const file& src,
+ module_imports& imports,
+ sha256& cs) const
+ {
+ tracer trace (x, "compile_rule::search_modules");
+
+ // NOTE: currently we don't see header unit imports (they are
+ // handled by extract_headers() and are not in imports).
+
+ // So we have a list of imports and a list of "potential" module
+ // prerequisites. They are potential in the sense that they may or may
+ // not be required by this translation unit. In other words, they are
+ // the pool where we can resolve actual imports.
+ //
+ // Because we may not need all of these prerequisites, we cannot just go
+ // ahead and match all of them (and they can even have cycles; see rule
+ // synthesis). This poses a bit of a problem: the only way to discover
+ // the module's actual name (see cc.module_name) is by matching it.
+ //
+ // One way to solve this would be to make the user specify the module
+ // name for each mxx{} explicitly. This will be a major pain, however.
+ // Another would be to require encoding of the module name in the
+ // interface unit file name. For example, hello.core -> hello-core.mxx.
+ // This is better but still too restrictive: some will want to call it
+ // hello_core.mxx or HelloCore.mxx (because that's their file naming
+ // convention) or place it in a subdirectory, say, hello/core.mxx.
+ //
+ // In the above examples one common theme about all the file names is
+ // that they contain, in one form or another, the "tail" of the module
+ // name ('core'). So what we are going to do is require that the
+ // interface file names contain enough of the module name tail to
+ // unambiguously resolve all the module imports. On our side we are
+ // going to implement a "fuzzy" module name to file name match. This
+ // should be reliable enough since we will always verify our guesses
+ // once we match the target and extract the actual module name. Plus,
+ // the user will always have the option of resolving any impasses by
+ // specifying the module name explicitly.
+ //
+ // So, the fuzzy match: the idea is that each match gets a score, the
+ // number of characters in the module name that got matched. A match
+ // with the highest score is used. And we use the (length + 1) for a
+ // match against an actual module name.
+ //
+ // Actually, the scoring system is a bit more elaborate than that.
+ // Consider module name core.window and two files, window.mxx and
+ // abstract-window.mxx: which one is likely to define this module?
+ // Clearly the first, but in the above-described scheme they will get
+ // the same score. More generally, consider these "obvious" (to the
+ // human) situations:
+ //
+ // window.mxx vs abstract-window.mxx
+ // details/window.mxx vs abstract-window.mxx
+ // gtk-window.mxx vs gtk-abstract-window.mxx
+ //
+ // To handle such cases we are going to combine the above primary score
+ // with the following secondary scores (in that order):
+ //
+ // a) Strength of separation between matched and unmatched parts:
+ //
+ // '\0' > directory separator > other separator > unseparated
+ //
+ // Here '\0' signifies nothing to separate (unmatched part is empty).
+ //
+ // b) Shortness of the unmatched part.
+ //
+ // For std.* modules we only accept non-fuzzy matches (think std.core vs
+ // some core.mxx). And if such a module is unresolved, then we assume it
+ // is pre-built and will be found by some other means (e.g., VC's
+ // IFCPATH).
+ //
+ auto match_max = [] (const string& m) -> size_t
+ {
+ // The primary and sub-scores are packed in the following decimal
+ // representation:
+ //
+ // PPPPABBBB
+ //
+ // We use decimal instead of binary packing to make it easier to
+ // separate fields in the trace messages, during debugging, etc.
+ //
+ return m.size () * 100000 + 99999; // Maximum match score.
+ };
+
+ auto match = [] (const string& f, const string& m) -> size_t
+ {
+ auto file_sep = [] (char c) -> char
+ {
+ // Return the character (translating directory seperator to '/') if
+ // it is a separator and '\0' otherwise (so can be used as bool).
+ //
+ return (c == '_' || c == '-' || c == '.' ? c :
+ path::traits_type::is_separator (c) ? '/' : '\0');
+ };
+
+ auto case_sep = [] (char c1, char c2)
+ {
+ return (alpha (c1) &&
+ alpha (c2) &&
+ (ucase (c1) == c1) != (ucase (c2) == c2));
+ };
+
+ size_t fn (f.size ()), fi (fn);
+ size_t mn (m.size ()), mi (mn);
+
+ // True if the previous character was counted as a real (that is,
+ // non-case changing) separator.
+ //
+ bool fsep (false);
+ bool msep (false);
+
+ // Scan backwards for as long as we match. Keep track of the previous
+ // character for case change detection.
+ //
+ for (char fc, mc, fp ('\0'), mp ('\0');
+ fi != 0 && mi != 0;
+ fp = fc, mp = mc, --fi, --mi)
+ {
+ fc = f[fi - 1];
+ mc = m[mi - 1];
+
+ if (casecmp (fc, mc) == 0)
+ {
+ fsep = msep = false;
+ continue;
+ }
+
+ // We consider all separators equal and character case change being
+ // a separators. Some examples of the latter:
+ //
+ // foo.bar
+ // fooBAR
+ // FOObar
+ //
+ bool fs (file_sep (fc));
+ bool ms (mc == '_' || mc == '.');
+
+ if (fs && ms)
+ {
+ fsep = msep = true;
+ continue;
+ }
+
+ // Only if one is a real separator do we consider case change.
+ //
+ if (fs || ms)
+ {
+ bool fa (false), ma (false);
+ if ((fs || (fa = case_sep (fp, fc))) &&
+ (ms || (ma = case_sep (mp, mc))))
+ {
+ // Stay on this character if imaginary punctuation (note: cannot
+ // be both true).
+ //
+ if (fa) {++fi; msep = true;}
+ if (ma) {++mi; fsep = true;}
+
+ continue;
+ }
+ }
+
+ break; // No match.
+ }
+
+ // "Uncount" real separators.
+ //
+ if (fsep) fi++;
+ if (msep) mi++;
+
+ // Use the number of characters matched in the module name and not
+ // in the file (this may not be the same because of the imaginary
+ // separators).
+ //
+ size_t ps (mn - mi);
+
+ // The strength of separation sub-score.
+ //
+ // Check for case change between the last character that matched and
+ // the first character that did not.
+ //
+ size_t as (0);
+ if (fi == 0) as = 9;
+ else if (char c = file_sep (f[fi - 1])) as = c == '/' ? 8 : 7;
+ else if (fi != fn && case_sep (f[fi], f[fi - 1])) as = 7;
+
+ // The length of the unmatched part sub-score.
+ //
+ size_t bs (9999 - fi);
+
+ return ps * 100000 + as * 10000 + bs;
+ };
+
+ auto& pts (t.prerequisite_targets[a]);
+ size_t start (pts.size ()); // Index of the first to be added.
+
+ // We have two parallel vectors: module names/scores in imports and
+ // targets in prerequisite_targets (offset with start). Pre-allocate
+ // NULL entries in the latter.
+ //
+ size_t n (imports.size ());
+ pts.resize (start + n, nullptr);
+
+ // Oh, yes, there is one "minor" complication. It's the last one, I
+ // promise. It has to do with module re-exporting (export import M;).
+ // In this case (currently) all implementations simply treat it as a
+ // shallow (from the BMI's point of view) reference to the module (or an
+ // implicit import, if you will). Do you see where it's going? Nowever
+ // good, that's right. This shallow reference means that the compiler
+ // should be able to find BMIs for all the re-exported modules,
+ // recursive. The good news is we are actually in a pretty good shape to
+ // handle this: after match all our prerequisite BMIs will have their
+ // prerequisite BMIs known, recursively. The only bit that is missing is
+ // the re-export flag of some sorts. As well as deciding where to handle
+ // it: here or in append_modules(). After some meditation it became
+ // clear handling it here will be simpler: we need to weed out
+ // duplicates for which we can re-use the imports vector. And we may
+ // also need to save this "flattened" list of modules in depdb.
+ //
+ // Ok, so, here is the plan:
+ //
+ // 1. There is no good place in prerequisite_targets to store the
+ // exported flag (no, using the marking facility across match/execute
+ // is a bad idea). So what we are going to do is put re-exported
+ // bmi{}s at the back and store (in the target's data pad) the start
+ // position. One bad aspect about this part is that we assume those
+ // bmi{}s have been matched by the same rule. But let's not kid
+ // ourselves, there will be no other rule that matches bmi{}s.
+ //
+ // 2. Once we have matched all the bmi{}s we are importing directly
+ // (with all the re-exported by us at the back), we will go over them
+ // and copy all of their re-exported bmi{}s (using the position we
+ // saved on step #1). The end result will be a recursively-explored
+ // list of imported bmi{}s that append_modules() can simply convert
+ // to the list of options.
+ //
+ // One issue with this approach is that these copied targets will be
+ // executed which means we need to adjust their dependent counts
+ // (which is normally done by match). While this seems conceptually
+ // correct (especially if you view re-exports as implicit imports),
+ // it's just extra overhead (we know they will be updated). So what
+ // we are going to do is save another position, that of the start of
+ // these copied-over targets, and will only execute up to this point.
+ //
+ // And after implementing this came the reality check: all the current
+ // implementations require access to all the imported BMIs, not only
+ // re-exported. Some (like Clang) store references to imported BMI files
+ // so we actually don't need to pass any extra options (unless things
+ // get moved) but they still need access to the BMIs (and things will
+ // most likely have to be done differenly for distributed compilation).
+ //
+ // So the revised plan: on the off chance that some implementation will
+ // do it differently we will continue maintaing the imported/re-exported
+ // split and how much to copy-over can be made compiler specific.
+ //
+ // As a first sub-step of step #1, move all the re-exported imports to
+ // the end of the vector. This will make sure they end up at the end
+ // of prerequisite_targets. Note: the special first import, if any,
+ // should be unaffected.
+ //
+ sort (imports.begin (), imports.end (),
+ [] (const module_import& x, const module_import& y)
+ {
+ return !x.exported && y.exported;
+ });
+
+ // Go over the prerequisites once.
+ //
+ // For (direct) library prerequisites, check their prerequisite bmi{}s
+ // (which should be searched and matched with module names discovered;
+ // see the library meta-information protocol for details).
+ //
+ // For our own bmi{} prerequisites, checking if each (better) matches
+ // any of the imports.
+
+ // For fuzzy check if a file name (better) resolves any of our imports
+ // and if so make it the new selection. For exact the name is the actual
+ // module name and it can only resolve one import (there are no
+ // duplicates).
+ //
+ // Set done to true if all the imports have now been resolved to actual
+ // module names (which means we can stop searching). This will happens
+ // if all the modules come from libraries. Which will be fairly common
+ // (think of all the tests) so it's worth optimizing for.
+ //
+ bool done (false);
+
+ auto check_fuzzy = [&trace, &imports, &pts, &match, &match_max, start, n]
+ (const target* pt, const string& name)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ module_import& m (imports[i]);
+
+ if (std_module (m.name)) // No fuzzy std.* matches.
+ continue;
+
+ if (m.score > match_max (m.name)) // Resolved to module name.
+ continue;
+
+ size_t s (match (name, m.name));
+
+ l5 ([&]{trace << name << " ~ " << m.name << ": " << s;});
+
+ if (s > m.score)
+ {
+ pts[start + i] = pt;
+ m.score = s;
+ }
+ }
+ };
+
+ // If resolved, return the "slot" in pts (we don't want to create a
+ // side build until we know we match; see below for details).
+ //
+ auto check_exact = [&trace, &imports, &pts, &match_max, start, n, &done]
+ (const string& name) -> const target**
+ {
+ const target** r (nullptr);
+ done = true;
+
+ for (size_t i (0); i != n; ++i)
+ {
+ module_import& m (imports[i]);
+
+ size_t ms (match_max (m.name));
+
+ if (m.score > ms) // Resolved to module name (no effect on done).
+ continue;
+
+ if (r == nullptr)
+ {
+ size_t s (name == m.name ? ms + 1 : 0);
+
+ l5 ([&]{trace << name << " ~ " << m.name << ": " << s;});
+
+ if (s > m.score)
+ {
+ r = &pts[start + i].target;
+ m.score = s;
+ continue; // Scan the rest to detect if all done.
+ }
+ }
+
+ done = false;
+ }
+
+ return r;
+ };
+
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+ continue;
+
+ const target* pt (p.load ()); // Should be cached for libraries.
+
+ if (pt != nullptr)
+ {
+ const target* lt (nullptr);
+
+ if (const libx* l = pt->is_a<libx> ())
+ lt = link_member (*l, a, li);
+ else if (pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ())
+ lt = pt;
+
+ // If this is a library, check its bmi{}s and mxx{}s.
+ //
+ if (lt != nullptr)
+ {
+ for (const target* bt: lt->prerequisite_targets[a])
+ {
+ if (bt == nullptr)
+ continue;
+
+ // Note that here we (try) to use whatever flavor of bmi*{} is
+ // available.
+ //
+ // @@ MOD: BMI compatibility check.
+ // @@ UTL: we need to (recursively) see through libu*{} (and
+ // also in pkgconfig_save()).
+ //
+ if (bt->is_a<bmix> ())
+ {
+ const string& n (
+ cast<string> (bt->state[a].vars[c_module_name]));
+
+ if (const target** p = check_exact (n))
+ *p = bt;
+ }
+ else if (bt->is_a (*x_mod))
+ {
+ // This is an installed library with a list of module sources
+ // (the source are specified as prerequisites but the fallback
+ // file rule puts them into prerequisite_targets for us).
+ //
+ // The module names should be specified but if not assume
+ // something else is going on and ignore.
+ //
+ const string* n (cast_null<string> (bt->vars[c_module_name]));
+
+ if (n == nullptr)
+ continue;
+
+ if (const target** p = check_exact (*n))
+ *p = &make_module_sidebuild (a, bs, *lt, *bt, *n);
+ }
+ else
+ continue;
+
+ if (done)
+ break;
+ }
+
+ if (done)
+ break;
+
+ continue;
+ }
+
+ // Fall through.
+ }
+
+ // While it would have been even better not to search for a target, we
+ // need to get hold of the corresponding mxx{} (unlikely but possible
+ // for bmi{} to have a different name).
+ //
+ // While we want to use group_prerequisite_members() below, we cannot
+ // call resolve_group() since we will be doing it "speculatively" for
+ // modules that we may use but also for modules that may use us. This
+ // quickly leads to deadlocks. So instead we are going to perform an
+ // ad hoc group resolution.
+ //
+ const target* pg;
+ if (p.is_a<bmi> ())
+ {
+ pg = pt != nullptr ? pt : &p.search (t);
+ pt = &search (t, btt, p.key ()); // Same logic as in picking obj*{}.
+ }
+ else if (p.is_a (btt))
+ {
+ pg = &search (t, bmi::static_type, p.key ());
+ if (pt == nullptr) pt = &p.search (t);
+ }
+ else
+ continue;
+
+ // Find the mxx{} prerequisite and extract its "file name" for the
+ // fuzzy match unless the user specified the module name explicitly.
+ //
+ for (prerequisite_member p:
+ prerequisite_members (a, t, group_prerequisites (*pt, pg)))
+ {
+ if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+ continue;
+
+ if (p.is_a (*x_mod))
+ {
+ // Check for an explicit module name. Only look for an existing
+ // target (which means the name can only be specified on the
+ // target itself, not target type/pattern-spec).
+ //
+ const target* t (p.search_existing ());
+ const string* n (t != nullptr
+ ? cast_null<string> (t->vars[c_module_name])
+ : nullptr);
+ if (n != nullptr)
+ {
+ if (const target** p = check_exact (*n))
+ *p = pt;
+ }
+ else
+ {
+ // Fuzzy match.
+ //
+ string f;
+
+ // Add the directory part if it is relative. The idea is to
+ // include it into the module match, say hello.core vs
+ // hello/mxx{core}.
+ //
+ // @@ MOD: Why not for absolute? Good question. What if it
+ // contains special components, say, ../mxx{core}?
+ //
+ const dir_path& d (p.dir ());
+
+ if (!d.empty () && d.relative ())
+ f = d.representation (); // Includes trailing slash.
+
+ f += p.name ();
+ check_fuzzy (pt, f);
+ }
+ break;
+ }
+ }
+
+ if (done)
+ break;
+ }
+
+ // Diagnose unresolved modules.
+ //
+ if (!done)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ if (pts[start + i] == nullptr && !std_module (imports[i].name))
+ {
+ // It would have been nice to print the location of the import
+ // declaration. And we could save it during parsing at the expense
+ // of a few paths (that can be pooled). The question is what to do
+ // when we re-create this information from depdb? We could have
+ // saved the location information there but the relative paths
+ // (e.g., from the #line directives) could end up being wrong if
+ // the we re-run from a different working directory.
+ //
+ // It seems the only workable approach is to extract full location
+ // info during parse, not save it in depdb, when re-creating,
+ // fallback to just src path without any line/column information.
+ // This will probably cover the majority of case (most of the time
+ // it will be a misspelled module name, not a removal of module
+ // from buildfile).
+ //
+ // But at this stage this doesn't seem worth the trouble.
+ //
+ fail (relative (src)) << "unable to resolve module "
+ << imports[i].name;
+ }
+ }
+ }
+
+ // Match in parallel and wait for completion.
+ //
+ match_members (a, t, pts, start);
+
+ // Post-process the list of our (direct) imports. While at it, calculate
+ // the checksum of all (direct and indirect) bmi{} paths.
+ //
+ size_t exported (n);
+ size_t copied (pts.size ());
+
+ for (size_t i (0); i != n; ++i)
+ {
+ const module_import& m (imports[i]);
+
+ // Determine the position of the first re-exported bmi{}.
+ //
+ if (m.exported && exported == n)
+ exported = i;
+
+ const target* bt (pts[start + i]);
+
+ if (bt == nullptr)
+ continue; // Unresolved (std.*).
+
+ // Verify our guesses against extracted module names but don't waste
+ // time if it was a match against the actual module name.
+ //
+ const string& in (m.name);
+
+ if (m.score <= match_max (in))
+ {
+ const string& mn (cast<string> (bt->state[a].vars[c_module_name]));
+
+ if (in != mn)
+ {
+ // Note: matched, so the group should be resolved.
+ //
+ for (prerequisite_member p: group_prerequisite_members (a, *bt))
+ {
+ if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+ continue;
+
+ if (p.is_a (*x_mod)) // Got to be there.
+ {
+ fail (relative (src))
+ << "failed to correctly guess module name from " << p <<
+ info << "guessed: " << in <<
+ info << "actual: " << mn <<
+ info << "consider adjusting module interface file names or" <<
+ info << "consider specifying module name with " << x
+ << ".module_name";
+ }
+ }
+ }
+ }
+
+ // Hash (we know it's a file).
+ //
+ cs.append (static_cast<const file&> (*bt).path ().string ());
+
+ // Copy over bmi{}s from our prerequisites weeding out duplicates.
+ //
+ if (size_t j = bt->data<match_data> ().modules.start)
+ {
+ // Hard to say whether we should reserve or not. We will probably
+ // get quite a bit of duplications.
+ //
+ auto& bpts (bt->prerequisite_targets[a]);
+ for (size_t m (bpts.size ()); j != m; ++j)
+ {
+ const target* et (bpts[j]);
+
+ if (et == nullptr)
+ continue; // Unresolved (std.*).
+
+ const string& mn (cast<string> (et->state[a].vars[c_module_name]));
+
+ if (find_if (imports.begin (), imports.end (),
+ [&mn] (const module_import& i)
+ {
+ return i.name == mn;
+ }) == imports.end ())
+ {
+ pts.push_back (et);
+ cs.append (static_cast<const file&> (*et).path ().string ());
+
+ // Add to the list of imports for further duplicate suppression.
+ // We could have stored reference to the name (e.g., in score)
+ // but it's probably not worth it if we have a small string
+ // optimization.
+ //
+ imports.push_back (
+ module_import {unit_type::module_iface, mn, true, 0});
+ }
+ }
+ }
+ }
+
+ if (copied == pts.size ()) // No copied tail.
+ copied = 0;
+
+ if (exported == n) // No (own) re-exported imports.
+ exported = copied;
+ else
+ exported += start; // Rebase.
+
+ return module_positions {start, exported, copied};
+ }
+
+ // Find or create a modules sidebuild subproject returning its root
+ // directory.
+ //
+ dir_path compile_rule::
+ find_modules_sidebuild (const scope& rs) const
+ {
+ // First figure out where we are going to build. We want to avoid
+ // multiple sidebuilds so the outermost scope that has loaded the
+ // cc.config module and that is within our amalgmantion seems like a
+ // good place.
+ //
+ const scope* as (&rs);
+ {
+ const scope* ws (as->weak_scope ());
+ if (as != ws)
+ {
+ const scope* s (as);
+ do
+ {
+ s = s->parent_scope ()->root_scope ();
+
+ // Use cc.core.vars as a proxy for {c,cxx}.config (a bit smelly).
+ //
+ // This is also the module that registers the scope operation
+ // callback that cleans up the subproject.
+ //
+ if (cast_false<bool> ((*s)["cc.core.vars.loaded"]))
+ as = s;
+
+ } while (s != ws);
+ }
+ }
+
+ // We build modules in a subproject (since there might be no full
+ // language support loaded in the amalgamation, only *.config). So the
+ // first step is to check if the project has already been created and/or
+ // loaded and if not, then to go ahead and do so.
+ //
+ dir_path pd (as->out_path () /
+ as->root_extra->build_dir /
+ modules_sidebuild_dir /=
+ x);
+
+ const scope* ps (&rs.ctx.scopes.find (pd));
+
+ if (ps->out_path () != pd)
+ {
+ // Switch the phase to load then create and load the subproject.
+ //
+ phase_switch phs (rs.ctx, run_phase::load);
+
+ // Re-test again now that we are in exclusive phase (another thread
+ // could have already created and loaded the subproject).
+ //
+ ps = &rs.ctx.scopes.find (pd);
+
+ if (ps->out_path () != pd)
+ {
+ // The project might already be created in which case we just need
+ // to load it.
+ //
+ optional<bool> altn (false); // Standard naming scheme.
+ if (!is_src_root (pd, altn))
+ {
+ // Copy our standard and force modules.
+ //
+ string extra;
+
+ if (const string* std = cast_null<string> (rs[x_std]))
+ extra += string (x) + ".std = " + *std + '\n';
+
+ extra += string (x) + ".features.modules = true";
+
+ config::create_project (
+ pd,
+ as->out_path ().relative (pd), /* amalgamation */
+ {}, /* boot_modules */
+ extra, /* root_pre */
+ {string (x) + '.'}, /* root_modules */
+ "", /* root_post */
+ false, /* config */
+ false, /* buildfile */
+ "the cc module",
+ 2); /* verbosity */
+ }
+
+ ps = &load_project (as->rw () /* lock */,
+ pd,
+ pd,
+ false /* forwarded */);
+ }
+ }
+
+ // Some sanity checks.
+ //
+#ifndef NDEBUG
+ assert (ps->root ());
+ const module* m (ps->lookup_module<module> (x));
+ assert (m != nullptr && m->modules);
+#endif
+
+ return pd;
+ }
+
+ // Synthesize a dependency for building a module binary interface on
+ // the side.
+ //
+ const file& compile_rule::
+ make_module_sidebuild (action a,
+ const scope& bs,
+ const target& lt,
+ const target& mt,
+ const string& mn) const
+ {
+ tracer trace (x, "compile_rule::make_module_sidebuild");
+
+ // Note: see also make_header_sidebuild() below.
+
+ dir_path pd (find_modules_sidebuild (*bs.root_scope ()));
+
+ // We need to come up with a file/target name that will be unique enough
+ // not to conflict with other modules. If we assume that within an
+ // amalgamation there is only one "version" of each module, then the
+ // module name itself seems like a good fit. We just replace '.' with
+ // '-'.
+ //
+ string mf;
+ transform (mn.begin (), mn.end (),
+ back_inserter (mf),
+ [] (char c) {return c == '.' ? '-' : c;});
+
+ // It seems natural to build a BMI type that corresponds to the library
+ // type. After all, this is where the object file part of the BMI is
+ // going to come from (though things will probably be different for
+ // module-only libraries).
+ //
+ const target_type& tt (compile_types (link_type (lt).type).bmi);
+
+ // Store the BMI target in the subproject root. If the target already
+ // exists then we assume all this is already done (otherwise why would
+ // someone have created such a target).
+ //
+ if (const file* bt = bs.ctx.targets.find<file> (
+ tt,
+ pd,
+ dir_path (), // Always in the out tree.
+ mf,
+ nullopt, // Use default extension.
+ trace))
+ return *bt;
+
+ prerequisites ps;
+ ps.push_back (prerequisite (mt));
+
+ // We've added the mxx{} but it may import other modules from this
+ // library. Or from (direct) dependencies of this library. We add them
+ // all as prerequisites so that the standard module search logic can
+ // sort things out. This is pretty similar to what we do in link when
+ // synthesizing dependencies for bmi{}'s.
+ //
+ // Note: lt is matched and so the group is resolved.
+ //
+ ps.push_back (prerequisite (lt));
+ for (prerequisite_member p: group_prerequisite_members (a, lt))
+ {
+ if (include (a, lt, p) != include_type::normal) // Excluded/ad hoc.
+ continue;
+
+ // @@ TODO: will probably need revision if using sidebuild for
+ // non-installed libraries (e.g., direct BMI dependencies
+ // will probably have to be translated to mxx{} or some such).
+ //
+ if (p.is_a<libx> () ||
+ p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ())
+ {
+ ps.push_back (p.as_prerequisite ());
+ }
+ }
+
+ auto p (bs.ctx.targets.insert_locked (
+ tt,
+ move (pd),
+ dir_path (), // Always in the out tree.
+ move (mf),
+ nullopt, // Use default extension.
+ true, // Implied.
+ trace));
+ file& bt (static_cast<file&> (p.first));
+
+ // Note that this is racy and someone might have created this target
+ // while we were preparing the prerequisite list.
+ //
+ if (p.second.owns_lock ())
+ bt.prerequisites (move (ps));
+
+ return bt;
+ }
+
+ // Synthesize a dependency for building a header unit binary interface on
+ // the side.
+ //
+ const file& compile_rule::
+ make_header_sidebuild (action,
+ const scope& bs,
+ linfo li,
+ const file& ht) const
+ {
+ tracer trace (x, "compile_rule::make_header_sidebuild");
+
+ // Note: similar to make_module_sidebuild() above.
+
+ dir_path pd (find_modules_sidebuild (*bs.root_scope ()));
+
+ // What should we use as a file/target name? On one hand we want it
+ // unique enough so that <stdio.h> and <custom/stdio.h> don't end up
+ // with the same BMI. On the other, we need the same headers resolving
+ // to the same target, regardless of how they were imported. So it feels
+ // like the name should be the absolute and normalized (actualized on
+ // case-insensitive filesystems) header path. We could try to come up
+ // with something by sanitizing certain characters, etc. But then the
+ // names will be very long and ugly, they will run into path length
+ // limits, etc. So instead we will use the file name plus an abbreviated
+ // hash of the whole path, something like stdio-211321fe6de7.
+ //
+ string mf;
+ {
+ // @@ MODHDR: Can we assume the path is actualized since the header
+ // target came from enter_header()? No, not anymore: it
+ // is now normally just normalized.
+ //
+ const path& hp (ht.path ());
+ mf = hp.leaf ().make_base ().string ();
+ mf += '-';
+ mf += sha256 (hp.string ()).abbreviated_string (12);
+ }
+
+ const target_type& tt (compile_types (li.type).hbmi);
+
+ if (const file* bt = bs.ctx.targets.find<file> (
+ tt,
+ pd,
+ dir_path (), // Always in the out tree.
+ mf,
+ nullopt, // Use default extension.
+ trace))
+ return *bt;
+
+ prerequisites ps;
+ ps.push_back (prerequisite (ht));
+
+ auto p (bs.ctx.targets.insert_locked (
+ tt,
+ move (pd),
+ dir_path (), // Always in the out tree.
+ move (mf),
+ nullopt, // Use default extension.
+ true, // Implied.
+ trace));
+ file& bt (static_cast<file&> (p.first));
+
+ // Note that this is racy and someone might have created this target
+ // while we were preparing the prerequisite list.
+ //
+ if (p.second.owns_lock ())
+ bt.prerequisites (move (ps));
+
+ return bt;
+ }
+
+ // Filter cl.exe noise (msvc.cxx).
+ //
+ void
+ msvc_filter_cl (ifdstream&, const path& src);
+
+ // Append header unit-related options.
+ //
+ // Note that this function is called for both full preprocessing and
+ // compilation proper and in the latter case it is followed by a call
+ // to append_modules().
+ //
+ void compile_rule::
+ append_headers (environment&,
+ cstrings& args,
+ small_vector<string, 2>& stor,
+ action,
+ const file&,
+ const match_data& md,
+ const path& dd) const
+ {
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ if (md.headers != 0)
+ {
+ string s (relative (dd).string ());
+ s.insert (0, "-fmodule-mapper=");
+ s += "?@"; // Cookie (aka line prefix).
+ stor.push_back (move (s));
+ }
+
+ break;
+ }
+ case compiler_type::clang:
+ case compiler_type::msvc:
+ case compiler_type::icc:
+ break;
+ }
+
+ // Shallow-copy storage to args. Why not do it as we go along pushing
+ // into storage? Because of potential reallocations.
+ //
+ for (const string& a: stor)
+ args.push_back (a.c_str ());
+ }
+
+ // Append module-related options.
+ //
+ // Note that this function is only called for the compilation proper and
+ // after a call to append_headers() (so watch out for duplicate options).
+ //
+ void compile_rule::
+ append_modules (environment& env,
+ cstrings& args,
+ small_vector<string, 2>& stor,
+ action a,
+ const file& t,
+ const match_data& md,
+ const path& dd) const
+ {
+ unit_type ut (md.type);
+ const module_positions& ms (md.modules);
+
+ dir_path stdifc; // See the VC case below.
+
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ // Use the module map stored in depdb.
+ //
+ // Note that it is also used to specify the output BMI file.
+ //
+ if (md.headers == 0 && // Done in append_headers()?
+ (ms.start != 0 ||
+ ut == unit_type::module_iface ||
+ ut == unit_type::module_header))
+ {
+ string s (relative (dd).string ());
+ s.insert (0, "-fmodule-mapper=");
+ s += "?@"; // Cookie (aka line prefix).
+ stor.push_back (move (s));
+ }
+
+ break;
+ }
+ case compiler_type::clang:
+ {
+ if (ms.start == 0)
+ return;
+
+ // Clang embeds module file references so we only need to specify
+ // our direct imports.
+ //
+ // If/when we get the ability to specify the mapping in a file, we
+ // will pass the whole list.
+ //
+#if 0
+ // In Clang the module implementation's unit .pcm is special and
+ // must be "loaded".
+ //
+ if (ut == unit_type::module_impl)
+ {
+ const file& f (pts[ms.start]->as<file> ());
+ string s (relative (f.path ()).string ());
+ s.insert (0, "-fmodule-file=");
+ stor.push_back (move (s));
+ }
+
+ // Use the module map stored in depdb for others.
+ //
+ string s (relative (dd).string ());
+ s.insert (0, "-fmodule-file-map=@=");
+ stor.push_back (move (s));
+#else
+ auto& pts (t.prerequisite_targets[a]);
+ for (size_t i (ms.start),
+ n (ms.copied != 0 ? ms.copied : pts.size ());
+ i != n;
+ ++i)
+ {
+ const target* pt (pts[i]);
+
+ if (pt == nullptr)
+ continue;
+
+ // Here we use whatever bmi type has been added. And we know all
+ // of these are bmi's.
+ //
+ const file& f (pt->as<file> ());
+ string s (relative (f.path ()).string ());
+
+ // In Clang the module implementation's unit .pcm is special and
+ // must be "loaded".
+ //
+ if (ut == unit_type::module_impl && i == ms.start)
+ s.insert (0, "-fmodule-file=");
+ else
+ {
+ s.insert (0, 1, '=');
+ s.insert (0, cast<string> (f.state[a].vars[c_module_name]));
+ s.insert (0, "-fmodule-file=");
+ }
+
+ stor.push_back (move (s));
+ }
+#endif
+ break;
+ }
+ case compiler_type::msvc:
+ {
+ if (ms.start == 0)
+ return;
+
+ auto& pts (t.prerequisite_targets[a]);
+ for (size_t i (ms.start), n (pts.size ());
+ i != n;
+ ++i)
+ {
+ const target* pt (pts[i]);
+
+ if (pt == nullptr)
+ continue;
+
+ // Here we use whatever bmi type has been added. And we know all
+ // of these are bmi's.
+ //
+ const file& f (pt->as<file> ());
+
+ // In VC std.* modules can only come from a single directory
+ // specified with the IFCPATH environment variable or the
+ // /module:stdIfcDir option.
+ //
+ if (std_module (cast<string> (f.state[a].vars[c_module_name])))
+ {
+ dir_path d (f.path ().directory ());
+
+ if (stdifc.empty ())
+ {
+ // Go one directory up since /module:stdIfcDir will look in
+ // either Release or Debug subdirectories. Keeping the result
+ // absolute feels right.
+ //
+ stor.push_back ("/module:stdIfcDir");
+ stor.push_back (d.directory ().string ());
+ stdifc = move (d);
+ }
+ else if (d != stdifc) // Absolute and normalized.
+ fail << "multiple std.* modules in different directories";
+ }
+ else
+ {
+ stor.push_back ("/module:reference");
+ stor.push_back (relative (f.path ()).string ());
+ }
+ }
+ break;
+ }
+ case compiler_type::icc:
+ break;
+ }
+
+ // Shallow-copy storage to args. Why not do it as we go along pushing
+ // into storage? Because of potential reallocations.
+ //
+ for (const string& a: stor)
+ args.push_back (a.c_str ());
+
+ // VC's IFCPATH takes precedence over /module:stdIfcDir so unset it
+ // if we are using our own std modules.
+ //
+ if (!stdifc.empty ())
+ env.push_back ("IFCPATH");
+ }
+
+ target_state compile_rule::
+ perform_update (action a, const target& xt) const
+ {
+ const file& t (xt.as<file> ());
+ const path& tp (t.path ());
+
+ match_data md (move (t.data<match_data> ()));
+ unit_type ut (md.type);
+
+ context& ctx (t.ctx);
+
+ // While all our prerequisites are already up-to-date, we still have to
+ // execute them to keep the dependency counts straight. Actually, no, we
+ // may also have to update the modules.
+ //
+ // Note that this also takes care of forcing update on any ad hoc
+ // prerequisite change.
+ //
+ auto pr (
+ execute_prerequisites<file> (
+ md.src.type (),
+ a, t,
+ md.mt,
+ [s = md.modules.start] (const target&, size_t i)
+ {
+ return s != 0 && i >= s; // Only compare timestamps for modules.
+ },
+ md.modules.copied)); // See search_modules() for details.
+
+ const file& s (pr.second);
+ const path* sp (&s.path ());
+
+ if (pr.first)
+ {
+ if (md.touch)
+ {
+ touch (ctx, tp, false, 2);
+ t.mtime (system_clock::now ());
+ ctx.skip_count.fetch_add (1, memory_order_relaxed);
+ }
+ // Note: else mtime should be cached.
+
+ return *pr.first;
+ }
+
+ // Make sure depdb is no older than any of our prerequisites (see md.mt
+ // logic description above for details). Also save the sequence start
+ // time if doing mtime checks (see the depdb::check_mtime() call below).
+ //
+ timestamp start (depdb::mtime_check ()
+ ? system_clock::now ()
+ : timestamp_unknown);
+
+ touch (ctx, md.dd, false, verb_never);
+
+ const scope& bs (t.base_scope ());
+ const scope& rs (*bs.root_scope ());
+
+ otype ot (compile_type (t, ut));
+ linfo li (link_info (bs, ot));
+ compile_target_types tts (compile_types (ot));
+
+ environment env;
+ cstrings args {cpath.recall_string ()};
+
+ // If we are building a module interface, then the target is bmi*{} and
+ // its ad hoc member is obj*{}. For header units there is no obj*{}.
+ //
+ path relm;
+ path relo (ut == unit_type::module_header
+ ? path ()
+ : relative (ut == unit_type::module_iface
+ ? find_adhoc_member<file> (t, tts.obj)->path ()
+ : tp));
+
+ // Build the command line.
+ //
+ if (md.pp != preprocessed::all)
+ {
+ append_options (args, t, c_poptions);
+ append_options (args, t, x_poptions);
+
+ // Add *.export.poptions from prerequisite libraries.
+ //
+ append_lib_options (bs, args, a, t, li);
+
+ // Extra system header dirs (last).
+ //
+ assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+ append_option_values (
+ args, "-I",
+ sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+ [] (const dir_path& d) {return d.string ().c_str ();});
+
+ if (md.symexport)
+ append_symexport_options (args, t);
+ }
+
+ append_options (args, t, c_coptions);
+ append_options (args, t, x_coptions);
+ append_options (args, tstd);
+
+ string out, out1; // Output options storage.
+ small_vector<string, 2> header_args; // Header unit options storage.
+ small_vector<string, 2> module_args; // Module options storage.
+
+ size_t out_i (0); // Index of the -o option.
+ size_t lang_n (0); // Number of lang options.
+
+ if (cclass == compiler_class::msvc)
+ {
+ // The /F*: option variants with separate names only became available
+ // in VS2013/12.0. Why do we bother? Because the command line suddenly
+ // becomes readable.
+ //
+ uint64_t ver (cast<uint64_t> (rs[x_version_major]));
+
+ args.push_back ("/nologo");
+
+ // While we want to keep the low-level build as "pure" as possible,
+ // the two misguided defaults, exceptions and runtime, just have to be
+ // fixed. Otherwise the default build is pretty much unusable. But we
+ // also make sure that the user can easily disable our defaults: if we
+ // see any relevant options explicitly specified, we take our hands
+ // off.
+ //
+ // For C looks like no /EH* (exceptions supported but no C++ objects
+ // destroyed) is a reasonable default.
+ //
+ if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+ args.push_back ("/EHsc");
+
+ // The runtime is a bit more interesting. At first it may seem like a
+ // good idea to be a bit clever and use the static runtime if we are
+ // building obja{}. And for obje{} we could decide which runtime to
+ // use based on the library link order: if it is static-only, then we
+ // could assume the static runtime. But it is indeed too clever: when
+ // building liba{} we have no idea who is going to use it. It could be
+ // an exe{} that links both static and shared libraries (and is
+ // therefore built with the shared runtime). And to safely use the
+ // static runtime, everything must be built with /MT and there should
+ // be no DLLs in the picture. So we are going to play it safe and
+ // always default to the shared runtime.
+ //
+ // In a similar vein, it would seem reasonable to use the debug runtime
+ // if we are compiling with debug. But, again, there will be fireworks
+ // if we have some projects built with debug and some without and then
+ // we try to link them together (which is not an unreasonable thing to
+ // do). So by default we will always use the release runtime.
+ //
+ if (!find_option_prefixes ({"/MD", "/MT"}, args))
+ args.push_back ("/MD");
+
+ msvc_sanitize_cl (args);
+
+ append_headers (env, args, header_args, a, t, md, md.dd);
+ append_modules (env, args, module_args, a, t, md, md.dd);
+
+ // The presence of /Zi or /ZI causes the compiler to write debug info
+ // to the .pdb file. By default it is a shared file called vcNN.pdb
+ // (where NN is the VC version) created (wait for it) in the current
+ // working directory (and not the directory of the .obj file). Also,
+ // because it is shared, there is a special Windows service that
+ // serializes access. We, of course, want none of that so we will
+ // create a .pdb per object file.
+ //
+ // Note that this also changes the name of the .idb file (used for
+ // minimal rebuild and incremental compilation): cl.exe take the /Fd
+ // value and replaces the .pdb extension with .idb.
+ //
+ // Note also that what we are doing here appears to be incompatible
+ // with PCH (/Y* options) and /Gm (minimal rebuild).
+ //
+ if (find_options ({"/Zi", "/ZI"}, args))
+ {
+ if (ver >= 18)
+ args.push_back ("/Fd:");
+ else
+ out1 = "/Fd";
+
+ out1 += relo.string ();
+ out1 += ".pdb";
+
+ args.push_back (out1.c_str ());
+ }
+
+ if (ver >= 18)
+ {
+ args.push_back ("/Fo:");
+ args.push_back (relo.string ().c_str ());
+ }
+ else
+ {
+ out = "/Fo" + relo.string ();
+ args.push_back (out.c_str ());
+ }
+
+ // @@ MODHDR MSVC
+ //
+ if (ut == unit_type::module_iface)
+ {
+ relm = relative (tp);
+
+ args.push_back ("/module:interface");
+ args.push_back ("/module:output");
+ args.push_back (relm.string ().c_str ());
+ }
+
+ // Note: no way to indicate that the source if already preprocessed.
+
+ args.push_back ("/c"); // Compile only.
+ append_lang_options (args, md); // Compile as.
+ args.push_back (sp->string ().c_str ()); // Note: relied on being last.
+ }
+ else
+ {
+ if (ot == otype::s)
+ {
+ // On Darwin, Win32 -fPIC is the default.
+ //
+ if (tclass == "linux" || tclass == "bsd")
+ args.push_back ("-fPIC");
+ }
+
+ append_headers (env, args, header_args, a, t, md, md.dd);
+ append_modules (env, args, module_args, a, t, md, md.dd);
+
+ // Note: the order of the following options is relied upon below.
+ //
+ out_i = args.size (); // Index of the -o option.
+
+ if (ut == unit_type::module_iface || ut == unit_type::module_header)
+ {
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ // Output module file is specified in the mapping file, the
+ // same as input.
+ //
+ if (ut != unit_type::module_header) // No object file.
+ {
+ args.push_back ("-o");
+ args.push_back (relo.string ().c_str ());
+ args.push_back ("-c");
+ }
+ break;
+ }
+ case compiler_type::clang:
+ {
+ relm = relative (tp);
+
+ args.push_back ("-o");
+ args.push_back (relm.string ().c_str ());
+ args.push_back ("--precompile");
+
+ // Without this option Clang's .pcm will reference source files.
+ // In our case this file may be transient (.ii). Plus, it won't
+ // play nice with distributed compilation.
+ //
+ args.push_back ("-Xclang");
+ args.push_back ("-fmodules-embed-all-files");
+
+ break;
+ }
+ case compiler_type::msvc:
+ case compiler_type::icc:
+ assert (false);
+ }
+ }
+ else
+ {
+ args.push_back ("-o");
+ args.push_back (relo.string ().c_str ());
+ args.push_back ("-c");
+ }
+
+ lang_n = append_lang_options (args, md);
+
+ if (md.pp == preprocessed::all)
+ {
+ // Note that the mode we select must still handle comments and line
+ // continuations. So some more compiler-specific voodoo.
+ //
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ // -fdirectives-only is available since GCC 4.3.0.
+ //
+ if (cmaj > 4 || (cmaj == 4 && cmin >= 3))
+ {
+ args.push_back ("-fpreprocessed");
+ args.push_back ("-fdirectives-only");
+ }
+ break;
+ }
+ case compiler_type::clang:
+ {
+ // Clang handles comments and line continuations in the
+ // preprocessed source (it does not have -fpreprocessed).
+ //
+ break;
+ }
+ case compiler_type::icc:
+ break; // Compile as normal source for now.
+ case compiler_type::msvc:
+ assert (false);
+ }
+ }
+
+ args.push_back (sp->string ().c_str ());
+ }
+
+ args.push_back (nullptr);
+
+ if (!env.empty ())
+ env.push_back (nullptr);
+
+ // With verbosity level 2 print the command line as if we are compiling
+ // the source file, not its preprocessed version (so that it's easy to
+ // copy and re-run, etc). Only at level 3 and above print the real deal.
+ //
+ if (verb == 1)
+ text << x_name << ' ' << s;
+ else if (verb == 2)
+ print_process (args);
+
+ // If we have the (partially) preprocessed output, switch to that.
+ //
+ bool psrc (!md.psrc.path.empty ());
+ bool pact (md.psrc.active);
+ if (psrc)
+ {
+ args.pop_back (); // nullptr
+ args.pop_back (); // sp
+
+ sp = &md.psrc.path;
+
+ // This should match with how we setup preprocessing.
+ //
+ switch (ctype)
+ {
+ case compiler_type::gcc:
+ {
+ // The -fpreprocessed is implied by .i/.ii. But not when compiling
+ // a header unit (there is no .hi/.hii).
+ //
+ if (ut == unit_type::module_header)
+ args.push_back ("-fpreprocessed");
+ else
+ // Pop -x since it takes precedence over the extension.
+ //
+ // @@ I wonder why bother and not just add -fpreprocessed? Are
+ // we trying to save an option or does something break?
+ //
+ for (; lang_n != 0; --lang_n)
+ args.pop_back ();
+
+ args.push_back ("-fdirectives-only");
+ break;
+ }
+ case compiler_type::clang:
+ {
+ // Note that without -x Clang will treat .i/.ii as fully
+ // preprocessed.
+ //
+ break;
+ }
+ case compiler_type::msvc:
+ {
+ // Nothing to do (/TP or /TC already there).
+ //
+ break;
+ }
+ case compiler_type::icc:
+ assert (false);
+ }
+
+ args.push_back (sp->string ().c_str ());
+ args.push_back (nullptr);
+
+ // Let's keep the preprocessed file in case of an error but only at
+ // verbosity level 3 and up (when one actually sees it mentioned on
+ // the command line). We also have to re-arm on success (see below).
+ //
+ if (pact && verb >= 3)
+ md.psrc.active = false;
+ }
+
+ if (verb >= 3)
+ print_process (args);
+
+ // @@ DRYRUN: Currently we discard the (partially) preprocessed file on
+ // dry-run which is a waste. Even if we keep the file around (like we do
+ // for the error case; see above), we currently have no support for
+ // re-using the previously preprocessed output. However, everything
+ // points towards us needing this in the near future since with modules
+ // we may be out of date but not needing to re-preprocess the
+ // translation unit (i.e., one of the imported module's has BMIs
+ // changed).
+ //
+ if (!ctx.dry_run)
+ {
+ try
+ {
+ // VC cl.exe sends diagnostics to stdout. It also prints the file
+ // name being compiled as the first line. So for cl.exe we redirect
+ // stdout to a pipe, filter that noise out, and send the rest to
+ // stderr.
+ //
+ // For other compilers redirect stdout to stderr, in case any of
+ // them tries to pull off something similar. For sane compilers this
+ // should be harmless.
+ //
+ bool filter (ctype == compiler_type::msvc);
+
+ process pr (cpath,
+ args.data (),
+ 0, (filter ? -1 : 2), 2,
+ nullptr, // CWD
+ env.empty () ? nullptr : env.data ());
+
+ if (filter)
+ {
+ try
+ {
+ ifdstream is (
+ move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit);
+
+ msvc_filter_cl (is, *sp);
+
+ // If anything remains in the stream, send it all to stderr.
+ // Note that the eof check is important: if the stream is at
+ // eof, this and all subsequent writes to the diagnostics stream
+ // will fail (and you won't see a thing).
+ //
+ if (is.peek () != ifdstream::traits_type::eof ())
+ diag_stream_lock () << is.rdbuf ();
+
+ is.close ();
+ }
+ catch (const io_error&) {} // Assume exits with error.
+ }
+
+ run_finish (args, pr);
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << args[0] << ": " << e;
+
+ if (e.child)
+ exit (1);
+
+ throw failed ();
+ }
+ }
+
+ // Remove preprocessed file (see above).
+ //
+ if (pact && verb >= 3)
+ md.psrc.active = true;
+
+ // Clang's module compilation requires two separate compiler
+ // invocations.
+ //
+ if (ctype == compiler_type::clang && ut == unit_type::module_iface)
+ {
+ // Adjust the command line. First discard everything after -o then
+ // build the new "tail".
+ //
+ args.resize (out_i + 1);
+ args.push_back (relo.string ().c_str ()); // Produce .o.
+ args.push_back ("-c"); // By compiling .pcm.
+ args.push_back ("-Wno-unused-command-line-argument");
+ args.push_back (relm.string ().c_str ());
+ args.push_back (nullptr);
+
+ if (verb >= 2)
+ print_process (args);
+
+ if (!ctx.dry_run)
+ {
+ // Remove the target file if this fails. If we don't do that, we
+ // will end up with a broken build that is up-to-date.
+ //
+ auto_rmfile rm (relm);
+
+ try
+ {
+ process pr (cpath,
+ args.data (),
+ 0, 2, 2,
+ nullptr, // CWD
+ env.empty () ? nullptr : env.data ());
+
+ run_finish (args, pr);
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << args[0] << ": " << e;
+
+ if (e.child)
+ exit (1);
+
+ throw failed ();
+ }
+
+ rm.cancel ();
+ }
+ }
+
+ timestamp now (system_clock::now ());
+
+ if (!ctx.dry_run)
+ depdb::check_mtime (start, md.dd, tp, now);
+
+ // Should we go to the filesystem and get the new mtime? We know the
+ // file has been modified, so instead just use the current clock time.
+ // It has the advantage of having the subseconds precision. Plus, in
+ // case of dry-run, the file won't be modified.
+ //
+ t.mtime (now);
+ return target_state::changed;
+ }
+
+ target_state compile_rule::
+ perform_clean (action a, const target& xt) const
+ {
+ const file& t (xt.as<file> ());
+
+ clean_extras extras;
+
+ switch (ctype)
+ {
+ case compiler_type::gcc: extras = {".d", x_pext, ".t"}; break;
+ case compiler_type::clang: extras = {".d", x_pext}; break;
+ case compiler_type::msvc: extras = {".d", x_pext, ".idb", ".pdb"};break;
+ case compiler_type::icc: extras = {".d"}; break;
+ }
+
+ return perform_clean_extra (a, t, extras);
+ }
+ }
+}
diff --git a/libbuild2/cc/compile-rule.hxx b/libbuild2/cc/compile-rule.hxx
new file mode 100644
index 0000000..93972a2
--- /dev/null
+++ b/libbuild2/cc/compile-rule.hxx
@@ -0,0 +1,189 @@
+// file : libbuild2/cc/compile-rule.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_COMPILE_RULE_HXX
+#define LIBBUILD2_CC_COMPILE_RULE_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/rule.hxx>
+#include <libbuild2/filesystem.hxx> // auto_rmfile
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+ class depdb;
+
+ namespace cc
+ {
+ // The order is arranged so that their integral values indicate whether
+ // one is a "stronger" than another.
+ //
+ enum class preprocessed: uint8_t {none, includes, modules, all};
+
+ // Positions of the re-exported bmi{}s. See search_modules() for
+ // details.
+ //
+ struct module_positions
+ {
+ size_t start; // First imported bmi*{}, 0 if none.
+ size_t exported; // First re-exported bmi*{}, 0 if none.
+ size_t copied; // First copied-over bmi*{}, 0 if none.
+ };
+
+ class LIBBUILD2_CC_SYMEXPORT compile_rule: public rule, virtual common
+ {
+ public:
+ compile_rule (data&&);
+
+ virtual bool
+ match (action, target&, const string&) const override;
+
+ virtual recipe
+ apply (action, target&) const override;
+
+ target_state
+ perform_update (action, const target&) const;
+
+ target_state
+ perform_clean (action, const target&) const;
+
+ private:
+ struct match_data;
+ using environment = small_vector<const char*, 2>;
+
+ void
+ append_lib_options (const scope&,
+ cstrings&,
+ action,
+ const target&,
+ linfo) const;
+
+ void
+ hash_lib_options (const scope&,
+ sha256&,
+ action,
+ const target&,
+ linfo) const;
+
+ // Mapping of include prefixes (e.g., foo in <foo/bar>) for auto-
+ // generated headers to directories where they will be generated.
+ //
+ // We are using a prefix map of directories (dir_path_map) instead of
+ // just a map in order to also cover sub-paths (e.g., <foo/more/bar> if
+ // we continue with the example). Specifically, we need to make sure we
+ // don't treat foobar as a sub-directory of foo.
+ //
+ // The priority is used to decide who should override whom. Lesser
+ // values are considered higher priority. See append_prefixes() for
+ // details.
+ //
+ // @@ The keys should be normalized.
+ //
+ struct prefix_value
+ {
+ dir_path directory;
+ size_t priority;
+ };
+ using prefix_map = dir_path_map<prefix_value>;
+
+ void
+ append_prefixes (prefix_map&, const target&, const variable&) const;
+
+ void
+ append_lib_prefixes (const scope&,
+ prefix_map&,
+ action,
+ target&,
+ linfo) const;
+
+ prefix_map
+ build_prefix_map (const scope&, action, target&, linfo) const;
+
+ small_vector<const target_type*, 2>
+ map_extension (const scope&, const string&, const string&) const;
+
+ // Src-to-out re-mapping. See extract_headers() for details.
+ //
+ using srcout_map = path_map<dir_path>;
+
+ struct module_mapper_state;
+
+ void
+ gcc_module_mapper (module_mapper_state&,
+ action, const scope&, file&, linfo,
+ ifdstream&, ofdstream&,
+ depdb&, bool&, bool&,
+ optional<prefix_map>&, srcout_map&) const;
+
+ pair<const file*, bool>
+ enter_header (action, const scope&, file&, linfo,
+ path&&, bool,
+ optional<prefix_map>&, srcout_map&) const;
+
+ optional<bool>
+ inject_header (action, file&, const file&, bool, timestamp) const;
+
+ pair<auto_rmfile, bool>
+ extract_headers (action, const scope&, file&, linfo,
+ const file&, match_data&,
+ depdb&, bool&, timestamp) const;
+
+ pair<unit, string>
+ parse_unit (action, file&, linfo,
+ const file&, auto_rmfile&,
+ const match_data&, const path&) const;
+
+ void
+ extract_modules (action, const scope&, file&, linfo,
+ const compile_target_types&,
+ const file&, match_data&,
+ module_info&&, depdb&, bool&) const;
+
+ module_positions
+ search_modules (action, const scope&, file&, linfo,
+ const target_type&,
+ const file&, module_imports&, sha256&) const;
+
+ dir_path
+ find_modules_sidebuild (const scope&) const;
+
+ const file&
+ make_module_sidebuild (action, const scope&, const target&,
+ const target&, const string&) const;
+
+ const file&
+ make_header_sidebuild (action, const scope&, linfo, const file&) const;
+
+ void
+ append_headers (environment&, cstrings&, small_vector<string, 2>&,
+ action, const file&,
+ const match_data&, const path&) const;
+
+ void
+ append_modules (environment&, cstrings&, small_vector<string, 2>&,
+ action, const file&,
+ const match_data&, const path&) const;
+
+ // Compiler-specific language selection option. Return the number of
+ // options (arguments, really) appended.
+ //
+ size_t
+ append_lang_options (cstrings&, const match_data&) const;
+
+ void
+ append_symexport_options (cstrings&, const target&) const;
+
+ private:
+ const string rule_id;
+ };
+ }
+}
+
+#endif // LIBBUILD2_CC_COMPILE_RULE_HXX
diff --git a/libbuild2/cc/export.hxx b/libbuild2/cc/export.hxx
new file mode 100644
index 0000000..16118d6
--- /dev/null
+++ b/libbuild2/cc/export.hxx
@@ -0,0 +1,38 @@
+// file : libbuild2/cc/export.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#pragma once
+
+// Normally we don't export class templates (but do complete specializations),
+// inline functions, and classes with only inline member functions. Exporting
+// classes that inherit from non-exported/imported bases (e.g., std::string)
+// will end up badly. The only known workarounds are to not inherit or to not
+// export. Also, MinGW GCC doesn't like seeing non-exported functions being
+// used before their inline definition. The workaround is to reorder code. In
+// the end it's all trial and error.
+
+#if defined(LIBBUILD2_CC_STATIC) // Using static.
+# define LIBBUILD2_CC_SYMEXPORT
+#elif defined(LIBBUILD2_CC_STATIC_BUILD) // Building static.
+# define LIBBUILD2_CC_SYMEXPORT
+#elif defined(LIBBUILD2_CC_SHARED) // Using shared.
+# ifdef _WIN32
+# define LIBBUILD2_CC_SYMEXPORT __declspec(dllimport)
+# else
+# define LIBBUILD2_CC_SYMEXPORT
+# endif
+#elif defined(LIBBUILD2_CC_SHARED_BUILD) // Building shared.
+# ifdef _WIN32
+# define LIBBUILD2_CC_SYMEXPORT __declspec(dllexport)
+# else
+# define LIBBUILD2_CC_SYMEXPORT
+# endif
+#else
+// If none of the above macros are defined, then we assume we are being used
+// by some third-party build system that cannot/doesn't signal the library
+// type. Note that this fallback works for both static and shared but in case
+// of shared will be sub-optimal compared to having dllimport.
+//
+# define LIBBUILD2_CC_SYMEXPORT // Using static or shared.
+#endif
diff --git a/libbuild2/cc/gcc.cxx b/libbuild2/cc/gcc.cxx
new file mode 100644
index 0000000..632805c
--- /dev/null
+++ b/libbuild2/cc/gcc.cxx
@@ -0,0 +1,263 @@
+// file : libbuild2/cc/gcc.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+#include <libbuild2/cc/module.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using namespace bin;
+
+ // Extract system header search paths from GCC (gcc/g++) or compatible
+ // (Clang, Intel) using the -v -E </dev/null method.
+ //
+ dir_paths config_module::
+ gcc_header_search_paths (const process_path& xc, scope& rs) const
+ {
+ dir_paths r;
+
+ cstrings args;
+ string std; // Storage.
+
+ args.push_back (xc.recall_string ());
+ append_options (args, rs, c_coptions);
+ append_options (args, rs, x_coptions);
+ append_options (args, tstd);
+
+ // Compile as.
+ //
+ auto langopt = [this] () -> const char*
+ {
+ switch (x_lang)
+ {
+ case lang::c: return "c";
+ case lang::cxx: return "c++";
+ }
+
+ assert (false); // Can't get here.
+ return nullptr;
+ };
+
+ args.push_back ("-x");
+ args.push_back (langopt ());
+ args.push_back ("-v");
+ args.push_back ("-E");
+ args.push_back ("-");
+ args.push_back (nullptr);
+
+ if (verb >= 3)
+ print_process (args);
+
+ try
+ {
+ // Open pipe to stderr, redirect stdin and stdout to /dev/null.
+ //
+ process pr (xc, args.data (), -2, -2, -1);
+
+ try
+ {
+ ifdstream is (
+ move (pr.in_efd), fdstream_mode::skip, ifdstream::badbit);
+
+ // Normally the system header paths appear between the following
+ // lines:
+ //
+ // #include <...> search starts here:
+ // End of search list.
+ //
+ // The exact text depends on the current locale. What we can rely on
+ // is the presence of the "#include <...>" substring in the
+ // "opening" line and the fact that the paths are indented with a
+ // single space character, unlike the "closing" line.
+ //
+ // Note that on Mac OS we will also see some framework paths among
+ // system header paths, followed with a comment. For example:
+ //
+ // /Library/Frameworks (framework directory)
+ //
+ // For now we ignore framework paths and to filter them out we will
+ // only consider valid paths to existing directories, skipping those
+ // which we fail to normalize or stat.
+ //
+ string s;
+ for (bool found (false); getline (is, s); )
+ {
+ if (!found)
+ found = s.find ("#include <...>") != string::npos;
+ else
+ {
+ if (s[0] != ' ')
+ break;
+
+ try
+ {
+ dir_path d (s, 1, s.size () - 1);
+
+ if (d.absolute () && exists (d, true) &&
+ find (r.begin (), r.end (), d.normalize ()) == r.end ())
+ r.emplace_back (move (d));
+ }
+ catch (const invalid_path&) {}
+ }
+ }
+
+ is.close (); // Don't block.
+
+ if (!pr.wait ())
+ {
+ // We have read stderr so better print some diagnostics.
+ //
+ diag_record dr (fail);
+
+ dr << "failed to extract " << x_lang << " header search paths" <<
+ info << "command line: ";
+
+ print_process (dr, args);
+ }
+ }
+ catch (const io_error&)
+ {
+ pr.wait ();
+ fail << "error reading " << x_lang << " compiler -v -E output";
+ }
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << args[0] << ": " << e;
+
+ if (e.child)
+ exit (1);
+
+ throw failed ();
+ }
+
+ // It's highly unlikely not to have any system directories. More likely
+ // we misinterpreted the compiler output.
+ //
+ if (r.empty ())
+ fail << "unable to extract " << x_lang << " compiler system header "
+ << "search paths";
+
+ return r;
+ }
+
+ // Extract system library search paths from GCC (gcc/g++) or compatible
+ // (Clang, Intel) using the -print-search-dirs option.
+ //
+ dir_paths config_module::
+ gcc_library_search_paths (const process_path& xc, scope& rs) const
+ {
+ dir_paths r;
+
+ cstrings args;
+ string std; // Storage.
+
+ args.push_back (xc.recall_string ());
+ append_options (args, rs, c_coptions);
+ append_options (args, rs, x_coptions);
+ append_options (args, tstd);
+ append_options (args, rs, c_loptions);
+ append_options (args, rs, x_loptions);
+ args.push_back ("-print-search-dirs");
+ args.push_back (nullptr);
+
+ if (verb >= 3)
+ print_process (args);
+
+ // Open pipe to stdout.
+ //
+ process pr (run_start (xc,
+ args.data (),
+ 0, /* stdin */
+ -1 /* stdout */));
+
+ string l;
+ try
+ {
+ ifdstream is (
+ move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit);
+
+ // The output of -print-search-dirs are a bunch of lines that start
+ // with "<name>: =" where name can be "install", "programs", or
+ // "libraries". If you have English locale, that is. If you set your
+ // LC_ALL="tr_TR", then it becomes "kurulum", "programlar", and
+ // "kitapl?klar". Also, Clang omits "install" while GCC and Intel icc
+ // print all three. The "libraries" seem to be alwasy last, however.
+ //
+ string s;
+ for (bool found (false); !found && getline (is, s); )
+ {
+ found = (s.compare (0, 12, "libraries: =") == 0);
+
+ size_t p (found ? 9 : s.find (": ="));
+
+ if (p != string::npos)
+ l.assign (s, p + 3, string::npos);
+ }
+
+ is.close (); // Don't block.
+ }
+ catch (const io_error&)
+ {
+ pr.wait ();
+ fail << "error reading " << x_lang << " compiler -print-search-dirs "
+ << "output";
+ }
+
+ run_finish (args, pr);
+
+ if (l.empty ())
+ fail << "unable to extract " << x_lang << " compiler system library "
+ << "search paths";
+
+ // Now the fun part: figuring out which delimiter is used. Normally it
+ // is ':' but on Windows it is ';' (or can be; who knows for sure). Also
+ // note that these paths are absolute (or should be). So here is what we
+ // are going to do: first look for ';'. If found, then that's the
+ // delimiter. If not found, then there are two cases: it is either a
+ // single Windows path or the delimiter is ':'. To distinguish these two
+ // cases we check if the path starts with a Windows drive.
+ //
+ char d (';');
+ string::size_type e (l.find (d));
+
+ if (e == string::npos &&
+ (l.size () < 2 || l[0] == '/' || l[1] != ':'))
+ {
+ d = ':';
+ e = l.find (d);
+ }
+
+ // Now chop it up. We already have the position of the first delimiter
+ // (if any).
+ //
+ for (string::size_type b (0);; e = l.find (d, (b = e + 1)))
+ {
+ dir_path d (l, b, (e != string::npos ? e - b : e));
+
+ if (find (r.begin (), r.end (), d.normalize ()) == r.end ())
+ r.emplace_back (move (d));
+
+ if (e == string::npos)
+ break;
+ }
+
+ return r;
+ }
+ }
+}
diff --git a/libbuild2/cc/guess.cxx b/libbuild2/cc/guess.cxx
new file mode 100644
index 0000000..02a2f5a
--- /dev/null
+++ b/libbuild2/cc/guess.cxx
@@ -0,0 +1,1892 @@
+// file : libbuild2/cc/guess.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/guess.hxx>
+
+#include <map>
+#include <cstring> // strlen(), strchr()
+
+#include <libbuild2/diagnostics.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace cc
+ {
+ string
+ to_string (compiler_type t)
+ {
+ string r;
+
+ switch (t)
+ {
+ case compiler_type::clang: r = "clang"; break;
+ case compiler_type::gcc: r = "gcc"; break;
+ case compiler_type::msvc: r = "msvc"; break;
+ case compiler_type::icc: r = "icc"; break;
+ }
+
+ return r;
+ }
+
+ compiler_id::
+ compiler_id (const std::string& id)
+ {
+ using std::string;
+
+ size_t p (id.find ('-'));
+
+ if (id.compare (0, p, "gcc" ) == 0) type = compiler_type::gcc;
+ else if (id.compare (0, p, "clang") == 0) type = compiler_type::clang;
+ else if (id.compare (0, p, "msvc" ) == 0) type = compiler_type::msvc;
+ else if (id.compare (0, p, "icc" ) == 0) type = compiler_type::icc;
+ else
+ throw invalid_argument (
+ "invalid compiler type '" + string (id, 0, p) + "'");
+
+ if (p != string::npos)
+ {
+ variant.assign (id, p + 1, string::npos);
+
+ if (variant.empty ())
+ throw invalid_argument ("empty compiler variant");
+ }
+ }
+
+ string compiler_id::
+ string () const
+ {
+ std::string r (to_string (type));
+
+ if (!variant.empty ())
+ {
+ r += '-';
+ r += variant;
+ }
+
+ return r;
+ }
+
+ string
+ to_string (compiler_class c)
+ {
+ string r;
+
+ switch (c)
+ {
+ case compiler_class::gcc: r = "gcc"; break;
+ case compiler_class::msvc: r = "msvc"; break;
+ }
+
+ return r;
+ }
+
+ // Standard library detection for GCC-class compilers.
+ //
+ // The src argument should detect the standard library based on the
+ // preprocessor macros and output the result in the stdlib:="XXX" form.
+ //
+ static string
+ stdlib (lang xl,
+ const process_path& xp,
+ const strings* c_po, const strings* x_po,
+ const strings* c_co, const strings* x_co,
+ const char* src)
+ {
+ cstrings args {xp.recall_string ()};
+ if (c_po != nullptr) append_options (args, *c_po);
+ if (x_po != nullptr) append_options (args, *x_po);
+ if (c_co != nullptr) append_options (args, *c_co);
+ if (x_co != nullptr) append_options (args, *x_co);
+ args.push_back ("-x");
+ switch (xl)
+ {
+ case lang::c: args.push_back ("c"); break;
+ case lang::cxx: args.push_back ("c++"); break;
+ }
+ args.push_back ("-E");
+ args.push_back ("-"); // Read stdin.
+ args.push_back (nullptr);
+
+ // The source we are going to preprocess may contains #include's which
+ // may fail to resolve if, for example, there is no standard library
+ // (-nostdinc/-nostdinc++). So we are going to suppress diagnostics and
+ // assume the error exit code means no standard library (of course it
+ // could also be because there is something wrong with the compiler or
+ // options but that we simply leave to blow up later).
+ //
+ process pr (run_start (3 /* verbosity */,
+ xp,
+ args.data (),
+ -1 /* stdin */,
+ -1 /* stdout */,
+ false /* error */));
+ string l, r;
+ try
+ {
+ // Here we have to simultaneously write to stdin and read from stdout
+ // with both operations having the potential to block. For now we
+ // assume that src fits into the pipe's buffer.
+ //
+ ofdstream os (move (pr.out_fd));
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::skip,
+ ifdstream::badbit);
+
+ os << src << endl;
+ os.close ();
+
+ while (!eof (getline (is, l)))
+ {
+ size_t p (l.find_first_not_of (' '));
+
+ if (p != string::npos && l.compare (p, 9, "stdlib:=\"") == 0)
+ {
+ p += 9;
+ r = string (l, p, l.size () - p - 1); // One for closing \".
+ break;
+ }
+ }
+
+ is.close ();
+ }
+ catch (const io_error&)
+ {
+ // Presumably the child process failed. Let run_finish() deal with
+ // that.
+ }
+
+ if (!run_finish (args.data (), pr, false /* error */, l))
+ r = "none";
+
+ if (r.empty ())
+ fail << "unable to determine " << xl << " standard library";
+
+ return r;
+ }
+
+ // C standard library detection on POSIX (i.e., non-Windows) systems.
+ // Notes:
+ //
+ // - We place platform macro-based checks (__FreeBSD__, __APPLE__, etc)
+ // after library macro-based ones in case a non-default libc is used.
+ //
+ static const char* c_stdlib_src =
+"#if !defined(__STDC_HOSTED__) || __STDC_HOSTED__ == 1 \n"
+"# include <stddef.h> /* Forces defining __KLIBC__ for klibc. */ \n"
+"# include <limits.h> /* Includes features.h for glibc. */ \n"
+"# include <sys/types.h> /* Includes sys/cdefs.h for bionic. */ \n"
+" /* Includes sys/features.h for newlib. */ \n"
+" /* Includes features.h for uclibc. */ \n"
+"# if defined(__KLIBC__) \n"
+" stdlib:=\"klibc\" \n"
+"# elif defined(__BIONIC__) \n"
+" stdlib:=\"bionic\" \n"
+"# elif defined(__NEWLIB__) \n"
+" stdlib:=\"newlib\" \n"
+"# elif defined(__UCLIBC__) \n"
+" stdlib:=\"uclibc\" \n"
+"# elif defined(__dietlibc__) /* Also has to be defined manually by */ \n"
+" stdlib:=\"dietlibc\" /* or some wrapper. */ \n"
+"# elif defined(__MUSL__) /* This libc refuses to define __MUSL__ */ \n"
+" stdlib:=\"musl\" /* so it has to be defined by user. */ \n"
+"# elif defined(__GLIBC__) /* Check for glibc last since some libc's */ \n"
+" stdlib:=\"glibc\" /* pretend to be it. */ \n"
+"# elif defined(__FreeBSD__) \n"
+" stdlib:=\"freebsd\" \n"
+"# elif defined(__APPLE__) \n"
+" stdlib:=\"apple\" \n"
+"# else \n"
+" stdlib:=\"other\" \n"
+"# endif \n"
+"#else \n"
+" stdlib:=\"none\" \n"
+"#endif \n";
+
+ // Pre-guess the compiler type based on the compiler executable name and
+ // also return the start of that name in the path (used to derive the
+ // toolchain pattern). Return empty string/npos if can't make a guess (for
+ // example, because the compiler name is a generic 'c++'). Note that it
+ // only guesses the type, not the variant.
+ //
+ static pair<compiler_type, size_t>
+ pre_guess (lang xl, const path& xc, const optional<compiler_id>& xi)
+ {
+ tracer trace ("cc::pre_guess");
+
+ // Analyze the last path component only.
+ //
+ const string& s (xc.string ());
+ size_t s_p (path::traits_type::find_leaf (s));
+ size_t s_n (s.size ());
+
+ // Name separator characters (e.g., '-' in 'g++-4.8').
+ //
+ auto sep = [] (char c) -> bool
+ {
+ return c == '-' || c == '_' || c == '.';
+ };
+
+ auto stem = [&sep, &s, s_p, s_n] (const char* x) -> size_t
+ {
+ size_t m (strlen (x));
+ size_t p (s.find (x, s_p, m));
+
+ return (p != string::npos &&
+ ( p == s_p || sep (s[p - 1])) && // Separated beginning.
+ ((p + m) == s_n || sep (s[p + m]))) // Separated end.
+ ? p
+ : string::npos;
+ };
+
+ using type = compiler_type;
+ using pair = std::pair<type, size_t>;
+
+ // If the user specified the compiler id, then only check the stem for
+ // that compiler.
+ //
+ auto check = [&xi, &stem] (type t, const char* s) -> optional<pair>
+ {
+ if (!xi || xi->type == t)
+ {
+ size_t p (stem (s));
+
+ if (p != string::npos)
+ return pair (t, p);
+ }
+
+ return nullopt;
+ };
+
+ // Warn if the user specified a C compiler instead of C++ or vice versa.
+ //
+ lang o; // Other language.
+ const char* as (nullptr); // Actual stem.
+ const char* es (nullptr); // Expected stem.
+
+ switch (xl)
+ {
+ case lang::c:
+ {
+ // Keep msvc last since 'cl' is very generic.
+ //
+ if (auto r = check (type::gcc, "gcc") ) return *r;
+ if (auto r = check (type::clang, "clang")) return *r;
+ if (auto r = check (type::icc, "icc") ) return *r;
+ if (auto r = check (type::msvc, "cl") ) return *r;
+
+ if (check (type::gcc, as = "g++") ) es = "gcc";
+ else if (check (type::clang, as = "clang++")) es = "clang";
+ else if (check (type::icc, as = "icpc") ) es = "icc";
+ else if (check (type::msvc, as = "c++") ) es = "cc";
+
+ o = lang::cxx;
+ break;
+ }
+ case lang::cxx:
+ {
+ // Keep msvc last since 'cl' is very generic.
+ //
+ if (auto r = check (type::gcc, "g++") ) return *r;
+ if (auto r = check (type::clang, "clang++")) return *r;
+ if (auto r = check (type::icc, "icpc") ) return *r;
+ if (auto r = check (type::msvc, "cl") ) return *r;
+
+ if (check (type::gcc, as = "gcc") ) es = "g++";
+ else if (check (type::clang, as = "clang")) es = "clang++";
+ else if (check (type::icc, as = "icc") ) es = "icpc";
+ else if (check (type::msvc, as = "cc") ) es = "c++";
+
+ o = lang::c;
+ break;
+ }
+ }
+
+ if (es != nullptr)
+ warn << xc << " looks like a " << o << " compiler" <<
+ info << "should it be '" << es << "' instead of '" << as << "'?";
+
+ // If the user specified the id, then continue as if we pre-guessed.
+ //
+ if (xi)
+ return pair (xi->type, string::npos);
+
+ l4 ([&]{trace << "unable to guess compiler type of " << xc;});
+
+ return pair (invalid_compiler_type, string::npos);
+ }
+
+ // Guess the compiler type and variant by running it. If the pre argument
+ // is not empty, then only "confirm" the pre-guess. Return empty result if
+ // unable to guess.
+ //
+ struct guess_result
+ {
+ compiler_id id;
+ string signature;
+ string checksum;
+ process_path path;
+
+ guess_result () = default;
+ guess_result (compiler_id i, string&& s)
+ : id (move (i)), signature (move (s)) {}
+
+ bool
+ empty () const {return id.empty ();}
+ };
+
+ // Allowed to change pre if succeeds.
+ //
+ static guess_result
+ guess (const char* xm,
+ lang,
+ const path& xc,
+ const optional<compiler_id>& xi,
+ compiler_type& pre)
+ {
+ tracer trace ("cc::guess");
+
+ assert (!xi || xi->type == pre);
+
+ guess_result r;
+
+ process_path xp;
+ {
+ auto df = make_diag_frame (
+ [&xm](const diag_record& dr)
+ {
+ dr << info << "use config." << xm << " to override";
+ });
+
+ // Only search in PATH (specifically, omitting the current
+ // executable's directory on Windows).
+ //
+ xp = run_search (xc,
+ false /* init */, // Note: result is cached.
+ dir_path () /* fallback */,
+ true /* path_only */);
+ }
+
+ using type = compiler_type;
+ const type invalid = invalid_compiler_type;
+
+ // Start with -v. This will cover gcc and clang.
+ //
+ // While icc also writes what may seem like something we can use to
+ // detect it:
+ //
+ // icpc version 16.0.2 (gcc version 4.9.0 compatibility)
+ //
+ // That first word is actually the executable name. So if we rename
+ // icpc to foocpc, we will get:
+ //
+ // foocpc version 16.0.2 (gcc version 4.9.0 compatibility)
+ //
+ // In fact, if someone renames icpc to g++, there will be no way for
+ // us to detect this. Oh, well, their problem.
+ //
+ if (r.empty () && (pre == invalid ||
+ pre == type::gcc ||
+ pre == type::clang))
+ {
+ auto f = [&xi] (string& l, bool last) -> guess_result
+ {
+ if (xi)
+ {
+ // The signature line is first in Clang and last in GCC.
+ //
+ if (xi->type != type::gcc || last)
+ return guess_result (*xi, move (l));
+ }
+
+ // The gcc/g++ -v output will have a last line in the form:
+ //
+ // "gcc version X.Y.Z ..."
+ //
+ // The "version" word can probably be translated. For example:
+ //
+ // gcc version 3.4.4
+ // gcc version 4.2.1
+ // gcc version 4.8.2 (GCC)
+ // gcc version 4.8.5 (Ubuntu 4.8.5-2ubuntu1~14.04.1)
+ // gcc version 4.9.2 (Ubuntu 4.9.2-0ubuntu1~14.04)
+ // gcc version 5.1.0 (Ubuntu 5.1.0-0ubuntu11~14.04.1)
+ // gcc version 6.0.0 20160131 (experimental) (GCC)
+ //
+ if (last && l.compare (0, 4, "gcc ") == 0)
+ return guess_result (compiler_id {type::gcc, ""}, move (l));
+
+ // The Apple clang/clang++ -v output will have a line (currently
+ // first) in the form:
+ //
+ // "Apple (LLVM|clang) version X.Y.Z ..."
+ //
+ // Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn)
+ // Apple clang version 4.0 (tags/Apple/clang-421.0.60) (based on LLVM 3.1svn)
+ // Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)
+ // Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)
+ // Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn)
+ // Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn)
+ // Apple LLVM version 6.0 (clang-600.0.57) (based on LLVM 3.5svn)
+ // Apple LLVM version 6.1.0 (clang-602.0.53) (based on LLVM 3.6.0svn)
+ // Apple LLVM version 7.0.0 (clang-700.0.53)
+ // Apple LLVM version 7.0.0 (clang-700.1.76)
+ // Apple LLVM version 7.0.2 (clang-700.1.81)
+ // Apple LLVM version 7.3.0 (clang-703.0.16.1)
+ //
+ // Note that the gcc/g++ "aliases" for clang/clang++ also include
+ // this line but it is (currently) preceded by "Configured with:
+ // ...".
+ //
+ // Check for Apple clang before the vanilla one since the above line
+ // also includes "clang".
+ //
+ if (l.compare (0, 6, "Apple ") == 0 &&
+ (l.compare (6, 5, "LLVM ") == 0 ||
+ l.compare (6, 6, "clang ") == 0))
+ return guess_result (compiler_id {type::clang, "apple"}, move (l));
+
+ // The vanilla clang/clang++ -v output will have a first line in the
+ // form:
+ //
+ // "[... ]clang version X.Y.Z[-...] ..."
+ //
+ // The "version" word can probably be translated. For example:
+ //
+ // FreeBSD clang version 3.4.1 (tags/RELEASE_34/dot1-final 208032) 20140512
+ // Ubuntu clang version 3.5.0-4ubuntu2~trusty2 (tags/RELEASE_350/final) (based on LLVM 3.5.0)
+ // Ubuntu clang version 3.6.0-2ubuntu1~trusty1 (tags/RELEASE_360/final) (based on LLVM 3.6.0)
+ // clang version 3.7.0 (tags/RELEASE_370/final)
+ //
+ if (l.find ("clang ") != string::npos)
+ return guess_result (compiler_id {type::clang, ""}, move (l));
+
+ return guess_result ();
+ };
+
+ // The -v output contains other information (such as the compiler
+ // build configuration for gcc or the selected gcc installation for
+ // clang) which makes sense to include into the compiler checksum. So
+ // ask run() to calculate it for every line of the -v ouput.
+ //
+ // One notable consequence of this is that if the locale changes
+ // (e.g., via LC_ALL), then the compiler signature will most likely
+ // change as well because of the translated text.
+ //
+ sha256 cs;
+
+ // Suppress all the compiler errors because we may be trying an
+ // unsupported option (but still consider the exit code).
+ //
+ r = run<guess_result> (3, xp, "-v", f, false, false, &cs);
+
+ if (r.empty ())
+ {
+ if (xi)
+ {
+ // Fallback to --version below in case this GCC/Clang-like
+ // compiler doesn't support -v.
+ //
+ //fail << "unable to obtain " << xc << " signature with -v";
+ }
+ }
+ else
+ {
+ // If this is clang-apple and pre-guess was gcc then change it so
+ // that we don't issue any warnings.
+ //
+ if (r.id.type == type::clang &&
+ r.id.variant == "apple" &&
+ pre == type::gcc)
+ pre = type::clang;
+
+ r.checksum = cs.string ();
+ }
+ }
+
+ // Next try --version to detect icc. As well as obtain signature for
+ // GCC/Clang-like compilers in case -v above didn't work.
+ //
+ if (r.empty () && (pre == invalid ||
+ pre == type::icc ||
+ pre == type::gcc ||
+ pre == type::clang))
+ {
+ auto f = [&xi] (string& l, bool) -> guess_result
+ {
+ // Assume the first line is the signature.
+ //
+ if (xi)
+ return guess_result (*xi, move (l));
+
+ // The first line has the " (ICC) " in it, for example:
+ //
+ // icpc (ICC) 9.0 20060120
+ // icpc (ICC) 11.1 20100414
+ // icpc (ICC) 12.1.0 20110811
+ // icpc (ICC) 14.0.0 20130728
+ // icpc (ICC) 15.0.2 20150121
+ // icpc (ICC) 16.0.2 20160204
+ // icc (ICC) 16.0.2 20160204
+ //
+ if (l.find (" (ICC) ") != string::npos)
+ return guess_result (compiler_id {type::icc, ""}, move (l));
+
+ return guess_result ();
+ };
+
+ r = run<guess_result> (3, xp, "--version", f, false);
+
+ if (r.empty ())
+ {
+ if (xi)
+ fail << "unable to obtain " << xc << " signature with --version";
+ }
+ }
+
+ // Finally try to run it without any options to detect msvc.
+ //
+ if (r.empty () && (pre == invalid || pre == type::msvc))
+ {
+ auto f = [&xi] (string& l, bool) -> guess_result
+ {
+ // Assume the first line is the signature.
+ //
+ if (xi)
+ return guess_result (*xi, move (l));
+
+ // Check for "Microsoft (R)" and "C/C++" in the first line as a
+ // signature since all other words/positions can be translated. For
+ // example:
+ //
+ // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.6030 for 80x86
+ // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 14.00.50727.762 for 80x86
+ // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86
+ // Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64
+ // Microsoft (R) C/C++ Optimizing Compiler Version 17.00.50727.1 for x86
+ // Microsoft (R) C/C++ Optimizing Compiler Version 18.00.21005.1 for x86
+ // Microsoft (R) C/C++ Optimizing Compiler Version 19.00.23026 for x86
+ // Microsoft (R) C/C++ Optimizing Compiler Version 19.10.24629 for x86
+ //
+ // In the recent versions the architecture is either "x86", "x64",
+ // or "ARM".
+ //
+ if (l.find ("Microsoft (R)") != string::npos &&
+ l.find ("C/C++") != string::npos)
+ return guess_result (compiler_id {type::msvc, ""}, move (l));
+
+ return guess_result ();
+ };
+
+ // One can pass extra options/arguments to cl.exe with the CL and _CL_
+ // environment variables. However, if such extra options are passed
+ // without anything to compile, then cl.exe no longer prints usage and
+ // exits successfully but instead issues an error and fails. So we are
+ // going to unset these variables for our test (interestingly, only CL
+ // seem to cause the problem but let's unset both, for good measure).
+ //
+ const char* env[] = {"CL=", "_CL_=", nullptr};
+
+ r = run<guess_result> (3, process_env (xp, env), f, false);
+
+ if (r.empty ())
+ {
+ if (xi)
+ fail << "unable to obtain " << xc << " signature";
+ }
+ }
+
+ if (!r.empty ())
+ {
+ if (pre != invalid && r.id.type != pre)
+ {
+ l4 ([&]{trace << "compiler type guess mismatch"
+ << ", pre-guessed " << pre
+ << ", determined " << r.id.type;});
+
+ r = guess_result ();
+ }
+ else
+ {
+ l5 ([&]{trace << xc << " is " << r.id << ": '"
+ << r.signature << "'";});
+
+ r.path = move (xp);
+ }
+ }
+ else
+ l4 ([&]{trace << "unable to determine compiler type of " << xc;});
+
+ return r;
+ }
+
+ // Try to derive the toolchain pattern.
+ //
+ // The s argument is the stem to look for in the leaf of the path. The ls
+ // and rs arguments are the left/right separator characters. If either is
+ // NULL, then the stem should be the prefix/suffix of the leaf,
+ // respectively. Note that a path that is equal to stem is not considered
+ // a pattern.
+ //
+ // Note that the default right separator includes digits to handle cases
+ // like clang++37 (FreeBSD).
+ //
+ static string
+ pattern (const path& xc,
+ const char* s,
+ const char* ls = "-_.",
+ const char* rs = "-_.0123456789")
+ {
+ string r;
+ size_t sn (strlen (s));
+
+ if (xc.size () > sn)
+ {
+ string l (xc.leaf ().string ());
+ size_t ln (l.size ());
+
+ size_t b;
+ if (ln >= sn && (b = l.find (s)) != string::npos)
+ {
+ // Check left separators.
+ //
+ if (b == 0 || (ls != nullptr && strchr (ls, l[b - 1]) != nullptr))
+ {
+ // Check right separators.
+ //
+ size_t e (b + sn);
+ if (e == ln || (rs != nullptr && strchr (rs, l[e]) != nullptr))
+ {
+ l.replace (b, sn, "*", 1);
+ path p (xc.directory ());
+ p /= l;
+ r = move (p).string ();
+ }
+ }
+ }
+ }
+
+ return r;
+ }
+
+
+ static compiler_info
+ guess_gcc (const char* xm,
+ lang xl,
+ const path& xc,
+ const string* xv,
+ const string* xt,
+ const strings* c_po, const strings* x_po,
+ const strings* c_co, const strings* x_co,
+ const strings*, const strings*,
+ guess_result&& gr)
+ {
+ tracer trace ("cc::guess_gcc");
+
+ const process_path& xp (gr.path);
+
+ // Extract the version. The signature line has the following format
+ // though language words can be translated and even rearranged (see
+ // examples above).
+ //
+ // "gcc version A.B.C[ ...]"
+ //
+ compiler_version v;
+ {
+ auto df = make_diag_frame (
+ [&xm](const diag_record& dr)
+ {
+ dr << info << "use config." << xm << ".version to override";
+ });
+
+ // Treat the custom version as just a tail of the signature.
+ //
+ const string& s (xv == nullptr ? gr.signature : *xv);
+
+ // Scan the string as words and look for one that looks like a
+ // version.
+ //
+ size_t b (0), e (0);
+ while (next_word (s, b, e))
+ {
+ // The third argument to find_first_not_of() is the length of the
+ // first argument, not the length of the interval to check. So to
+ // limit it to [b, e) we are also going to compare the result to the
+ // end of the word position (first space). In fact, we can just
+ // check if it is >= e.
+ //
+ if (s.find_first_not_of ("1234567890.", b, 11) >= e)
+ break;
+ }
+
+ if (b == e)
+ fail << "unable to extract gcc version from '" << s << "'";
+
+ v.string.assign (s, b, string::npos);
+
+ // Split the version into components.
+ //
+ size_t vb (b), ve (b);
+ auto next = [&s, b, e, &vb, &ve] (const char* m) -> uint64_t
+ {
+ try
+ {
+ if (next_word (s, e, vb, ve, '.'))
+ return stoull (string (s, vb, ve - vb));
+ }
+ catch (const invalid_argument&) {}
+ catch (const out_of_range&) {}
+
+ fail << "unable to extract gcc " << m << " version from '"
+ << string (s, b, e - b) << "'" << endf;
+ };
+
+ v.major = next ("major");
+ v.minor = next ("minor");
+ v.patch = next ("patch");
+
+ if (e != s.size ())
+ v.build.assign (s, e + 1, string::npos);
+ }
+
+ // Figure out the target architecture. This is actually a lot trickier
+ // than one would have hoped.
+ //
+ // There is the -dumpmachine option but gcc doesn't adjust it per the
+ // compile options (e.g., -m32). However, starting with 4.6 it has the
+ // -print-multiarch option which gives (almost) the right answer. The
+ // "almost" part has to do with it not honoring the -arch option (which
+ // is really what this compiler is building for). To get to that, we
+ // would have to resort to a hack like this:
+ //
+ // gcc -v -E - 2>&1 | grep cc1
+ // .../cc1 ... -mtune=generic -march=x86-64
+ //
+ // Also, -print-multiarch will print am empty line if the compiler
+ // actually wasn't built with multi-arch support.
+ //
+ // So for now this is what we are going to do for the time being: First
+ // try -print-multiarch. If that works out (recent gcc configure with
+ // multi-arch support), then use the result. Otherwise, fallback to
+ // -dumpmachine (older gcc or not multi-arch).
+ //
+ string t, ot;
+
+ if (xt == nullptr)
+ {
+ cstrings args {xp.recall_string (), "-print-multiarch"};
+ if (c_co != nullptr) append_options (args, *c_co);
+ if (x_co != nullptr) append_options (args, *x_co);
+ args.push_back (nullptr);
+
+ // The output of both -print-multiarch and -dumpmachine is a single
+ // line containing just the target triplet.
+ //
+ auto f = [] (string& l, bool) {return move (l);};
+
+ t = run<string> (3, xp, args.data (), f, false);
+
+ if (t.empty ())
+ {
+ l5 ([&]{trace << xc << " doesn's support -print-multiarch, "
+ << "falling back to -dumpmachine";});
+
+ args[1] = "-dumpmachine";
+ t = run<string> (3, xp, args.data (), f, false);
+ }
+
+ if (t.empty ())
+ fail << "unable to extract target architecture from " << xc
+ << " using -print-multiarch or -dumpmachine output" <<
+ info << "use config." << xm << ".target to override";
+
+ ot = t;
+ }
+ else
+ ot = t = *xt;
+
+ // Parse the target into triplet (for further tests) ignoring any
+ // failures.
+ //
+ target_triplet tt;
+ try {tt = target_triplet (t);} catch (const invalid_argument&) {}
+
+ // Derive the toolchain pattern. Try cc/c++ as a fallback.
+ //
+ string pat (pattern (xc, xl == lang::c ? "gcc" : "g++"));
+
+ if (pat.empty ())
+ pat = pattern (xc, xl == lang::c ? "cc" : "c++");
+
+ // Runtime and standard library.
+ //
+ // GCC always uses libgcc (even on MinGW). Even with -nostdlib GCC's
+ // documentation says that you should usually specify -lgcc.
+ //
+ string rt ("libgcc");
+ string csl (tt.system == "mingw32"
+ ? "msvc"
+ : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src));
+ string xsl;
+ switch (xl)
+ {
+ case lang::c: xsl = csl; break;
+ case lang::cxx:
+ {
+ // While GCC only supports it's own C++ standard library (libstdc++)
+ // we still run the test to detect the "none" case (-nostdinc++).
+ //
+ const char* src =
+ "#include <bits/c++config.h> \n"
+ "stdlib:=\"libstdc++\" \n";
+
+ xsl = stdlib (xl, xp, c_po, x_po, c_co, x_co, src);
+ break;
+ }
+ }
+
+ return compiler_info {
+ move (gr.path),
+ move (gr.id),
+ compiler_class::gcc,
+ move (v),
+ move (gr.signature),
+ move (gr.checksum), // Calculated on whole -v output.
+ move (t),
+ move (ot),
+ move (pat),
+ "",
+ move (rt),
+ move (csl),
+ move (xsl)};
+ }
+
+ static compiler_info
+ guess_clang (const char* xm,
+ lang xl,
+ const path& xc,
+ const string* xv,
+ const string* xt,
+ const strings* c_po, const strings* x_po,
+ const strings* c_co, const strings* x_co,
+ const strings* c_lo, const strings* x_lo,
+ guess_result&& gr)
+ {
+ const process_path& xp (gr.path);
+
+ // Extract the version. Here we will try to handle both vanilla and
+ // Apple clang since the signature lines are fairly similar. They have
+ // the following format though language words can probably be translated
+ // and even rearranged (see examples above).
+ //
+ // "[... ]clang version A.B.C[( |-)...]"
+ // "Apple (clang|LLVM) version A.B[.C] ..."
+ //
+ compiler_version v;
+ {
+ auto df = make_diag_frame (
+ [&xm](const diag_record& dr)
+ {
+ dr << info << "use config." << xm << ".version to override";
+ });
+
+ // Treat the custom version as just a tail of the signature.
+ //
+ const string& s (xv == nullptr ? gr.signature : *xv);
+
+ // Some overrides for testing.
+ //
+ //s = "clang version 3.7.0 (tags/RELEASE_370/final)";
+ //
+ //gr.id.variant = "apple";
+ //s = "Apple LLVM version 7.3.0 (clang-703.0.16.1)";
+ //s = "Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn)";
+
+ // Scan the string as words and look for one that looks like a
+ // version. Use '-' as a second delimiter to handle versions like
+ // "3.6.0-2ubuntu1~trusty1".
+ //
+ size_t b (0), e (0);
+ while (next_word (s, b, e, ' ', '-'))
+ {
+ // The third argument to find_first_not_of() is the length of the
+ // first argument, not the length of the interval to check. So to
+ // limit it to [b, e) we are also going to compare the result to the
+ // end of the word position (first space). In fact, we can just
+ // check if it is >= e.
+ //
+ if (s.find_first_not_of ("1234567890.", b, 11) >= e)
+ break;
+ }
+
+ if (b == e)
+ fail << "unable to extract clang version from '" << s << "'";
+
+ v.string.assign (s, b, string::npos);
+
+ // Split the version into components.
+ //
+ size_t vb (b), ve (b);
+ auto next = [&s, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t
+ {
+ try
+ {
+ if (next_word (s, e, vb, ve, '.'))
+ return stoull (string (s, vb, ve - vb));
+
+ if (opt)
+ return 0;
+ }
+ catch (const invalid_argument&) {}
+ catch (const out_of_range&) {}
+
+ fail << "unable to extract clang " << m << " version from '"
+ << string (s, b, e - b) << "'" << endf;
+ };
+
+ v.major = next ("major", false);
+ v.minor = next ("minor", false);
+ v.patch = next ("patch", gr.id.variant == "apple");
+
+ if (e != s.size ())
+ v.build.assign (s, e + 1, string::npos);
+ }
+
+ // Figure out the target architecture.
+ //
+ // Unlike gcc, clang doesn't have -print-multiarch. Its -dumpmachine,
+ // however, respects the compile options (e.g., -m32).
+ //
+ string t, ot;
+
+ if (xt == nullptr)
+ {
+ cstrings args {xp.recall_string (), "-dumpmachine"};
+ if (c_co != nullptr) append_options (args, *c_co);
+ if (x_co != nullptr) append_options (args, *x_co);
+ args.push_back (nullptr);
+
+ // The output of -dumpmachine is a single line containing just the
+ // target triplet.
+ //
+ auto f = [] (string& l, bool) {return move (l);};
+ t = run<string> (3, xp, args.data (), f, false);
+
+ if (t.empty ())
+ fail << "unable to extract target architecture from " << xc
+ << " using -dumpmachine output" <<
+ info << "use config." << xm << ".target to override";
+
+ ot = t;
+ }
+ else
+ ot = t = *xt;
+
+ // Parse the target into triplet (for further tests) ignoring any
+ // failures.
+ //
+ target_triplet tt;
+ try {tt = target_triplet (t);} catch (const invalid_argument&) {}
+
+ // For Clang on Windows targeting MSVC we remap the target to match
+ // MSVC's.
+ //
+ if (tt.system == "windows-msvc")
+ {
+ // Keep the CPU and replace the rest.
+ //
+ // @@ Note that currently there is no straightforward way to determine
+ // the VC version Clang is using. See:
+ //
+ // http://lists.llvm.org/pipermail/cfe-dev/2017-December/056240.html
+ //
+ tt.vendor = "microsoft";
+ tt.system = "win32-msvc";
+ tt.version = "14.1";
+ t = tt.string ();
+ }
+
+ // Derive the toolchain pattern. Try clang/clang++, the gcc/g++ alias,
+ // as well as cc/c++.
+ //
+ string pat (pattern (xc, xl == lang::c ? "clang" : "clang++"));
+
+ if (pat.empty ())
+ pat = pattern (xc, xl == lang::c ? "gcc" : "g++");
+
+ if (pat.empty ())
+ pat = pattern (xc, xl == lang::c ? "cc" : "c++");
+
+ // Runtime and standard library.
+ //
+ // Clang can use libgcc, its own compiler-rt, or, on Windows targeting
+ // MSVC, the VC's runtime. As usual, there is no straightforward way
+ // to query this and silence on the mailing list. See:
+ //
+ // http://lists.llvm.org/pipermail/cfe-dev/2018-January/056494.html
+ //
+ // So for now we will just look for --rtlib (note: linker option) and if
+ // none specified, assume some platform-specific defaults.
+ //
+ string rt;
+ {
+ auto find_rtlib = [] (const strings* ops) -> const string*
+ {
+ return ops != nullptr
+ ? find_option_prefix ("--rtlib=", *ops, false)
+ : nullptr;
+ };
+
+ const string* o;
+ if ((o = find_rtlib (x_lo)) != nullptr ||
+ (o = find_rtlib (c_lo)) != nullptr)
+ {
+ rt = string (*o, 8);
+ }
+ else if (tt.system == "win32-msvc") rt = "msvc";
+ else if (tt.system == "linux-gnu" ||
+ tt.system == "freebsd") rt = "libgcc";
+ else /* Mac OS, etc. */ rt = "compiler-rt";
+ }
+
+ string csl (tt.system == "win32-msvc" || tt.system == "mingw32"
+ ? "msvc"
+ : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src));
+
+ string xsl;
+ switch (xl)
+ {
+ case lang::c: xsl = csl; break;
+ case lang::cxx:
+ {
+ // All Clang versions that we care to support have __has_include()
+ // so we use it to determine which standard library is available.
+ //
+ // Note that we still include the corresponding headers to verify
+ // things are usable. For the "other" case we include some
+ // standard header to detect the "none" case (e.g, -nostdinc++).
+ //
+ const char* src =
+ "#if __has_include(<__config>) \n"
+ " #include <__config> \n"
+ " stdlib:=\"libc++\" \n"
+ "#elif __has_include(<bits/c++config.h>) \n"
+ " #include <bits/c++config.h> \n"
+ " stdlib:=\"libstdc++\" \n"
+ "#else \n"
+ " #include <cstddef> \n"
+ " stdlib:=\"other\" \n"
+ "#endif \n";
+
+ xsl = tt.system == "win32-msvc"
+ ? "msvcp"
+ : stdlib (xl, xp, c_po, x_po, c_co, x_co, src);
+ break;
+ }
+ }
+
+ return compiler_info {
+ move (gr.path),
+ move (gr.id),
+ compiler_class::gcc,
+ move (v),
+ move (gr.signature),
+ move (gr.checksum), // Calculated on whole -v output.
+ move (t),
+ move (ot),
+ move (pat),
+ "",
+ move (rt),
+ move (csl),
+ move (xsl)};
+ }
+
+ static compiler_info
+ guess_icc (const char* xm,
+ lang xl,
+ const path& xc,
+ const string* xv,
+ const string* xt,
+ const strings* c_po, const strings* x_po,
+ const strings* c_co, const strings* x_co,
+ const strings*, const strings*,
+ guess_result&& gr)
+ {
+ const process_path& xp (gr.path);
+
+ // Extract the version. If the version has the fourth component, then
+ // the signature line (extracted with --version) won't include it. So we
+ // will have to get a more elaborate line with -V. We will also have to
+ // do it to get the compiler target that respects the -m option: icc
+ // doesn't support -print-multiarch like gcc and its -dumpmachine
+ // doesn't respect -m like clang. In fact, its -dumpmachine is
+ // completely broken as it appears to print the compiler's host and not
+ // the target (e.g., .../bin/ia32/icpc prints x86_64-linux-gnu).
+ //
+ // Some examples of the signature lines from -V output:
+ //
+ // Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047
+ // Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116
+ // Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010
+ // Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074
+ // Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064
+ // Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427
+ // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 16.0.2.181 Build 20160204
+ // Intel(R) C++ Intel(R) 64 Compiler for applications running on IA-32, Version 16.0.2.181 Build 20160204
+ // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204
+ // Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204
+ //
+ // We should probably also assume the language words can be translated
+ // and even rearranged.
+ //
+ auto f = [] (string& l, bool)
+ {
+ return l.compare (0, 5, "Intel") == 0 && (l[5] == '(' || l[5] == ' ')
+ ? move (l)
+ : string ();
+ };
+
+ if (xv == nullptr)
+ {
+ string& s (gr.signature);
+ s.clear ();
+
+ // The -V output is sent to STDERR.
+ //
+ s = run<string> (3, xp, "-V", f, false);
+
+ if (s.empty ())
+ fail << "unable to extract signature from " << xc << " -V output";
+
+ if (s.find (xl == lang::c ? " C " : " C++ ") == string::npos)
+ fail << xc << " does not appear to be the Intel " << xl
+ << " compiler" <<
+ info << "extracted signature: '" << s << "'";
+ }
+
+ // Scan the string as words and look for the version. It consist of only
+ // digits and periods and contains at least one period.
+ //
+ compiler_version v;
+ {
+ auto df = make_diag_frame (
+ [&xm](const diag_record& dr)
+ {
+ dr << info << "use config." << xm << ".version to override";
+ });
+
+ // Treat the custom version as just a tail of the signature.
+ //
+ const string& s (xv == nullptr ? gr.signature : *xv);
+
+ // Some overrides for testing.
+ //
+ //s = "Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047";
+ //s = "Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116";
+ //s = "Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010";
+ //s = "Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074";
+ //s = "Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064";
+ //s = "Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427";
+
+ size_t b (0), e (0);
+ while (next_word (s, b, e, ' ', ',') != 0)
+ {
+ // The third argument to find_first_not_of() is the length of the
+ // first argument, not the length of the interval to check. So to
+ // limit it to [b, e) we are also going to compare the result to the
+ // end of the word position (first space). In fact, we can just
+ // check if it is >= e. Similar logic for find_first_of() except
+ // that we add space to the list of character to make sure we don't
+ // go too far.
+ //
+ if (s.find_first_not_of ("1234567890.", b, 11) >= e &&
+ s.find_first_of (". ", b, 2) < e)
+ break;
+ }
+
+ if (b == e)
+ fail << "unable to extract icc version from '" << s << "'";
+
+ v.string.assign (s, b, string::npos);
+
+ // Split the version into components.
+ //
+ size_t vb (b), ve (b);
+ auto next = [&s, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t
+ {
+ try
+ {
+ if (next_word (s, e, vb, ve, '.'))
+ return stoull (string (s, vb, ve - vb));
+
+ if (opt)
+ return 0;
+ }
+ catch (const invalid_argument&) {}
+ catch (const out_of_range&) {}
+
+ fail << "unable to extract icc " << m << " version from '"
+ << string (s, b, e - b) << "'" << endf;
+ };
+
+ v.major = next ("major", false);
+ v.minor = next ("minor", false);
+ v.patch = next ("patch", true);
+
+ if (vb != ve && next_word (s, e, vb, ve, '.'))
+ v.build.assign (s, vb, ve - vb);
+
+ if (e != s.size ())
+ {
+ if (!v.build.empty ())
+ v.build += ' ';
+
+ v.build.append (s, e + 1, string::npos);
+ }
+ }
+
+ // Figure out the target CPU by re-running the compiler with -V and
+ // compile options (which may include, e.g., -m32). The output will
+ // contain two CPU keywords: the first is the host and the second is the
+ // target (hopefully this won't get rearranged by the translation).
+ //
+ // The CPU keywords (based on the above samples) appear to be:
+ //
+ // "32-bit"
+ // "IA-32"
+ // "Intel" "64"
+ // "Intel(R)" "64"
+ // "Intel(R)" "MIC" (-dumpmachine says: x86_64-k1om-linux)
+ //
+ string t, ot;
+
+ if (xt == nullptr)
+ {
+ auto df = make_diag_frame (
+ [&xm](const diag_record& dr)
+ {
+ dr << info << "use config." << xm << ".target to override";
+ });
+
+ cstrings args {xp.recall_string (), "-V"};
+ if (c_co != nullptr) append_options (args, *c_co);
+ if (x_co != nullptr) append_options (args, *x_co);
+ args.push_back (nullptr);
+
+ // The -V output is sent to STDERR.
+ //
+ t = run<string> (3, xp, args.data (), f, false);
+
+ if (t.empty ())
+ fail << "unable to extract target architecture from " << xc
+ << " -V output";
+
+ string arch;
+ for (size_t b (0), e (0), n;
+ (n = next_word (t, b, e, ' ', ',')) != 0; )
+ {
+ if (t.compare (b, n, "Intel(R)", 8) == 0 ||
+ t.compare (b, n, "Intel", 5) == 0)
+ {
+ if ((n = next_word (t, b, e, ' ', ',')) != 0)
+ {
+ if (t.compare (b, n, "64", 2) == 0)
+ {
+ arch = "x86_64";
+ }
+ else if (t.compare (b, n, "MIC", 3) == 0)
+ {
+ arch = "x86_64"; // Plus "-k1om-linux" from -dumpmachine below.
+ }
+ }
+ else
+ break;
+ }
+ else if (t.compare (b, n, "IA-32", 5) == 0 ||
+ t.compare (b, n, "32-bit", 6) == 0)
+ {
+ arch = "i386";
+ }
+ }
+
+ if (arch.empty ())
+ fail << "unable to extract icc target architecture from '"
+ << t << "'";
+
+ // So we have the CPU but we still need the rest of the triplet. While
+ // icc currently doesn't support cross-compilation (at least on Linux)
+ // and we could have just used the build triplet (i.e., the
+ // architecture on which we are running), who knows what will happen
+ // in the future. So instead we are going to use -dumpmachine and
+ // substitute the CPU.
+ //
+ {
+ auto f = [] (string& l, bool) {return move (l);};
+ t = run<string> (3, xp, "-dumpmachine", f);
+ }
+
+ if (t.empty ())
+ fail << "unable to extract target architecture from " << xc
+ << " using -dumpmachine output";
+
+ // The first component in the triplet is always CPU.
+ //
+ size_t p (t.find ('-'));
+
+ if (p == string::npos)
+ fail << "unable to parse icc target architecture '" << t << "'";
+
+ t.swap (arch);
+ t.append (arch, p, string::npos);
+
+ ot = t;
+ }
+ else
+ ot = t = *xt;
+
+ // Parse the target into triplet (for further tests) ignoring any
+ // failures.
+ //
+ target_triplet tt;
+ try {tt = target_triplet (t);} catch (const invalid_argument&) {}
+
+ // Derive the toolchain pattern.
+ //
+ string pat (pattern (xc, xl == lang::c ? "icc" : "icpc"));
+
+ // Runtime and standard library.
+ //
+ // For now we assume that unless it is Windows, we are targeting
+ // Linux/GCC.
+ //
+ string rt (tt.system == "win32-msvc" ? "msvc" : "libgcc");
+ string csl (tt.system == "win32-msvc"
+ ? "msvc"
+ : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src));
+ string xsl;
+ switch (xl)
+ {
+ case lang::c: xsl = csl; break;
+ case lang::cxx:
+ {
+ xsl = tt.system == "win32-msvc" ? "msvcp" : "libstdc++";
+ break;
+ }
+ }
+
+ return compiler_info {
+ move (gr.path),
+ move (gr.id),
+ compiler_class::gcc, //@@ TODO: msvc on Windows?
+ move (v),
+ move (gr.signature),
+ "",
+ move (t),
+ move (ot),
+ move (pat),
+ "",
+ move (rt),
+ move (csl),
+ move (xsl)};
+ }
+
+ static compiler_info
+ guess_msvc (const char* xm,
+ lang xl,
+ const path& xc,
+ const string* xv,
+ const string* xt,
+ const strings*, const strings*,
+ const strings*, const strings*,
+ const strings*, const strings*,
+ guess_result&& gr)
+ {
+ // Extract the version. The signature line has the following format
+ // though language words can be translated and even rearranged (see
+ // examples above).
+ //
+ // "Microsoft (R) C/C++ Optimizing Compiler Version A.B.C[.D] for CPU"
+ //
+ // The CPU keywords (based on the above samples) appear to be:
+ //
+ // "80x86"
+ // "x86"
+ // "x64"
+ // "ARM"
+ //
+ compiler_version v;
+ {
+ auto df = make_diag_frame (
+ [&xm](const diag_record& dr)
+ {
+ dr << info << "use config." << xm << ".version to override";
+ });
+
+ // Treat the custom version as just a tail of the signature.
+ //
+ const string& s (xv == nullptr ? gr.signature : *xv);
+
+ // Some overrides for testing.
+ //
+ //string s;
+ //s = "Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86";
+ //s = "Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64";
+ //s = "Compilateur d'optimisation Microsoft (R) C/C++ version 19.16.27026.1 pour x64";
+
+ // Scan the string as words and look for the version.
+ //
+ size_t b (0), e (0);
+ while (next_word (s, b, e, ' ', ','))
+ {
+ // The third argument to find_first_not_of() is the length of the
+ // first argument, not the length of the interval to check. So to
+ // limit it to [b, e) we are also going to compare the result to the
+ // end of the word position (first space). In fact, we can just
+ // check if it is >= e.
+ //
+ if (s.find_first_not_of ("1234567890.", b, 11) >= e)
+ break;
+ }
+
+ if (b == e)
+ fail << "unable to extract msvc version from '" << s << "'";
+
+ v.string.assign (s, b, e - b);
+
+ // Split the version into components.
+ //
+ size_t vb (b), ve (b);
+ auto next = [&s, b, e, &vb, &ve] (const char* m) -> uint64_t
+ {
+ try
+ {
+ if (next_word (s, e, vb, ve, '.'))
+ return stoull (string (s, vb, ve - vb));
+ }
+ catch (const invalid_argument&) {}
+ catch (const out_of_range&) {}
+
+ fail << "unable to extract msvc " << m << " version from '"
+ << string (s, b, e - b) << "'" << endf;
+ };
+
+ v.major = next ("major");
+ v.minor = next ("minor");
+ v.patch = next ("patch");
+
+ if (next_word (s, e, vb, ve, '.'))
+ v.build.assign (s, vb, ve - vb);
+ }
+
+
+ // Figure out the target architecture.
+ //
+ string t, ot;
+
+ if (xt == nullptr)
+ {
+ auto df = make_diag_frame (
+ [&xm](const diag_record& dr)
+ {
+ dr << info << "use config." << xm << ".target to override";
+ });
+
+ const string& s (gr.signature);
+
+ // Scan the string as words and look for the CPU.
+ //
+ string arch;
+
+ for (size_t b (0), e (0), n;
+ (n = next_word (s, b, e, ' ', ',')) != 0; )
+ {
+ if (s.compare (b, n, "x64", 3) == 0 ||
+ s.compare (b, n, "x86", 3) == 0 ||
+ s.compare (b, n, "ARM", 3) == 0 ||
+ s.compare (b, n, "80x86", 5) == 0)
+ {
+ arch.assign (s, b, n);
+ break;
+ }
+ }
+
+ if (arch.empty ())
+ fail << "unable to extract msvc target architecture from "
+ << "'" << s << "'";
+
+ // Now we need to map x86, x64, and ARM to the target triplets. The
+ // problem is, there aren't any established ones so we got to invent
+ // them ourselves. Based on the discussion in
+ // <libbutl/target-triplet.mxx>, we need something in the
+ // CPU-VENDOR-OS-ABI form.
+ //
+ // The CPU part is fairly straightforward with x86 mapped to 'i386'
+ // (or maybe 'i686'), x64 to 'x86_64', and ARM to 'arm' (it could also
+ // include the version, e.g., 'amrv8').
+ //
+ // The (toolchain) VENDOR is also straightforward: 'microsoft'. Why
+ // not omit it? Two reasons: firstly, there are other compilers with
+ // the otherwise same target, for example Intel C/C++, and it could be
+ // useful to distinguish between them. Secondly, by having all four
+ // components we remove any parsing ambiguity.
+ //
+ // OS-ABI is where things are not as clear cut. The OS part shouldn't
+ // probably be just 'windows' since we have Win32 and WinCE. And
+ // WinRT. And Universal Windows Platform (UWP). So perhaps the
+ // following values for OS: 'win32', 'wince', 'winrt', 'winup'.
+ //
+ // For 'win32' the ABI part could signal the Microsoft C/C++ runtime
+ // by calling it 'msvc'. And seeing that the runtimes are incompatible
+ // from version to version, we should probably add the 'X.Y' version
+ // at the end (so we essentially mimic the DLL name, for example,
+ // msvcr120.dll). Some suggested we also encode the runtime type
+ // (those pesky /M* options) though I am not sure: the only
+ // "redistributable" runtime is multi-threaded release DLL.
+ //
+ // The ABI part for the other OS values needs thinking. For 'winrt'
+ // and 'winup' it probably makes sense to encode the WINAPI_FAMILY
+ // macro value (perhaps also with the version). Some of its values:
+ //
+ // WINAPI_FAMILY_APP Windows 10
+ // WINAPI_FAMILY_PC_APP Windows 8.1
+ // WINAPI_FAMILY_PHONE_APP Windows Phone 8.1
+ //
+ // For 'wince' we may also want to add the OS version, for example,
+ // 'wince4.2'.
+ //
+ // Putting it all together, Visual Studio 2015 will then have the
+ // following target triplets:
+ //
+ // x86 i386-microsoft-win32-msvc14.0
+ // x64 x86_64-microsoft-win32-msvc14.0
+ // ARM arm-microsoft-winup-???
+ //
+ if (arch == "ARM")
+ fail << "cl.exe ARM/WinRT/UWP target is not yet supported";
+ else
+ {
+ if (arch == "x64")
+ t = "x86_64-microsoft-win32-msvc";
+ else if (arch == "x86" || arch == "80x86")
+ t = "i386-microsoft-win32-msvc";
+ else
+ assert (false);
+
+ // Mapping of compiler versions to runtime versions:
+ //
+ // Note that VC 15 has runtime version 14.1 but the DLLs are still
+ // called *140.dll (they are said to be backwards-compatible).
+ //
+ // And VC 16 seems to have the runtime version 14.1 (and not 14.2,
+ // as one might expect; DLLs are still *140.dll but there are now _1
+ // and _2 variants for, say, msvcp140.dll). We will, however, call
+ // it 14.2 (which is the version of the "toolset") in our target
+ // triplet.
+ //
+ // year ver cl crt/dll toolset
+ //
+ // 2019 16.1 19.21 14.2/140 14.21
+ // 2019 16.0 19.20 14.2/140
+ // 2017 15.9 19.16 14.1/140
+ // 2017 15.8 19.15 14.1/140
+ // 2017 15.7 19.14 14.1/140
+ // 2017 15.6 19.13 14.1/140
+ // 2017 15.5 19.12 14.1/140
+ // 2017 15.3 19.11 14.1/140
+ // 2017 15 19.10 14.1/140
+ // 2015 14 19.00 14.0/140
+ // 2013 12 18.00 12.0/120
+ // 2012 11 17.00 11.0/110
+ // 2010 10 16.00 10.0/100
+ // 2008 9 15.00 9.0/90
+ // 2005 8 14.00 8.0/80
+ // 2003 7.1 13.10 7.1/71
+ //
+ // _MSC_VER is the numeric cl version, e.g., 1921 for 19.21.
+ //
+ /**/ if (v.major == 19 && v.minor >= 20) t += "14.2";
+ else if (v.major == 19 && v.minor >= 10) t += "14.1";
+ else if (v.major == 19 && v.minor == 0) t += "14.0";
+ else if (v.major == 18 && v.minor == 0) t += "12.0";
+ else if (v.major == 17 && v.minor == 0) t += "11.0";
+ else if (v.major == 16 && v.minor == 0) t += "10.0";
+ else if (v.major == 15 && v.minor == 0) t += "9.0";
+ else if (v.major == 14 && v.minor == 0) t += "8.0";
+ else if (v.major == 13 && v.minor == 10) t += "7.1";
+ else fail << "unable to map msvc compiler version '" << v.string
+ << "' to runtime version";
+ }
+
+ ot = t;
+ }
+ else
+ ot = t = *xt;
+
+ // Derive the toolchain pattern.
+ //
+ // If the compiler name is/starts with 'cl' (e.g., cl.exe, cl-14),
+ // then replace it with '*' and use it as a pattern for lib, link,
+ // etc.
+ //
+ string cpat (pattern (xc, "cl", nullptr, ".-"));
+ string bpat (cpat); // Binutils pattern is the same as toolchain.
+
+ // Runtime and standard library.
+ //
+ string rt ("msvc");
+ string csl ("msvc");
+ string xsl;
+ switch (xl)
+ {
+ case lang::c: xsl = csl; break;
+ case lang::cxx: xsl = "msvcp"; break;
+ }
+
+ return compiler_info {
+ move (gr.path),
+ move (gr.id),
+ compiler_class::msvc,
+ move (v),
+ move (gr.signature),
+ "",
+ move (t),
+ move (ot),
+ move (cpat),
+ move (bpat),
+ move (rt),
+ move (csl),
+ move (xsl)};
+ }
+
+ // Compiler checks can be expensive (we often need to run the compiler
+ // several times) so we cache the result.
+ //
+ static map<string, compiler_info> cache;
+
+ const compiler_info&
+ guess (const char* xm,
+ lang xl,
+ const path& xc,
+ const string* xis,
+ const string* xv,
+ const string* xt,
+ const strings* c_po, const strings* x_po,
+ const strings* c_co, const strings* x_co,
+ const strings* c_lo, const strings* x_lo)
+ {
+ // First check the cache.
+ //
+ string key;
+ {
+ sha256 cs;
+ cs.append (static_cast<size_t> (xl));
+ cs.append (xc.string ());
+ if (xis != nullptr) cs.append (*xis);
+ if (c_po != nullptr) hash_options (cs, *c_po);
+ if (x_po != nullptr) hash_options (cs, *x_po);
+ if (c_co != nullptr) hash_options (cs, *c_co);
+ if (x_co != nullptr) hash_options (cs, *x_co);
+ if (c_lo != nullptr) hash_options (cs, *c_lo);
+ if (x_lo != nullptr) hash_options (cs, *x_lo);
+ key = cs.string ();
+
+ auto i (cache.find (key));
+ if (i != cache.end ())
+ return i->second;
+ }
+
+ // Parse the user-specified compiler id (config.x.id).
+ //
+ optional<compiler_id> xi;
+ if (xis != nullptr)
+ {
+ try
+ {
+ xi = compiler_id (*xis);
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "invalid compiler id '" << *xis << "' "
+ << "specified in variable config." << xm << ".id: " << e;
+ }
+ }
+
+ pair<compiler_type, size_t> pre (pre_guess (xl, xc, xi));
+ compiler_type& type (pre.first);
+
+ // If we could pre-guess the type based on the excutable name, then
+ // try the test just for that compiler.
+ //
+ guess_result gr;
+
+ if (type != invalid_compiler_type)
+ {
+ gr = guess (xm, xl, xc, xi, type);
+
+ if (gr.empty ())
+ {
+ warn << xc << " looks like " << type << " but it is not" <<
+ info << "use config." << xm << " to override";
+
+ type = invalid_compiler_type; // Clear pre-guess.
+ }
+ }
+
+ if (gr.empty ())
+ gr = guess (xm, xl, xc, xi, type);
+
+ if (gr.empty ())
+ fail << "unable to guess " << xl << " compiler type of " << xc <<
+ info << "use config." << xm << ".id to specify explicitly";
+
+ compiler_info r;
+ const compiler_id& id (gr.id);
+
+ switch (id.type)
+ {
+ case compiler_type::gcc:
+ {
+ r = guess_gcc (xm, xl, xc, xv, xt,
+ c_po, x_po, c_co, x_co, c_lo, x_lo,
+ move (gr));
+ break;
+ }
+ case compiler_type::clang:
+ {
+ r = guess_clang (xm, xl, xc, xv, xt,
+ c_po, x_po, c_co, x_co, c_lo, x_lo,
+ move (gr));
+ break;
+ }
+ case compiler_type::msvc:
+ {
+ r = guess_msvc (xm, xl, xc, xv, xt,
+ c_po, x_po, c_co, x_co, c_lo, x_lo,
+ move (gr));
+ break;
+ }
+ case compiler_type::icc:
+ {
+ r = guess_icc (xm, xl, xc, xv, xt,
+ c_po, x_po, c_co, x_co, c_lo, x_lo,
+ move (gr));
+ break;
+ }
+ }
+
+ // By default use the signature line to generate the checksum.
+ //
+ if (r.checksum.empty ())
+ r.checksum = sha256 (r.signature).string ();
+
+ // Derive binutils pattern unless this has already been done by the
+ // compiler-specific code.
+ //
+
+ // When cross-compiling the whole toolchain is normally prefixed with
+ // the target triplet, e.g., x86_64-w64-mingw32-{gcc,g++,ar,ld}. But
+ // oftentimes it is not quite canonical (and sometimes -- outright
+ // bogus). So instead we are going to first try to derive the prefix
+ // using the pre-guessed position of the compiler name. Note that we
+ // still want to try the target in case we could not pre-guess (think
+ // x86_64-w64-mingw32-c++).
+ //
+ // BTW, for GCC we also get gcc-{ar,ranlib} (but not -ld) which add
+ // support for the LTO plugin though it seems more recent GNU binutils
+ // (2.25) are able to load the plugin when needed automatically. So it
+ // doesn't seem we should bother trying to support this on our end (one
+ // way we could do it is by passing config.bin.{ar,ranlib} as hints).
+ //
+ // It's also normal for native (i.e., non-cross-compiler) builds of GCC
+ // and Clang to not have binutils installed in the same directory and
+ // instead relying on the system ones. In this case, if the compiler is
+ // specified with the absolute path, the pattern will be the fallback
+ // search directory (though it feels like it should be checked first
+ // rather than last).
+ //
+ if (r.bin_pattern.empty ())
+ {
+ if (pre.second != 0 &&
+ pre.second != string::npos &&
+ !path::traits_type::is_separator (xc.string ()[pre.second - 1]))
+ {
+ r.bin_pattern.assign (xc.string (), 0, pre.second);
+ r.bin_pattern += '*'; // '-' or similar is already there.
+ }
+ }
+
+ if (r.bin_pattern.empty ())
+ {
+ const string& t (r.target);
+ size_t n (t.size ());
+
+ if (xc.size () > n + 1)
+ {
+ const string& l (xc.leaf ().string ());
+
+ if (l.size () > n + 1 && l.compare (0, n, t) == 0 && l[n] == '-')
+ {
+ path p (xc.directory ());
+ p /= t;
+ p += "-*";
+ r.bin_pattern = move (p).string ();
+ }
+ }
+ }
+
+ // If we could not derive the pattern, then see if we can come up with a
+ // fallback search directory.
+ //
+ if (r.bin_pattern.empty ())
+ {
+ const path& p (r.path.recall.empty () ? xc : r.path.recall);
+
+ if (!p.simple ())
+ r.bin_pattern = p.directory ().representation (); // Trailing slash.
+ }
+
+ return (cache[key] = move (r));
+ }
+
+ path
+ guess_default (lang xl, const string& cid, const string& pat)
+ {
+ compiler_id id (cid);
+ const char* s (nullptr);
+
+ using type = compiler_type;
+
+ switch (xl)
+ {
+ case lang::c:
+ {
+ switch (id.type)
+ {
+ case type::gcc: s = "gcc"; break;
+ case type::clang: s = "clang"; break;
+ case type::icc: s = "icc"; break;
+ case type::msvc: s = "cl"; break;
+ }
+
+ break;
+ }
+ case lang::cxx:
+ {
+ switch (id.type)
+ {
+ case type::gcc: s = "g++"; break;
+ case type::clang: s = "clang++"; break;
+ case type::icc: s = "icpc"; break;
+ case type::msvc: s = "cl"; break;
+ }
+
+ break;
+ }
+ }
+
+ return path (apply_pattern (s, &pat));
+ }
+ }
+}
diff --git a/libbuild2/cc/guess.hxx b/libbuild2/cc/guess.hxx
new file mode 100644
index 0000000..3677cc7
--- /dev/null
+++ b/libbuild2/cc/guess.hxx
@@ -0,0 +1,246 @@
+// file : libbuild2/cc/guess.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_GUESS_HXX
+#define LIBBUILD2_CC_GUESS_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // Compiler id consisting of a type and optional variant. If the variant
+ // is not empty, then the id is spelled out as 'type-variant', similar to
+ // target triplets (this also means that the type cannot contain '-').
+ //
+ // Currently recognized compilers and their ids:
+ //
+ // gcc GCC gcc/g++
+ // clang Vanilla Clang clang/clang++
+ // clang-apple Apple Clang clang/clang++ and the gcc/g++ "alias"
+ // msvc Microsoft cl.exe
+ // icc Intel icc/icpc
+ //
+ // Note that the user can provide a custom id with one of the predefined
+ // types and a custom variant (say 'gcc-tasking').
+ //
+ enum class compiler_type
+ {
+ gcc = 1, // 0 value represents invalid type.
+ clang,
+ msvc,
+ icc
+ // Update compiler_id(string) and to_string() if adding a new type.
+ };
+
+ const compiler_type invalid_compiler_type = static_cast<compiler_type> (0);
+
+ string
+ to_string (compiler_type);
+
+ inline ostream&
+ operator<< (ostream& o, const compiler_type& t)
+ {
+ return o << to_string (t);
+ }
+
+ struct compiler_id
+ {
+ compiler_type type = invalid_compiler_type;
+ std::string variant;
+
+ bool
+ empty () const {return type == invalid_compiler_type;}
+
+ std::string
+ string () const;
+
+ compiler_id ()
+ : type (invalid_compiler_type) {}
+
+ compiler_id (compiler_type t, std::string v)
+ : type (t), variant (move (v)) {}
+
+ explicit
+ compiler_id (const std::string&);
+ };
+
+ inline ostream&
+ operator<< (ostream& o, const compiler_id& id)
+ {
+ return o << id.string ();
+ }
+
+ // Compiler class describes a set of compilers that follow more or less
+ // the same command line interface. Compilers that don't belong to any of
+ // the existing classes are in classes of their own (say, Sun CC would be
+ // on its own if we were to support it).
+ //
+ // Currently defined compiler classes:
+ //
+ // gcc gcc, clang, clang-apple, icc (on non-Windows)
+ // msvc msvc, clang-cl, icc (Windows)
+ //
+ enum class compiler_class
+ {
+ gcc,
+ msvc
+ };
+
+ string
+ to_string (compiler_class);
+
+ inline ostream&
+ operator<< (ostream& o, compiler_class c)
+ {
+ return o << to_string (c);
+ }
+
+ // Compiler version. Here we map the various compiler version formats to
+ // something that resembles the MAJOR.MINOR.PATCH-BUILD form of the
+ // Semantic Versioning. While the MAJOR.MINOR part is relatively
+ // straightforward, PATCH may be empty and BUILD can contain pretty much
+ // anything (including spaces).
+ //
+ // gcc A.B.C[ ...] {A, B, C, ...}
+ // clang A.B.C[( |-)...] {A, B, C, ...}
+ // clang-apple A.B[.C] ... {A, B, C, ...}
+ // icc A.B[.C.D] ... {A, B, C, D ...}
+ // msvc A.B.C[.D] {A, B, C, D}
+ //
+ // Note that the clang-apple version is a custom Apple version and does
+ // not correspond to the vanilla clang version.
+ //
+ struct compiler_version
+ {
+ std::string string;
+
+ // Currently all the compilers that we support have numeric MAJOR,
+ // MINOR, and PATCH components and it makes sense to represent them as
+ // integers for easy comparison. If we meet a compiler for which this
+ // doesn't hold, then we will probably just set these to 0 and let the
+ // user deal with the string representation.
+ //
+ uint64_t major;
+ uint64_t minor;
+ uint64_t patch;
+ std::string build;
+ };
+
+ // Compiler information.
+ //
+ // The signature is normally the -v/--version line that was used to guess
+ // the compiler id and its version.
+ //
+ // The checksum is used to detect compiler changes. It is calculated in a
+ // compiler-specific manner (usually the output of -v/--version) and is
+ // not bulletproof (e.g., it most likely won't detect that the underlying
+ // assembler or linker has changed). However, it should detect most
+ // common cases, such as an upgrade to a new version or a configuration
+ // change.
+ //
+ // Note that we assume the checksum incorporates the (default) target so
+ // that if the compiler changes but only in what it targets, then the
+ // checksum will still change. This is currently the case for all the
+ // compilers that we support.
+ //
+ // The target is the compiler's traget architecture triplet. Note that
+ // unlike all the preceding fields, this one takes into account the
+ // compile options (e.g., -m32).
+ //
+ // The pattern is the toolchain program pattern that could sometimes be
+ // derived for some toolchains. For example, i686-w64-mingw32-*-4.9.
+ //
+ // The bin_pattern is the binutils program pattern that could sometimes be
+ // derived for some toolchains. For example, i686-w64-mingw32-*. If the
+ // pattern could not be derived, then it could contain a fallback search
+ // directory, in which case it will end with a directory separator but
+ // will not contain '*'.
+ //
+ struct compiler_info
+ {
+ process_path path;
+ compiler_id id;
+ compiler_class class_;
+ compiler_version version;
+ string signature;
+ string checksum;
+ string target;
+ string original_target; // As reported by the compiler.
+ string pattern;
+ string bin_pattern;
+
+ // Compiler runtime, C standard library, and language (e.g., C++)
+ // standard library.
+ //
+ // The runtime is the low-level compiler runtime library and its name is
+ // the library/project name. Current values are (but can also be some
+ // custom name specified with Clang's --rtlib):
+ //
+ // libgcc
+ // compiler-rt (clang)
+ // msvc
+ //
+ // The C standard library is normally the library/project name (e.g,
+ // glibc, klibc, newlib, etc) but if there is none, then we fallback to
+ // the vendor name (e.g., freebsd, apple). Current values are:
+ //
+ // glibc
+ // msvc (msvcrt.lib/msvcrNNN.dll)
+ // freebsd
+ // apple
+ // newlib (also used by Cygwin)
+ // klibc
+ // bionic
+ // uclibc
+ // musl
+ // dietlibc
+ // other
+ // none
+ //
+ // The C++ standard library is normally the library/project name.
+ // Current values are:
+ //
+ // libstdc++
+ // libc++
+ // msvcp (msvcprt.lib/msvcpNNN.dll)
+ // other
+ // none
+ //
+ string runtime;
+ string c_stdlib;
+ string x_stdlib;
+ };
+
+ // In a sense this is analagous to the language standard which we handle
+ // via a virtual function in common. However, duplicating this hairy ball
+ // of fur in multiple places doesn't seem wise, especially considering
+ // that most of it will be the same, at least for C and C++.
+ //
+ const compiler_info&
+ guess (const char* xm, // Module (for variable names in diagnostics).
+ lang xl, // Language.
+ const path& xc, // Compiler path.
+ const string* xi, // Compiler id (optional).
+ const string* xv, // Compiler version (optional).
+ const string* xt, // Compiler target (optional).
+ const strings* c_poptions, const strings* x_poptions,
+ const strings* c_coptions, const strings* x_coptions,
+ const strings* c_loptions, const strings* x_loptions);
+
+ // Given a language, compiler id, and optionally an (empty) pattern,
+ // return an appropriate default compiler path.
+ //
+ // For example, for (lang::cxx, gcc, *-4.9) we will get g++-4.9.
+ //
+ path
+ guess_default (lang, const string& cid, const string& pattern);
+ }
+}
+
+#endif // LIBBUILD2_CC_GUESS_HXX
diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx
new file mode 100644
index 0000000..f45a1bf
--- /dev/null
+++ b/libbuild2/cc/init.cxx
@@ -0,0 +1,493 @@
+// file : libbuild2/cc/init.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/init.hxx>
+
+#include <libbuild2/file.hxx>
+#include <libbuild2/scope.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/config/utility.hxx>
+
+#include <libbuild2/cc/target.hxx>
+#include <libbuild2/cc/utility.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ // Scope operation callback that cleans up module sidebuilds.
+ //
+ static target_state
+ clean_module_sidebuilds (action, const scope& rs, const dir&)
+ {
+ context& ctx (rs.ctx);
+
+ const dir_path& out_root (rs.out_path ());
+
+ dir_path d (out_root / rs.root_extra->build_dir / modules_sidebuild_dir);
+
+ if (exists (d))
+ {
+ if (rmdir_r (ctx, d))
+ {
+ // Clean up cc/ if it became empty.
+ //
+ d = out_root / rs.root_extra->build_dir / module_dir;
+ if (empty (d))
+ {
+ rmdir (ctx, d);
+
+ // And build/ if it also became empty (e.g., in case of a build
+ // with a transient configuration).
+ //
+ d = out_root / rs.root_extra->build_dir;
+ if (empty (d))
+ rmdir (ctx, d);
+ }
+
+ return target_state::changed;
+ }
+ }
+
+ return target_state::unchanged;
+ }
+
+ bool
+ core_vars_init (scope& rs,
+ scope&,
+ const location& loc,
+ unique_ptr<module_base>&,
+ bool first,
+ bool,
+ const variable_map&)
+ {
+ tracer trace ("cc::core_vars_init");
+ l5 ([&]{trace << "for " << rs;});
+
+ assert (first);
+
+ // Load bin.vars (we need its config.bin.target/pattern for hints).
+ //
+ if (!cast_false<bool> (rs["bin.vars.loaded"]))
+ load_module (rs, rs, "bin.vars", loc);
+
+ // Enter variables. Note: some overridable, some not.
+ //
+ auto& v (rs.ctx.var_pool.rw (rs));
+
+ auto v_t (variable_visibility::target);
+
+ v.insert<strings> ("config.cc.poptions", true);
+ v.insert<strings> ("config.cc.coptions", true);
+ v.insert<strings> ("config.cc.loptions", true);
+ v.insert<strings> ("config.cc.aoptions", true);
+ v.insert<strings> ("config.cc.libs", true);
+
+ v.insert<strings> ("cc.poptions");
+ v.insert<strings> ("cc.coptions");
+ v.insert<strings> ("cc.loptions");
+ v.insert<strings> ("cc.aoptions");
+ v.insert<strings> ("cc.libs");
+
+ v.insert<strings> ("cc.export.poptions");
+ v.insert<strings> ("cc.export.coptions");
+ v.insert<strings> ("cc.export.loptions");
+ v.insert<vector<name>> ("cc.export.libs");
+
+ // Hint variables (not overridable).
+ //
+ v.insert<string> ("config.cc.id");
+ v.insert<string> ("config.cc.hinter"); // Hinting module.
+ v.insert<string> ("config.cc.pattern");
+ v.insert<target_triplet> ("config.cc.target");
+
+ // Compiler runtime and C standard library.
+ //
+ v.insert<string> ("cc.runtime");
+ v.insert<string> ("cc.stdlib");
+
+ // Target type, for example, "C library" or "C++ library". Should be set
+ // on the target as a rule-specific variable by the matching rule to the
+ // name of the module (e.g., "c", "cxx"). Currenly only set for
+ // libraries and is used to decide which *.libs to use during static
+ // linking.
+ //
+ // It can also be the special "cc" value which means a C-common library
+ // but specific language is not known. Used in the import installed
+ // logic.
+ //
+ v.insert<string> ("cc.type", v_t);
+
+ // If set and is true, then this (imported) library has been found in a
+ // system library search directory.
+ //
+ v.insert<bool> ("cc.system", v_t);
+
+ // C++ module name. Set on the bmi*{} target as a rule-specific variable
+ // by the matching rule. Can also be set by the user (normally via the
+ // x.module_name alias) on the x_mod{} source.
+ //
+ v.insert<string> ("cc.module_name", v_t);
+
+ // Ability to disable using preprocessed output for compilation.
+ //
+ v.insert<bool> ("config.cc.reprocess", true);
+ v.insert<bool> ("cc.reprocess");
+
+ // Register scope operation callback.
+ //
+ // It feels natural to do clean up sidebuilds as a post operation but
+ // that prevents the (otherwise-empty) out root directory to be cleaned
+ // up (via the standard fsdir{} chain).
+ //
+ rs.operation_callbacks.emplace (
+ perform_clean_id,
+ scope::operation_callback {&clean_module_sidebuilds, nullptr /*post*/});
+
+ return true;
+ }
+
+ bool
+ core_guess_init (scope& rs,
+ scope&,
+ const location& loc,
+ unique_ptr<module_base>&,
+ bool first,
+ bool,
+ const variable_map& h)
+ {
+ tracer trace ("cc::core_guess_init");
+ l5 ([&]{trace << "for " << rs;});
+
+ assert (first);
+
+ // Load cc.core.vars.
+ //
+ if (!cast_false<bool> (rs["cc.core.vars.loaded"]))
+ load_module (rs, rs, "cc.core.vars", loc);
+
+ // config.cc.{id,hinter}
+ //
+ {
+ // These values must be hinted.
+ //
+ rs.assign<string> ("cc.id") = cast<string> (h["config.cc.id"]);
+ rs.assign<string> ("cc.hinter") = cast<string> (h["config.cc.hinter"]);
+ }
+
+ // config.cc.target
+ //
+ {
+ // This value must be hinted.
+ //
+ const auto& t (cast<target_triplet> (h["config.cc.target"]));
+
+ // Also enter as cc.target.{cpu,vendor,system,version,class} for
+ // convenience of access.
+ //
+ rs.assign<string> ("cc.target.cpu") = t.cpu;
+ rs.assign<string> ("cc.target.vendor") = t.vendor;
+ rs.assign<string> ("cc.target.system") = t.system;
+ rs.assign<string> ("cc.target.version") = t.version;
+ rs.assign<string> ("cc.target.class") = t.class_;
+
+ rs.assign<target_triplet> ("cc.target") = t;
+ }
+
+ // config.cc.pattern
+ //
+ {
+ // This value could be hinted.
+ //
+ rs.assign<string> ("cc.pattern") =
+ cast_empty<string> (h["config.cc.pattern"]);
+ }
+
+ // cc.runtime
+ // cc.stdlib
+ //
+ rs.assign ("cc.runtime") = cast<string> (h["cc.runtime"]);
+ rs.assign ("cc.stdlib") = cast<string> (h["cc.stdlib"]);
+
+ return true;
+ }
+
+ bool
+ core_config_init (scope& rs,
+ scope&,
+ const location& loc,
+ unique_ptr<module_base>&,
+ bool first,
+ bool,
+ const variable_map& hints)
+ {
+ tracer trace ("cc::core_config_init");
+ l5 ([&]{trace << "for " << rs;});
+
+ assert (first);
+
+ // Load cc.core.guess.
+ //
+ if (!cast_false<bool> (rs["cc.core.guess.loaded"]))
+ load_module (rs, rs, "cc.core.guess", loc);
+
+ // Configure.
+ //
+
+ // Adjust module priority (compiler).
+ //
+ config::save_module (rs, "cc", 250);
+
+ // Note that we are not having a config report since it will just
+ // duplicate what has already been printed by the hinting module.
+
+ // config.cc.{p,c,l}options
+ // config.cc.libs
+ //
+ // @@ Same nonsense as in module.
+ //
+ //
+ rs.assign ("cc.poptions") += cast_null<strings> (
+ config::optional (rs, "config.cc.poptions"));
+
+ rs.assign ("cc.coptions") += cast_null<strings> (
+ config::optional (rs, "config.cc.coptions"));
+
+ rs.assign ("cc.loptions") += cast_null<strings> (
+ config::optional (rs, "config.cc.loptions"));
+
+ rs.assign ("cc.aoptions") += cast_null<strings> (
+ config::optional (rs, "config.cc.aoptions"));
+
+ rs.assign ("cc.libs") += cast_null<strings> (
+ config::optional (rs, "config.cc.libs"));
+
+ if (lookup l = config::omitted (rs, "config.cc.reprocess").first)
+ rs.assign ("cc.reprocess") = *l;
+
+ // Load the bin.config module.
+ //
+ if (!cast_false<bool> (rs["bin.config.loaded"]))
+ {
+ // Prepare configuration hints. They are only used on the first load
+ // of bin.config so we only populate them on our first load.
+ //
+ variable_map h (rs.ctx);
+
+ if (first)
+ {
+ // Note that all these variables have already been registered.
+ //
+ h.assign ("config.bin.target") =
+ cast<target_triplet> (rs["cc.target"]).string ();
+
+ if (auto l = hints["config.bin.pattern"])
+ h.assign ("config.bin.pattern") = cast<string> (l);
+ }
+
+ load_module (rs, rs, "bin.config", loc, false, h);
+ }
+
+ // Verify bin's target matches ours (we do it even if we loaded it
+ // ourselves since the target can come from the configuration and not
+ // our hint).
+ //
+ if (first)
+ {
+ const auto& ct (cast<target_triplet> (rs["cc.target"]));
+ const auto& bt (cast<target_triplet> (rs["bin.target"]));
+
+ if (bt != ct)
+ {
+ const auto& h (cast<string> (rs["cc.hinter"]));
+
+ fail (loc) << h << " and bin module target mismatch" <<
+ info << h << " target is " << ct <<
+ info << "bin target is " << bt;
+ }
+ }
+
+ // Load bin.*.config for bin.* modules we may need (see core_init()
+ // below).
+ //
+ const string& tsys (cast<string> (rs["cc.target.system"]));
+
+ if (!cast_false<bool> (rs["bin.ar.config.loaded"]))
+ load_module (rs, rs, "bin.ar.config", loc);
+
+ if (tsys == "win32-msvc")
+ {
+ if (!cast_false<bool> (rs["bin.ld.config.loaded"]))
+ load_module (rs, rs, "bin.ld.config", loc);
+ }
+
+ if (tsys == "mingw32")
+ {
+ if (!cast_false<bool> (rs["bin.rc.config.loaded"]))
+ load_module (rs, rs, "bin.rc.config", loc);
+ }
+
+ return true;
+ }
+
+ bool
+ core_init (scope& rs,
+ scope&,
+ const location& loc,
+ unique_ptr<module_base>&,
+ bool first,
+ bool,
+ const variable_map& hints)
+ {
+ tracer trace ("cc::core_init");
+ l5 ([&]{trace << "for " << rs;});
+
+ assert (first);
+
+ const string& tsys (cast<string> (rs["cc.target.system"]));
+
+ // Load cc.core.config.
+ //
+ if (!cast_false<bool> (rs["cc.core.config.loaded"]))
+ load_module (rs, rs, "cc.core.config", loc, false, hints);
+
+ // Load the bin module.
+ //
+ if (!cast_false<bool> (rs["bin.loaded"]))
+ load_module (rs, rs, "bin", loc);
+
+ // Load the bin.ar module.
+ //
+ if (!cast_false<bool> (rs["bin.ar.loaded"]))
+ load_module (rs, rs, "bin.ar", loc);
+
+ // For this target we link things directly with link.exe so load the
+ // bin.ld module.
+ //
+ if (tsys == "win32-msvc")
+ {
+ if (!cast_false<bool> (rs["bin.ld.loaded"]))
+ load_module (rs, rs, "bin.ld", loc);
+ }
+
+ // If our target is MinGW, then we will need the resource compiler
+ // (windres) in order to embed manifests into executables.
+ //
+ if (tsys == "mingw32")
+ {
+ if (!cast_false<bool> (rs["bin.rc.loaded"]))
+ load_module (rs, rs, "bin.rc", loc);
+ }
+
+ return true;
+ }
+
+ // The cc module is an "alias" for c and cxx. Its intended use is to make
+ // sure that the C/C++ configuration is captured in an amalgamation rather
+ // than subprojects.
+ //
+ static inline bool
+ init_alias (tracer& trace,
+ scope& rs,
+ scope& bs,
+ const char* m,
+ const char* c,
+ const char* c_loaded,
+ const char* cxx,
+ const char* cxx_loaded,
+ const location& loc,
+ const variable_map& hints)
+ {
+ l5 ([&]{trace << "for " << bs;});
+
+ // We only support root loading (which means there can only be one).
+ //
+ if (&rs != &bs)
+ fail (loc) << m << " module must be loaded in project root";
+
+ // We want to order the loading to match what user specified on the
+ // command line (config.c or config.cxx). This way the first loaded
+ // module (with user-specified config.*) will hint the compiler to the
+ // second.
+ //
+ bool lc (!cast_false<bool> (rs[c_loaded]));
+ bool lp (!cast_false<bool> (rs[cxx_loaded]));
+
+ // If none of them are already loaded, load c first only if config.c
+ // is specified.
+ //
+ if (lc && lp && rs["config.c"])
+ {
+ load_module (rs, rs, c, loc, false, hints);
+ load_module (rs, rs, cxx, loc, false, hints);
+ }
+ else
+ {
+ if (lp) load_module (rs, rs, cxx, loc, false, hints);
+ if (lc) load_module (rs, rs, c, loc, false, hints);
+ }
+
+ return true;
+ }
+
+ bool
+ config_init (scope& rs,
+ scope& bs,
+ const location& loc,
+ unique_ptr<module_base>&,
+ bool,
+ bool,
+ const variable_map& hints)
+ {
+ tracer trace ("cc::config_init");
+ return init_alias (trace, rs, bs,
+ "cc.config",
+ "c.config", "c.config.loaded",
+ "cxx.config", "cxx.config.loaded",
+ loc, hints);
+ }
+
+ bool
+ init (scope& rs,
+ scope& bs,
+ const location& loc,
+ unique_ptr<module_base>&,
+ bool,
+ bool,
+ const variable_map& hints)
+ {
+ tracer trace ("cc::init");
+ return init_alias (trace, rs, bs,
+ "cc",
+ "c", "c.loaded",
+ "cxx", "cxx.loaded",
+ loc, hints);
+ }
+
+ static const module_functions mod_functions[] =
+ {
+ // NOTE: don't forget to also update the documentation in init.hxx if
+ // changing anything here.
+
+ {"cc.core.vars", nullptr, core_vars_init},
+ {"cc.core.guess", nullptr, core_guess_init},
+ {"cc.core.config", nullptr, core_config_init},
+ {"cc.core", nullptr, core_init},
+ {"cc.config", nullptr, config_init},
+ {"cc", nullptr, init},
+ {nullptr, nullptr, nullptr}
+ };
+
+ const module_functions*
+ build2_cc_load ()
+ {
+ return mod_functions;
+ }
+ }
+}
diff --git a/libbuild2/cc/init.hxx b/libbuild2/cc/init.hxx
new file mode 100644
index 0000000..b98e816
--- /dev/null
+++ b/libbuild2/cc/init.hxx
@@ -0,0 +1,36 @@
+// file : libbuild2/cc/init.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_INIT_HXX
+#define LIBBUILD2_CC_INIT_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/module.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // Module `cc` does not require bootstrapping.
+ //
+ // Submodules:
+ //
+ // `cc.core.vars` -- registers some variables.
+ // `cc.core.guess` -- loads cc.core.vars and sets some variables.
+ // `cc.core.config` -- loads cc.core.guess and sets more variables.
+ // `cc.core` -- loads cc.core.config and registers target types and
+ // rules.
+ // `cc.config` -- loads {c,cxx}.config.
+ // `cc` -- loads c and cxx.
+ //
+ extern "C" LIBBUILD2_CC_SYMEXPORT const module_functions*
+ build2_cc_load ();
+ }
+}
+
+#endif // LIBBUILD2_CC_INIT_HXX
diff --git a/libbuild2/cc/install-rule.cxx b/libbuild2/cc/install-rule.cxx
new file mode 100644
index 0000000..670757e
--- /dev/null
+++ b/libbuild2/cc/install-rule.cxx
@@ -0,0 +1,355 @@
+// file : libbuild2/cc/install-rule.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/install-rule.hxx>
+
+#include <libbuild2/algorithm.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/utility.hxx>
+#include <libbuild2/cc/link-rule.hxx> // match()
+
+using namespace std;
+
+namespace build2
+{
+ namespace cc
+ {
+ using namespace bin;
+
+ // install_rule
+ //
+ install_rule::
+ install_rule (data&& d, const link_rule& l)
+ : common (move (d)), link_ (l) {}
+
+ const target* install_rule::
+ filter (action a, const target& t, prerequisite_iterator& i) const
+ {
+ // NOTE: see libux_install_rule::filter() if changing anything here.
+
+ const prerequisite& p (i->prerequisite);
+
+ // If this is a shared library prerequisite, install it as long as it
+ // is in the same amalgamation as we are.
+ //
+ // Less obvious: we also want to install a static library prerequisite
+ // of a library (since it could be referenced from its .pc file, etc).
+ //
+ // Note: for now we assume these prerequisites never come from see-
+ // through groups.
+ //
+ // Note: we install ad hoc prerequisites by default.
+ //
+ otype ot (link_type (t).type);
+
+ bool st (t.is_a<exe> () || t.is_a<libs> ()); // Target needs shared.
+ bool at (t.is_a<liba> () || t.is_a<libs> ()); // Target needs static.
+
+ if ((st && (p.is_a<libx> () || p.is_a<libs> ())) ||
+ (at && (p.is_a<libx> () || p.is_a<liba> ())))
+ {
+ const target* pt (&search (t, p));
+
+ // If this is the lib{}/libu*{} group, pick a member which we would
+ // link. For libu*{} we want the "see through" logic.
+ //
+ if (const libx* l = pt->is_a<libx> ())
+ pt = link_member (*l, a, link_info (t.base_scope (), ot));
+
+ // Note: not redundant since we are returning a member.
+ //
+ if ((st && pt->is_a<libs> ()) || (at && pt->is_a<liba> ()))
+ return pt->in (t.weak_scope ()) ? pt : nullptr;
+
+ // See through to libu*{} members. Note that we are always in the same
+ // project (and thus amalgamation).
+ //
+ if (pt->is_a<libux> ())
+ return pt;
+ }
+
+ // The rest of the tests only succeed if the base filter() succeeds.
+ //
+ const target* pt (file_rule::filter (a, t, p));
+ if (pt == nullptr)
+ return pt;
+
+ // Don't install executable's prerequisite headers and module
+ // interfaces.
+ //
+ // Note that if they come from a group, then we assume the entire
+ // group is not to be installed.
+ //
+ if (t.is_a<exe> ())
+ {
+ if (x_header (p))
+ pt = nullptr;
+ else if (p.type.see_through)
+ {
+ for (i.enter_group (); i.group (); )
+ {
+ if (x_header (*++i))
+ pt = nullptr;
+ }
+ }
+
+ if (pt == nullptr)
+ return pt;
+ }
+
+ // Here is a problem: if the user spells the obj*/bmi*{} targets
+ // explicitly, then the source files, including headers/modules may be
+ // specified as preprequisites of those targets and not of this target.
+ // While this can be worked around for headers by also listing them as
+ // prerequisites of this target, this won't work for modules (since they
+ // are compiled). So what we are going to do here is detect bmi*{} and
+ // translate them to their mxx{} (this doesn't quite work for headers
+ // since there would normally be many of them).
+ //
+ // Note: for now we assume bmi*{} never come from see-through groups.
+ //
+ bool g (false);
+ if (p.is_a<bmi> () || (g = p.is_a (compile_types (ot).bmi)))
+ {
+ if (g)
+ resolve_group (a, *pt);
+
+ for (prerequisite_member pm:
+ group_prerequisite_members (a, *pt, members_mode::maybe))
+ {
+ // This is tricky: we need to "look" inside groups for mxx{} but if
+ // found, remap to the group, not member.
+ //
+ if (pm.is_a (*x_mod))
+ {
+ pt = t.is_a<exe> ()
+ ? nullptr
+ : file_rule::filter (a, *pt, pm.prerequisite);
+ break;
+ }
+ }
+
+ if (pt == nullptr)
+ return pt;
+ }
+
+ return pt;
+ }
+
+ bool install_rule::
+ match (action a, target& t, const string& hint) const
+ {
+ // @@ How do we split the hint between the two?
+ //
+
+ // We only want to handle installation if we are also the ones building
+ // this target. So first run link's match().
+ //
+ return link_.match (a, t, hint) && file_rule::match (a, t, "");
+ }
+
+ recipe install_rule::
+ apply (action a, target& t) const
+ {
+ recipe r (file_rule::apply (a, t));
+
+ if (a.operation () == update_id)
+ {
+ // Signal to the link rule that this is update for install. And if the
+ // update has already been executed, verify it was done for install.
+ //
+ auto& md (t.data<link_rule::match_data> ());
+
+ if (md.for_install)
+ {
+ if (!*md.for_install)
+ fail << "target " << t << " already updated but not for install";
+ }
+ else
+ md.for_install = true;
+ }
+ else // install or uninstall
+ {
+ // Derive shared library paths and cache them in the target's aux
+ // storage if we are un/installing (used in the *_extra() functions
+ // below).
+ //
+ static_assert (sizeof (link_rule::libs_paths) <= target::data_size,
+ "insufficient space");
+
+ if (file* f = t.is_a<libs> ())
+ {
+ if (!f->path ().empty ()) // Not binless.
+ {
+ const string* p (cast_null<string> (t["bin.lib.prefix"]));
+ const string* s (cast_null<string> (t["bin.lib.suffix"]));
+ t.data (
+ link_.derive_libs_paths (*f,
+ p != nullptr ? p->c_str (): nullptr,
+ s != nullptr ? s->c_str (): nullptr));
+ }
+ }
+ }
+
+ return r;
+ }
+
+ bool install_rule::
+ install_extra (const file& t, const install_dir& id) const
+ {
+ bool r (false);
+
+ if (t.is_a<libs> ())
+ {
+ // Here we may have a bunch of symlinks that we need to install.
+ //
+ const scope& rs (t.root_scope ());
+ auto& lp (t.data<link_rule::libs_paths> ());
+
+ auto ln = [&rs, &id] (const path& f, const path& l)
+ {
+ install_l (rs, id, f.leaf (), l.leaf (), 2 /* verbosity */);
+ return true;
+ };
+
+ const path& lk (lp.link);
+ const path& ld (lp.load);
+ const path& so (lp.soname);
+ const path& in (lp.interm);
+
+ const path* f (lp.real);
+
+ if (!in.empty ()) {r = ln (*f, in) || r; f = &in;}
+ if (!so.empty ()) {r = ln (*f, so) || r; f = &so;}
+ if (!ld.empty ()) {r = ln (*f, ld) || r; f = &ld;}
+ if (!lk.empty ()) {r = ln (*f, lk) || r; }
+ }
+
+ return r;
+ }
+
+ bool install_rule::
+ uninstall_extra (const file& t, const install_dir& id) const
+ {
+ bool r (false);
+
+ if (t.is_a<libs> ())
+ {
+ // Here we may have a bunch of symlinks that we need to uninstall.
+ //
+ const scope& rs (t.root_scope ());
+ auto& lp (t.data<link_rule::libs_paths> ());
+
+ auto rm = [&rs, &id] (const path& l)
+ {
+ return uninstall_f (rs, id, nullptr, l.leaf (), 2 /* verbosity */);
+ };
+
+ const path& lk (lp.link);
+ const path& ld (lp.load);
+ const path& so (lp.soname);
+ const path& in (lp.interm);
+
+ if (!lk.empty ()) r = rm (lk) || r;
+ if (!ld.empty ()) r = rm (ld) || r;
+ if (!so.empty ()) r = rm (so) || r;
+ if (!in.empty ()) r = rm (in) || r;
+ }
+
+ return r;
+ }
+
+ // libux_install_rule
+ //
+ libux_install_rule::
+ libux_install_rule (data&& d, const link_rule& l)
+ : common (move (d)), link_ (l) {}
+
+ const target* libux_install_rule::
+ filter (action a, const target& t, prerequisite_iterator& i) const
+ {
+ const prerequisite& p (i->prerequisite);
+
+ // The "see through" semantics that should be parallel to install_rule
+ // above. In particular, here we use libue/libua/libus{} as proxies for
+ // exe/liba/libs{} there.
+ //
+ otype ot (link_type (t).type);
+
+ bool st (t.is_a<libue> () || t.is_a<libus> ()); // Target needs shared.
+ bool at (t.is_a<libua> () || t.is_a<libus> ()); // Target needs static.
+
+ if ((st && (p.is_a<libx> () || p.is_a<libs> ())) ||
+ (at && (p.is_a<libx> () || p.is_a<liba> ())))
+ {
+ const target* pt (&search (t, p));
+
+ if (const libx* l = pt->is_a<libx> ())
+ pt = link_member (*l, a, link_info (t.base_scope (), ot));
+
+ if ((st && pt->is_a<libs> ()) || (at && pt->is_a<liba> ()))
+ return pt->in (t.weak_scope ()) ? pt : nullptr;
+
+ if (pt->is_a<libux> ())
+ return pt;
+ }
+
+ const target* pt (install::file_rule::instance.filter (a, t, p));
+ if (pt == nullptr)
+ return pt;
+
+ if (t.is_a<libue> ())
+ {
+ if (x_header (p))
+ pt = nullptr;
+ else if (p.type.see_through)
+ {
+ for (i.enter_group (); i.group (); )
+ {
+ if (x_header (*++i))
+ pt = nullptr;
+ }
+ }
+
+ if (pt == nullptr)
+ return pt;
+ }
+
+ bool g (false);
+ if (p.is_a<bmi> () || (g = p.is_a (compile_types (ot).bmi)))
+ {
+ if (g)
+ resolve_group (a, *pt);
+
+ for (prerequisite_member pm:
+ group_prerequisite_members (a, *pt, members_mode::maybe))
+ {
+ if (pm.is_a (*x_mod))
+ {
+ pt = t.is_a<libue> ()
+ ? nullptr
+ : install::file_rule::instance.filter (a, *pt, pm.prerequisite);
+ break;
+ }
+ }
+
+ if (pt == nullptr)
+ return pt;
+ }
+
+ return pt;
+ }
+
+ bool libux_install_rule::
+ match (action a, target& t, const string& hint) const
+ {
+ // We only want to handle installation if we are also the ones building
+ // this target. So first run link's match().
+ //
+ return link_.match (a, t, hint) && alias_rule::match (a, t, "");
+ }
+ }
+}
diff --git a/libbuild2/cc/install-rule.hxx b/libbuild2/cc/install-rule.hxx
new file mode 100644
index 0000000..6d7ceb8
--- /dev/null
+++ b/libbuild2/cc/install-rule.hxx
@@ -0,0 +1,82 @@
+// file : libbuild2/cc/install-rule.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_INSTALL_RULE_HXX
+#define LIBBUILD2_CC_INSTALL_RULE_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/install/rule.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ class link_rule;
+
+ // Installation rule for exe{} and lib*{}. Here we do:
+ //
+ // 1. Signal to the link rule that this is update for install.
+ //
+ // 2. Custom filtering of prerequisites (e.g., headers of an exe{}).
+ //
+ // 3. Extra un/installation (e.g., libs{} symlinks).
+ //
+ class LIBBUILD2_CC_SYMEXPORT install_rule: public install::file_rule,
+ virtual common
+ {
+ public:
+ install_rule (data&&, const link_rule&);
+
+ virtual const target*
+ filter (action, const target&, prerequisite_iterator&) const override;
+
+ virtual bool
+ match (action, target&, const string&) const override;
+
+ virtual recipe
+ apply (action, target&) const override;
+
+ virtual bool
+ install_extra (const file&, const install_dir&) const override;
+
+ virtual bool
+ uninstall_extra (const file&, const install_dir&) const override;
+
+ private:
+ const link_rule& link_;
+ };
+
+ // Installation rule for libu*{}.
+ //
+ // While libu*{} members themselves are not installable, we need to see
+ // through them in case they depend on stuff that we need to install
+ // (e.g., headers). Note that we use the alias_rule as a base.
+ //
+ class LIBBUILD2_CC_SYMEXPORT libux_install_rule:
+ public install::alias_rule,
+ virtual common
+ {
+ public:
+ libux_install_rule (data&&, const link_rule&);
+
+ virtual const target*
+ filter (action, const target&, prerequisite_iterator&) const override;
+
+ virtual bool
+ match (action, target&, const string&) const override;
+
+ private:
+ const link_rule& link_;
+ };
+ }
+}
+
+#endif // LIBBUILD2_CC_INSTALL_RULE_HXX
diff --git a/libbuild2/cc/lexer+char-literal.test.testscript b/libbuild2/cc/lexer+char-literal.test.testscript
new file mode 100644
index 0000000..afd16dd
--- /dev/null
+++ b/libbuild2/cc/lexer+char-literal.test.testscript
@@ -0,0 +1,67 @@
+# file : libbuild2/cc/lexer+char-literal.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test character literals.
+#
+
+: normal
+:
+$* <<EOI >>EOO
+'a'
+'aa'
+'"'
+EOI
+<char literal>
+<char literal>
+<char literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+L'a'
+U'a'
+u'a'
+u8'a'
+u8R'a'
+EOI
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+'u8R'
+<char literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+'a'x
+'a'_X123
+EOI
+<char literal>
+<char literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+'\''
+'\\'
+'\\\''
+'\n'
+U'\U0001f34c'
+EOI
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+EOO
+
+: unterminated
+:
+$* <"'a" 2>>EOE != 0
+stdin:1:1: error: unterminated character literal
+EOE
diff --git a/libbuild2/cc/lexer+comment.test.testscript b/libbuild2/cc/lexer+comment.test.testscript
new file mode 100644
index 0000000..bfcc440
--- /dev/null
+++ b/libbuild2/cc/lexer+comment.test.testscript
@@ -0,0 +1,88 @@
+# file : libbuild2/cc/lexer+comment.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test C and C++ comments.
+#
+
+: c-comment
+:
+$* <<EOI
+/* 'one' */
+/* "two" // three
+*/
+/**
+four
+// five */
+/**
+six /*
+*/
+EOI
+
+: cxx-comment
+:
+$* <<EOI
+// 'one'
+// "two" // three
+// four /* five */
+EOI
+
+: commented-out
+:
+$* <<EOI >"';'"
+// /*
+;
+// */
+EOI
+
+: c-unterminated
+:
+$* <<EOI 2>>EOE != 0
+/*
+comment
+EOI
+stdin:1:2: error: unterminated comment
+EOE
+
+: cxx-unterminated
+:
+$* <<:EOI
+// comment
+EOI
+
+: in-char-literal
+:
+$* <<EOI >>EOO
+'//'
+'/*'*/
+EOI
+<char literal>
+<char literal>
+<punctuation>
+<punctuation>
+EOO
+
+: in-string-literal
+:
+$* <<EOI >>EOO
+"//foo"
+"/*"*/
+EOI
+<string literal>
+<string literal>
+<punctuation>
+<punctuation>
+EOO
+
+: in-raw-string-literal
+:
+$* <<EOI >>EOO
+R"X(
+// foo
+/* bar
+)X"*/
+EOI
+<string literal>
+<punctuation>
+<punctuation>
+EOO
diff --git a/libbuild2/cc/lexer+line.test.testscript b/libbuild2/cc/lexer+line.test.testscript
new file mode 100644
index 0000000..560c092
--- /dev/null
+++ b/libbuild2/cc/lexer+line.test.testscript
@@ -0,0 +1,67 @@
+# file : libbuild2/cc/lexer+line.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test line continuations.
+#
+
+: identifier
+:
+$* <<EOI >"'foo123'"
+fo\
+o\
+1\
+2\
+3
+EOI
+
+: punctuation
+:
+$* <<EOI >'<punctuation>'
+.\
+.\
+.
+EOI
+
+: c-comment
+:
+$* <<EOI
+/\
+*
+comment
+*\
+/\
+
+EOI
+
+: cxx-comment
+:
+$* <<EOI
+/\
+/ comment\
+more\
+more
+EOI
+
+: other
+:
+$* <<EOI >>EOO
+\abc
+EOI
+<punctuation>
+'abc'
+EOO
+
+: multiple
+:
+$* <<EOI >>EOO
+\\
+EOI
+<punctuation>
+EOO
+
+: unterminated
+:
+$* <<:EOI >'<punctuation>'
+\
+EOI
diff --git a/libbuild2/cc/lexer+number.test.testscript b/libbuild2/cc/lexer+number.test.testscript
new file mode 100644
index 0000000..f361245
--- /dev/null
+++ b/libbuild2/cc/lexer+number.test.testscript
@@ -0,0 +1,48 @@
+# file : libbuild2/cc/lexer+number.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test numbers.
+#
+
+$* <'1' >'<number literal>'
+$* <'.1' >'<number literal>'
+$* <'1.' >'<number literal>'
+
+$* <'0b101' >'<number literal>'
+$* <'0123' >'<number literal>'
+$* <'0X12AB' >'<number literal>'
+
+$* <'1e10' >'<number literal>'
+$* <'1E+10' >'<number literal>'
+$* <'0x1.p10' >'<number literal>'
+$* <'0x1.P-10' >'<number literal>'
+
+$* <"123'456" >'<number literal>'
+$* <"0xff00'00ff" >'<number literal>'
+
+$* <'123f' >'<number literal>'
+$* <'123UL' >'<number literal>'
+$* <'123_X' >'<number literal>'
+
+: separate-punctuation
+:
+$* <'123;' >>EOO
+<number literal>
+';'
+EOO
+
+: separate-plus-minus
+:
+$* <'1.0_a+2.0' >>EOO
+<number literal>
+<punctuation>
+<number literal>
+EOO
+
+: separate-whitespace
+:
+$* <'123 abc' >>EOO
+<number literal>
+'abc'
+EOO
diff --git a/libbuild2/cc/lexer+preprocessor.test.testscript b/libbuild2/cc/lexer+preprocessor.test.testscript
new file mode 100644
index 0000000..e33eb90
--- /dev/null
+++ b/libbuild2/cc/lexer+preprocessor.test.testscript
@@ -0,0 +1,73 @@
+# file : libbuild2/cc/lexer+preprocessor.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test preprocessor lines.
+#
+
+: normal
+:
+$* <<EOI
+#pragma message("abc")
+EOI
+
+: multiline
+:
+$* <<EOI
+#pragma message \
+( \
+"abc" \
+)
+EOI
+
+: comment
+:
+$* <<EOI
+#pragma foo /*
+bar
+baz
+*/
+#pragma foo // bar baz
+EOI
+
+: line
+:
+$* -l <<EOI >>EOO
+;
+# 1 "test.cxx" 2
+;
+ ;
+# 4
+;
+#line 8 "z:\\tmp\\test.hxx"
+;
+#line 10
+;
+# 5 "test.cxx"
+;
+EOI
+';' stdin:1:1
+';' test.cxx:1:1
+';' test.cxx:2:3
+';' test.cxx:4:1
+';' z:\tmp\test.hxx:8:1
+';' z:\tmp\test.hxx:10:1
+';' test.cxx:5:1
+EOO
+
+: include
+:
+$* <<EOI 2>>EOE != 0
+#include <foo/bar>
+EOI
+stdin:1:1: error: unexpected #include directive
+EOE
+
+: nested
+:
+$* <<EOI >>EOO
+#define FOO(x) #y
+;
+EOI
+';'
+EOO
diff --git a/libbuild2/cc/lexer+raw-string-literal.test.testscript b/libbuild2/cc/lexer+raw-string-literal.test.testscript
new file mode 100644
index 0000000..93cddc1
--- /dev/null
+++ b/libbuild2/cc/lexer+raw-string-literal.test.testscript
@@ -0,0 +1,90 @@
+# file : libbuild2/cc/lexer+raw-string-literal.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test raw string literals.
+#
+
+: normal
+:
+$* <<EOI >>EOO
+R"()"
+R"(ab)"
+R"(a"b)"
+R"(a)b)"
+R"%(a%)b)%"
+R"X(a
+ b)X"
+R"X(a\
+ b)X"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+LR"(ab)"
+UR"(ab)"
+uR"(ab)"
+u8R"(ab)"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+R"(ab)"x
+R"(ab)"_X123
+EOI
+<string literal>
+<string literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+R"(\)"
+EOI
+<string literal>
+EOO
+
+: invalid-no-paren
+:
+$* <'R"a"' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-paren
+:
+$* <'R")()("' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-unterminated-paren
+:
+$* <'R"(abc"' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-unterminated-delimiter
+:
+$* <'R"X(abc)"' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-unterminated-quote
+:
+$* <'R"X(abc)X' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
diff --git a/libbuild2/cc/lexer+string-literal.test.testscript b/libbuild2/cc/lexer+string-literal.test.testscript
new file mode 100644
index 0000000..a2509c9
--- /dev/null
+++ b/libbuild2/cc/lexer+string-literal.test.testscript
@@ -0,0 +1,65 @@
+# file : libbuild2/cc/lexer+string-literal.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test string literals (except raw).
+#
+
+: normal
+:
+$* <<EOI >>EOO
+"aa"
+"'"
+"a""b"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+L"ab"
+U"ab"
+u"ab"
+u8"ab"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+"ab"x
+"ab"_X123
+EOI
+<string literal>
+<string literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+"\"\""
+"\\\\"
+"\\\"\\"
+"\n\t"
+U"a\U0001f34c"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: unterminated
+:
+$* <'"ab' 2>>EOE != 0
+stdin:1:1: error: unterminated string literal
+EOE
diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx
new file mode 100644
index 0000000..6eba57e
--- /dev/null
+++ b/libbuild2/cc/lexer.cxx
@@ -0,0 +1,1129 @@
+// file : libbuild2/cc/lexer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+// bit 0 - identifier character (_0-9A-Ba-b).
+//
+static const uint8_t char_flags[256] =
+//0 1 2 3 4 5 6 7 8 9 A B C D E F
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 3
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 5
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 7
+
+ // 128-255
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
+};
+
+// Diagnostics plumbing.
+//
+namespace butl // ADL
+{
+ inline build2::location
+ get_location (const butl::char_scanner::xchar& c, const void* data)
+ {
+ using namespace build2;
+
+ assert (data != nullptr); // E.g., must be &lexer::name_.
+ return location (static_cast<const path*> (data), c.line, c.column);
+ }
+}
+
+namespace build2
+{
+ namespace cc
+ {
+ auto lexer::
+ peek (bool e) -> xchar
+ {
+ if (unget_)
+ return ungetc_;
+
+ if (unpeek_)
+ return unpeekc_;
+
+ xchar c (base::peek ());
+
+ if (e && c == '\\')
+ {
+ get (c);
+ xchar p (base::peek ());
+
+ // Handle Windows CRLF sequence. Similar to char_scanner, we treat a
+ // single CR as if it was followed by LF and also collapse multiple
+ // CRs.
+ //
+ while (p == '\r')
+ {
+ get (p);
+ p = base::peek ();
+
+ if (p == '\n')
+ break;
+
+ // Pretend '\n' was there and recurse.
+ //
+ if (p != '\r')
+ return peek (e);
+ }
+
+ if (p == '\n')
+ {
+ get (p);
+ return peek (e); // Recurse.
+ }
+
+ // Save in the unpeek buffer so that it is returned on the subsequent
+ // calls to peek() (until get()).
+ //
+ unpeek_ = true;
+ unpeekc_ = c;
+ }
+
+ return c;
+ }
+
+ inline auto lexer::
+ get (bool e) -> xchar
+ {
+ if (unget_)
+ {
+ unget_ = false;
+ return ungetc_;
+ }
+ else
+ {
+ xchar c (peek (e));
+ get (c);
+ return c;
+ }
+ }
+
+ inline void lexer::
+ get (const xchar& c)
+ {
+ // Increment the logical line similar to how base will increment the
+ // physical (the column counts are the same).
+ //
+ if (log_line_ && c == '\n' && !unget_)
+ ++*log_line_;
+
+ base::get (c);
+ }
+
+ inline auto lexer::
+ geth (bool e) -> xchar
+ {
+ xchar c (get (e));
+ cs_.append (c);
+ return c;
+ }
+
+ inline void lexer::
+ geth (const xchar& c)
+ {
+ get (c);
+ cs_.append (c);
+ }
+
+ using type = token_type;
+
+ void lexer::
+ next (token& t, xchar c, bool ignore_pp)
+ {
+ for (;; c = skip_spaces ())
+ {
+ t.file = log_file_;
+ t.line = log_line_ ? *log_line_ : c.line;
+ t.column = c.column;
+
+ if (eos (c))
+ {
+ t.type = type::eos;
+ return;
+ }
+
+ const location l (&name_, c.line, c.column);
+
+ // Hash the token's line. The reason is debug info. In fact, doing
+ // this will make quite a few "noop" changes (like adding a newline
+ // anywhere in the source) cause the checksum change. But there
+ // doesn't seem to be any way around it: the case where we benefit
+ // from the precise change detection the most (development) is also
+ // where we will most likely have debug info enable.
+ //
+ // Note that in order not to make this completely useless we don't
+ // hash the column. Even if it is part of the debug info, having it a
+ // bit off shouldn't cause any significant mis-positioning. We also
+ // don't hash the file path for each token instead only hashing it
+ // when changed with the #line directive (as well as in the
+ // constructor for the initial path).
+ //
+ cs_.append (t.line);
+ cs_.append (c);
+
+ switch (c)
+ {
+ // Preprocessor lines.
+ //
+ case '#':
+ {
+ // It is tempting to simply scan until the newline ignoring
+ // anything in between. However, these lines can start a
+ // multi-line C-style comment. So we have to tokenize them (and
+ // hash the data for each token).
+ //
+ // Note that this may not work for things like #error that can
+ // contain pretty much anything. Also note that lines that start
+ // with '#' can contain '#' further down. In this case we need to
+ // be careful not to recurse (and consume multiple newlines). Thus
+ // the ignore_pp flag.
+ //
+ // Finally, to support diagnostics properly we need to recognize
+ // #line directives.
+ //
+ if (ignore_pp)
+ {
+ for (bool first (true);;)
+ {
+ // Note that we keep using the passed token for buffers.
+ //
+ c = skip_spaces (false); // Stop at newline.
+
+ if (eos (c) || c == '\n')
+ break;
+
+ if (first)
+ {
+ first = false;
+
+ // Recognize #line and its shorthand version:
+ //
+ // #line <integer> [<string literal>] ...
+ // # <integer> [<string literal>] ...
+ //
+ // Also diagnose #include while at it.
+ //
+ if (!(c >= '0' && c <= '9'))
+ {
+ next (t, c, false);
+
+ if (t.type == type::identifier)
+ {
+ if (t.value == "include")
+ fail (l) << "unexpected #include directive";
+ else if (t.value != "line")
+ continue;
+ }
+ else
+ continue;
+
+ if (t.type != type::identifier || t.value != "line")
+ continue;
+
+ c = skip_spaces (false);
+
+ if (!(c >= '0' && c <= '9'))
+ fail (c) << "line number expected after #line directive";
+ }
+
+ // Ok, this is #line and next comes the line number.
+ //
+ line_directive (t, c);
+ continue; // Parse the tail, if any.
+ }
+
+ next (t, c, false);
+ }
+ break;
+ }
+ else
+ {
+ t.type = type::punctuation;
+ return;
+ }
+ }
+ // Single-letter punctuation.
+ //
+ case ';': t.type = type::semi; return;
+ case '{': t.type = type::lcbrace; return;
+ case '}': t.type = type::rcbrace; return;
+ // Other single-letter punctuation.
+ //
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case ',':
+ case '?':
+ case '~':
+ case '\\': t.type = type::punctuation; return;
+ // Potentially multi-letter punctuation.
+ //
+ case '.': // . .* .<N> ...
+ {
+ xchar p (peek ());
+
+ if (p == '*')
+ {
+ geth (p);
+ t.type = type::punctuation;
+ return;
+ }
+ else if (p >= '0' && p <= '9')
+ {
+ number_literal (t, c);
+ return;
+ }
+ else if (p == '.')
+ {
+ get (p);
+
+ xchar q (peek ());
+ if (q == '.')
+ {
+ cs_.append (p);
+
+ geth (q);
+ t.type = type::punctuation;
+ return;
+ }
+ unget (p);
+ // Fall through.
+ }
+
+ t.type = type::dot;
+ return;
+ }
+ case '=': // = ==
+ case '!': // ! !=
+ case '*': // * *=
+ case '/': // / /= (/* and // handled by skip_spaced() above)
+ case '%': // % %=
+ case '^': // ^ ^=
+ {
+ xchar p (peek ());
+
+ if (p == '=')
+ geth (p);
+
+ t.type = type::punctuation;
+ return;
+ }
+ case '<': // < <= << <<=
+ case '>': // > >= >> >>=
+ {
+ xchar p (peek ());
+
+ if (p == c)
+ {
+ geth (p);
+ if ((p = peek ()) == '=')
+ geth (p);
+ t.type = type::punctuation;
+ }
+ else if (p == '=')
+ {
+ geth (p);
+ t.type = type::punctuation;
+ }
+ else
+ t.type = (c == '<' ? type::less : type::greater);
+
+ return;
+ }
+ case '+': // + ++ +=
+ case '-': // - -- -= -> ->*
+ {
+ xchar p (peek ());
+
+ if (p == c || p == '=')
+ geth (p);
+ else if (c == '-' && p == '>')
+ {
+ geth (p);
+ if ((p = peek ()) == '*')
+ geth (p);
+ }
+
+ t.type = type::punctuation;
+ return;
+ }
+ case '&': // & && &=
+ case '|': // | || |=
+ {
+ xchar p (peek ());
+
+ if (p == c || p == '=')
+ geth (p);
+
+ t.type = type::punctuation;
+ return;
+ }
+ case ':': // : ::
+ {
+ xchar p (peek ());
+
+ if (p == ':')
+ geth (p);
+
+ t.type = type::punctuation;
+ return;
+ }
+ // Number (and also .<N> above).
+ //
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ {
+ number_literal (t, c);
+ return;
+ }
+ // Char/string literal, identifier, or other (\, $, @, `).
+ //
+ default:
+ {
+ bool raw (false); // Raw string literal.
+
+ // Note: known not to be a digit (see above).
+ //
+ if (char_flags[static_cast<uint8_t> (c)] & 0x01)
+ {
+ // This smells a little: we know skip_spaces() did not peek at
+ // the next character because this is not '/'. Which means the
+ // position in the stream must be of this character + 1.
+ //
+ t.position = buf_->tellg () - 1;
+
+ string& id (t.value);
+ id = c;
+
+ while (char_flags[static_cast<uint8_t> (c = peek ())] & 0x01)
+ {
+ geth (c);
+ id += c;
+
+ // Direct buffer scan. Note that we always follow up with the
+ // normal peek() call which may load the next chunk, handle
+ // line continuations, etc. In other words, the end of the
+ // "raw" scan doesn't necessarily mean the end.
+ //
+ const char* b (gptr_);
+ const char* p (b);
+
+ for (const char* e (egptr_);
+ p != e && char_flags[static_cast<uint8_t> (*p)] & 0x01;
+ ++p) ;
+
+ // Unrolling this loop doesn't make a difference.
+ //
+ // for (const char* e (egptr_ - 4); p < e; p += 4)
+ // {
+ // uint8_t c;
+ //
+ // c = static_cast<uint8_t> (p[0]);
+ // if (!(char_flags[c] & 0x01)) break;
+ //
+ // c = static_cast<uint8_t> (p[1]);
+ // if (!(char_flags[c] & 0x01)) {p += 1; break;}
+ //
+ // c = static_cast<uint8_t> (p[2]);
+ // if (!(char_flags[c] & 0x01)) {p += 2; break;}
+ //
+ // c = static_cast<uint8_t> (p[3]);
+ // if (!(char_flags[c] & 0x01)) {p += 3; break;}
+ // }
+
+ size_t n (p - b);
+ id.append (b, n); cs_.append (b, n);
+ gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+ }
+
+ // If the following character is a quote, see if the identifier
+ // is one of the literal prefixes.
+ //
+ if (c == '\'' || c == '\"')
+ {
+ size_t n (id.size ()), i (0);
+ switch (id[0])
+ {
+ case 'u':
+ {
+ if (n > 1 && id[1] == '8')
+ ++i;
+ }
+ // Fall through.
+ case 'L':
+ case 'U':
+ {
+ ++i;
+
+ if (c == '\"' && n > i && id[i] == 'R')
+ {
+ ++i;
+ raw = true;
+ }
+ break;
+ }
+ case 'R':
+ {
+ if (c == '\"')
+ {
+ ++i;
+ raw = true;
+ }
+ break;
+ }
+ }
+
+ if (i == n) // All characters "consumed".
+ {
+ geth (c);
+ id.clear ();
+ }
+ }
+
+ if (!id.empty ())
+ {
+ t.type = type::identifier;
+ return;
+ }
+ }
+
+ switch (c)
+ {
+ case '\'':
+ {
+ char_literal (t, c);
+ return;
+ }
+ case '\"':
+ {
+ if (raw)
+ raw_string_literal (t, c);
+ else
+ string_literal (t, c);
+ return;
+ }
+ default:
+ {
+ t.type = type::other;
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ void lexer::
+ number_literal (token& t, xchar c)
+ {
+ // note: c is hashed
+
+ // A number (integer or floating point literal) can:
+ //
+ // 1. Start with a dot (which must be followed by a digit, e.g., .123).
+ //
+ // 2. Can have a radix prefix (0b101, 0123, 0X12AB).
+ //
+ // 3. Can have an exponent (1e10, 0x1.p-10, 1.).
+ //
+ // 4. Digits can be separated with ' (123'456, 0xff00'00ff).
+ //
+ // 5. End with a built-in or user defined literal (123f, 123UL, 123_X)
+ //
+ // Quoting from GCC's preprocessor documentation:
+ //
+ // "Formally preprocessing numbers begin with an optional period, a
+ // required decimal digit, and then continue with any sequence of
+ // letters, digits, underscores, periods, and exponents. Exponents are
+ // the two-character sequences 'e+', 'e-', 'E+', 'E-', 'p+', 'p-', 'P+',
+ // and 'P-'."
+ //
+ // So it looks like a "C++ number" is then any unseparated (with
+ // whitespace or punctuation) sequence of those plus '. The only mildly
+ // tricky part is then to recognize +/- as being part of the exponent.
+ //
+ while (!eos ((c = peek ())))
+ {
+ switch (c)
+ {
+ // All the whitespace, punctuation, and other characters that end
+ // the number.
+ //
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\r':
+ case '\f':
+ case '\v':
+
+ case '#':
+ case ';':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case ',':
+ case '?':
+ case '~':
+ case '=':
+ case '!':
+ case '*':
+ case '/':
+ case '%':
+ case '^':
+ case '>':
+ case '<':
+ case '&':
+ case '|':
+ case ':':
+ case '+': // The exponent case is handled below.
+ case '-': // The exponent case is handled below.
+ case '"':
+ case '\\':
+
+ case '@':
+ case '$':
+ case '`':
+ break;
+
+ // Recognize +/- after the exponent.
+ //
+ case 'e':
+ case 'E':
+ case 'p':
+ case 'P':
+ {
+ geth (c);
+ c = peek ();
+ if (c == '+' || c == '-')
+ geth (c);
+ continue;
+ }
+
+ case '_':
+ case '.':
+ case '\'':
+ default: // Digits and letters.
+ {
+ geth (c);
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ t.type = type::number;
+ }
+
+ void lexer::
+ char_literal (token& t, xchar c)
+ {
+ // note: c is hashed
+
+ const location l (&name_, c.line, c.column);
+
+ for (char p (c);;) // Previous character (see below).
+ {
+ c = geth ();
+
+ if (eos (c) || c == '\n')
+ fail (l) << "unterminated character literal";
+
+ if (c == '\'' && p != '\\')
+ break;
+
+ // Keep track of \\-escapings so we don't confuse them with \', as in
+ // '\\'.
+ //
+ p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
+ }
+
+ // See if we have a user-defined suffix (which is an identifier).
+ //
+ if ((c = peek ()) == '_' || alpha (c))
+ literal_suffix (c);
+
+ t.type = type::character;
+ }
+
+ void lexer::
+ string_literal (token& t, xchar c)
+ {
+ // note: c is hashed
+
+ const location l (&name_, c.line, c.column);
+
+ for (char p (c);;) // Previous character (see below).
+ {
+ c = geth ();
+
+ if (eos (c) || c == '\n')
+ fail (l) << "unterminated string literal";
+
+ if (c == '\"' && p != '\\')
+ break;
+
+ // Keep track of \\-escapings so we don't confuse them with \", as in
+ // "\\".
+ //
+ p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
+
+ // Direct buffer scan.
+ //
+ if (p != '\\')
+ {
+ const char* b (gptr_);
+ const char* e (egptr_);
+ const char* p (b);
+
+ for (char c;
+ p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
+ ++p) ;
+
+ size_t n (p - b);
+ cs_.append (b, n);
+ gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+ }
+ }
+
+ // See if we have a user-defined suffix (which is an identifier).
+ //
+ if ((c = peek ()) == '_' || alpha (c))
+ literal_suffix (c);
+
+ t.type = type::string;
+ }
+
+ void lexer::
+ raw_string_literal (token& t, xchar c)
+ {
+ // note: c is hashed
+
+ // The overall form is:
+ //
+ // R"<delimiter>(<raw_characters>)<delimiter>"
+ //
+ // Where <delimiter> is a potentially-empty character sequence made of
+ // any source character but parentheses, backslash and spaces. It can be
+ // at most 16 characters long.
+ //
+ // Note that the <raw_characters> are not processed in any way, not even
+ // for line continuations.
+ //
+ const location l (&name_, c.line, c.column);
+
+ // As a first step, parse the delimiter (including the openning paren).
+ //
+ string d (1, ')');
+
+ for (;;)
+ {
+ c = geth ();
+
+ if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
+ fail (l) << "invalid raw string literal";
+
+ if (c == '(')
+ break;
+
+ d += c;
+ }
+
+ d += '"';
+
+ // Now parse the raw characters while trying to match the closing
+ // delimiter.
+ //
+ for (size_t i (0);;) // Position to match in d.
+ {
+ c = geth (false); // No newline escaping.
+
+ if (eos (c)) // Note: newline is ok.
+ fail (l) << "invalid raw string literal";
+
+ if (c != d[i] && i != 0) // Restart from the beginning.
+ i = 0;
+
+ if (c == d[i])
+ {
+ if (++i == d.size ())
+ break;
+ }
+ }
+
+ // See if we have a user-defined suffix (which is an identifier).
+ //
+ if ((c = peek ()) == '_' || alpha (c))
+ literal_suffix (c);
+
+ t.type = type::string;
+ }
+
+ void lexer::
+ literal_suffix (xchar c)
+ {
+ // note: c is unhashed
+
+ // Parse a user-defined literal suffix identifier.
+ //
+ for (geth (c); (c = peek ()) == '_' || alnum (c); geth (c)) ;
+ }
+
+ void lexer::
+ line_directive (token& t, xchar c)
+ {
+ // enter: first digit of the line number
+ // leave: last character of the line number or file string
+ // note: c is unhashed
+
+ // If our number and string tokens contained the literal values, then we
+ // could have used that. However, we ignore the value (along with escape
+ // processing, etc), for performance. Let's keep it that way and instead
+ // handle it ourselves.
+ //
+ // Note also that we are not hashing these at the character level
+ // instead hashing the switch to a new file path below and leaving the
+ // line number to the token line hashing.
+ //
+ {
+ string& s (t.value);
+
+ for (s = c; (c = peek ()) >= '0' && c <= '9'; get (c))
+ s += c;
+
+ // The newline that ends the directive will increment the logical line
+ // so subtract one to compensate. Note: can't be 0 and shouldn't throw
+ // for valid lines.
+ //
+ log_line_ = stoull (s.c_str ()) - 1;
+ }
+
+ // See if we have the file.
+ //
+ c = skip_spaces (false);
+
+ if (c == '\"')
+ {
+ const location l (&name_, c.line, c.column);
+
+ // It is common to have a large number of #line directives that don't
+ // change the file (they seem to be used to track macro locations or
+ // some such). So we are going to optimize for this by comparing the
+ // current path to what's in #line.
+ //
+ string& s (tmp_file_);
+ s.clear ();
+
+ for (char p ('\0'); p != '\"'; ) // Previous character.
+ {
+ c = get ();
+
+ if (eos (c) || c == '\n')
+ fail (l) << "unterminated string literal";
+
+ // Handle escapes.
+ //
+ if (p == '\\')
+ {
+ p = '\0'; // Clear so we don't confuse \" and \\".
+
+ // We only handle what can reasonably be expected in a file name.
+ //
+ switch (c)
+ {
+ case '\\':
+ case '\'':
+ case '\"': break; // Add as is.
+ default:
+ fail (c) << "unsupported escape sequence in #line directive";
+ }
+ }
+ else
+ {
+ p = c;
+
+ switch (c)
+ {
+ case '\\':
+ case '\"': continue;
+ }
+ }
+
+ s += c;
+
+ // Direct buffer scan.
+ //
+ if (p != '\\')
+ {
+ const char* b (gptr_);
+ const char* e (egptr_);
+ const char* p (b);
+
+ for (char c;
+ p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
+ ++p) ;
+
+ size_t n (p - b);
+ s.append (b, n);
+ gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+ }
+ }
+
+ if (log_file_.string () == s)
+ return;
+
+ // Swap the two string buffers.
+ //
+ {
+ string r (move (log_file_).string ()); // Move string rep out.
+ r.swap (s);
+ log_file_ = path (move (r)); // Move back in.
+ }
+
+ // If the path is relative, then prefix it with the current working
+ // directory. Failed that, we will end up with different checksums for
+ // invocations from different directories.
+ //
+ // While this should work fine for normal cross-compilation, it's an
+ // entirely different story for the emulated case (e.g., msvc-linux
+ // where the preprocessed output contains absolute Windows paths). So
+ // we try to sense if things look fishy and leave the path alone.
+ //
+ // Also detect special names like <built-in> and <command-line>. Plus
+ // GCC sometimes adds what looks like working directory (has trailing
+ // slash). So ignore that as well.
+ //
+ // We now switched to using absolute translation unit paths (because
+ // of __FILE__/assert(); see compile.cxx for details). But we might
+ // still need this logic when we try to calculate location-independent
+ // hash for distributed compilation/caching. The idea is to only hash
+ // the part starting from the project root which is immutable. Plus
+ // we will need -ffile-prefix-map to deal with __FILE__.
+ //
+ if (!log_file_.to_directory ())
+ cs_.append (log_file_.string ());
+#if 0
+ {
+ using tr = path::traits;
+ const string& f (log_file_.string ());
+
+ if (f.find (':') != string::npos ||
+ (f.front () == '<' && f.back () == '>') ||
+ log_file_.absolute ())
+ cs_.append (f);
+ else
+ {
+ // This gets complicated and slow: the path may contain '..' and
+ // '.' so strictly speaking we would need to normalize it.
+ // Instead, we are going to handle leading '..'s ourselves (the
+ // sane case) and ignore everything else (so if you have '..' or
+ // '.' somewhere in the middle, then things might not work
+ // optimally for you).
+ //
+ const string& d (work.string ());
+
+ // Iterate over leading '..' in f "popping" the corresponding
+ // number of trailing components from d.
+ //
+ size_t fp (0);
+ size_t dp (d.size () - 1);
+
+ for (size_t p;; )
+ {
+ // Note that in file we recognize any directory separator, not
+ // just of this platform (see note about emulation above).
+ //
+ if (f.compare (fp, 2, "..") != 0 ||
+ (f[fp + 2] != '/' && f[fp + 2] != '\\') || // Could be '\0'.
+ (p = tr::rfind_separator (d, dp)) == string::npos)
+ break;
+
+ fp += 3;
+ dp = p - 1;
+ }
+
+ cs_.append (d.c_str (), dp + 1);
+ cs_.append (tr::directory_separator); // Canonical in work.
+ cs_.append (f.c_str () + fp);
+ }
+ }
+#endif
+ }
+ else
+ unget (c);
+ }
+
+ auto lexer::
+ skip_spaces (bool nl) -> xchar
+ {
+ xchar c (get ());
+
+ for (; !eos (c); c = get ())
+ {
+ switch (c)
+ {
+ case '\n':
+ if (!nl) break;
+ // Fall through.
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\f':
+ case '\v':
+ {
+ // Direct buffer scan.
+ //
+ const char* b (gptr_);
+ const char* e (egptr_);
+ const char* p (b);
+
+ for (char c;
+ p != e && ((c = *p) == ' ' || c == '\t');
+ ++p) ;
+
+ size_t n (p - b);
+ gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+
+ continue;
+ }
+ case '/':
+ {
+ xchar p (peek ());
+
+ // C++ comment.
+ //
+ if (p == '/')
+ {
+ get (p);
+
+ for (;;)
+ {
+ c = get ();
+ if (c == '\n' || eos (c))
+ break;
+
+ // Direct buffer scan.
+ //
+ const char* b (gptr_);
+ const char* e (egptr_);
+ const char* p (b);
+
+ for (char c;
+ p != e && (c = *p) != '\n' && c != '\\';
+ ++p) ;
+
+ size_t n (p - b);
+ gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+ }
+
+ if (!nl)
+ break;
+
+ continue;
+ }
+
+ // C comment.
+ //
+ if (p == '*')
+ {
+ get (p);
+
+ for (;;)
+ {
+ c = get ();
+
+ if (eos (c))
+ fail (p) << "unterminated comment";
+
+ if (c == '*' && (c = peek ()) == '/')
+ {
+ get (c);
+ break;
+ }
+
+ // Direct buffer scan.
+ //
+ const char* b (gptr_);
+ const char* e (egptr_);
+ const char* p (b);
+
+ for (char c;
+ p != e && (c = *p) != '*' && c != '\\';
+ ++p)
+ {
+ if (c == '\n')
+ {
+ if (log_line_) ++*log_line_;
+ ++line;
+ column = 1;
+ }
+ else
+ ++column;
+ }
+
+ gptr_ = p; buf_->gbump (static_cast<int> (p - b));
+ }
+ continue;
+ }
+ break;
+ }
+ }
+ break;
+ }
+
+ return c;
+ }
+
+ ostream&
+ operator<< (ostream& o, const token& t)
+ {
+ switch (t.type)
+ {
+ case type::dot: o << "'.'"; break;
+ case type::semi: o << "';'"; break;
+ case type::less: o << "'<'"; break;
+ case type::greater: o << "'>'"; break;
+ case type::lcbrace: o << "'{'"; break;
+ case type::rcbrace: o << "'}'"; break;
+ case type::punctuation: o << "<punctuation>"; break;
+
+ case type::identifier: o << '\'' << t.value << '\''; break;
+
+ case type::number: o << "<number literal>"; break;
+ case type::character: o << "<char literal>"; break;
+ case type::string: o << "<string literal>"; break;
+
+ case type::other: o << "<other>"; break;
+ case type::eos: o << "<end of file>"; break;
+ }
+
+ return o;
+ }
+ }
+}
diff --git a/libbuild2/cc/lexer.hxx b/libbuild2/cc/lexer.hxx
new file mode 100644
index 0000000..cb2b3a5
--- /dev/null
+++ b/libbuild2/cc/lexer.hxx
@@ -0,0 +1,190 @@
+// file : libbuild2/cc/lexer.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_LEXER_HXX
+#define LIBBUILD2_CC_LEXER_HXX
+
+#include <libbutl/sha256.mxx>
+#include <libbutl/char-scanner.mxx>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/diagnostics.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // Preprocessor-level tokenization of C/C++ source. In other words, the
+ // sequence of tokens returned is similar to what a real C/C++ compiler
+ // would see from its preprocessor.
+ //
+ // The input is a (partially-)preprocessed translation unit that may still
+ // contain comments, line continuations, and preprocessor directives such
+ // as #line, #pragma, but not #include (which is diagnosed). Currently,
+ // all preprocessor directives except #line are ignored and no values are
+ // saved from literals. The #line directive (and its shorthand notation)
+ // is recognized to provide the logical token location.
+ //
+ // While at it we also calculate the checksum of the input ignoring
+ // comments, whitespaces, etc. This is used to detect changes that do not
+ // alter the resulting token stream.
+ //
+ enum class token_type
+ {
+ // NOTE: remember to update operator<<() if changing anything here!
+ //
+ eos,
+
+ dot, // .
+ semi, // ;
+ less, // <
+ greater, // >
+ lcbrace, // {
+ rcbrace, // }
+
+ punctuation, // Other punctuation.
+
+ identifier,
+
+ number, // Number literal.
+ character, // Char literal.
+ string, // String literal.
+
+ other // Other token.
+ };
+
+ struct token
+ {
+ token_type type = token_type::eos;
+ string value;
+
+ // Logical position.
+ //
+ path file;
+ uint64_t line = 0;
+ uint64_t column = 0;
+
+ // Physical position in the stream, currently only for identifiers.
+ //
+ uint64_t position = 0;
+ };
+
+ // Output the token value in a format suitable for diagnostics.
+ //
+ ostream&
+ operator<< (ostream&, const token&);
+
+ class lexer: protected butl::char_scanner
+ {
+ public:
+ lexer (ifdstream& is, const path& name)
+ : char_scanner (is, false),
+ name_ (name),
+ fail ("error", &name_),
+ log_file_ (name) {}
+
+ const path&
+ name () const {return name_;}
+
+ string
+ checksum () const {return cs_.string ();}
+
+ // Note that it is ok to call next() again after getting eos.
+ //
+ token
+ next ()
+ {
+ token t;
+ next (t, skip_spaces (), true);
+ return t;
+ }
+
+ // As above but reuse the token to avoid a (potential) memory
+ // allocation. Typical usage:
+ //
+ // for (token t; l.next (t) != token_type::eos; )
+ // ...
+ //
+ token_type
+ next (token& t)
+ {
+ next (t, skip_spaces (), true);
+ return t.type;
+ }
+
+ private:
+ void
+ next (token&, xchar, bool);
+
+ void
+ number_literal (token&, xchar);
+
+ void
+ char_literal (token&, xchar);
+
+ void
+ string_literal (token&, xchar);
+
+ void
+ raw_string_literal (token&, xchar);
+
+ void
+ literal_suffix (xchar);
+
+ void
+ line_directive (token&, xchar);
+
+ xchar
+ skip_spaces (bool newline = true);
+
+ // The char_scanner adaptation for newline escape sequence processing.
+ // Enabled by default and is only disabled in the raw string literals.
+ //
+ private:
+ using base = char_scanner;
+
+ xchar
+ peek (bool escape = true);
+
+ xchar
+ get (bool escape = true);
+
+ void
+ get (const xchar& peeked);
+
+ // Hashing versions.
+ //
+ xchar
+ geth (bool escape = true);
+
+ void
+ geth (const xchar& peeked);
+
+ private:
+ const path name_;
+ const fail_mark fail;
+
+ // Logical file and line as set by the #line directives. Note that the
+ // lexer diagnostics still uses the physical file/lines.
+ //
+ path log_file_;
+ optional<uint64_t> log_line_;
+
+ string tmp_file_;
+ sha256 cs_;
+ };
+
+ // Diagnostics plumbing.
+ //
+ inline location
+ get_location (const token& t, const void* = nullptr)
+ {
+ return location (&t.file, t.line, t.column);
+ }
+ }
+}
+
+#endif // LIBBUILD2_CC_LEXER_HXX
diff --git a/libbuild2/cc/lexer.test.cxx b/libbuild2/cc/lexer.test.cxx
new file mode 100644
index 0000000..0aeadba
--- /dev/null
+++ b/libbuild2/cc/lexer.test.cxx
@@ -0,0 +1,80 @@
+// file : libbuild2/cc/lexer.test.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ // Usage: argv[0] [-l] [<file>]
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ bool loc (false);
+ const char* file (nullptr);
+
+ for (int i (1); i != argc; ++i)
+ {
+ string a (argv[i]);
+
+ if (a == "-l")
+ loc = true;
+ else
+ {
+ file = argv[i];
+ break;
+ }
+ }
+
+ try
+ {
+ ifdstream is;
+ if (file != nullptr)
+ is.open (file);
+ else
+ {
+ file = "stdin";
+ is.open (fddup (stdin_fd ()));
+ }
+
+ lexer l (is, path (file));
+
+ // No use printing eos since we will either get it or loop forever.
+ //
+ for (token t; l.next (t) != token_type::eos; )
+ {
+ cout << t;
+
+ if (loc)
+ cout << ' ' << t.file << ':' << t.line << ':' << t.column;
+
+ cout << endl;
+ }
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::cc::main (argc, argv);
+}
diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx
new file mode 100644
index 0000000..110a992
--- /dev/null
+++ b/libbuild2/cc/link-rule.cxx
@@ -0,0 +1,3043 @@
+// file : libbuild2/cc/link-rule.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/link-rule.hxx>
+
+#include <map>
+#include <cstdlib> // exit()
+#include <cstring> // strlen()
+
+#include <libbutl/filesystem.mxx> // file_exists()
+
+#include <libbuild2/depdb.hxx>
+#include <libbuild2/scope.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/target.hxx> // c, pc*
+#include <libbuild2/cc/utility.hxx>
+
+using std::map;
+using std::exit;
+
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using namespace bin;
+
+ link_rule::
+ link_rule (data&& d)
+ : common (move (d)),
+ rule_id (string (x) += ".link 1")
+ {
+ static_assert (sizeof (match_data) <= target::data_size,
+ "insufficient space");
+ }
+
+ link_rule::match_result link_rule::
+ match (action a,
+ const target& t,
+ const target* g,
+ otype ot,
+ bool library) const
+ {
+ // NOTE: the target may be a group (see utility library logic below).
+
+ match_result r;
+
+ // Scan prerequisites and see if we can work with what we've got. Note
+ // that X could be C (as in language). We handle this by always checking
+ // for X first.
+ //
+ // Note also that we treat bmi{} as obj{}. @@ MODHDR hbmi{}?
+ //
+ for (prerequisite_member p:
+ prerequisite_members (a, t, group_prerequisites (t, g)))
+ {
+ // If excluded or ad hoc, then don't factor it into our tests.
+ //
+ if (include (a, t, p) != include_type::normal)
+ continue;
+
+ if (p.is_a (x_src) ||
+ (x_mod != nullptr && p.is_a (*x_mod)) ||
+ // Header-only X library (or library with C source and X header).
+ (library && x_header (p, false /* c_hdr */)))
+ {
+ r.seen_x = r.seen_x || true;
+ }
+ else if (p.is_a<c> () ||
+ // Header-only C library.
+ (library && p.is_a<h> ()))
+ {
+ r.seen_c = r.seen_c || true;
+ }
+ else if (p.is_a<obj> () || p.is_a<bmi> ())
+ {
+ r.seen_obj = r.seen_obj || true;
+ }
+ else if (p.is_a<obje> () || p.is_a<bmie> ())
+ {
+ // We can make these "no-match" if/when there is a valid use case.
+ //
+ if (ot != otype::e)
+ fail << p.type ().name << "{} as prerequisite of " << t;
+
+ r.seen_obj = r.seen_obj || true;
+ }
+ else if (p.is_a<obja> () || p.is_a<bmia> ())
+ {
+ if (ot != otype::a)
+ fail << p.type ().name << "{} as prerequisite of " << t;
+
+ r.seen_obj = r.seen_obj || true;
+ }
+ else if (p.is_a<objs> () || p.is_a<bmis> ())
+ {
+ if (ot != otype::s)
+ fail << p.type ().name << "{} as prerequisite of " << t;
+
+ r.seen_obj = r.seen_obj || true;
+ }
+ else if (p.is_a<libul> () || p.is_a<libux> ())
+ {
+ // For a unility library we look at its prerequisites, recursively.
+ // Since these checks are not exactly light-weight, only do them if
+ // we haven't already seen any X prerequisites.
+ //
+ if (!r.seen_x)
+ {
+ // This is a bit iffy: in our model a rule can only search a
+ // target's prerequisites if it matches. But we don't yet know
+ // whether we match. However, it seems correct to assume that any
+ // rule-specific search will always resolve to an existing target
+ // if there is one. So perhaps it's time to relax this restriction
+ // a little? Note that this fits particularly well with what we
+ // doing here since if there is no existing target, then there can
+ // be no prerequisites.
+ //
+ // Note, however, that we cannot linkup a prerequisite target
+ // member to its group since we are not matching this target. As
+ // result we have to do all the steps except for setting t.group
+ // and pass both member and group (we also cannot query t.group
+ // since it's racy).
+ //
+ const target* pg (nullptr);
+ const target* pt (p.search_existing ());
+
+ if (p.is_a<libul> ())
+ {
+ if (pt != nullptr)
+ {
+ // If this is a group then try to pick (again, if exists) a
+ // suitable member. If it doesn't exist, then we will only be
+ // considering the group's prerequisites.
+ //
+ if (const target* pm =
+ link_member (pt->as<libul> (),
+ a,
+ linfo {ot, lorder::a /* unused */},
+ true /* existing */))
+ {
+ pg = pt;
+ pt = pm;
+ }
+ }
+ else
+ {
+ // It's possible we have no group but have a member so try
+ // that.
+ //
+ const target_type& tt (ot == otype::a ? libua::static_type :
+ ot == otype::s ? libus::static_type :
+ libue::static_type);
+
+ // We know this prerequisite member is a prerequisite since
+ // otherwise the above search would have returned the member
+ // target.
+ //
+ pt = search_existing (t.ctx, p.prerequisite.key (tt));
+ }
+ }
+ else if (!p.is_a<libue> ())
+ {
+ // See if we also/instead have a group.
+ //
+ pg = search_existing (t.ctx,
+ p.prerequisite.key (libul::static_type));
+
+ if (pt == nullptr)
+ swap (pt, pg);
+ }
+
+ if (pt != nullptr)
+ {
+ // If we are matching a target, use the original output type
+ // since that would be the member that we pick.
+ //
+ otype pot (pt->is_a<libul> () ? ot : link_type (*pt).type);
+ match_result pr (match (a, *pt, pg, pot, true /* lib */));
+
+ // Do we need to propagate any other seen_* values? Hm, that
+ // would in fact match with the "see-through" semantics of
+ // utility libraries we have in other places.
+ //
+ r.seen_x = pr.seen_x;
+ }
+ else
+ r.seen_lib = r.seen_lib || true; // Consider as just a library.
+ }
+ }
+ else if (p.is_a<lib> () ||
+ p.is_a<liba> () ||
+ p.is_a<libs> ())
+ {
+ r.seen_lib = r.seen_lib || true;
+ }
+ // Some other c-common header/source (say C++ in a C rule) other than
+ // a C header (we assume everyone can hanle that).
+ //
+ else if (p.is_a<cc> () && !(x_header (p, true /* c_hdr */)))
+ {
+ r.seen_cc = true;
+ break;
+ }
+ }
+
+ return r;
+ }
+
+ bool link_rule::
+ match (action a, target& t, const string& hint) const
+ {
+ // NOTE: may be called multiple times and for both inner and outer
+ // operations (see the install rules).
+
+ tracer trace (x, "link_rule::match");
+
+ ltype lt (link_type (t));
+
+ // If this is a group member library, link-up to our group (this is the
+ // target group protocol which means this can be done whether we match
+ // or not).
+ //
+ // If we are called for the outer operation (see install rules), then
+ // use resolve_group() to delegate to inner.
+ //
+ if (lt.member_library ())
+ {
+ if (a.outer ())
+ resolve_group (a, t);
+ else if (t.group == nullptr)
+ t.group = &search (t,
+ lt.utility ? libul::static_type : lib::static_type,
+ t.dir, t.out, t.name);
+ }
+
+ match_result r (match (a, t, t.group, lt.type, lt.library ()));
+
+ // If this is some other c-common header/source (say C++ in a C rule),
+ // then we shouldn't try to handle that (it may need to be compiled,
+ // etc).
+ //
+ if (r.seen_cc)
+ {
+ l4 ([&]{trace << "non-" << x_lang << " prerequisite "
+ << "for target " << t;});
+ return false;
+ }
+
+ if (!(r.seen_x || r.seen_c || r.seen_obj || r.seen_lib))
+ {
+ l4 ([&]{trace << "no " << x_lang << ", C, or obj/lib prerequisite "
+ << "for target " << t;});
+ return false;
+ }
+
+ // We will only chain a C source if there is also an X source or we were
+ // explicitly told to.
+ //
+ if (r.seen_c && !r.seen_x && hint < x)
+ {
+ l4 ([&]{trace << "C prerequisite without " << x_lang << " or hint "
+ << "for target " << t;});
+ return false;
+ }
+
+ return true;
+ }
+
+ auto link_rule::
+ derive_libs_paths (file& t,
+ const char* pfx,
+ const char* sfx) const -> libs_paths
+ {
+ bool win (tclass == "windows");
+
+ // Get default prefix and extension.
+ //
+ const char* ext (nullptr);
+ if (win)
+ {
+ if (tsys == "mingw32")
+ {
+ if (pfx == nullptr)
+ pfx = "lib";
+ }
+
+ ext = "dll";
+ }
+ else
+ {
+ if (pfx == nullptr)
+ pfx = "lib";
+
+ if (tclass == "macos")
+ ext = "dylib";
+ else
+ ext = "so";
+ }
+
+ // First sort out which extension we are using.
+ //
+ const string& e (t.derive_extension (ext));
+
+ auto append_ext = [&e] (path& p)
+ {
+ if (!e.empty ())
+ {
+ p += '.';
+ p += e;
+ }
+ };
+
+ // See if we have the load suffix.
+ //
+ const string& ls (cast_empty<string> (t["bin.lib.load_suffix"]));
+
+ // Figure out the version.
+ //
+ string ver;
+ using verion_map = map<string, string>;
+ if (const verion_map* m = cast_null<verion_map> (t["bin.lib.version"]))
+ {
+ // First look for the target system.
+ //
+ auto i (m->find (tsys));
+
+ // Then look for the target class.
+ //
+ if (i == m->end ())
+ i = m->find (tclass);
+
+ // Then look for the wildcard. Since it is higly unlikely one can have
+ // a version that will work across platforms, this is only useful to
+ // say "all others -- no version".
+ //
+ if (i == m->end ())
+ i = m->find ("*");
+
+ // At this stage the only platform-specific version we support is the
+ // "no version" override.
+ //
+ if (i != m->end () && !i->second.empty ())
+ fail << i->first << "-specific bin.lib.version not yet supported";
+
+ // Finally look for the platform-independent version.
+ //
+ if (i == m->end ())
+ i = m->find ("");
+
+ // If we didn't find anything, fail. If the bin.lib.version was
+ // specified, then it should explicitly handle all the targets.
+ //
+ if (i == m->end ())
+ fail << "no version for " << ctgt << " in bin.lib.version" <<
+ info << "considere adding " << tsys << "@<ver> or " << tclass
+ << "@<ver>";
+
+ ver = i->second;
+ }
+
+ // Now determine the paths.
+ //
+ path lk, ld, so, in;
+
+ // We start with the basic path.
+ //
+ path b (t.dir);
+
+ if (pfx != nullptr && pfx[0] != '\0')
+ {
+ b /= pfx;
+ b += t.name;
+ }
+ else
+ b /= t.name;
+
+ if (sfx != nullptr && sfx[0] != '\0')
+ b += sfx;
+
+ // Clean pattern.
+ //
+ path cp (b);
+ cp += "?*"; // Don't match empty (like the libfoo.so symlink).
+ append_ext (cp);
+
+ // On Windows the real path is to libs{} and the link path is empty.
+ // Note that we still need to derive the import library path.
+ //
+ if (win)
+ {
+ // Usually on Windows with MSVC the import library is called the same
+ // as the DLL but with the .lib extension. Which means it clashes with
+ // the static library. Instead of decorating the static library name
+ // with ugly suffixes (as is customary), let's use the MinGW approach
+ // (one must admit it's quite elegant) and call it .dll.lib.
+ //
+ libi& i (*find_adhoc_member<libi> (t));
+
+ if (i.path ().empty ())
+ {
+ path ip (b);
+ append_ext (ip);
+ i.derive_path (move (ip), tsys == "mingw32" ? "a" : "lib");
+ }
+ }
+ // We will only need the link name if the following name differs.
+ //
+ else if (!ver.empty () || !ls.empty ())
+ {
+ lk = b;
+ append_ext (lk);
+ }
+
+ // See if we have the load suffix.
+ //
+ if (!ls.empty ())
+ {
+ b += ls;
+
+ // We will only need the load name if the following name differs.
+ //
+ if (!ver.empty ())
+ {
+ ld = b;
+ append_ext (ld);
+ }
+ }
+
+ if (!ver.empty ())
+ b += ver;
+
+ const path& re (t.derive_path (move (b)));
+
+ return libs_paths {
+ move (lk), move (ld), move (so), move (in), &re, move (cp)};
+ }
+
+ // Look for binary-full utility library recursively until we hit a
+ // non-utility "barier".
+ //
+ static bool
+ find_binfull (action a, const target& t, linfo li)
+ {
+ for (const target* pt: t.prerequisite_targets[a])
+ {
+ if (pt == nullptr || unmark (pt) != 0) // Called after pass 1 below.
+ continue;
+
+ const file* pf;
+
+ // If this is the libu*{} group, then pick the appropriate member.
+ //
+ if (const libul* ul = pt->is_a<libul> ())
+ {
+ pf = &link_member (*ul, a, li)->as<file> ();
+ }
+ else if ((pf = pt->is_a<libue> ()) ||
+ (pf = pt->is_a<libus> ()) ||
+ (pf = pt->is_a<libua> ()))
+ ;
+ else
+ continue;
+
+ if (!pf->path ().empty () || find_binfull (a, *pf, li))
+ return true;
+ }
+
+ return false;
+ };
+
+ recipe link_rule::
+ apply (action a, target& xt) const
+ {
+ tracer trace (x, "link_rule::apply");
+
+ file& t (xt.as<file> ());
+ context& ctx (t.ctx);
+
+ // Note that for_install is signalled by install_rule and therefore
+ // can only be relied upon during execute.
+ //
+ match_data& md (t.data (match_data ()));
+
+ const scope& bs (t.base_scope ());
+ const scope& rs (*bs.root_scope ());
+
+ ltype lt (link_type (t));
+ otype ot (lt.type);
+ linfo li (link_info (bs, ot));
+
+ // Set the library type (C, C++, etc) as rule-specific variable.
+ //
+ if (lt.library ())
+ t.state[a].assign (c_type) = string (x);
+
+ bool binless (lt.library ()); // Binary-less until proven otherwise.
+
+ // Inject dependency on the output directory. Note that we do it even
+ // for binless libraries since there could be other output (e.g., .pc
+ // files).
+ //
+ inject_fsdir (a, t);
+
+ // Process prerequisites, pass 1: search and match prerequisite
+ // libraries, search obj/bmi{} targets, and search targets we do rule
+ // chaining for.
+ //
+ // Also clear the binless flag if we see any source or object files.
+ // Note that if we don't see any this still doesn't mean the library is
+ // binless since it can depend on a binfull utility library. This we
+ // check below, after matching the libraries.
+ //
+ // We do libraries first in order to indicate that we will execute these
+ // targets before matching any of the obj/bmi{}. This makes it safe for
+ // compile::apply() to unmatch them and therefore not to hinder
+ // parallelism.
+ //
+ // We also create obj/bmi{} chain targets because we need to add
+ // (similar to lib{}) all the bmi{} as prerequisites to all the other
+ // obj/bmi{} that we are creating. Note that this doesn't mean that the
+ // compile rule will actually treat them all as prerequisite targets.
+ // Rather, they are used to resolve actual module imports. We don't
+ // really have to search obj{} targets here but it's the same code so we
+ // do it here to avoid duplication.
+ //
+ // Also, when cleaning, we ignore prerequisites that are not in the same
+ // or a subdirectory of our project root. Except for libraries: if we
+ // ignore them, then they won't be added to synthesized dependencies and
+ // this will break things if we do, say, update after clean in the same
+ // invocation. So for libraries we ignore them later, on pass 3.
+ //
+ optional<dir_paths> usr_lib_dirs; // Extract lazily.
+ compile_target_types tts (compile_types (ot));
+
+ auto skip = [&a, &rs] (const target* pt) -> bool
+ {
+ return a.operation () == clean_id && !pt->dir.sub (rs.out_path ());
+ };
+
+ auto& pts (t.prerequisite_targets[a]);
+ size_t start (pts.size ());
+
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ include_type pi (include (a, t, p));
+
+ // We pre-allocate a NULL slot for each (potential; see clean)
+ // prerequisite target.
+ //
+ pts.push_back (prerequisite_target (nullptr, pi));
+ const target*& pt (pts.back ());
+
+ if (pi != include_type::normal) // Skip excluded and ad hoc.
+ continue;
+
+ // Mark:
+ // 0 - lib
+ // 1 - src
+ // 2 - mod
+ // 3 - obj/bmi and also lib not to be cleaned
+ //
+ uint8_t m (0);
+
+ bool mod (x_mod != nullptr && p.is_a (*x_mod));
+
+ if (mod || p.is_a (x_src) || p.is_a<c> ())
+ {
+ binless = binless && false;
+
+ // Rule chaining, part 1.
+ //
+
+ // Which scope shall we use to resolve the root? Unlikely, but
+ // possible, the prerequisite is from a different project
+ // altogether. So we are going to use the target's project.
+ //
+
+ // If the source came from the lib{} group, then create the obj{}
+ // group and add the source as a prerequisite of the obj{} group,
+ // not the obj*{} member. This way we only need one prerequisite
+ // for, say, both liba{} and libs{}. The same goes for bmi{}.
+ //
+ bool group (!p.prerequisite.belongs (t)); // Group's prerequisite.
+
+ const target_type& rtt (mod
+ ? (group ? bmi::static_type : tts.bmi)
+ : (group ? obj::static_type : tts.obj));
+
+ const prerequisite_key& cp (p.key ()); // Source key.
+
+ // Come up with the obj*/bmi*{} target. The source prerequisite
+ // directory can be relative (to the scope) or absolute. If it is
+ // relative, then use it as is. If absolute, then translate it to
+ // the corresponding directory under out_root. While the source
+ // directory is most likely under src_root, it is also possible it
+ // is under out_root (e.g., generated source).
+ //
+ dir_path d;
+ {
+ const dir_path& cpd (*cp.tk.dir);
+
+ if (cpd.relative () || cpd.sub (rs.out_path ()))
+ d = cpd;
+ else
+ {
+ if (!cpd.sub (rs.src_path ()))
+ fail << "out of project prerequisite " << cp <<
+ info << "specify corresponding " << rtt.name << "{} "
+ << "target explicitly";
+
+ d = rs.out_path () / cpd.leaf (rs.src_path ());
+ }
+ }
+
+ // obj/bmi{} is always in the out tree. Note that currently it could
+ // be the group -- we will pick a member in part 2 below.
+ //
+ pt = &search (t, rtt, d, dir_path (), *cp.tk.name, nullptr, cp.scope);
+
+ // If we shouldn't clean obj{}, then it is fair to assume we
+ // shouldn't clean the source either (generated source will be in
+ // the same directory as obj{} and if not, well, go find yourself
+ // another build system ;-)).
+ //
+ if (skip (pt))
+ {
+ pt = nullptr;
+ continue;
+ }
+
+ m = mod ? 2 : 1;
+ }
+ else if (p.is_a<libx> () ||
+ p.is_a<liba> () ||
+ p.is_a<libs> () ||
+ p.is_a<libux> ())
+ {
+ // Handle imported libraries.
+ //
+ // Note that since the search is rule-specific, we don't cache the
+ // target in the prerequisite.
+ //
+ if (p.proj ())
+ pt = search_library (
+ a, sys_lib_dirs, usr_lib_dirs, p.prerequisite);
+
+ // The rest is the same basic logic as in search_and_match().
+ //
+ if (pt == nullptr)
+ pt = &p.search (t);
+
+ if (skip (pt))
+ m = 3; // Mark so it is not matched.
+
+ // If this is the lib{}/libu{} group, then pick the appropriate
+ // member.
+ //
+ if (const libx* l = pt->is_a<libx> ())
+ pt = link_member (*l, a, li);
+ }
+ else
+ {
+ // If this is the obj{} or bmi{} target group, then pick the
+ // appropriate member.
+ //
+ if (p.is_a<obj> ()) pt = &search (t, tts.obj, p.key ());
+ else if (p.is_a<bmi> ()) pt = &search (t, tts.bmi, p.key ());
+ //
+ // Windows module definition (.def). For other platforms (and for
+ // static libraries) treat it as an ordinary prerequisite.
+ //
+ else if (p.is_a<def> () && tclass == "windows" && ot != otype::a)
+ {
+ pt = &p.search (t);
+ }
+ //
+ // Something else. This could be something unrelated that the user
+ // tacked on (e.g., a doc{}). Or it could be some ad hoc input to
+ // the linker (say a linker script or some such).
+ //
+ else
+ {
+ if (!p.is_a<objx> () && !p.is_a<bmix> ())
+ {
+ // @@ Temporary hack until we get the default outer operation
+ // for update. This allows operations like test and install to
+ // skip such tacked on stuff.
+ //
+ // Note that ad hoc inputs have to be explicitly marked with the
+ // include=adhoc prerequisite-specific variable.
+ //
+ if (ctx.current_outer_oif != nullptr)
+ continue;
+ }
+
+ pt = &p.search (t);
+ }
+
+ if (skip (pt))
+ {
+ pt = nullptr;
+ continue;
+ }
+
+ // @@ MODHDR: hbmix{} has no objx{}
+ //
+ binless = binless && !(pt->is_a<objx> () || pt->is_a<bmix> ());
+
+ m = 3;
+ }
+
+ mark (pt, m);
+ }
+
+ // Match lib{} (the only unmarked) in parallel and wait for completion.
+ //
+ match_members (a, t, pts, start);
+
+ // Check if we have any binfull utility libraries.
+ //
+ binless = binless && !find_binfull (a, t, li);
+
+ // Now that we know for sure whether we are binless, derive file name(s)
+ // and add ad hoc group members. Note that for binless we still need the
+ // .pc member (whose name depends on the libray prefix) so we take care
+ // to not derive the path for the library target itself inside.
+ //
+ {
+ const char* e (nullptr); // Extension.
+ const char* p (nullptr); // Prefix.
+ const char* s (nullptr); // Suffix.
+
+ if (lt.utility)
+ {
+ // These are all static libraries with names indicating the kind of
+ // object files they contain (similar to how we name object files
+ // themselves). We add the 'u' extension to avoid clashes with
+ // real libraries/import stubs.
+ //
+ // libue libhello.u.a hello.exe.u.lib
+ // libua libhello.a.u.a hello.lib.u.lib
+ // libus libhello.so.u.a hello.dll.u.lib hello.dylib.u.lib
+ //
+ // Note that we currently don't add bin.lib.{prefix,suffix} since
+ // these are not installed.
+ //
+ if (tsys == "win32-msvc")
+ {
+ switch (ot)
+ {
+ case otype::e: e = "exe.u.lib"; break;
+ case otype::a: e = "lib.u.lib"; break;
+ case otype::s: e = "dll.u.lib"; break;
+ }
+ }
+ else
+ {
+ p = "lib";
+
+ if (tsys == "mingw32")
+ {
+ switch (ot)
+ {
+ case otype::e: e = "exe.u.a"; break;
+ case otype::a: e = "a.u.a"; break;
+ case otype::s: e = "dll.u.a"; break;
+ }
+
+ }
+ else if (tsys == "darwin")
+ {
+ switch (ot)
+ {
+ case otype::e: e = "u.a"; break;
+ case otype::a: e = "a.u.a"; break;
+ case otype::s: e = "dylib.u.a"; break;
+ }
+ }
+ else
+ {
+ switch (ot)
+ {
+ case otype::e: e = "u.a"; break;
+ case otype::a: e = "a.u.a"; break;
+ case otype::s: e = "so.u.a"; break;
+ }
+ }
+ }
+
+ if (binless)
+ t.path (empty_path);
+ else
+ t.derive_path (e, p, s);
+ }
+ else
+ {
+ if (auto l = t[ot == otype::e ? "bin.exe.prefix" : "bin.lib.prefix"])
+ p = cast<string> (l).c_str ();
+ if (auto l = t[ot == otype::e ? "bin.exe.suffix" : "bin.lib.suffix"])
+ s = cast<string> (l).c_str ();
+
+ switch (ot)
+ {
+ case otype::e:
+ {
+ if (tclass == "windows")
+ e = "exe";
+ else
+ e = "";
+
+ t.derive_path (e, p, s);
+ break;
+ }
+ case otype::a:
+ {
+ if (tsys == "win32-msvc")
+ e = "lib";
+ else
+ {
+ if (p == nullptr) p = "lib";
+ e = "a";
+ }
+
+ if (binless)
+ t.path (empty_path);
+ else
+ t.derive_path (e, p, s);
+
+ break;
+ }
+ case otype::s:
+ {
+ if (binless)
+ t.path (empty_path);
+ else
+ {
+ // On Windows libs{} is an ad hoc group. The libs{} itself is
+ // the DLL and we add libi{} import library as its member.
+ //
+ if (tclass == "windows")
+ {
+ e = "dll";
+ add_adhoc_member<libi> (t);
+ }
+
+ md.libs_paths = derive_libs_paths (t, p, s);
+ }
+
+ break;
+ }
+ }
+
+ // Add VC's .pdb. Note that we are looking for the link.exe /DEBUG
+ // option.
+ //
+ if (!binless && ot != otype::a && tsys == "win32-msvc")
+ {
+ if (find_option ("/DEBUG", t, c_loptions, true) ||
+ find_option ("/DEBUG", t, x_loptions, true))
+ {
+ const target_type& tt (*bs.find_target_type ("pdb"));
+
+ // We call the target foo.{exe,dll}.pdb rather than just foo.pdb
+ // because we can have both foo.exe and foo.dll in the same
+ // directory.
+ //
+ file& pdb (add_adhoc_member<file> (t, tt, e));
+
+ // Note that the path is derived from the exe/dll path (so it
+ // will include the version in case of a dll).
+ //
+ if (pdb.path ().empty ())
+ pdb.derive_path (t.path (), "pdb");
+ }
+ }
+
+ // Add pkg-config's .pc file.
+ //
+ // Note that we do it regardless of whether we are installing or not
+ // for two reasons. Firstly, it is not easy to detect this situation
+ // here since the for_install hasn't yet been communicated by
+ // install_rule. Secondly, always having this member takes care of
+ // cleanup automagically. The actual generation happens in
+ // perform_update() below.
+ //
+ if (ot != otype::e)
+ {
+ file& pc (add_adhoc_member<file> (t,
+ (ot == otype::a
+ ? pca::static_type
+ : pcs::static_type)));
+
+ // Note that here we always use the lib name prefix, even on
+ // Windows with VC. The reason is the user needs a consistent name
+ // across platforms by which they can refer to the library. This
+ // is also the reason why we use the .static and .shared second-
+ // level extensions rather that a./.lib and .so/.dylib/.dll.
+ //
+ if (pc.path ().empty ())
+ pc.derive_path (nullptr, (p == nullptr ? "lib" : p), s);
+ }
+
+ // Add the Windows rpath emulating assembly directory as fsdir{}.
+ //
+ // Currently this is used in the backlinking logic and in the future
+ // could also be used for clean (though there we may want to clean
+ // old assemblies).
+ //
+ if (ot == otype::e && tclass == "windows")
+ {
+ // Note that here we cannot determine whether we will actually
+ // need one (for_install, library timestamps are not available at
+ // this point to call windows_rpath_timestamp()). So we may add
+ // the ad hoc target but actually not produce the assembly. So
+ // whomever relies on this must check if the directory actually
+ // exists (windows_rpath_assembly() does take care to clean it up
+ // if not used).
+ //
+#ifdef _WIN32
+ target& dir =
+#endif
+ add_adhoc_member (t,
+ fsdir::static_type,
+ path_cast<dir_path> (t.path () + ".dlls"),
+ t.out,
+ string () /* name */);
+
+ // By default our backlinking logic will try to symlink the
+ // directory and it can even be done on Windows using junctions.
+ // The problem is the Windows DLL assembly "logic" refuses to
+ // recognize a junction as a valid assembly for some reason. So we
+ // are going to resort to copy-link (i.e., a real directory with a
+ // bunch of links).
+ //
+ // Interestingly, the directory symlink works just fine under
+ // Wine. So we only resort to copy-link'ing if we are running on
+ // Windows.
+ //
+#ifdef _WIN32
+ dir.state[a].assign (ctx.var_backlink) = "copy";
+#endif
+ }
+ }
+ }
+
+ // Process prerequisites, pass 2: finish rule chaining but don't start
+ // matching anything yet since that may trigger recursive matching of
+ // bmi{} targets we haven't completed yet. Hairy, I know.
+ //
+
+ // Parallel prerequisites/prerequisite_targets loop.
+ //
+ size_t i (start);
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ const target*& pt (pts[i].target);
+ uintptr_t& pd (pts[i++].data);
+
+ if (pt == nullptr)
+ continue;
+
+ // New mark:
+ // 1 - completion
+ // 2 - verification
+ //
+ uint8_t m (unmark (pt));
+
+ if (m == 3) // obj/bmi or lib not to be cleaned
+ {
+ m = 1; // Just completion.
+
+ // Note that if this is a library not to be cleaned, we keep it
+ // marked for completion (see the next phase).
+ }
+ else if (m == 1 || m == 2) // Source/module chain.
+ {
+ bool mod (m == 2);
+
+ m = 1;
+
+ const target& rt (*pt);
+ bool group (!p.prerequisite.belongs (t)); // Group's prerequisite.
+
+ // If we have created a obj/bmi{} target group, pick one of its
+ // members; the rest would be primarily concerned with it.
+ //
+ pt =
+ group
+ ? &search (t, (mod ? tts.bmi : tts.obj), rt.dir, rt.out, rt.name)
+ : &rt;
+
+ const target_type& rtt (mod
+ ? (group ? bmi::static_type : tts.bmi)
+ : (group ? obj::static_type : tts.obj));
+
+ // If this obj*{} already has prerequisites, then verify they are
+ // "compatible" with what we are doing here. Otherwise, synthesize
+ // the dependency. Note that we may also end up synthesizing with
+ // someone beating us to it. In this case also verify.
+ //
+ bool verify (true);
+
+ // Note that we cannot use has_group_prerequisites() since the
+ // target is not yet matched. So we check the group directly. Of
+ // course, all of this is racy (see below).
+ //
+ if (!pt->has_prerequisites () &&
+ (!group || !rt.has_prerequisites ()))
+ {
+ prerequisites ps {p.as_prerequisite ()}; // Source.
+
+ // Add our lib*{} (see the export.* machinery for details) and
+ // bmi*{} (both original and chained; see module search logic)
+ // prerequisites.
+ //
+ // Note that we don't resolve lib{} to liba{}/libs{} here
+ // instead leaving it to whomever (e.g., the compile rule) will
+ // be needing *.export.*. One reason for doing it there is that
+ // the object target might be specified explicitly by the user
+ // in which case they will have to specify the set of lib{}
+ // prerequisites and it's much cleaner to do as lib{} rather
+ // than liba{}/libs{}.
+ //
+ // Initially, we were only adding imported libraries, but there
+ // is a problem with this approach: the non-imported library
+ // might depend on the imported one(s) which we will never "see"
+ // unless we start with this library.
+ //
+ // Note: have similar logic in make_module_sidebuild().
+ //
+ size_t j (start);
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ const target* pt (pts[j++]);
+
+ if (pt == nullptr) // Note: ad hoc is taken care of.
+ continue;
+
+ // NOTE: pt may be marked (even for a library -- see clean
+ // above). So watch out for a faux pax in this careful dance.
+ //
+ if (p.is_a<libx> () ||
+ p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> () ||
+ p.is_a<bmi> () || p.is_a (tts.bmi))
+ {
+ ps.push_back (p.as_prerequisite ());
+ }
+ else if (x_mod != nullptr && p.is_a (*x_mod)) // Chained module.
+ {
+ // Searched during pass 1 but can be NULL or marked.
+ //
+ if (pt != nullptr && i != j) // Don't add self (note: both +1).
+ {
+ // This is sticky: pt might have come before us and if it
+ // was a group, then we would have picked up a member. So
+ // here we may have to "unpick" it.
+ //
+ bool group (j < i && !p.prerequisite.belongs (t));
+
+ unmark (pt);
+ ps.push_back (prerequisite (group ? *pt->group : *pt));
+ }
+ }
+ }
+
+ // Note: adding to the group, not the member.
+ //
+ verify = !rt.prerequisites (move (ps));
+
+ // Recheck that the target still has no prerequisites. If that's
+ // no longer the case, then verify the result is compatible with
+ // what we need.
+ //
+ // Note that there are scenarios where we will not detect this or
+ // the detection will be racy. For example, thread 1 adds the
+ // prerequisite to the group and then thread 2, which doesn't use
+ // the group, adds the prerequisite to the member. This could be
+ // triggered by something like this (undetectable):
+ //
+ // lib{foo}: cxx{foo}
+ // exe{foo}: cxx{foo}
+ //
+ // Or this (detection is racy):
+ //
+ // lib{bar}: cxx{foo}
+ // liba{baz}: cxx{foo}
+ //
+ // The current feeling, however, is that in non-contrived cases
+ // (i.e., the source file is the same) this should be harmless.
+ //
+ if (!verify && group)
+ verify = pt->has_prerequisites ();
+ }
+
+ if (verify)
+ {
+ // This gets a bit tricky. We need to make sure the source files
+ // are the same which we can only do by comparing the targets to
+ // which they resolve. But we cannot search ot's prerequisites --
+ // only the rule that matches can. Note, however, that if all this
+ // works out, then our next step is to match the obj*{} target. If
+ // things don't work out, then we fail, in which case searching
+ // and matching speculatively doesn't really hurt. So we start the
+ // async match here and finish this verification in the "harvest"
+ // loop below.
+ //
+ resolve_group (a, *pt); // Not matched yet so resolve group.
+
+ bool src (false);
+ for (prerequisite_member p1: group_prerequisite_members (a, *pt))
+ {
+ // Most of the time we will have just a single source so fast-
+ // path that case.
+ //
+ if (p1.is_a (mod ? *x_mod : x_src) || p1.is_a<c> ())
+ {
+ src = true;
+ continue; // Check the rest of the prerequisites.
+ }
+
+ // Ignore some known target types (fsdir, headers, libraries,
+ // modules).
+ //
+ if (p1.is_a<fsdir> () ||
+ p1.is_a<libx> () ||
+ p1.is_a<liba> () || p1.is_a<libs> () || p1.is_a<libux> () ||
+ p1.is_a<bmi> () || p1.is_a<bmix> () ||
+ (p.is_a (mod ? *x_mod : x_src) && x_header (p1)) ||
+ (p.is_a<c> () && p1.is_a<h> ()))
+ continue;
+
+ fail << "synthesized dependency for prerequisite " << p
+ << " would be incompatible with existing target " << *pt <<
+ info << "unexpected existing prerequisite type " << p1 <<
+ info << "specify corresponding " << rtt.name << "{} "
+ << "dependency explicitly";
+ }
+
+ if (!src)
+ fail << "synthesized dependency for prerequisite " << p
+ << " would be incompatible with existing target " << *pt <<
+ info << "no existing c/" << x_name << " source prerequisite" <<
+ info << "specify corresponding " << rtt.name << "{} "
+ << "dependency explicitly";
+
+ m = 2; // Needs verification.
+ }
+ }
+ else // lib*{}
+ {
+ // If this is a static library, see if we need to link it whole.
+ // Note that we have to do it after match since we rely on the
+ // group link-up.
+ //
+ bool u;
+ if ((u = pt->is_a<libux> ()) || pt->is_a<liba> ())
+ {
+ const variable& var (ctx.var_pool["bin.whole"]); // @@ Cache.
+
+ // See the bin module for the lookup semantics discussion. Note
+ // that the variable is not overridable so we omit find_override()
+ // calls.
+ //
+ lookup l (p.prerequisite.vars[var]);
+
+ if (!l.defined ())
+ l = pt->find_original (var, true).first;
+
+ if (!l.defined ())
+ {
+ bool g (pt->group != nullptr);
+ l = bs.find_original (var,
+ &pt->type (),
+ &pt->name,
+ (g ? &pt->group->type () : nullptr),
+ (g ? &pt->group->name : nullptr)).first;
+ }
+
+ if (l ? cast<bool> (*l) : u)
+ pd |= lflag_whole;
+ }
+ }
+
+ mark (pt, m);
+ }
+
+ // Process prerequisites, pass 3: match everything and verify chains.
+ //
+
+ // Wait with unlocked phase to allow phase switching.
+ //
+ wait_guard wg (ctx, ctx.count_busy (), t[a].task_count, true);
+
+ i = start;
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ bool adhoc (pts[i].adhoc);
+ const target*& pt (pts[i++]);
+
+ uint8_t m;
+
+ if (pt == nullptr)
+ {
+ // Handle ad hoc prerequisities.
+ //
+ if (!adhoc)
+ continue;
+
+ pt = &p.search (t);
+ m = 1; // Mark for completion.
+ }
+ else if ((m = unmark (pt)) != 0)
+ {
+ // If this is a library not to be cleaned, we can finally blank it
+ // out.
+ //
+ if (skip (pt))
+ {
+ pt = nullptr;
+ continue;
+ }
+ }
+
+ match_async (a, *pt, ctx.count_busy (), t[a].task_count);
+ mark (pt, m);
+ }
+
+ wg.wait ();
+
+ // The "harvest" loop: finish matching the targets we have started. Note
+ // that we may have bailed out early (thus the parallel i/n for-loop).
+ //
+ i = start;
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ const target*& pt (pts[i++]);
+
+ // Skipped or not marked for completion.
+ //
+ uint8_t m;
+ if (pt == nullptr || (m = unmark (pt)) == 0)
+ continue;
+
+ build2::match (a, *pt);
+
+ // Nothing else to do if not marked for verification.
+ //
+ if (m == 1)
+ continue;
+
+ // Finish verifying the existing dependency (which is now matched)
+ // compared to what we would have synthesized.
+ //
+ bool mod (x_mod != nullptr && p.is_a (*x_mod));
+
+ // Note: group already resolved in the previous loop.
+
+ for (prerequisite_member p1: group_prerequisite_members (a, *pt))
+ {
+ if (p1.is_a (mod ? *x_mod : x_src) || p1.is_a<c> ())
+ {
+ // Searching our own prerequisite is ok, p1 must already be
+ // resolved.
+ //
+ const target& tp (p.search (t));
+ const target& tp1 (p1.search (*pt));
+
+ if (&tp != &tp1)
+ {
+ bool group (!p.prerequisite.belongs (t));
+
+ const target_type& rtt (mod
+ ? (group ? bmi::static_type : tts.bmi)
+ : (group ? obj::static_type : tts.obj));
+
+ fail << "synthesized dependency for prerequisite " << p << " "
+ << "would be incompatible with existing target " << *pt <<
+ info << "existing prerequisite " << p1 << " does not match "
+ << p <<
+ info << p1 << " resolves to target " << tp1 <<
+ info << p << " resolves to target " << tp <<
+ info << "specify corresponding " << rtt.name << "{} "
+ << "dependency explicitly";
+ }
+
+ break;
+ }
+ }
+ }
+
+ md.binless = binless;
+ md.start = start;
+
+ switch (a)
+ {
+ case perform_update_id: return [this] (action a, const target& t)
+ {
+ return perform_update (a, t);
+ };
+ case perform_clean_id: return [this] (action a, const target& t)
+ {
+ return perform_clean (a, t);
+ };
+ default: return noop_recipe; // Configure update.
+ }
+ }
+
+ void link_rule::
+ append_libraries (strings& args,
+ const file& l, bool la, lflags lf,
+ const scope& bs, action a, linfo li) const
+ {
+ struct data
+ {
+ strings& args;
+ const file& l;
+ action a;
+ linfo li;
+ compile_target_types tts;
+ } d {args, l, a, li, compile_types (li.type)};
+
+ auto imp = [] (const file&, bool la)
+ {
+ return la;
+ };
+
+ auto lib = [&d, this] (const file* const* lc,
+ const string& p,
+ lflags f,
+ bool)
+ {
+ const file* l (lc != nullptr ? *lc : nullptr);
+
+ if (l == nullptr)
+ {
+ // Don't try to link a library (whether -lfoo or foo.lib) to a
+ // static library.
+ //
+ if (d.li.type != otype::a)
+ d.args.push_back (p);
+ }
+ else
+ {
+ bool lu (l->is_a<libux> ());
+
+ // The utility/non-utility case is tricky. Consider these two
+ // scenarios:
+ //
+ // exe -> (libu1-e -> libu1-e) -> (liba) -> libu-a -> (liba1)
+ // exe -> (liba) -> libu1-a -> libu1-a -> (liba1) -> libu-a1
+ //
+ // Libraries that should be linked are in '()'. That is, we need to
+ // link the initial sequence of utility libraries and then, after
+ // encountering a first non-utility, only link non-utilities
+ // (because they already contain their utility's object files).
+ //
+ if (lu)
+ {
+ for (ptrdiff_t i (-1); lc[i] != nullptr; --i)
+ if (!lc[i]->is_a<libux> ())
+ return;
+ }
+
+ if (d.li.type == otype::a)
+ {
+ // Linking a utility library to a static library.
+ //
+ // Note that utility library prerequisites of utility libraries
+ // are automatically handled by process_libraries(). So all we
+ // have to do is implement the "thin archive" logic.
+ //
+ // We may also end up trying to link a non-utility library to a
+ // static library via a utility library (direct linking is taken
+ // care of by perform_update()). So we cut it off here.
+ //
+ if (!lu)
+ return;
+
+ if (l->mtime () == timestamp_unreal) // Binless.
+ return;
+
+ for (const target* pt: l->prerequisite_targets[d.a])
+ {
+ if (pt == nullptr)
+ continue;
+
+ if (modules)
+ {
+ if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{}
+ pt = find_adhoc_member (*pt, d.tts.obj);
+ }
+
+ // We could have dependency diamonds with utility libraries.
+ // Repeats will be handled by the linker (in fact, it could be
+ // required to repeat them to satisfy all the symbols) but here
+ // we have to suppress duplicates ourselves.
+ //
+ if (const file* f = pt->is_a<objx> ())
+ {
+ string p (relative (f->path ()).string ());
+ if (find (d.args.begin (), d.args.end (), p) == d.args.end ())
+ d.args.push_back (move (p));
+ }
+ }
+ }
+ else
+ {
+ // Linking a library to a shared library or executable.
+ //
+
+ if (l->mtime () == timestamp_unreal) // Binless.
+ return;
+
+ // On Windows a shared library is a DLL with the import library as
+ // an ad hoc group member. MinGW though can link directly to DLLs
+ // (see search_library() for details).
+ //
+ if (tclass == "windows" && l->is_a<libs> ())
+ {
+ if (const libi* li = find_adhoc_member<libi> (*l))
+ l = li;
+ }
+
+ string p (relative (l->path ()).string ());
+
+ if (f & lflag_whole)
+ {
+ if (tsys == "win32-msvc")
+ {
+ p.insert (0, "/WHOLEARCHIVE:"); // Only available from VC14U2.
+ }
+ else if (tsys == "darwin")
+ {
+ p.insert (0, "-Wl,-force_load,");
+ }
+ else
+ {
+ d.args.push_back ("-Wl,--whole-archive");
+ d.args.push_back (move (p));
+ d.args.push_back ("-Wl,--no-whole-archive");
+ return;
+ }
+ }
+
+ d.args.push_back (move (p));
+ }
+ }
+ };
+
+ auto opt = [&d, this] (const file& l,
+ const string& t,
+ bool com,
+ bool exp)
+ {
+ // Don't try to pass any loptions when linking a static library.
+ //
+ if (d.li.type == otype::a)
+ return;
+
+ // If we need an interface value, then use the group (lib{}).
+ //
+ if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
+ {
+ const variable& var (
+ com
+ ? (exp ? c_export_loptions : c_loptions)
+ : (t == x
+ ? (exp ? x_export_loptions : x_loptions)
+ : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")]));
+
+ append_options (d.args, *g, var);
+ }
+ };
+
+ process_libraries (
+ a, bs, li, sys_lib_dirs, l, la, lf, imp, lib, opt, true);
+ }
+
+ void link_rule::
+ hash_libraries (sha256& cs,
+ bool& update, timestamp mt,
+ const file& l, bool la, lflags lf,
+ const scope& bs, action a, linfo li) const
+ {
+ struct data
+ {
+ sha256& cs;
+ const dir_path& out_root;
+ bool& update;
+ timestamp mt;
+ linfo li;
+ } d {cs, bs.root_scope ()->out_path (), update, mt, li};
+
+ auto imp = [] (const file&, bool la)
+ {
+ return la;
+ };
+
+ auto lib = [&d, this] (const file* const* lc,
+ const string& p,
+ lflags f,
+ bool)
+ {
+ const file* l (lc != nullptr ? *lc : nullptr);
+
+ if (l == nullptr)
+ {
+ if (d.li.type != otype::a)
+ d.cs.append (p);
+ }
+ else
+ {
+ bool lu (l->is_a<libux> ());
+
+ if (lu)
+ {
+ for (ptrdiff_t i (-1); lc[i] != nullptr; --i)
+ if (!lc[i]->is_a<libux> ())
+ return;
+ }
+
+ // We also don't need to do anything special for linking a utility
+ // library to a static library. If any of its object files (or the
+ // set of its object files) changes, then the library will have to
+ // be updated as well. In other words, we use the library timestamp
+ // as a proxy for all of its member's timestamps.
+ //
+ // We do need to cut of the static to static linking, just as in
+ // append_libraries().
+ //
+ if (d.li.type == otype::a && !lu)
+ return;
+
+ if (l->mtime () == timestamp_unreal) // Binless.
+ return;
+
+ // Check if this library renders us out of date.
+ //
+ d.update = d.update || l->newer (d.mt);
+
+ // On Windows a shared library is a DLL with the import library as
+ // an ad hoc group member. MinGW though can link directly to DLLs
+ // (see search_library() for details).
+ //
+ if (tclass == "windows" && l->is_a<libs> ())
+ {
+ if (const libi* li = find_adhoc_member<libi> (*l))
+ l = li;
+ }
+
+ d.cs.append (f);
+ hash_path (d.cs, l->path (), d.out_root);
+ }
+ };
+
+ auto opt = [&d, this] (const file& l,
+ const string& t,
+ bool com,
+ bool exp)
+ {
+ if (d.li.type == otype::a)
+ return;
+
+ if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
+ {
+ const variable& var (
+ com
+ ? (exp ? c_export_loptions : c_loptions)
+ : (t == x
+ ? (exp ? x_export_loptions : x_loptions)
+ : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")]));
+
+ hash_options (d.cs, *g, var);
+ }
+ };
+
+ process_libraries (
+ a, bs, li, sys_lib_dirs, l, la, lf, imp, lib, opt, true);
+ }
+
+ void link_rule::
+ rpath_libraries (strings& args,
+ const target& t,
+ const scope& bs,
+ action a,
+ linfo li,
+ bool link) const
+ {
+ // Use -rpath-link only on targets that support it (Linux, *BSD). Note
+ // that we don't really need it for top-level libraries.
+ //
+ if (link)
+ {
+ if (tclass != "linux" && tclass != "bsd")
+ return;
+ }
+
+ auto imp = [link] (const file& l, bool la)
+ {
+ // If we are not rpath-link'ing, then we only need to rpath interface
+ // libraries (they will include rpath's for their implementations)
+ // Otherwise, we have to do this recursively. In both cases we also
+ // want to see through utility libraries.
+ //
+ // The rpath-link part is tricky: ideally we would like to get only
+ // implementations and only of shared libraries. We are not interested
+ // in interfaces because we are linking their libraries explicitly.
+ // However, in our model there is no such thing as "implementation
+ // only"; it is either interface or interface and implementation. So
+ // we are going to rpath-link all of them which should be harmless
+ // except for some noise on the command line.
+ //
+ //
+ return (link ? !la : false) || l.is_a<libux> ();
+ };
+
+ // Package the data to keep within the 2-pointer small std::function
+ // optimization limit.
+ //
+ struct
+ {
+ strings& args;
+ bool link;
+ } d {args, link};
+
+ auto lib = [&d, this] (const file* const* lc,
+ const string& f,
+ lflags,
+ bool sys)
+ {
+ const file* l (lc != nullptr ? *lc : nullptr);
+
+ // We don't rpath system libraries. Why, you may ask? There are many
+ // good reasons and I have them written on a napkin somewhere...
+ //
+ if (sys)
+ return;
+
+ if (l != nullptr)
+ {
+ if (!l->is_a<libs> ())
+ return;
+
+ if (l->mtime () == timestamp_unreal) // Binless.
+ return;
+ }
+ else
+ {
+ // This is an absolute path and we need to decide whether it is
+ // a shared or static library. Doesn't seem there is anything
+ // better than checking for a platform-specific extension (maybe
+ // we should cache it somewhere).
+ //
+ size_t p (path::traits_type::find_extension (f));
+
+ if (p == string::npos)
+ return;
+
+ ++p; // Skip dot.
+
+ bool c (true);
+ const char* e;
+
+ if (tclass == "windows") {e = "dll"; c = false;}
+ else if (tsys == "darwin") e = "dylib";
+ else e = "so";
+
+ if ((c
+ ? f.compare (p, string::npos, e)
+ : casecmp (f.c_str () + p, e)) != 0)
+ return;
+ }
+
+ // Ok, if we are here then it means we have a non-system, shared
+ // library and its absolute path is in f.
+ //
+ string o (d.link ? "-Wl,-rpath-link," : "-Wl,-rpath,");
+
+ size_t p (path::traits_type::rfind_separator (f));
+ assert (p != string::npos);
+
+ o.append (f, 0, (p != 0 ? p : 1)); // Don't include trailing slash.
+ d.args.push_back (move (o));
+ };
+
+ // In case we don't have the "small function object" optimization.
+ //
+ const function<bool (const file&, bool)> impf (imp);
+ const function<
+ void (const file* const*, const string&, lflags, bool)> libf (lib);
+
+ for (const prerequisite_target& pt: t.prerequisite_targets[a])
+ {
+ if (pt == nullptr)
+ continue;
+
+ bool la;
+ const file* f;
+
+ if ((la = (f = pt->is_a<liba> ())) ||
+ (la = (f = pt->is_a<libux> ())) ||
+ ( f = pt->is_a<libs> ()))
+ {
+ if (!link && !la)
+ {
+ // Top-level shared library dependency.
+ //
+ if (!f->path ().empty ()) // Not binless.
+ {
+ // It is either matched or imported so should be a cc library.
+ //
+ if (!cast_false<bool> (f->vars[c_system]))
+ args.push_back (
+ "-Wl,-rpath," + f->path ().directory ().string ());
+ }
+ }
+
+ process_libraries (a, bs, li, sys_lib_dirs,
+ *f, la, pt.data,
+ impf, libf, nullptr);
+ }
+ }
+ }
+
+ // Filter link.exe noise (msvc.cxx).
+ //
+ void
+ msvc_filter_link (ifdstream&, const file&, otype);
+
+ // Translate target CPU to the link.exe/lib.exe /MACHINE option.
+ //
+ const char*
+ msvc_machine (const string& cpu); // msvc.cxx
+
+ target_state link_rule::
+ perform_update (action a, const target& xt) const
+ {
+ tracer trace (x, "link_rule::perform_update");
+
+ const file& t (xt.as<file> ());
+ const path& tp (t.path ());
+
+ context& ctx (t.ctx);
+
+ const scope& bs (t.base_scope ());
+ const scope& rs (*bs.root_scope ());
+
+ match_data& md (t.data<match_data> ());
+
+ // Unless the outer install rule signalled that this is update for
+ // install, signal back that we've performed plain update.
+ //
+ if (!md.for_install)
+ md.for_install = false;
+
+ bool for_install (*md.for_install);
+
+ ltype lt (link_type (t));
+ otype ot (lt.type);
+ linfo li (link_info (bs, ot));
+ compile_target_types tts (compile_types (ot));
+
+ bool binless (md.binless);
+ assert (ot != otype::e || !binless); // Sanity check.
+
+ // Determine if we are out-of-date.
+ //
+ bool update (false);
+ bool scratch (false);
+ timestamp mt (binless ? timestamp_unreal : t.load_mtime ());
+
+ // Update prerequisites. We determine if any relevant non-ad hoc ones
+ // render us out-of-date manually below.
+ //
+ // Note that execute_prerequisites() blanks out all the ad hoc
+ // prerequisites so we don't need to worry about them from now on.
+ //
+ target_state ts;
+
+ if (optional<target_state> s =
+ execute_prerequisites (a,
+ t,
+ mt,
+ [] (const target&, size_t) {return false;}))
+ ts = *s;
+ else
+ {
+ // An ad hoc prerequisite renders us out-of-date. Let's update from
+ // scratch for good measure.
+ //
+ scratch = update = true;
+ ts = target_state::changed;
+ }
+
+ // Check for the for_install variable on each prerequisite and blank out
+ // those that don't match. Note that we have to do it after updating
+ // prerequisites to keep the dependency counts straight.
+ //
+ if (const variable* var_fi = ctx.var_pool.find ("for_install"))
+ {
+ // Parallel prerequisites/prerequisite_targets loop.
+ //
+ size_t i (md.start);
+ for (prerequisite_member p: group_prerequisite_members (a, t))
+ {
+ const target*& pt (t.prerequisite_targets[a][i++]);
+
+ if (pt == nullptr)
+ continue;
+
+ if (lookup l = p.prerequisite.vars[var_fi])
+ {
+ if (cast<bool> (l) != for_install)
+ {
+ l5 ([&]{trace << "excluding " << *pt << " due to for_install";});
+ pt = nullptr;
+ }
+ }
+ }
+ }
+
+ // (Re)generate pkg-config's .pc file. While the target itself might be
+ // up-to-date from a previous run, there is no guarantee that .pc exists
+ // or also up-to-date. So to keep things simple we just regenerate it
+ // unconditionally.
+ //
+ // Also, if you are wondering why don't we just always produce this .pc,
+ // install or no install, the reason is unless and until we are updating
+ // for install, we have no idea where-to things will be installed.
+ //
+ if (for_install && lt.library () && !lt.utility)
+ pkgconfig_save (a, t, lt.static_library (), binless);
+
+ // If we have no binary to build then we are done.
+ //
+ if (binless)
+ {
+ t.mtime (timestamp_unreal);
+ return ts;
+ }
+
+ // Open the dependency database (do it before messing with Windows
+ // manifests to diagnose missing output directory).
+ //
+ depdb dd (tp + ".d");
+
+ // If targeting Windows, take care of the manifest.
+ //
+ path manifest; // Manifest itself (msvc) or compiled object file.
+ timestamp rpath_timestamp = timestamp_nonexistent; // DLLs timestamp.
+
+ if (lt.executable () && tclass == "windows")
+ {
+ // First determine if we need to add our rpath emulating assembly. The
+ // assembly itself is generated later, after updating the target. Omit
+ // it if we are updating for install.
+ //
+ if (!for_install && cast_true<bool> (t["bin.rpath.auto"]))
+ rpath_timestamp = windows_rpath_timestamp (t, bs, a, li);
+
+ auto p (windows_manifest (t, rpath_timestamp != timestamp_nonexistent));
+ path& mf (p.first);
+ timestamp mf_mt (p.second);
+
+ if (tsys == "mingw32")
+ {
+ // Compile the manifest into the object file with windres. While we
+ // are going to synthesize an .rc file to pipe to windres' stdin, we
+ // will still use .manifest to check if everything is up-to-date.
+ //
+ manifest = mf + ".o";
+
+ if (mf_mt == timestamp_nonexistent || mf_mt > mtime (manifest))
+ {
+ path of (relative (manifest));
+
+ const process_path& rc (cast<process_path> (rs["bin.rc.path"]));
+
+ // @@ Would be good to add this to depdb (e.g,, rc changes).
+ //
+ const char* args[] = {
+ rc.recall_string (),
+ "--input-format=rc",
+ "--output-format=coff",
+ "-o", of.string ().c_str (),
+ nullptr};
+
+ if (verb >= 3)
+ print_process (args);
+
+ if (!ctx.dry_run)
+ {
+ auto_rmfile rm (of);
+
+ try
+ {
+ process pr (rc, args, -1);
+
+ try
+ {
+ ofdstream os (move (pr.out_fd));
+
+ // 1 is resource ID, 24 is RT_MANIFEST. We also need to
+ // escape Windows path backslashes.
+ //
+ os << "1 24 \"";
+
+ const string& s (mf.string ());
+ for (size_t i (0), j;; i = j + 1)
+ {
+ j = s.find ('\\', i);
+ os.write (s.c_str () + i,
+ (j == string::npos ? s.size () : j) - i);
+
+ if (j == string::npos)
+ break;
+
+ os.write ("\\\\", 2);
+ }
+
+ os << "\"" << endl;
+
+ os.close ();
+ rm.cancel ();
+ }
+ catch (const io_error& e)
+ {
+ if (pr.wait ()) // Ignore if child failed.
+ fail << "unable to pipe resource file to " << args[0]
+ << ": " << e;
+ }
+
+ run_finish (args, pr);
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << args[0] << ": " << e;
+
+ if (e.child)
+ exit (1);
+
+ throw failed ();
+ }
+ }
+
+ update = true; // Manifest changed, force update.
+ }
+ }
+ else
+ {
+ manifest = move (mf); // Save for link.exe's /MANIFESTINPUT.
+
+ if (mf_mt == timestamp_nonexistent || mf_mt > mt)
+ update = true; // Manifest changed, force update.
+ }
+ }
+
+ // Check/update the dependency database.
+ //
+ // First should come the rule name/version.
+ //
+ if (dd.expect (rule_id) != nullptr)
+ l4 ([&]{trace << "rule mismatch forcing update of " << t;});
+
+ lookup ranlib;
+
+ // Then the linker checksum (ar/ranlib or the compiler).
+ //
+ if (lt.static_library ())
+ {
+ ranlib = rs["bin.ranlib.path"];
+
+ const char* rl (
+ ranlib
+ ? cast<string> (rs["bin.ranlib.checksum"]).c_str ()
+ : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+
+ if (dd.expect (cast<string> (rs["bin.ar.checksum"])) != nullptr)
+ l4 ([&]{trace << "ar mismatch forcing update of " << t;});
+
+ if (dd.expect (rl) != nullptr)
+ l4 ([&]{trace << "ranlib mismatch forcing update of " << t;});
+ }
+ else
+ {
+ // For VC we use link.exe directly.
+ //
+ const string& cs (
+ cast<string> (
+ rs[tsys == "win32-msvc"
+ ? ctx.var_pool["bin.ld.checksum"]
+ : x_checksum]));
+
+ if (dd.expect (cs) != nullptr)
+ l4 ([&]{trace << "linker mismatch forcing update of " << t;});
+ }
+
+ // Next check the target. While it might be incorporated into the linker
+ // checksum, it also might not (e.g., VC link.exe).
+ //
+ if (dd.expect (ctgt.string ()) != nullptr)
+ l4 ([&]{trace << "target mismatch forcing update of " << t;});
+
+ // Start building the command line. While we don't yet know whether we
+ // will really need it, we need to hash it to find out. So the options
+ // are to either replicate the exact process twice, first for hashing
+ // then for building or to go ahead and start building and hash the
+ // result. The first approach is probably more efficient while the
+ // second is simpler. Let's got with the simpler for now (actually it's
+ // kind of a hybrid).
+ //
+ cstrings args {nullptr}; // Reserve one for config.bin.ar/config.x.
+
+ // Storage.
+ //
+ string arg1, arg2;
+ strings sargs;
+
+ if (lt.static_library ())
+ {
+ if (tsys == "win32-msvc")
+ {
+ // lib.exe has /LIBPATH but it's not clear/documented what it's used
+ // for. Perhaps for link-time code generation (/LTCG)? If that's the
+ // case, then we may need to pass *.loptions.
+ //
+ args.push_back ("/NOLOGO");
+
+ // Add /MACHINE.
+ //
+ args.push_back (msvc_machine (cast<string> (rs[x_target_cpu])));
+ }
+ else
+ {
+ // If the user asked for ranlib, don't try to do its function with
+ // -s. Some ar implementations (e.g., the LLVM one) don't support
+ // leading '-'.
+ //
+ arg1 = ranlib ? "rc" : "rcs";
+
+ // For utility libraries use thin archives if possible.
+ //
+ // Thin archives are supported by GNU ar since binutils 2.19.1 and
+ // LLVM ar since LLVM 3.8.0. Note that strictly speaking thin
+ // archives also have to be supported by the linker but it is
+ // probably safe to assume that the two came from the same version
+ // of binutils/LLVM.
+ //
+ if (lt.utility)
+ {
+ const string& id (cast<string> (rs["bin.ar.id"]));
+
+ for (bool g (id == "gnu"); g || id == "llvm"; ) // Breakout loop.
+ {
+ auto mj (cast<uint64_t> (rs["bin.ar.version.major"]));
+ if (mj < (g ? 2 : 3)) break;
+ if (mj == (g ? 2 : 3))
+ {
+ auto mi (cast<uint64_t> (rs["bin.ar.version.minor"]));
+ if (mi < (g ? 18 : 8)) break;
+ if (mi == 18 && g)
+ {
+ auto pa (cast<uint64_t> (rs["bin.ar.version.patch"]));
+ if (pa < 1) break;
+ }
+ }
+
+ arg1 += 'T';
+ break;
+ }
+ }
+
+ args.push_back (arg1.c_str ());
+ }
+
+ append_options (args, t, c_aoptions);
+ append_options (args, t, x_aoptions);
+ }
+ else
+ {
+ if (tsys == "win32-msvc")
+ {
+ // We are using link.exe directly so don't pass the compiler
+ // options.
+ }
+ else
+ {
+ append_options (args, t, c_coptions);
+ append_options (args, t, x_coptions);
+ append_options (args, tstd);
+ }
+
+ append_options (args, t, c_loptions);
+ append_options (args, t, x_loptions);
+
+ // Extra system library dirs (last).
+ //
+ // @@ /LIBPATH:<path>, not /LIBPATH <path>
+ //
+ assert (sys_lib_dirs_extra <= sys_lib_dirs.size ());
+ append_option_values (
+ args,
+ cclass == compiler_class::msvc ? "/LIBPATH:" : "-L",
+ sys_lib_dirs.begin () + sys_lib_dirs_extra, sys_lib_dirs.end (),
+ [] (const dir_path& d) {return d.string ().c_str ();});
+
+ // Handle soname/rpath.
+ //
+ if (tclass == "windows")
+ {
+ // Limited emulation for Windows with no support for user-defined
+ // rpath/rpath-link.
+ //
+ lookup l;
+
+ if ((l = t["bin.rpath"]) && !l->empty ())
+ fail << ctgt << " does not support rpath";
+
+ if ((l = t["bin.rpath_link"]) && !l->empty ())
+ fail << ctgt << " does not support rpath-link";
+ }
+ else
+ {
+ // Set soname.
+ //
+ if (lt.shared_library ())
+ {
+ const libs_paths& paths (md.libs_paths);
+ const string& leaf (paths.effect_soname ().leaf ().string ());
+
+ if (tclass == "macos")
+ {
+ // With Mac OS 10.5 (Leopard) Apple finally caved in and gave us
+ // a way to emulate vanilla -rpath.
+ //
+ // It may seem natural to do something different on update for
+ // install. However, if we don't make it @rpath, then the user
+ // won't be able to use config.bin.rpath for installed libraries.
+ //
+ arg1 = "-install_name";
+ arg2 = "@rpath/" + leaf;
+ }
+ else
+ arg1 = "-Wl,-soname," + leaf;
+
+ if (!arg1.empty ())
+ args.push_back (arg1.c_str ());
+
+ if (!arg2.empty ())
+ args.push_back (arg2.c_str ());
+ }
+
+ // Add rpaths. We used to first add the ones specified by the user
+ // so that they take precedence. But that caused problems if we have
+ // old versions of the libraries sitting in the rpath location
+ // (e.g., installed libraries). And if you think about this, it's
+ // probably correct to prefer libraries that we explicitly imported
+ // to the ones found via rpath.
+ //
+ // Note also that if this is update for install, then we don't add
+ // rpath of the imported libraries (i.e., we assume they are also
+ // installed). But we add -rpath-link for some platforms.
+ //
+ if (cast_true<bool> (t[for_install
+ ? "bin.rpath_link.auto"
+ : "bin.rpath.auto"]))
+ rpath_libraries (sargs, t, bs, a, li, for_install /* link */);
+
+ lookup l;
+
+ if ((l = t["bin.rpath"]) && !l->empty ())
+ for (const dir_path& p: cast<dir_paths> (l))
+ sargs.push_back ("-Wl,-rpath," + p.string ());
+
+ if ((l = t["bin.rpath_link"]) && !l->empty ())
+ {
+ // Only certain targets support -rpath-link (Linux, *BSD).
+ //
+ if (tclass != "linux" && tclass != "bsd")
+ fail << ctgt << " does not support rpath-link";
+
+ for (const dir_path& p: cast<dir_paths> (l))
+ sargs.push_back ("-Wl,-rpath-link," + p.string ());
+ }
+ }
+ }
+
+ // All the options should now be in. Hash them and compare with the db.
+ //
+ {
+ sha256 cs;
+
+ for (size_t i (1); i != args.size (); ++i)
+ cs.append (args[i]);
+
+ for (size_t i (0); i != sargs.size (); ++i)
+ cs.append (sargs[i]);
+
+ // @@ Note that we don't hash output options so if one of the ad hoc
+ // members that we manage gets renamed, we will miss a rebuild.
+
+ if (dd.expect (cs.string ()) != nullptr)
+ l4 ([&]{trace << "options mismatch forcing update of " << t;});
+ }
+
+ // Finally, hash and compare the list of input files.
+ //
+ // Should we capture actual file names or their checksum? The only good
+ // reason for capturing actual files is diagnostics: we will be able to
+ // pinpoint exactly what is causing the update. On the other hand, the
+ // checksum is faster and simpler. And we like simple.
+ //
+ const file* def (nullptr); // Cached if present.
+ {
+ sha256 cs;
+
+ for (const prerequisite_target& p: t.prerequisite_targets[a])
+ {
+ const target* pt (p.target);
+
+ if (pt == nullptr)
+ continue;
+
+ // If this is bmi*{}, then obj*{} is its ad hoc member.
+ //
+ if (modules)
+ {
+ if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{}
+ pt = find_adhoc_member (*pt, tts.obj);
+ }
+
+ const file* f;
+ bool la (false), ls (false);
+
+ // We link utility libraries to everything except other utility
+ // libraries. In case of linking to liba{} we follow the "thin
+ // archive" lead and "see through" to their object file
+ // prerequisites (recursively, until we encounter a non-utility).
+ //
+ if ((f = pt->is_a<objx> ()) ||
+ (!lt.utility &&
+ (la = (f = pt->is_a<libux> ()))) ||
+ (!lt.static_library () &&
+ ((la = (f = pt->is_a<liba> ())) ||
+ (ls = (f = pt->is_a<libs> ())))))
+ {
+ // Link all the dependent interface libraries (shared) or interface
+ // and implementation (static), recursively.
+ //
+ // Also check if any of them render us out of date. The tricky
+ // case is, say, a utility library (static) that depends on a
+ // shared library. When the shared library is updated, there is no
+ // reason to re-archive the utility but those who link the utility
+ // have to "see through" the changes in the shared library.
+ //
+ if (la || ls)
+ {
+ hash_libraries (cs, update, mt, *f, la, p.data, bs, a, li);
+ f = nullptr; // Timestamp checked by hash_libraries().
+ }
+ else
+ hash_path (cs, f->path (), rs.out_path ());
+ }
+ else if ((f = pt->is_a<bin::def> ()))
+ {
+ if (tclass == "windows" && !lt.static_library ())
+ {
+ // At least link.exe only allows a single .def file.
+ //
+ if (def != nullptr)
+ fail << "multiple module definition files specified for " << t;
+
+ hash_path (cs, f->path (), rs.out_path ());
+ def = f;
+ }
+ else
+ f = nullptr; // Not an input.
+ }
+ else
+ f = pt->is_a<exe> (); // Consider executable mtime (e.g., linker).
+
+ // Check if this input renders us out of date.
+ //
+ if (f != nullptr)
+ update = update || f->newer (mt);
+ }
+
+ // Treat it as input for both MinGW and VC (mtime checked above).
+ //
+ if (!manifest.empty ())
+ hash_path (cs, manifest, rs.out_path ());
+
+ // Treat *.libs variable values as inputs, not options.
+ //
+ if (!lt.static_library ())
+ {
+ hash_options (cs, t, c_libs);
+ hash_options (cs, t, x_libs);
+ }
+
+ if (dd.expect (cs.string ()) != nullptr)
+ l4 ([&]{trace << "file set mismatch forcing update of " << t;});
+ }
+
+ // If any of the above checks resulted in a mismatch (different linker,
+ // options or input file set), or if the database is newer than the
+ // target (interrupted update) then force the target update. Also note
+ // this situation in the "from scratch" flag.
+ //
+ if (dd.writing () || dd.mtime > mt)
+ scratch = update = true;
+
+ dd.close ();
+
+ // If nothing changed, then we are done.
+ //
+ if (!update)
+ return ts;
+
+ // Ok, so we are updating. Finish building the command line.
+ //
+ string in, out, out1, out2, out3; // Storage.
+
+ // Translate paths to relative (to working directory) ones. This results
+ // in easier to read diagnostics.
+ //
+ path relt (relative (tp));
+
+ const process_path* ld (nullptr);
+ if (lt.static_library ())
+ {
+ ld = &cast<process_path> (rs["bin.ar.path"]);
+
+ if (tsys == "win32-msvc")
+ {
+ out = "/OUT:" + relt.string ();
+ args.push_back (out.c_str ());
+ }
+ else
+ args.push_back (relt.string ().c_str ());
+ }
+ else
+ {
+ // The options are usually similar enough to handle executables
+ // and shared libraries together.
+ //
+ if (tsys == "win32-msvc")
+ {
+ // Using link.exe directly.
+ //
+ ld = &cast<process_path> (rs["bin.ld.path"]);
+ args.push_back ("/NOLOGO");
+
+ if (ot == otype::s)
+ args.push_back ("/DLL");
+
+ // Add /MACHINE.
+ //
+ args.push_back (msvc_machine (cast<string> (rs[x_target_cpu])));
+
+ // Unless explicitly enabled with /INCREMENTAL, disable incremental
+ // linking (it is implicitly enabled if /DEBUG is specified). The
+ // reason is the .ilk file: its name cannot be changed and if we
+ // have, say, foo.exe and foo.dll, then they will end up stomping on
+ // each other's .ilk's.
+ //
+ // So the idea is to disable it by default but let the user request
+ // it explicitly if they are sure their project doesn't suffer from
+ // the above issue. We can also have something like 'incremental'
+ // config initializer keyword for this.
+ //
+ // It might also be a good idea to ask Microsoft to add an option.
+ //
+ if (!find_option ("/INCREMENTAL", args, true))
+ args.push_back ("/INCREMENTAL:NO");
+
+ if (ctype == compiler_type::clang)
+ {
+ // According to Clang's MSVC.cpp, we shall link libcmt.lib (static
+ // multi-threaded runtime) unless -nostdlib or -nostartfiles is
+ // specified.
+ //
+ if (!find_options ({"-nostdlib", "-nostartfiles"}, t, c_coptions) &&
+ !find_options ({"-nostdlib", "-nostartfiles"}, t, x_coptions))
+ args.push_back ("/DEFAULTLIB:libcmt.lib");
+ }
+
+ // If you look at the list of libraries Visual Studio links by
+ // default, it includes everything and a couple of kitchen sinks
+ // (winspool32.lib, ole32.lib, odbc32.lib, etc) while we want to
+ // keep our low-level build as pure as possible. However, there seem
+ // to be fairly essential libraries that are not linked by link.exe
+ // by default (use /VERBOSE:LIB to see the list). For example, MinGW
+ // by default links advapi32, shell32, user32, and kernel32. And so
+ // we follow suit and make sure those are linked. advapi32 and
+ // kernel32 are already on the default list and we only need to add
+ // the other two.
+ //
+ // The way we are going to do it is via the /DEFAULTLIB option
+ // rather than specifying the libraries as normal inputs (as VS
+ // does). This way the user can override our actions with the
+ // /NODEFAULTLIB option.
+ //
+ args.push_back ("/DEFAULTLIB:shell32.lib");
+ args.push_back ("/DEFAULTLIB:user32.lib");
+
+ // Take care of the manifest (will be empty for the DLL).
+ //
+ if (!manifest.empty ())
+ {
+ out3 = "/MANIFESTINPUT:";
+ out3 += relative (manifest).string ();
+ args.push_back ("/MANIFEST:EMBED");
+ args.push_back (out3.c_str ());
+ }
+
+ if (def != nullptr)
+ {
+ in = "/DEF:" + relative (def->path ()).string ();
+ args.push_back (in.c_str ());
+ }
+
+ if (ot == otype::s)
+ {
+ // On Windows libs{} is the DLL and an ad hoc group member is the
+ // import library.
+ //
+ // This will also create the .exp export file. Its name will be
+ // derived from the import library by changing the extension.
+ // Lucky for us -- there is no option to name it.
+ //
+ const file& imp (*find_adhoc_member<libi> (t));
+
+ out2 = "/IMPLIB:";
+ out2 += relative (imp.path ()).string ();
+ args.push_back (out2.c_str ());
+ }
+
+ // If we have /DEBUG then name the .pdb file. It is an ad hoc group
+ // member.
+ //
+ if (find_option ("/DEBUG", args, true))
+ {
+ const file& pdb (
+ *find_adhoc_member<file> (t, *bs.find_target_type ("pdb")));
+
+ out1 = "/PDB:";
+ out1 += relative (pdb.path ()).string ();
+ args.push_back (out1.c_str ());
+ }
+
+ // @@ An executable can have an import library and VS seems to
+ // always name it. I wonder what would trigger its generation?
+ // Could it be the presence of export symbols? Yes, link.exe will
+ // generate the import library iff there are exported symbols.
+ // Which means there could be a DLL without an import library
+ // (which we currently don't handle very well).
+ //
+ out = "/OUT:" + relt.string ();
+ args.push_back (out.c_str ());
+ }
+ else
+ {
+ switch (cclass)
+ {
+ case compiler_class::gcc:
+ {
+ ld = &cpath;
+
+ // Add the option that triggers building a shared library and
+ // take care of any extras (e.g., import library).
+ //
+ if (ot == otype::s)
+ {
+ if (tclass == "macos")
+ args.push_back ("-dynamiclib");
+ else
+ args.push_back ("-shared");
+
+ if (tsys == "mingw32")
+ {
+ // On Windows libs{} is the DLL and an ad hoc group member
+ // is the import library.
+ //
+ const file& imp (*find_adhoc_member<libi> (t));
+ out = "-Wl,--out-implib=" + relative (imp.path ()).string ();
+ args.push_back (out.c_str ());
+ }
+ }
+
+ args.push_back ("-o");
+ args.push_back (relt.string ().c_str ());
+
+ // For MinGW the .def file is just another input.
+ //
+ if (def != nullptr)
+ {
+ in = relative (def->path ()).string ();
+ args.push_back (in.c_str ());
+ }
+
+ break;
+ }
+ case compiler_class::msvc: assert (false);
+ }
+ }
+ }
+
+ args[0] = ld->recall_string ();
+
+ // Append input files noticing the position of the first.
+ //
+#ifdef _WIN32
+ size_t args_input (args.size ());
+#endif
+
+ // The same logic as during hashing above. See also a similar loop
+ // inside append_libraries().
+ //
+ for (const prerequisite_target& p: t.prerequisite_targets[a])
+ {
+ const target* pt (p.target);
+
+ if (pt == nullptr)
+ continue;
+
+ if (modules)
+ {
+ if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{}
+ pt = find_adhoc_member (*pt, tts.obj);
+ }
+
+ const file* f;
+ bool la (false), ls (false);
+
+ if ((f = pt->is_a<objx> ()) ||
+ (!lt.utility &&
+ (la = (f = pt->is_a<libux> ()))) ||
+ (!lt.static_library () &&
+ ((la = (f = pt->is_a<liba> ())) ||
+ (ls = (f = pt->is_a<libs> ())))))
+ {
+ if (la || ls)
+ append_libraries (sargs, *f, la, p.data, bs, a, li);
+ else
+ sargs.push_back (relative (f->path ()).string ()); // string()&&
+ }
+ }
+
+ // For MinGW manifest is an object file.
+ //
+ if (!manifest.empty () && tsys == "mingw32")
+ sargs.push_back (relative (manifest).string ());
+
+ // Shallow-copy sargs to args. Why not do it as we go along pushing into
+ // sargs? Because of potential reallocations in sargs.
+ //
+ for (const string& a: sargs)
+ args.push_back (a.c_str ());
+
+ if (!lt.static_library ())
+ {
+ append_options (args, t, c_libs);
+ append_options (args, t, x_libs);
+ }
+
+ args.push_back (nullptr);
+
+ // Cleanup old (versioned) libraries. Let's do it even for dry-run to
+ // keep things simple.
+ //
+ if (lt.shared_library ())
+ {
+ const libs_paths& paths (md.libs_paths);
+ const path& p (paths.clean);
+
+ if (!p.empty ())
+ try
+ {
+ if (verb >= 4) // Seeing this with -V doesn't really add any value.
+ text << "rm " << p;
+
+ auto rm = [&paths, this] (path&& m, const string&, bool interm)
+ {
+ if (!interm)
+ {
+ // Filter out paths that have one of the current paths as a
+ // prefix.
+ //
+ auto test = [&m] (const path& p)
+ {
+ const string& s (p.string ());
+ return s.empty () || m.string ().compare (0, s.size (), s) != 0;
+ };
+
+ if (test (*paths.real) &&
+ test ( paths.interm) &&
+ test ( paths.soname) &&
+ test ( paths.load) &&
+ test ( paths.link))
+ {
+ try_rmfile (m);
+ try_rmfile (m + ".d");
+
+ if (tsys == "win32-msvc")
+ {
+ try_rmfile (m.base () += ".ilk");
+ try_rmfile (m += ".pdb");
+ }
+ }
+ }
+ return true;
+ };
+
+ // Note: doesn't follow symlinks.
+ //
+ path_search (p, rm, dir_path () /* start */, path_match_flags::none);
+ }
+ catch (const system_error&) {} // Ignore errors.
+ }
+ else if (lt.static_library ())
+ {
+ // We use relative paths to the object files which means we may end
+ // up with different ones depending on CWD and some implementation
+ // treat them as different archive members. So remote the file to
+ // be sure. Note that we ignore errors leaving it to the archiever
+ // to complain.
+ //
+ if (mt != timestamp_nonexistent)
+ try_rmfile (relt, true);
+ }
+
+ if (verb == 1)
+ text << (lt.static_library () ? "ar " : "ld ") << t;
+ else if (verb == 2)
+ print_process (args);
+
+ // Do any necessary fixups to the command line to make it runnable.
+ //
+ // Notice the split in the diagnostics: at verbosity level 1 we print
+ // the "logical" command line while at level 2 and above -- what we are
+ // actually executing.
+ //
+ // On Windows we need to deal with the command line length limit. The
+ // best workaround seems to be passing (part of) the command line in an
+ // "options file" ("response file" in Microsoft's terminology). Both
+ // Microsoft's link.exe/lib.exe as well as GNU g??.exe/ar.exe support
+ // the same @<file> notation (and with a compatible subset of the
+ // content format; see below). Note also that GCC is smart enough to use
+ // an options file to call the underlying linker if we called it with
+ // @<file>. We will also assume that any other linker that we might be
+ // using supports this notation.
+ //
+ // Note that this is a limitation of the host platform, not the target
+ // (and Wine, where these lines are a bit blurred, does not have this
+ // length limitation).
+ //
+#ifdef _WIN32
+ auto_rmfile trm;
+ string targ;
+ {
+ // Calculate the would-be command line length similar to how process'
+ // implementation does it.
+ //
+ auto quote = [s = string ()] (const char* a) mutable -> const char*
+ {
+ return process::quote_argument (a, s);
+ };
+
+ size_t n (0);
+ for (const char* a: args)
+ {
+ if (a != nullptr)
+ {
+ if (n != 0)
+ n++; // For the space separator.
+
+ n += strlen (quote (a));
+ }
+ }
+
+ if (n > 32766) // 32768 - "Unicode terminating null character".
+ {
+ // Use the .t extension (for "temporary").
+ //
+ const path& f ((trm = auto_rmfile (relt + ".t")).path);
+
+ try
+ {
+ ofdstream ofs (f);
+
+ // Both Microsoft and GNU support a space-separated list of
+ // potentially-quoted arguments. GNU also supports backslash-
+ // escaping (whether Microsoft supports it is unclear; but it
+ // definitely doesn't need it for backslashes themselves, for
+ // example, in paths).
+ //
+ bool e (tsys != "win32-msvc"); // Assume GNU if not MSVC.
+ string b;
+
+ for (size_t i (args_input), n (args.size () - 1); i != n; ++i)
+ {
+ const char* a (args[i]);
+
+ if (e) // We will most likely have backslashes so just do it.
+ {
+ for (b.clear (); *a != '\0'; ++a)
+ {
+ if (*a != '\\')
+ b += *a;
+ else
+ b += "\\\\";
+ }
+
+ a = b.c_str ();
+ }
+
+ ofs << (i != args_input ? " " : "") << quote (a);
+ }
+
+ ofs << '\n';
+ ofs.close ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to write " << f << ": " << e;
+ }
+
+ // Replace input arguments with @file.
+ //
+ targ = '@' + f.string ();
+ args.resize (args_input);
+ args.push_back (targ.c_str());
+ args.push_back (nullptr);
+
+ //@@ TODO: leave .t file if linker failed and verb > 2?
+ }
+ }
+#endif
+
+ if (verb > 2)
+ print_process (args);
+
+ // Remove the target file if any of the subsequent (after the linker)
+ // actions fail or if the linker fails but does not clean up its mess
+ // (like link.exe). If we don't do that, then we will end up with a
+ // broken build that is up-to-date.
+ //
+ auto_rmfile rm;
+
+ if (!ctx.dry_run)
+ {
+ rm = auto_rmfile (relt);
+
+ try
+ {
+ // VC tools (both lib.exe and link.exe) send diagnostics to stdout.
+ // Also, link.exe likes to print various gratuitous messages. So for
+ // link.exe we redirect stdout to a pipe, filter that noise out, and
+ // send the rest to stderr.
+ //
+ // For lib.exe (and any other insane linker that may try to pull off
+ // something like this) we are going to redirect stdout to stderr.
+ // For sane compilers this should be harmless.
+ //
+ bool filter (tsys == "win32-msvc" && !lt.static_library ());
+
+ process pr (*ld, args.data (), 0, (filter ? -1 : 2));
+
+ if (filter)
+ {
+ try
+ {
+ ifdstream is (
+ move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit);
+
+ msvc_filter_link (is, t, ot);
+
+ // If anything remains in the stream, send it all to stderr.
+ // Note that the eof check is important: if the stream is at
+ // eof, this and all subsequent writes to the diagnostics stream
+ // will fail (and you won't see a thing).
+ //
+ if (is.peek () != ifdstream::traits_type::eof ())
+ diag_stream_lock () << is.rdbuf ();
+
+ is.close ();
+ }
+ catch (const io_error&) {} // Assume exits with error.
+ }
+
+ run_finish (args, pr);
+ }
+ catch (const process_error& e)
+ {
+ error << "unable to execute " << args[0] << ": " << e;
+
+ // In a multi-threaded program that fork()'ed but did not exec(), it
+ // is unwise to try to do any kind of cleanup (like unwinding the
+ // stack and running destructors).
+ //
+ if (e.child)
+ {
+ rm.cancel ();
+#ifdef _WIN32
+ trm.cancel ();
+#endif
+ exit (1);
+ }
+
+ throw failed ();
+ }
+
+ // VC link.exe creates an import library and .exp file for an
+ // executable if any of its object files export any symbols (think a
+ // unit test linking libus{}). And, no, there is no way to suppress
+ // it. Well, there is a way: create a .def file with an empty EXPORTS
+ // section, pass it to lib.exe to create a dummy .exp (and .lib), and
+ // then pass this empty .exp to link.exe. Wanna go this way? Didn't
+ // think so. Having no way to disable this, the next simplest thing
+ // seems to be just cleaning the mess up.
+ //
+ // Note also that if at some point we decide to support such "shared
+ // executables" (-rdynamic, etc), then it will probably have to be a
+ // different target type (exes{}?) since it will need a different set
+ // of object files (-fPIC so probably objs{}), etc.
+ //
+ if (lt.executable () && tsys == "win32-msvc")
+ {
+ path b (relt.base ());
+ try_rmfile (b + ".lib", true /* ignore_errors */);
+ try_rmfile (b + ".exp", true /* ignore_errors */);
+ }
+ }
+
+ if (ranlib)
+ {
+ const process_path& rl (cast<process_path> (ranlib));
+
+ const char* args[] = {
+ rl.recall_string (),
+ relt.string ().c_str (),
+ nullptr};
+
+ if (verb >= 2)
+ print_process (args);
+
+ if (!ctx.dry_run)
+ run (rl, args);
+ }
+
+ // For Windows generate (or clean up) rpath-emulating assembly.
+ //
+ if (tclass == "windows")
+ {
+ if (lt.executable ())
+ windows_rpath_assembly (t, bs, a, li,
+ cast<string> (rs[x_target_cpu]),
+ rpath_timestamp,
+ scratch);
+ }
+
+ if (lt.shared_library ())
+ {
+ // For shared libraries we may need to create a bunch of symlinks (or
+ // fallback to hardlinks/copies on Windows).
+ //
+ auto ln = [&ctx] (const path& f, const path& l)
+ {
+ if (verb >= 3)
+ text << "ln -sf " << f << ' ' << l;
+
+ if (ctx.dry_run)
+ return;
+
+ try
+ {
+ try
+ {
+ // The -f part.
+ //
+ if (file_exists (l, false /* follow_symlinks */))
+ try_rmfile (l);
+
+ mkanylink (f, l, true /* copy */, true /* relative */);
+ }
+ catch (system_error& e)
+ {
+ throw pair<entry_type, system_error> (entry_type::symlink,
+ move (e));
+ }
+ }
+ catch (const pair<entry_type, system_error>& e)
+ {
+ const char* w (e.first == entry_type::regular ? "copy" :
+ e.first == entry_type::symlink ? "symlink" :
+ e.first == entry_type::other ? "hardlink" :
+ nullptr);
+
+ fail << "unable to make " << w << ' ' << l << ": " << e.second;
+ }
+ };
+
+ const libs_paths& paths (md.libs_paths);
+
+ const path& lk (paths.link);
+ const path& ld (paths.load);
+ const path& so (paths.soname);
+ const path& in (paths.interm);
+
+ const path* f (paths.real);
+
+ if (!in.empty ()) {ln (*f, in); f = &in;}
+ if (!so.empty ()) {ln (*f, so); f = &so;}
+ if (!ld.empty ()) {ln (*f, ld); f = &ld;}
+ if (!lk.empty ()) {ln (*f, lk);}
+ }
+ else if (lt.static_library ())
+ {
+ // Apple ar (from cctools) for some reason truncates fractional
+ // seconds when running on APFS (HFS has a second resolution so it's
+ // not an issue there). This can lead to object files being newer than
+ // the archive, which is naturally bad news. Filed as bug 49604334,
+ // reportedly fixed in Xcode 11 beta 5.
+ //
+ // Note that this block is not inside #ifdef __APPLE__ because we
+ // could be cross-compiling, theoretically. We also make sure we use
+ // Apple's ar (which is (un)recognized as 'generic') instead of, say,
+ // llvm-ar.
+ //
+ if (tsys == "darwin" && cast<string> (rs["bin.ar.id"]) == "generic")
+ {
+ if (!ctx.dry_run)
+ touch (ctx, tp, false /* create */, verb_never);
+ }
+ }
+
+ if (!ctx.dry_run)
+ {
+ rm.cancel ();
+ dd.check_mtime (tp);
+ }
+
+ // Should we go to the filesystem and get the new mtime? We know the
+ // file has been modified, so instead just use the current clock time.
+ // It has the advantage of having the subseconds precision. Plus, in
+ // case of dry-run, the file won't be modified.
+ //
+ t.mtime (system_clock::now ());
+ return target_state::changed;
+ }
+
+ target_state link_rule::
+ perform_clean (action a, const target& xt) const
+ {
+ const file& t (xt.as<file> ());
+
+ ltype lt (link_type (t));
+ const match_data& md (t.data<match_data> ());
+
+ clean_extras extras;
+ clean_adhoc_extras adhoc_extras;
+
+ if (md.binless)
+ ; // Clean prerequsites/members.
+ else
+ {
+ if (tclass != "windows")
+ ; // Everything is the default.
+ else if (tsys == "mingw32")
+ {
+ if (lt.executable ())
+ {
+ extras = {".d", ".dlls/", ".manifest.o", ".manifest"};
+ }
+
+ // For shared and static library it's the default.
+ }
+ else
+ {
+ // Assuming MSVC or alike.
+ //
+ if (lt.executable ())
+ {
+ // Clean up .ilk in case the user enabled incremental linking
+ // (notice that the .ilk extension replaces .exe).
+ //
+ extras = {".d", ".dlls/", ".manifest", "-.ilk"};
+ }
+ else if (lt.shared_library ())
+ {
+ // Clean up .ilk and .exp.
+ //
+ // Note that .exp is based on the .lib, not .dll name. And with
+ // versioning their bases may not be the same.
+ //
+ extras = {".d", "-.ilk"};
+ adhoc_extras.push_back ({libi::static_type, {"-.exp"}});
+ }
+
+ // For static library it's the default.
+ }
+
+ if (extras.empty ())
+ extras = {".d"}; // Default.
+
+#ifdef _WIN32
+ extras.push_back (".t"); // Options file.
+#endif
+ // For shared libraries we may have a bunch of symlinks that we need
+ // to remove.
+ //
+ if (lt.shared_library ())
+ {
+ const libs_paths& lp (md.libs_paths);
+
+ auto add = [&extras] (const path& p)
+ {
+ if (!p.empty ())
+ extras.push_back (p.string ().c_str ());
+ };
+
+ add (lp.link);
+ add (lp.load);
+ add (lp.soname);
+ add (lp.interm);
+ }
+ }
+
+ return perform_clean_extra (a, t, extras, adhoc_extras);
+ }
+ }
+}
diff --git a/libbuild2/cc/link-rule.hxx b/libbuild2/cc/link-rule.hxx
new file mode 100644
index 0000000..2a296a7
--- /dev/null
+++ b/libbuild2/cc/link-rule.hxx
@@ -0,0 +1,188 @@
+// file : libbuild2/cc/link-rule.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_LINK_RULE_HXX
+#define LIBBUILD2_CC_LINK_RULE_HXX
+
+#include <set>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/rule.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ class LIBBUILD2_CC_SYMEXPORT link_rule: public rule, virtual common
+ {
+ public:
+ link_rule (data&&);
+
+ struct match_result
+ {
+ bool seen_x = false;
+ bool seen_c = false;
+ bool seen_cc = false;
+ bool seen_obj = false;
+ bool seen_lib = false;
+ };
+
+ match_result
+ match (action, const target&, const target*, otype, bool) const;
+
+ virtual bool
+ match (action, target&, const string&) const override;
+
+ virtual recipe
+ apply (action, target&) const override;
+
+ target_state
+ perform_update (action, const target&) const;
+
+ target_state
+ perform_clean (action, const target&) const;
+
+ private:
+ friend class install_rule;
+ friend class libux_install_rule;
+
+ // Shared library paths.
+ //
+ struct libs_paths
+ {
+ // If any (except real) is empty, then it is the same as the next
+ // one. Except for load and intermediate, for which empty indicates
+ // that it is not used.
+ //
+ // Note that the paths must form a "hierarchy" with subsequent paths
+ // adding extra information as suffixes. This is relied upon by the
+ // clean pattern (see below).
+ //
+ // The libs{} path is always the real path. On Windows what we link
+ // to is the import library and the link path is empty.
+ //
+ path link; // What we link: libfoo.so
+ path load; // What we load (with dlopen() or similar)
+ path soname; // SONAME: libfoo-1.so, libfoo.so.1
+ path interm; // Intermediate: libfoo.so.1.2
+ const path* real; // Real: libfoo.so.1.2.3
+
+ inline const path&
+ effect_link () const {return link.empty () ? effect_soname () : link;}
+
+ inline const path&
+ effect_soname () const {return soname.empty () ? *real : soname;}
+
+ // Cleanup pattern used to remove previous versions. If empty, no
+ // cleanup is performed. The above (current) names are automatically
+ // filtered out.
+ //
+ path clean;
+ };
+
+ libs_paths
+ derive_libs_paths (file&, const char*, const char*) const;
+
+ struct match_data
+ {
+ // The "for install" condition is signalled to us by install_rule when
+ // it is matched for the update operation. It also verifies that if we
+ // have already been executed, then it was for install.
+ //
+ // This has an interesting implication: it means that this rule cannot
+ // be used to update targets during match. Specifically, we cannot be
+ // executed for group resolution purposes (not a problem) nor as part
+ // of the generated source update. The latter case can be a problem:
+ // imagine a code generator that itself may need to be updated before
+ // it can be used to re-generate some out-of-date source code. As an
+ // aside, note that even if we were somehow able to communicate the
+ // "for install" in this case, the result of such an update may not
+ // actually be "usable" (e.g., not runnable because of the missing
+ // rpaths). There is another prominent case where the result may not
+ // be usable: cross-compilation.
+ //
+ // So the current (admittedly fuzzy) thinking is that a project shall
+ // not try to use its own build for update since it may not be usable
+ // (because of cross-compilations, being "for install", etc). Instead,
+ // it should rely on another, "usable" build of itself (this, BTW, is
+ // related to bpkg's build-time vs run-time dependencies).
+ //
+ optional<bool> for_install;
+
+ bool binless; // Binary-less library.
+ size_t start; // Parallel prerequisites/prerequisite_targets start.
+
+ link_rule::libs_paths libs_paths;
+ };
+
+ // Library handling.
+ //
+ void
+ append_libraries (strings&,
+ const file&, bool, lflags,
+ const scope&, action, linfo) const;
+
+ void
+ hash_libraries (sha256&,
+ bool&, timestamp,
+ const file&, bool, lflags,
+ const scope&, action, linfo) const;
+
+ void
+ rpath_libraries (strings&,
+ const target&,
+ const scope&, action, linfo,
+ bool) const;
+
+ // Windows rpath emulation (windows-rpath.cxx).
+ //
+ struct windows_dll
+ {
+ const string& dll;
+ const string* pdb; // NULL if none.
+ string pdb_storage;
+
+ bool operator< (const windows_dll& y) const {return dll < y.dll;}
+ };
+
+ using windows_dlls = std::set<windows_dll>;
+
+ timestamp
+ windows_rpath_timestamp (const file&,
+ const scope&,
+ action, linfo) const;
+
+ windows_dlls
+ windows_rpath_dlls (const file&, const scope&, action, linfo) const;
+
+ void
+ windows_rpath_assembly (const file&, const scope&, action, linfo,
+ const string&,
+ timestamp,
+ bool) const;
+
+ // Windows-specific (windows-manifest.cxx).
+ //
+ pair<path, timestamp>
+ windows_manifest (const file&, bool rpath_assembly) const;
+
+ // pkg-config's .pc file generation (pkgconfig.cxx).
+ //
+ void
+ pkgconfig_save (action, const file&, bool, bool) const;
+
+ private:
+ const string rule_id;
+ };
+ }
+}
+
+#endif // LIBBUILD2_CC_LINK_RULE_HXX
diff --git a/libbuild2/cc/module.cxx b/libbuild2/cc/module.cxx
new file mode 100644
index 0000000..3113b5c
--- /dev/null
+++ b/libbuild2/cc/module.cxx
@@ -0,0 +1,781 @@
+// file : libbuild2/cc/module.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/module.hxx>
+
+#include <iomanip> // left, setw()
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/target.hxx> // pc*
+
+#include <libbuild2/config/utility.hxx>
+#include <libbuild2/install/utility.hxx>
+
+#include <libbuild2/cc/guess.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ void config_module::
+ guess (scope& rs, const location& loc, const variable_map&)
+ {
+ tracer trace (x, "guess_init");
+
+ bool cc_loaded (cast_false<bool> (rs["cc.core.guess.loaded"]));
+
+ // Adjust module priority (compiler). Also order cc module before us
+ // (we don't want to use priorities for that in case someone manages
+ // to slot in-between).
+ //
+ if (!cc_loaded)
+ config::save_module (rs, "cc", 250);
+
+ config::save_module (rs, x, 250);
+
+ auto& vp (rs.ctx.var_pool.rw (rs));
+
+ // Must already exist.
+ //
+ const variable& config_c_poptions (vp["config.cc.poptions"]);
+ const variable& config_c_coptions (vp["config.cc.coptions"]);
+ const variable& config_c_loptions (vp["config.cc.loptions"]);
+
+ // config.x
+ //
+
+ // Normally we will have a persistent configuration and computing the
+ // default value every time will be a waste. So try without a default
+ // first.
+ //
+ auto p (config::omitted (rs, config_x));
+
+ if (!p.first)
+ {
+ // If there is a config.x value for one of the modules that can hint
+ // us the toolchain, load it's .guess module. This makes sure that the
+ // order in which we load the modules is unimportant and that the user
+ // can specify the toolchain using any of the config.x values.
+ //
+ if (!cc_loaded)
+ {
+ for (const char* const* pm (x_hinters); *pm != nullptr; ++pm)
+ {
+ string m (*pm);
+
+ // Must be the same as in module's init().
+ //
+ const variable& v (vp.insert<path> ("config." + m, true));
+
+ if (rs[v].defined ())
+ {
+ load_module (rs, rs, m + ".guess", loc);
+ cc_loaded = true;
+ break;
+ }
+ }
+ }
+
+ // If cc.core.config is already loaded then use its toolchain id and
+ // (optional) pattern to guess an appropriate default (e.g., for {gcc,
+ // *-4.9} we will get g++-4.9).
+ //
+ path d;
+
+ if (cc_loaded)
+ d = guess_default (x_lang,
+ cast<string> (rs["cc.id"]),
+ cast<string> (rs["cc.pattern"]));
+ else
+ {
+ d = path (x_default);
+
+ if (d.empty ())
+ fail << "not built with default " << x_lang << " compiler" <<
+ info << "use config." << x << " to specify";
+ }
+
+ // If this value was hinted, save it as commented out so that if the
+ // user changes the source of the pattern, this one will get updated
+ // as well.
+ //
+ p = config::required (rs,
+ config_x,
+ d,
+ false,
+ cc_loaded ? config::save_commented : 0);
+ }
+
+ // Figure out which compiler we are dealing with, its target, etc.
+ //
+ ci_ = &build2::cc::guess (
+ x,
+ x_lang,
+ cast<path> (*p.first),
+ cast_null<string> (config::omitted (rs, config_x_id).first),
+ cast_null<string> (config::omitted (rs, config_x_version).first),
+ cast_null<string> (config::omitted (rs, config_x_target).first),
+ cast_null<strings> (rs[config_c_poptions]),
+ cast_null<strings> (rs[config_x_poptions]),
+ cast_null<strings> (rs[config_c_coptions]),
+ cast_null<strings> (rs[config_x_coptions]),
+ cast_null<strings> (rs[config_c_loptions]),
+ cast_null<strings> (rs[config_x_loptions]));
+
+ const compiler_info& ci (*ci_);
+
+ // Split/canonicalize the target. First see if the user asked us to
+ // use config.sub.
+ //
+ target_triplet tt;
+ {
+ string ct;
+
+ if (config_sub)
+ {
+ ct = run<string> (3,
+ *config_sub,
+ ci.target.c_str (),
+ [] (string& l, bool) {return move (l);});
+ l5 ([&]{trace << "config.sub target: '" << ct << "'";});
+ }
+
+ try
+ {
+ tt = target_triplet (ct.empty () ? ci.target : ct);
+ l5 ([&]{trace << "canonical target: '" << tt.string () << "'; "
+ << "class: " << tt.class_;});
+ }
+ catch (const invalid_argument& e)
+ {
+ // This is where we suggest that the user specifies --config-sub to
+ // help us out.
+ //
+ fail << "unable to parse " << x_lang << " compiler target '"
+ << ci.target << "': " << e <<
+ info << "consider using the --config-sub option";
+ }
+ }
+
+ // Assign values to variables that describe the compiler.
+ //
+ rs.assign (x_id) = ci.id.string ();
+ rs.assign (x_id_type) = to_string (ci.id.type);
+ rs.assign (x_id_variant) = ci.id.variant;
+
+ rs.assign (x_class) = to_string (ci.class_);
+
+ rs.assign (x_version) = ci.version.string;
+ rs.assign (x_version_major) = ci.version.major;
+ rs.assign (x_version_minor) = ci.version.minor;
+ rs.assign (x_version_patch) = ci.version.patch;
+ rs.assign (x_version_build) = ci.version.build;
+
+ // Also enter as x.target.{cpu,vendor,system,version,class} for
+ // convenience of access.
+ //
+ rs.assign (x_target_cpu) = tt.cpu;
+ rs.assign (x_target_vendor) = tt.vendor;
+ rs.assign (x_target_system) = tt.system;
+ rs.assign (x_target_version) = tt.version;
+ rs.assign (x_target_class) = tt.class_;
+
+ rs.assign (x_target) = move (tt);
+
+ rs.assign (x_pattern) = ci.pattern;
+
+ if (!x_stdlib.alias (c_stdlib))
+ rs.assign (x_stdlib) = ci.x_stdlib;
+
+ new_ = p.second;
+
+ // Load cc.core.guess.
+ //
+ if (!cc_loaded)
+ {
+ // Prepare configuration hints.
+ //
+ variable_map h (rs.ctx);
+
+ // Note that all these variables have already been registered.
+ //
+ h.assign ("config.cc.id") = cast<string> (rs[x_id]);
+ h.assign ("config.cc.hinter") = string (x);
+ h.assign ("config.cc.target") = cast<target_triplet> (rs[x_target]);
+
+ if (!ci.pattern.empty ())
+ h.assign ("config.cc.pattern") = ci.pattern;
+
+ h.assign (c_runtime) = ci.runtime;
+ h.assign (c_stdlib) = ci.c_stdlib;
+
+ load_module (rs, rs, "cc.core.guess", loc, false, h);
+ }
+ else
+ {
+ // If cc.core.guess is already loaded, verify its configuration
+ // matched ours since it could have been loaded by another c-family
+ // module.
+ //
+ const auto& h (cast<string> (rs["cc.hinter"]));
+
+ auto check = [&loc, &h, this] (const auto& cv,
+ const auto& xv,
+ const char* what,
+ bool error = true)
+ {
+ if (cv != xv)
+ {
+ diag_record dr (error ? fail (loc) : warn (loc));
+
+ dr << h << " and " << x << " module " << what << " mismatch" <<
+ info << h << " is '" << cv << "'" <<
+ info << x << " is '" << xv << "'" <<
+ info << "consider explicitly specifying config." << h
+ << " and config." << x;
+ }
+ };
+
+ check (cast<string> (rs["cc.id"]),
+ cast<string> (rs[x_id]),
+ "toolchain");
+
+ // We used to not require that patterns match assuming that if the
+ // toolchain id and target are the same, then where exactly the tools
+ // come from doesn't really matter. But in most cases it will be the
+ // g++-7 vs gcc kind of mistakes. So now we warn since even if
+ // intentional, it is still probably a bad idea.
+ //
+ check (cast<string> (rs["cc.pattern"]),
+ cast<string> (rs[x_pattern]),
+ "toolchain pattern",
+ false);
+
+ check (cast<target_triplet> (rs["cc.target"]),
+ cast<target_triplet> (rs[x_target]),
+ "target");
+
+ check (cast<string> (rs["cc.runtime"]),
+ ci.runtime,
+ "runtime");
+
+ check (cast<string> (rs["cc.stdlib"]),
+ ci.c_stdlib,
+ "c standard library");
+ }
+ }
+
+#ifndef _WIN32
+ static const dir_path usr_inc ("/usr/include");
+ static const dir_path usr_loc_lib ("/usr/local/lib");
+ static const dir_path usr_loc_inc ("/usr/local/include");
+# ifdef __APPLE__
+ static const dir_path a_usr_inc (
+ "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include");
+# endif
+#endif
+
+ void config_module::
+ init (scope& rs, const location& loc, const variable_map&)
+ {
+ tracer trace (x, "config_init");
+
+ const compiler_info& ci (*ci_);
+ const target_triplet& tt (cast<target_triplet> (rs[x_target]));
+
+ // config.x.std overrides x.std
+ //
+ {
+ lookup l (config::omitted (rs, config_x_std).first);
+
+ const string* v;
+ if (l.defined ())
+ {
+ v = cast_null<string> (l);
+ rs.assign (x_std) = v;
+ }
+ else
+ v = cast_null<string> (rs[x_std]);
+
+ // Translate x_std value (if any) to the compiler option(s) (if any).
+ //
+ tstd = translate_std (ci, rs, v);
+ }
+
+ // Extract system header/library search paths from the compiler and
+ // determine if we need any additional search paths.
+ //
+ dir_paths lib_dirs;
+ dir_paths inc_dirs;
+
+ switch (ci.class_)
+ {
+ case compiler_class::gcc:
+ {
+ lib_dirs = gcc_library_search_paths (ci.path, rs);
+ inc_dirs = gcc_header_search_paths (ci.path, rs);
+ break;
+ }
+ case compiler_class::msvc:
+ {
+ lib_dirs = msvc_library_search_paths (ci.path, rs);
+ inc_dirs = msvc_header_search_paths (ci.path, rs);
+ break;
+ }
+ }
+
+ sys_lib_dirs_extra = lib_dirs.size ();
+ sys_inc_dirs_extra = inc_dirs.size ();
+
+#ifndef _WIN32
+ // Add /usr/local/{include,lib}. We definitely shouldn't do this if we
+ // are cross-compiling. But even if the build and target are the same,
+ // it's possible the compiler uses some carefully crafted sysroot and by
+ // adding /usr/local/* we will just mess things up. So the heuristics
+ // that we will use is this: if the compiler's system include directories
+ // contain /usr[/local]/include then we add /usr/local/*.
+ //
+ // Note that similar to GCC we also check for the directory existence.
+ // Failed that, we can end up with some bizarre yo-yo'ing cases where
+ // uninstall removes the directories which in turn triggers a rebuild
+ // on the next invocation.
+ //
+ {
+ auto& is (inc_dirs);
+ auto& ls (lib_dirs);
+
+ bool ui (find (is.begin (), is.end (), usr_inc) != is.end ());
+ bool uli (find (is.begin (), is.end (), usr_loc_inc) != is.end ());
+
+#ifdef __APPLE__
+ // On Mac OS starting from 10.14 there is no longer /usr/include.
+ // Instead we get the following:
+ //
+ // Homebrew GCC 9:
+ //
+ // /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include
+ //
+ // Apple Clang 10.0.1:
+ //
+ // /Library/Developer/CommandLineTools/usr/include
+ // /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include
+ //
+ // What exactly all this means is anyone's guess, of course. So for
+ // now we will assume that anything that is or resolves (like that
+ // MacOSX10.14.sdk symlink) to:
+ //
+ // /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include
+ //
+ // Is Apple's /usr/include.
+ //
+ if (!ui && !uli)
+ {
+ for (const dir_path& d: inc_dirs)
+ {
+ // Both Clang and GCC skip non-existent paths but let's handle
+ // (and ignore) directories that cause any errors, for good
+ // measure.
+ //
+ try
+ {
+ if (d == a_usr_inc || dir_path (d).realize () == a_usr_inc)
+ {
+ ui = true;
+ break;
+ }
+ }
+ catch (...) {}
+ }
+ }
+#endif
+ if (ui || uli)
+ {
+ bool ull (find (ls.begin (), ls.end (), usr_loc_lib) != ls.end ());
+
+ // Many platforms don't search in /usr/local/lib by default (but do
+ // for headers in /usr/local/include). So add it as the last option.
+ //
+ if (!ull && exists (usr_loc_lib, true /* ignore_error */))
+ ls.push_back (usr_loc_lib);
+
+ // FreeBSD is at least consistent: it searches in neither. Quoting
+ // its wiki: "FreeBSD can't even find libraries that it installed."
+ // So let's help it a bit.
+ //
+ if (!uli && exists (usr_loc_inc, true /* ignore_error */))
+ is.push_back (usr_loc_inc);
+ }
+ }
+#endif
+
+ // If this is a new value (e.g., we are configuring), then print the
+ // report at verbosity level 2 and up (-v).
+ //
+ if (verb >= (new_ ? 2 : 3))
+ {
+ diag_record dr (text);
+
+ {
+ dr << x << ' ' << project (rs) << '@' << rs << '\n'
+ << " " << left << setw (11) << x << ci.path << '\n'
+ << " id " << ci.id << '\n'
+ << " version " << ci.version.string << '\n'
+ << " major " << ci.version.major << '\n'
+ << " minor " << ci.version.minor << '\n'
+ << " patch " << ci.version.patch << '\n';
+ }
+
+ if (!ci.version.build.empty ())
+ {
+ dr << " build " << ci.version.build << '\n';
+ }
+
+ {
+ const string& ct (tt.string ()); // Canonical target.
+
+ dr << " signature " << ci.signature << '\n'
+ << " checksum " << ci.checksum << '\n'
+ << " target " << ct;
+
+ if (ct != ci.original_target)
+ dr << " (" << ci.original_target << ")";
+
+ dr << "\n runtime " << ci.runtime
+ << "\n stdlib " << ci.x_stdlib;
+
+ if (!x_stdlib.alias (c_stdlib))
+ dr << "\n c stdlib " << ci.c_stdlib;
+ }
+
+ if (!tstd.empty ())
+ {
+ dr << "\n std "; // One less space.
+ for (const string& o: tstd) dr << ' ' << o;
+ }
+
+ if (!ci.pattern.empty ()) // Note: bin_pattern printed by bin
+ {
+ dr << "\n pattern " << ci.pattern;
+ }
+
+ if (verb >= 3 && !inc_dirs.empty ())
+ {
+ dr << "\n inc dirs";
+ for (size_t i (0); i != inc_dirs.size (); ++i)
+ {
+ if (i == sys_inc_dirs_extra)
+ dr << "\n --";
+ dr << "\n " << inc_dirs[i];
+ }
+ }
+
+ if (verb >= 3 && !lib_dirs.empty ())
+ {
+ dr << "\n lib dirs";
+ for (size_t i (0); i != lib_dirs.size (); ++i)
+ {
+ if (i == sys_lib_dirs_extra)
+ dr << "\n --";
+ dr << "\n " << lib_dirs[i];
+ }
+ }
+ }
+
+ rs.assign (x_path) = process_path (ci.path, false /* init */);
+ rs.assign (x_sys_lib_dirs) = move (lib_dirs);
+ rs.assign (x_sys_inc_dirs) = move (inc_dirs);
+
+ rs.assign (x_signature) = ci.signature;
+ rs.assign (x_checksum) = ci.checksum;
+
+ // config.x.{p,c,l}options
+ // config.x.libs
+ //
+ // These are optional. We also merge them into the corresponding
+ // x.* variables.
+ //
+ // The merging part gets a bit tricky if this module has already
+ // been loaded in one of the outer scopes. By doing the straight
+ // append we would just be repeating the same options over and
+ // over. So what we are going to do is only append to a value if
+ // it came from this scope. Then the usage for merging becomes:
+ //
+ // x.coptions = <overridable options> # Note: '='.
+ // using x
+ // x.coptions += <overriding options> # Note: '+='.
+ //
+ rs.assign (x_poptions) += cast_null<strings> (
+ config::optional (rs, config_x_poptions));
+
+ rs.assign (x_coptions) += cast_null<strings> (
+ config::optional (rs, config_x_coptions));
+
+ rs.assign (x_loptions) += cast_null<strings> (
+ config::optional (rs, config_x_loptions));
+
+ rs.assign (x_aoptions) += cast_null<strings> (
+ config::optional (rs, config_x_aoptions));
+
+ rs.assign (x_libs) += cast_null<strings> (
+ config::optional (rs, config_x_libs));
+
+ // config.x.importable_header
+ //
+ // It's still fuzzy whether specifying (or maybe tweaking) this list in
+ // the configuration will be a common thing to do so for now we use
+ // omitted. It's also probably too early to think whether we should have
+ // the cc.* version and what the semantics should be.
+ //
+ if (x_importable_headers != nullptr)
+ {
+ lookup l (config::omitted (rs, *config_x_importable_headers).first);
+
+ // @@ MODHDR: if(modules) ?
+ //
+ rs.assign (x_importable_headers) += cast_null<strings> (l);
+ }
+
+ // Load cc.core.config.
+ //
+ if (!cast_false<bool> (rs["cc.core.config.loaded"]))
+ {
+ variable_map h (rs.ctx);
+
+ if (!ci.bin_pattern.empty ())
+ h.assign ("config.bin.pattern") = ci.bin_pattern;
+
+ load_module (rs, rs, "cc.core.config", loc, false, h);
+ }
+ }
+
+ void module::
+ init (scope& rs, const location& loc, const variable_map&)
+ {
+ tracer trace (x, "init");
+
+ // Load cc.core. Besides other things, this will load bin (core) plus
+ // extra bin.* modules we may need.
+ //
+ if (!cast_false<bool> (rs["cc.core.loaded"]))
+ load_module (rs, rs, "cc.core", loc);
+
+ // Process, sort, and cache (in this->import_hdr) importable headers.
+ // Keep the cache NULL if unused or empty.
+ //
+ // @@ MODHDR TODO: support exclusions entries (e.g., -<stdio.h>)?
+ //
+ if (modules && x_importable_headers != nullptr)
+ {
+ strings* ih (cast_null<strings> (rs.assign (x_importable_headers)));
+
+ if (ih != nullptr && !ih->empty ())
+ {
+ // Translate <>-style header names to absolute paths using the
+ // compiler's include search paths. Otherwise complete and normalize
+ // since when searching in this list we always use the absolute and
+ // normalized header target path.
+ //
+ for (string& h: *ih)
+ {
+ if (h.empty ())
+ continue;
+
+ path f;
+ if (h.front () == '<' && h.back () == '>')
+ {
+ h.pop_back ();
+ h.erase (0, 1);
+
+ for (const dir_path& d: sys_inc_dirs)
+ {
+ if (file_exists ((f = d, f /= h),
+ true /* follow_symlinks */,
+ true /* ignore_errors */))
+ goto found;
+ }
+
+ // What should we do if not found? While we can fail, this could
+ // be too drastic if, for example, the header is "optional" and
+ // may or may not be present/used. So for now let's restore the
+ // original form to aid debugging (it can't possibly match any
+ // absolute path).
+ //
+ h.insert (0, 1, '<');
+ h.push_back ('>');
+ continue;
+
+ found:
+ ; // Fall through.
+ }
+ else
+ {
+ f = path (move (h));
+
+ if (f.relative ())
+ f.complete ();
+ }
+
+ // @@ MODHDR: should we use the more elaborate but robust
+ // normalize/realize scheme so the we get the same
+ // path? Feels right.
+ f.normalize ();
+ h = move (f).string ();
+ }
+
+ sort (ih->begin (), ih->end ());
+ import_hdr = ih;
+ }
+ }
+
+ // Register target types and configure their "installability".
+ //
+ bool install_loaded (cast_false<bool> (rs["install.loaded"]));
+
+ {
+ using namespace install;
+
+ rs.insert_target_type (x_src);
+
+ auto insert_hdr = [&rs, install_loaded] (const target_type& tt)
+ {
+ rs.insert_target_type (tt);
+
+ // Install headers into install.include.
+ //
+ if (install_loaded)
+ install_path (rs, tt, dir_path ("include"));
+ };
+
+ // Note: module (x_mod) is in x_hdr.
+ //
+ for (const target_type* const* ht (x_hdr); *ht != nullptr; ++ht)
+ insert_hdr (**ht);
+
+ // Also register the C header for C-derived languages.
+ //
+ if (*x_hdr != &h::static_type)
+ insert_hdr (h::static_type);
+
+ rs.insert_target_type<pca> ();
+ rs.insert_target_type<pcs> ();
+
+ if (install_loaded)
+ install_path<pc> (rs, dir_path ("pkgconfig"));
+ }
+
+ // Register rules.
+ //
+ {
+ using namespace bin;
+
+ auto& r (rs.rules);
+
+ // We register for configure so that we detect unresolved imports
+ // during configuration rather that later, e.g., during update.
+ //
+ const compile_rule& cr (*this);
+ const link_rule& lr (*this);
+
+ r.insert<obje> (perform_update_id, x_compile, cr);
+ r.insert<obje> (perform_clean_id, x_compile, cr);
+ r.insert<obje> (configure_update_id, x_compile, cr);
+
+ r.insert<obja> (perform_update_id, x_compile, cr);
+ r.insert<obja> (perform_clean_id, x_compile, cr);
+ r.insert<obja> (configure_update_id, x_compile, cr);
+
+ r.insert<objs> (perform_update_id, x_compile, cr);
+ r.insert<objs> (perform_clean_id, x_compile, cr);
+ r.insert<objs> (configure_update_id, x_compile, cr);
+
+ if (modules)
+ {
+ r.insert<bmie> (perform_update_id, x_compile, cr);
+ r.insert<bmie> (perform_clean_id, x_compile, cr);
+ r.insert<bmie> (configure_update_id, x_compile, cr);
+
+ r.insert<hbmie> (perform_update_id, x_compile, cr);
+ r.insert<hbmie> (perform_clean_id, x_compile, cr);
+ r.insert<hbmie> (configure_update_id, x_compile, cr);
+
+ r.insert<bmia> (perform_update_id, x_compile, cr);
+ r.insert<bmia> (perform_clean_id, x_compile, cr);
+ r.insert<bmia> (configure_update_id, x_compile, cr);
+
+ r.insert<hbmia> (perform_update_id, x_compile, cr);
+ r.insert<hbmia> (perform_clean_id, x_compile, cr);
+ r.insert<hbmia> (configure_update_id, x_compile, cr);
+
+ r.insert<bmis> (perform_update_id, x_compile, cr);
+ r.insert<bmis> (perform_clean_id, x_compile, cr);
+ r.insert<bmis> (configure_update_id, x_compile, cr);
+
+ r.insert<hbmis> (perform_update_id, x_compile, cr);
+ r.insert<hbmis> (perform_clean_id, x_compile, cr);
+ r.insert<hbmis> (configure_update_id, x_compile, cr);
+ }
+
+ r.insert<libue> (perform_update_id, x_link, lr);
+ r.insert<libue> (perform_clean_id, x_link, lr);
+ r.insert<libue> (configure_update_id, x_link, lr);
+
+ r.insert<libua> (perform_update_id, x_link, lr);
+ r.insert<libua> (perform_clean_id, x_link, lr);
+ r.insert<libua> (configure_update_id, x_link, lr);
+
+ r.insert<libus> (perform_update_id, x_link, lr);
+ r.insert<libus> (perform_clean_id, x_link, lr);
+ r.insert<libus> (configure_update_id, x_link, lr);
+
+ r.insert<exe> (perform_update_id, x_link, lr);
+ r.insert<exe> (perform_clean_id, x_link, lr);
+ r.insert<exe> (configure_update_id, x_link, lr);
+
+ r.insert<liba> (perform_update_id, x_link, lr);
+ r.insert<liba> (perform_clean_id, x_link, lr);
+ r.insert<liba> (configure_update_id, x_link, lr);
+
+ r.insert<libs> (perform_update_id, x_link, lr);
+ r.insert<libs> (perform_clean_id, x_link, lr);
+ r.insert<libs> (configure_update_id, x_link, lr);
+
+ // Note that while libu*{} are not installable, we need to see through
+ // them in case they depend on stuff that we need to install (see the
+ // install rule implementations for details).
+ //
+ if (install_loaded)
+ {
+ const install_rule& ir (*this);
+
+ r.insert<exe> (perform_install_id, x_install, ir);
+ r.insert<exe> (perform_uninstall_id, x_uninstall, ir);
+
+ r.insert<liba> (perform_install_id, x_install, ir);
+ r.insert<liba> (perform_uninstall_id, x_uninstall, ir);
+
+ r.insert<libs> (perform_install_id, x_install, ir);
+ r.insert<libs> (perform_uninstall_id, x_uninstall, ir);
+
+ const libux_install_rule& lr (*this);
+
+ r.insert<libue> (perform_install_id, x_install, lr);
+ r.insert<libue> (perform_uninstall_id, x_uninstall, lr);
+
+ r.insert<libua> (perform_install_id, x_install, lr);
+ r.insert<libua> (perform_uninstall_id, x_uninstall, lr);
+
+ r.insert<libus> (perform_install_id, x_install, lr);
+ r.insert<libus> (perform_uninstall_id, x_uninstall, lr);
+ }
+ }
+ }
+ }
+}
diff --git a/libbuild2/cc/module.hxx b/libbuild2/cc/module.hxx
new file mode 100644
index 0000000..43670c3
--- /dev/null
+++ b/libbuild2/cc/module.hxx
@@ -0,0 +1,103 @@
+// file : libbuild2/cc/module.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_MODULE_HXX
+#define LIBBUILD2_CC_MODULE_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/module.hxx>
+#include <libbuild2/variable.hxx>
+
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/compile-rule.hxx>
+#include <libbuild2/cc/link-rule.hxx>
+#include <libbuild2/cc/install-rule.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ struct compiler_info;
+
+ class LIBBUILD2_CC_SYMEXPORT config_module: public module_base,
+ public virtual config_data
+ {
+ public:
+ explicit
+ config_module (config_data&& d) : config_data (move (d)) {}
+
+ // We split the configuration process into into two parts: guessing the
+ // compiler information and the actual configuration. This allows one to
+ // adjust configuration (say the standard or enabled experimental
+ // features) base on the compiler information by first loading the
+ // guess module.
+ //
+ void
+ guess (scope&, const location&, const variable_map&);
+
+ void
+ init (scope&, const location&, const variable_map&);
+
+ // Translate the x.std value (if any) to the standard-selecting
+ // option(s) (if any). May also check/set x.features.* variables on the
+ // root scope.
+ //
+ virtual strings
+ translate_std (const compiler_info&, scope&, const string*) const = 0;
+
+ strings tstd;
+ size_t sys_lib_dirs_extra; // First extra path (size if none).
+ size_t sys_inc_dirs_extra; // First extra path (size if none).
+
+ const compiler_info* ci_;
+
+ private:
+ // Defined in gcc.cxx.
+ //
+ dir_paths
+ gcc_header_search_paths (const process_path&, scope&) const;
+
+ dir_paths
+ gcc_library_search_paths (const process_path&, scope&) const;
+
+ // Defined in msvc.cxx.
+ //
+ dir_paths
+ msvc_header_search_paths (const process_path&, scope&) const;
+
+ dir_paths
+ msvc_library_search_paths (const process_path&, scope&) const;
+
+ private:
+ bool new_; // See guess() and init() for details.
+ };
+
+ class LIBBUILD2_CC_SYMEXPORT module: public module_base,
+ public virtual common,
+ link_rule,
+ compile_rule,
+ install_rule,
+ libux_install_rule
+ {
+ public:
+ explicit
+ module (data&& d)
+ : common (move (d)),
+ link_rule (move (d)),
+ compile_rule (move (d)),
+ install_rule (move (d), *this),
+ libux_install_rule (move (d), *this) {}
+
+ void
+ init (scope&, const location&, const variable_map&);
+ };
+ }
+}
+
+#endif // LIBBUILD2_CC_MODULE_HXX
diff --git a/libbuild2/cc/msvc.cxx b/libbuild2/cc/msvc.cxx
new file mode 100644
index 0000000..d802b98
--- /dev/null
+++ b/libbuild2/cc/msvc.cxx
@@ -0,0 +1,502 @@
+// file : libbuild2/cc/msvc.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cstring> // strcmp()
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+#include <libbuild2/cc/common.hxx>
+#include <libbuild2/cc/module.hxx>
+
+using std::strcmp;
+
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using namespace bin;
+
+ // Translate the target triplet CPU to lib.exe/link.exe /MACHINE option.
+ //
+ const char*
+ msvc_machine (const string& cpu)
+ {
+ const char* m (cpu == "i386" || cpu == "i686" ? "/MACHINE:x86" :
+ cpu == "x86_64" ? "/MACHINE:x64" :
+ cpu == "arm" ? "/MACHINE:ARM" :
+ cpu == "arm64" ? "/MACHINE:ARM64" :
+ nullptr);
+
+ if (m == nullptr)
+ fail << "unable to translate CPU " << cpu << " to /MACHINE";
+
+ return m;
+ }
+
+ // Sanitize cl.exe options.
+ //
+ void
+ msvc_sanitize_cl (cstrings& args)
+ {
+ // VC is trying to be "helpful" and warn about one command line option
+ // overriding another. For example:
+ //
+ // cl : Command line warning D9025 : overriding '/W1' with '/W2'
+ //
+ // So we have to sanitize the command line and suppress duplicates of
+ // certain options.
+ //
+ // Note also that it is theoretically possible we will treat an option's
+ // argument as an option. Oh, well, nobody is perfect in the Microsoft
+ // land.
+
+ // We want to keep the last option seen at the position (relative to
+ // other options) that it was encountered. If we were to iterate forward
+ // and keep positions of the enountered options, then we would have had
+ // to adjust some of them once we remove a duplicate. So instead we are
+ // going to iterate backwards, in which case we don't even need to keep
+ // positions, just flags. Note that args[0] is cl.exe itself in which we
+ // are conveniently not interested.
+ //
+ bool W (false); // /WN /Wall /w
+
+ for (size_t i (args.size () - 1); i != 0; --i)
+ {
+ auto erase = [&args, &i] ()
+ {
+ args.erase (args.begin () + i);
+ };
+
+ const char* a (args[i]);
+
+ if (*a != '/' && *a != '-') // Not an option.
+ continue;
+
+ ++a;
+
+ // /WN /Wall /w
+ //
+ if ((a[0] == 'W' && digit (a[1]) && a[2] == '\0') || // WN
+ (a[0] == 'W' && strcmp (a + 1, "all") == 0) || // Wall
+ (a[0] == 'w' && a[1] == '\0')) // w
+ {
+ if (W)
+ erase ();
+ else
+ W = true;
+ }
+ }
+ }
+
+ // Sense whether this is a diagnostics line returning the postion of the
+ // NNNN code in XNNNN and npos otherwise.
+ //
+ size_t
+ msvc_sense_diag (const string& l, char f)
+ {
+ size_t p (l.find (':'));
+
+ // Note that while the C-numbers seems to all be in the ' CNNNN:' form,
+ // the D ones can be ' DNNNN :', for example:
+ //
+ // cl : Command line warning D9025 : overriding '/W3' with '/W4'
+ //
+ for (size_t n (l.size ());
+ p != string::npos;
+ p = ++p != n ? l.find_first_of (": ", p) : string::npos)
+ {
+ if (p > 5 &&
+ l[p - 6] == ' ' &&
+ l[p - 5] == f &&
+ digit (l[p - 4]) &&
+ digit (l[p - 3]) &&
+ digit (l[p - 2]) &&
+ digit (l[p - 1]))
+ {
+ p -= 4; // Start of the error code.
+ break;
+ }
+ }
+
+ return p;
+ }
+
+ // Filter cl.exe and link.exe noise.
+ //
+ void
+ msvc_filter_cl (ifdstream& is, const path& src)
+ {
+ // While it appears VC always prints the source name (event if the
+ // file does not exist), let's do a sanity check. Also handle the
+ // command line errors/warnings which come before the file name.
+ //
+ for (string l; !eof (getline (is, l)); )
+ {
+ if (l != src.leaf ().string ())
+ {
+ diag_stream_lock () << l << endl;
+
+ if (msvc_sense_diag (l, 'D') != string::npos)
+ continue;
+ }
+
+ break;
+ }
+ }
+
+ void
+ msvc_filter_link (ifdstream& is, const file& t, otype lt)
+ {
+ // Filter lines until we encounter something we don't recognize. We also
+ // have to assume the messages can be translated.
+ //
+ for (string l; getline (is, l); )
+ {
+ // " Creating library foo\foo.dll.lib and object foo\foo.dll.exp"
+ //
+ // This can also appear when linking executables if any of the object
+ // files export any symbols.
+ //
+ if (l.compare (0, 3, " ") == 0)
+ {
+ // Use the actual import library name if this is a library (since we
+ // override this name) and the executable name otherwise (by default
+ // .lib/.exp are named by replacing the .exe extension).
+ //
+ path i (
+ lt == otype::s
+ ? find_adhoc_member<libi> (t)->path ().leaf ()
+ : t.path ().leaf ().base () + ".lib");
+
+ if (l.find (i.string ()) != string::npos &&
+ l.find (i.base ().string () + ".exp") != string::npos)
+ continue;
+ }
+
+ // /INCREMENTAL causes linker to sometimes issue messages but now I
+ // can't quite reproduce it.
+ //
+
+ diag_stream_lock () << l << endl;
+ break;
+ }
+ }
+
+ // Extract system header search paths from MSVC.
+ //
+ dir_paths config_module::
+ msvc_header_search_paths (const process_path&, scope&) const
+ {
+ // The compiler doesn't seem to have any built-in paths and all of them
+ // come from the INCLUDE environment variable.
+
+ // @@ VC: how are we going to do this? E.g., cl-14 does this internally.
+ // cl.exe /Be prints INCLUDE.
+ //
+ // Should we actually bother? INCLUDE is normally used for system
+ // headers and its highly unlikely we will see an imported library
+ // that lists one of those directories in pkg-config Cflags value.
+ // Let's wait and see.
+ //
+ return dir_paths ();
+ }
+
+ // Extract system library search paths from MSVC.
+ //
+ dir_paths config_module::
+ msvc_library_search_paths (const process_path&, scope&) const
+ {
+ // The linker doesn't seem to have any built-in paths and all of them
+ // come from the LIB environment variable.
+
+ // @@ VC: how are we going to do this? E.g., cl-14 does this internally.
+ // cl.exe /Be prints LIB.
+ //
+ // Should we actually bother? LIB is normally used for system
+ // libraries and its highly unlikely we will see an explicit import
+ // for a library from one of those directories. Let's wait and see.
+ //
+ return dir_paths ();
+ }
+
+ // Inspect the file and determine if it is static or import library.
+ // Return otype::e if it is neither (which we quietly ignore).
+ //
+ static otype
+ library_type (const process_path& ld, const path& l)
+ {
+ // The are several reasonably reliable methods to tell whether it is a
+ // static or import library. One is lib.exe /LIST -- if there aren't any
+ // .obj members, then it is most likely an import library (it can also
+ // be an empty static library in which case there won't be any members).
+ // For an import library /LIST will print a bunch of .dll members.
+ //
+ // Another approach is dumpbin.exe (link.exe /DUMP) with /ARCHIVEMEMBERS
+ // (similar to /LIST) and /LINKERMEMBER (looking for __impl__ symbols or
+ // _IMPORT_DESCRIPTOR_).
+ //
+ // Note also, that apparently it is possible to have a hybrid library.
+ //
+ // While the lib.exe approach is probably the simplest, the problem is
+ // it will require us loading the bin.ar module even if we are not
+ // building any static libraries. On the other hand, if we are searching
+ // for libraries then we have bin.ld. So we will use the link.exe /DUMP
+ // /ARCHIVEMEMBERS.
+ //
+ const char* args[] = {ld.recall_string (),
+ "/DUMP", // Must come first.
+ "/NOLOGO",
+ "/ARCHIVEMEMBERS",
+ l.string ().c_str (),
+ nullptr};
+
+ if (verb >= 3)
+ print_process (args);
+
+ // Link.exe seem to always dump everything to stdout but just in case
+ // redirect stderr to stdout.
+ //
+ process pr (run_start (ld,
+ args,
+ 0 /* stdin */,
+ -1 /* stdout */,
+ false /* error */));
+
+ bool obj (false), dll (false);
+ string s;
+
+ try
+ {
+ ifdstream is (
+ move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit);
+
+ while (getline (is, s))
+ {
+ // Detect the one error we should let through.
+ //
+ if (s.compare (0, 18, "unable to execute ") == 0)
+ break;
+
+ // The lines we are interested in seem to have this form (though
+ // presumably the "Archive member name at" part can be translated):
+ //
+ // Archive member name at 746: [...]hello.dll[/][ ]*
+ // Archive member name at 8C70: [...]hello.lib.obj[/][ ]*
+ //
+ size_t n (s.size ());
+
+ for (; n != 0 && s[n - 1] == ' '; --n) ; // Skip trailing spaces.
+
+ if (n >= 7) // At least ": X.obj" or ": X.dll".
+ {
+ --n;
+
+ if (s[n] == '/') // Skip trailing slash if one is there.
+ --n;
+
+ n -= 3; // Beginning of extension.
+
+ if (s[n] == '.')
+ {
+ // Make sure there is ": ".
+ //
+ size_t p (s.rfind (':', n - 1));
+
+ if (p != string::npos && s[p + 1] == ' ')
+ {
+ const char* e (s.c_str () + n + 1);
+
+ if (casecmp (e, "obj", 3) == 0)
+ obj = true;
+
+ if (casecmp (e, "dll", 3) == 0)
+ dll = true;
+ }
+ }
+ }
+ }
+ }
+ catch (const io_error&)
+ {
+ // Presumably the child process failed. Let run_finish() deal with
+ // that.
+ }
+
+ if (!run_finish (args, pr, false, s))
+ return otype::e;
+
+ if (obj && dll)
+ {
+ warn << l << " looks like hybrid static/import library, ignoring";
+ return otype::e;
+ }
+
+ if (!obj && !dll)
+ {
+ warn << l << " looks like empty static or import library, ignoring";
+ return otype::e;
+ }
+
+ return obj ? otype::a : otype::s;
+ }
+
+ template <typename T>
+ static T*
+ msvc_search_library (const process_path& ld,
+ const dir_path& d,
+ const prerequisite_key& p,
+ otype lt,
+ const char* pfx,
+ const char* sfx,
+ bool exist,
+ tracer& trace)
+ {
+ // Pretty similar logic to search_library().
+ //
+ assert (p.scope != nullptr);
+
+ const optional<string>& ext (p.tk.ext);
+ const string& name (*p.tk.name);
+
+ // Assemble the file path.
+ //
+ path f (d);
+
+ if (*pfx != '\0')
+ {
+ f /= pfx;
+ f += name;
+ }
+ else
+ f /= name;
+
+ if (*sfx != '\0')
+ f += sfx;
+
+ const string& e (!ext || p.is_a<lib> () // Only for liba/libs.
+ ? string ("lib")
+ : *ext);
+
+ if (!e.empty ())
+ {
+ f += '.';
+ f += e;
+ }
+
+ // Check if the file exists and is of the expected type.
+ //
+ timestamp mt (mtime (f));
+
+ if (mt != timestamp_nonexistent && library_type (ld, f) == lt)
+ {
+ // Enter the target.
+ //
+ T* t;
+ common::insert_library (p.scope->ctx, t, name, d, e, exist, trace);
+
+ t->mtime (mt);
+ t->path (move (f));
+
+ return t;
+ }
+
+ return nullptr;
+ }
+
+ liba* common::
+ msvc_search_static (const process_path& ld,
+ const dir_path& d,
+ const prerequisite_key& p,
+ bool exist) const
+ {
+ tracer trace (x, "msvc_search_static");
+
+ liba* r (nullptr);
+
+ auto search = [&r, &ld, &d, &p, exist, &trace] (
+ const char* pf, const char* sf) -> bool
+ {
+ r = msvc_search_library<liba> (
+ ld, d, p, otype::a, pf, sf, exist, trace);
+ return r != nullptr;
+ };
+
+ // Try:
+ // foo.lib
+ // libfoo.lib
+ // foolib.lib
+ // foo_static.lib
+ //
+ return
+ search ("", "") ||
+ search ("lib", "") ||
+ search ("", "lib") ||
+ search ("", "_static") ? r : nullptr;
+ }
+
+ libs* common::
+ msvc_search_shared (const process_path& ld,
+ const dir_path& d,
+ const prerequisite_key& pk,
+ bool exist) const
+ {
+ tracer trace (x, "msvc_search_shared");
+
+ assert (pk.scope != nullptr);
+
+ libs* s (nullptr);
+
+ auto search = [&s, &ld, &d, &pk, exist, &trace] (
+ const char* pf, const char* sf) -> bool
+ {
+ if (libi* i = msvc_search_library<libi> (
+ ld, d, pk, otype::s, pf, sf, exist, trace))
+ {
+ ulock l (
+ insert_library (
+ pk.scope->ctx, s, *pk.tk.name, d, nullopt, exist, trace));
+
+ if (!exist)
+ {
+ if (l.owns_lock ())
+ {
+ s->member = i; // We are first.
+ l.unlock ();
+ }
+ else
+ assert (find_adhoc_member<libi> (*s) == i);
+
+ // Presumably there is a DLL somewhere, we just don't know where.
+ //
+ s->mtime (i->mtime ());
+ s->path (path ());
+ }
+ }
+
+ return s != nullptr;
+ };
+
+ // Try:
+ // foo.lib
+ // libfoo.lib
+ // foodll.lib
+ //
+ return
+ search ("", "") ||
+ search ("lib", "") ||
+ search ("", "dll") ? s : nullptr;
+ }
+ }
+}
diff --git a/libbuild2/cc/parser+module.test.testscript b/libbuild2/cc/parser+module.test.testscript
new file mode 100644
index 0000000..d51ac0a
--- /dev/null
+++ b/libbuild2/cc/parser+module.test.testscript
@@ -0,0 +1,147 @@
+# file : libbuild2/cc/parser+module.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test C++ module constructs.
+#
+
+# NOTE: currently header unit imports don't produce anything.
+#
+
+: import
+:
+$* <<EOI >>EOI
+import foo;
+import foo.bar;
+import foo.bar.baz;
+EOI
+
+: import-header
+:
+$* <<EOI
+import "foo.h";
+import <sys/foo.h>;
+__import "/usr/include/stdio.h";
+EOI
+
+: module-implementation
+:
+$* <<EOI >>EOI
+module foo;
+EOI
+
+: module-interface
+:
+$* <<EOI >>EOI
+export module foo;
+EOI
+
+: export-imported
+:
+$* <<EOI >>EOO
+export import foo;
+export import "foo.h";
+export import <sys/foo.h>;
+EOI
+export import foo;
+EOO
+
+: non-module
+:
+$* <<EOI
+#pragma import module foo;
+#pragma export module foo;
+#pragma module foo;
+export namespace bar {int fox ();}
+EOI
+
+: attribute
+:
+$* <<EOI >>EOO
+import foo [[export({import})]];
+import "foo.h" [[export({import})]];
+module bar [[module({module})]];
+EOI
+import foo;
+module bar;
+EOO
+
+: import-duplicate
+:
+$* <<EOI >>EOO
+import foo;
+import bar.baz;
+import foo;
+import bar . baz;
+EOI
+import foo;
+import bar.baz;
+EOO
+
+: brace-missing
+:
+$* <<EOI 2>>EOE != 0
+export
+{
+ class foo
+ {
+ //};
+ module foo;
+}
+EOI
+stdin:8:1: error: {}-imbalance detected
+EOE
+
+: brace-stray
+:
+$* <<EOI 2>>EOE != 0
+export
+{
+ class foo
+ {
+ };}
+}
+module foo;
+EOI
+stdin:6:1: error: {}-imbalance detected
+EOE
+
+: import-missing-name
+:
+$* <<EOI 2>>EOE != 0
+import ;
+EOI
+stdin:1:8: error: module or header name expected instead of ';'
+EOE
+
+: module-missing-name
+:
+$* <<EOI 2>>EOE != 0
+module ;
+EOI
+stdin:1:1: error: module declaration expected after leading module marker
+EOE
+
+: import-missing-semi
+:
+$* <<EOI 2>>EOE != 0
+import foo
+EOI
+stdin:2:1: error: ';' expected instead of <end of file>
+EOE
+
+: module-missing-semi
+:
+$* <<EOI 2>>EOE != 0
+export module foo
+EOI
+stdin:2:1: error: ';' expected instead of <end of file>
+EOE
+
+: import-missing-header
+:
+$* <<EOI 2>>EOE != 0
+import <foo.h;
+EOI
+stdin:2:1: error: closing '>' expected after header name
+EOE
diff --git a/libbuild2/cc/parser.cxx b/libbuild2/cc/parser.cxx
new file mode 100644
index 0000000..179043e
--- /dev/null
+++ b/libbuild2/cc/parser.cxx
@@ -0,0 +1,263 @@
+// file : libbuild2/cc/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/parser.hxx>
+
+#include <libbuild2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ using type = token_type;
+
+ unit parser::
+ parse (ifdstream& is, const path& name)
+ {
+ lexer l (is, name);
+ l_ = &l;
+
+ unit u;
+ u_ = &u;
+
+ // If the source has errors then we want the compiler to issues the
+ // diagnostics. However, the errors could as likely be because we are
+ // mis-parsing things. Initially, as a middle ground, we were going to
+ // issue warnings. But the problem with this approach is that they are
+ // easy to miss. So for now we fail. And it turns out we don't mis-
+ // parse much.
+ //
+ size_t bb (0); // {}-balance.
+
+ token t;
+ for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; )
+ {
+ // Break to stop, continue to continue, set n to false if the
+ // next token already extracted.
+ //
+ n = true;
+
+ switch (t.type)
+ {
+ case type::lcbrace:
+ {
+ ++bb;
+ continue;
+ }
+ case type::rcbrace:
+ {
+ if (bb-- == 0)
+ break; // Imbalance.
+
+ continue;
+ }
+ case type::identifier:
+ {
+ // Constructs we need to recognize:
+ //
+ // module ;
+ // [export] import <module-name> [<attributes>] ;
+ // [export] import <header-name> [<attributes>] ;
+ // [export] module <module-name> [<attributes>] ;
+ //
+ // Additionally, when include is translated to an import, it's
+ // normally replaced with the special __import keyword since it
+ // may appear in C context.
+ //
+ const string& id (t.value);
+
+ if (bb == 0)
+ {
+ if (id == "import" || id == "__import")
+ {
+ parse_import (t, false);
+ }
+ else if (id == "module")
+ {
+ parse_module (t, false);
+ }
+ else if (id == "export")
+ {
+ if (l_->next (t) == type::identifier)
+ {
+ if (id == "module") parse_module (t, true);
+ else if (id == "import") parse_import (t, true);
+ else n = false; // Something else (e.g., export namespace).
+ }
+ else
+ n = false;
+ }
+ }
+ continue;
+ }
+ default: continue;
+ }
+
+ break;
+ }
+
+ if (bb != 0)
+ /*warn*/ fail (t) << "{}-imbalance detected";
+
+ if (module_marker_ && u.module_info.name.empty ())
+ fail (*module_marker_) << "module declaration expected after "
+ << "leading module marker";
+
+ checksum = l.checksum ();
+ return u;
+ }
+
+ void parser::
+ parse_import (token& t, bool ex)
+ {
+ // enter: import keyword
+ // leave: semi
+
+ string un;
+ unit_type ut;
+ switch (l_->next (t)) // Start of module/header name.
+ {
+ case type::less:
+ case type::string:
+ {
+ un = parse_header_name (t);
+ ut = unit_type::module_header;
+ break;
+ }
+ case type::identifier:
+ {
+ un = parse_module_name (t);
+ ut = unit_type::module_iface;
+ break;
+ }
+ default:
+ fail (t) << "module or header name expected instead of " << t << endf;
+ }
+
+ // Should be {}-balanced.
+ //
+ for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+ if (t.type != type::semi)
+ fail (t) << "';' expected instead of " << t;
+
+ // For now we skip header units (see a comment on module type/info
+ // string serialization in compile rule for details). Note that
+ // currently parse_header_name() always returns empty name.
+ //
+ if (ut == unit_type::module_header)
+ return;
+
+ // Ignore duplicates. We don't expect a large numbers of (direct)
+ // imports so vector/linear search is probably more efficient than a
+ // set.
+ //
+ auto& is (u_->module_info.imports);
+
+ auto i (find_if (is.begin (), is.end (),
+ [&un] (const module_import& i)
+ {
+ return i.name == un;
+ }));
+
+ if (i == is.end ())
+ is.push_back (module_import {ut, move (un), ex, 0});
+ else
+ i->exported = i->exported || ex;
+ }
+
+ void parser::
+ parse_module (token& t, bool ex)
+ {
+ // enter: module keyword
+ // leave: semi
+
+ location l (get_location (t));
+
+ l_->next (t);
+
+ // Handle the leading 'module;' marker (p0713).
+ //
+ // Note that we don't bother diagnosing invalid/duplicate markers
+ // leaving that to the compiler.
+ //
+ if (!ex && t.type == type::semi)
+ {
+ module_marker_ = move (l);
+ return;
+ }
+
+ // Otherwise it should be the start of the module name.
+ //
+ string n (parse_module_name (t));
+
+ // Should be {}-balanced.
+ //
+ for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+ if (t.type != type::semi)
+ fail (t) << "';' expected instead of " << t;
+
+ if (!u_->module_info.name.empty ())
+ fail (l) << "multiple module declarations";
+
+ u_->type = ex ? unit_type::module_iface : unit_type::module_impl;
+ u_->module_info.name = move (n);
+ }
+
+ string parser::
+ parse_module_name (token& t)
+ {
+ // enter: first token of module name
+ // leave: token after module name
+
+ string n;
+
+ // <identifier>[ . <identifier>]*
+ //
+ for (;; l_->next (t))
+ {
+ if (t.type != type::identifier)
+ fail (t) << "module name expected instead of " << t;
+
+ n += t.value;
+
+ if (l_->next (t) != type::dot)
+ break;
+
+ n += '.';
+ }
+
+ return n;
+ }
+
+ string parser::
+ parse_header_name (token& t)
+ {
+ // enter: first token of module name, either string or less
+ // leave: token after module name
+
+ string n;
+
+ // NOTE: actual name is a TODO if/when we need it.
+ //
+ if (t.type == type::string)
+ /*n = move (t.value)*/;
+ else
+ {
+ while (l_->next (t) != type::greater)
+ {
+ if (t.type == type::eos)
+ fail (t) << "closing '>' expected after header name" << endf;
+ }
+ }
+
+ l_->next (t);
+ return n;
+ }
+ }
+}
diff --git a/libbuild2/cc/parser.hxx b/libbuild2/cc/parser.hxx
new file mode 100644
index 0000000..324b62a
--- /dev/null
+++ b/libbuild2/cc/parser.hxx
@@ -0,0 +1,55 @@
+// file : libbuild2/cc/parser.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_PARSER_HXX
+#define LIBBUILD2_CC_PARSER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // Extract translation unit information from a preprocessed C/C++ source.
+ //
+ struct token;
+ class lexer;
+
+ class parser
+ {
+ public:
+ unit
+ parse (ifdstream&, const path& name);
+
+ private:
+ void
+ parse_import (token&, bool);
+
+ void
+ parse_module (token&, bool);
+
+ string
+ parse_module_name (token&);
+
+ string
+ parse_header_name (token&);
+
+ public:
+ string checksum; // Translation unit checksum.
+
+ private:
+ lexer* l_;
+ unit* u_;
+
+ optional<location> module_marker_;
+ };
+ }
+}
+
+#endif // LIBBUILD2_CC_PARSER_HXX
diff --git a/libbuild2/cc/parser.test.cxx b/libbuild2/cc/parser.test.cxx
new file mode 100644
index 0000000..82c68d1
--- /dev/null
+++ b/libbuild2/cc/parser.test.cxx
@@ -0,0 +1,67 @@
+// file : libbuild2/cc/parser.test.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/cc/parser.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ // Usage: argv[0] [<file>]
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ try
+ {
+ const char* file;
+
+ ifdstream is;
+ if (argc > 1)
+ {
+ file = argv[1];
+ is.open (file);
+ }
+ else
+ {
+ file = "stdin";
+ is.open (fddup (stdin_fd ()));
+ }
+
+ parser p;
+ unit u (p.parse (is, path (file)));
+ unit_type ut (u.type);
+
+ for (const module_import& m: u.module_info.imports)
+ cout << (m.exported ? "export " : "")
+ << "import " << m.name << ';' << endl;
+
+ if (ut == unit_type::module_iface || ut == unit_type::module_impl)
+ cout << (ut == unit_type::module_iface ? "export " : "")
+ << "module " << u.module_info.name << ';' << endl;
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::cc::main (argc, argv);
+}
diff --git a/libbuild2/cc/pkgconfig.cxx b/libbuild2/cc/pkgconfig.cxx
new file mode 100644
index 0000000..0669b02
--- /dev/null
+++ b/libbuild2/cc/pkgconfig.cxx
@@ -0,0 +1,1550 @@
+// file : libbuild2/cc/pkgconfig.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+// In order not to complicate the bootstrap procedure with libpkgconf building
+// exclude functionality that involves reading of .pc files.
+//
+#ifndef BUILD2_BOOTSTRAP
+# include <libpkgconf/libpkgconf.h>
+#endif
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/install/utility.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/target.hxx> // pc
+#include <libbuild2/cc/utility.hxx>
+
+#include <libbuild2/cc/common.hxx>
+#include <libbuild2/cc/compile-rule.hxx>
+#include <libbuild2/cc/link-rule.hxx>
+
+#ifndef BUILD2_BOOTSTRAP
+
+// Note that the libpkgconf library doesn't provide the version macro that we
+// could use to compile the code conditionally against different API versions.
+// Thus, we need to sense the pkgconf_client_new() function signature
+// ourselves to call it properly.
+//
+namespace details
+{
+ void*
+ pkgconf_cross_personality_default (); // Never called.
+}
+
+using namespace details;
+
+template <typename H>
+static inline pkgconf_client_t*
+call_pkgconf_client_new (pkgconf_client_t* (*f) (H, void*),
+ H error_handler,
+ void* error_handler_data)
+{
+ return f (error_handler, error_handler_data);
+}
+
+template <typename H, typename P>
+static inline pkgconf_client_t*
+call_pkgconf_client_new (pkgconf_client_t* (*f) (H, void*, P),
+ H error_handler,
+ void* error_handler_data)
+{
+ return f (error_handler,
+ error_handler_data,
+ ::pkgconf_cross_personality_default ());
+}
+
+#endif
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+#ifndef BUILD2_BOOTSTRAP
+
+ // Load package information from a .pc file. Filter out the -I/-L options
+ // that refer to system directories.
+ //
+ // Note that the prerequisite package .pc files search order is as follows:
+ //
+ // - in directory of the specified file
+ // - in pc_dirs directories (in the natural order)
+ //
+ class pkgconf
+ {
+ public:
+ using path_type = build2::path;
+
+ path_type path;
+
+ public:
+ explicit
+ pkgconf (path_type,
+ const dir_paths& pc_dirs,
+ const dir_paths& sys_inc_dirs,
+ const dir_paths& sys_lib_dirs);
+
+ // Create a special empty object. Querying package information on such
+ // an object is illegal.
+ //
+ pkgconf () = default;
+
+ ~pkgconf ();
+
+ // Movable-only type.
+ //
+ pkgconf (pkgconf&& p)
+ : path (move (p.path)),
+ client_ (p.client_),
+ pkg_ (p.pkg_)
+ {
+ p.client_ = nullptr;
+ p.pkg_ = nullptr;
+ }
+
+ pkgconf&
+ operator= (pkgconf&& p)
+ {
+ if (this != &p)
+ {
+ this->~pkgconf ();
+ new (this) pkgconf (move (p)); // Assume noexcept move-construction.
+ }
+ return *this;
+ }
+
+ pkgconf (const pkgconf&) = delete;
+ pkgconf& operator= (const pkgconf&) = delete;
+
+ strings
+ cflags (bool stat) const;
+
+ strings
+ libs (bool stat) const;
+
+ string
+ variable (const char*) const;
+
+ string
+ variable (const string& s) const {return variable (s.c_str ());}
+
+ private:
+ // Keep them as raw pointers not to deal with API thread-unsafety in
+ // deleters and introducing additional mutex locks.
+ //
+ pkgconf_client_t* client_ = nullptr;
+ pkgconf_pkg_t* pkg_ = nullptr;
+ };
+
+ // Currently the library is not thread-safe, even on the pkgconf_client_t
+ // level (see issue #128 for details).
+ //
+ // @@ An update: seems that the obvious thread-safety issues are fixed.
+ // However, let's keep mutex locking for now not to introduce potential
+ // issues before we make sure that there are no other ones.
+ //
+ static mutex pkgconf_mutex;
+
+ // The package dependency traversal depth limit.
+ //
+ static const int pkgconf_max_depth = 100;
+
+ // Normally the error_handler() callback can be called multiple times to
+ // report a single error (once per message line), to produce a multi-line
+ // message like this:
+ //
+ // Package foo was not found in the pkg-config search path.\n
+ // Perhaps you should add the directory containing `foo.pc'\n
+ // to the PKG_CONFIG_PATH environment variable\n
+ // Package 'foo', required by 'bar', not found\n
+ //
+ // For the above example callback will be called 4 times. To suppress all the
+ // junk we will use PKGCONF_PKG_PKGF_SIMPLIFY_ERRORS to get just:
+ //
+ // Package 'foo', required by 'bar', not found\n
+ //
+ static const int pkgconf_flags = PKGCONF_PKG_PKGF_SIMPLIFY_ERRORS;
+
+ static bool
+ pkgconf_error_handler (const char* msg, const pkgconf_client_t*, const void*)
+ {
+ error << runtime_error (msg); // Sanitize the message.
+ return true;
+ }
+
+ // Deleters. Note that they are thread-safe.
+ //
+ struct fragments_deleter
+ {
+ void operator() (pkgconf_list_t* f) const {pkgconf_fragment_free (f);}
+ };
+
+ // Convert fragments to strings. Skip the -I/-L options that refer to system
+ // directories.
+ //
+ static strings
+ to_strings (const pkgconf_list_t& frags,
+ char type,
+ const pkgconf_list_t& sysdirs)
+ {
+ assert (type == 'I' || type == 'L');
+
+ strings r;
+
+ auto add = [&r] (const pkgconf_fragment_t* frag)
+ {
+ string s;
+ if (frag->type != '\0')
+ {
+ s += '-';
+ s += frag->type;
+ }
+
+ s += frag->data;
+ r.push_back (move (s));
+ };
+
+ // Option that is separated from its value, for example:
+ //
+ // -I /usr/lib
+ //
+ const pkgconf_fragment_t* opt (nullptr);
+
+ pkgconf_node_t *node;
+ PKGCONF_FOREACH_LIST_ENTRY(frags.head, node)
+ {
+ auto frag (static_cast<const pkgconf_fragment_t*> (node->data));
+
+ // Add the separated option and directory, unless the latest is a system
+ // one.
+ //
+ if (opt != nullptr)
+ {
+ // Note that we should restore the directory path that was
+ // (mis)interpreted as an option, for example:
+ //
+ // -I -Ifoo
+ //
+ // In the above example option '-I' is followed by directory '-Ifoo',
+ // which is represented by libpkgconf library as fragment 'foo' with
+ // type 'I'.
+ //
+ if (!pkgconf_path_match_list (
+ frag->type == '\0'
+ ? frag->data
+ : (string ({'-', frag->type}) + frag->data).c_str (),
+ &sysdirs))
+ {
+ add (opt);
+ add (frag);
+ }
+
+ opt = nullptr;
+ continue;
+ }
+
+ // Skip the -I/-L option if it refers to a system directory.
+ //
+ if (frag->type == type)
+ {
+ // The option is separated from a value, that will (presumably) follow.
+ //
+ if (*frag->data == '\0')
+ {
+ opt = frag;
+ continue;
+ }
+
+ if (pkgconf_path_match_list (frag->data, &sysdirs))
+ continue;
+ }
+
+ add (frag);
+ }
+
+ if (opt != nullptr) // Add the dangling option.
+ add (opt);
+
+ return r;
+ }
+
+ // Note that some libpkgconf functions can potentially return NULL, failing
+ // to allocate the required memory block. However, we will not check the
+ // returned value for NULL as the library doesn't do so, prior to filling the
+ // allocated structures. So such a code complication on our side would be
+ // useless. Also, for some functions the NULL result has a special semantics,
+ // for example "not found".
+ //
+ pkgconf::
+ pkgconf (path_type p,
+ const dir_paths& pc_dirs,
+ const dir_paths& sys_lib_dirs,
+ const dir_paths& sys_inc_dirs)
+ : path (move (p))
+ {
+ auto add_dirs = [] (pkgconf_list_t& dir_list,
+ const dir_paths& dirs,
+ bool suppress_dups,
+ bool cleanup = false)
+ {
+ if (cleanup)
+ {
+ pkgconf_path_free (&dir_list);
+ dir_list = PKGCONF_LIST_INITIALIZER;
+ }
+
+ for (const auto& d: dirs)
+ pkgconf_path_add (d.string ().c_str (), &dir_list, suppress_dups);
+ };
+
+ mlock l (pkgconf_mutex);
+
+ // Initialize the client handle.
+ //
+ unique_ptr<pkgconf_client_t, void (*) (pkgconf_client_t*)> c (
+ call_pkgconf_client_new (&pkgconf_client_new,
+ pkgconf_error_handler,
+ nullptr /* handler_data */),
+ [] (pkgconf_client_t* c) {pkgconf_client_free (c);});
+
+ pkgconf_client_set_flags (c.get (), pkgconf_flags);
+
+ // Note that the system header and library directory lists are
+ // automatically pre-filled by the pkgconf_client_new() call (see above).
+ // We will re-create these lists from scratch.
+ //
+ add_dirs (c->filter_libdirs,
+ sys_lib_dirs,
+ false /* suppress_dups */,
+ true /* cleanup */);
+
+ add_dirs (c->filter_includedirs,
+ sys_inc_dirs,
+ false /* suppress_dups */,
+ true /* cleanup */);
+
+ // Note that the loaded file directory is added to the (yet empty) search
+ // list. Also note that loading of the prerequisite packages is delayed
+ // until flags retrieval, and their file directories are not added to the
+ // search list.
+ //
+ pkg_ = pkgconf_pkg_find (c.get (), path.string ().c_str ());
+
+ if (pkg_ == nullptr)
+ fail << "package '" << path << "' not found or invalid";
+
+ // Add the .pc file search directories.
+ //
+ assert (c->dir_list.length == 1); // Package file directory (see above).
+ add_dirs (c->dir_list, pc_dirs, true /* suppress_dups */);
+
+ client_ = c.release ();
+ }
+
+ pkgconf::
+ ~pkgconf ()
+ {
+ if (client_ != nullptr) // Not empty.
+ {
+ assert (pkg_ != nullptr);
+
+ mlock l (pkgconf_mutex);
+ pkgconf_pkg_unref (client_, pkg_);
+ pkgconf_client_free (client_);
+ }
+ }
+
+ strings pkgconf::
+ cflags (bool stat) const
+ {
+ assert (client_ != nullptr); // Must not be empty.
+
+ mlock l (pkgconf_mutex);
+
+ pkgconf_client_set_flags (
+ client_,
+ pkgconf_flags |
+
+ // Walk through the private package dependencies (Requires.private)
+ // besides the public ones while collecting the flags. Note that we do
+ // this for both static and shared linking.
+ //
+ PKGCONF_PKG_PKGF_SEARCH_PRIVATE |
+
+ // Collect flags from Cflags.private besides those from Cflags for the
+ // static linking.
+ //
+ (stat
+ ? PKGCONF_PKG_PKGF_MERGE_PRIVATE_FRAGMENTS
+ : 0));
+
+ pkgconf_list_t f = PKGCONF_LIST_INITIALIZER; // Aggregate initialization.
+ int e (pkgconf_pkg_cflags (client_, pkg_, &f, pkgconf_max_depth));
+
+ if (e != PKGCONF_PKG_ERRF_OK)
+ throw failed (); // Assume the diagnostics is issued.
+
+ unique_ptr<pkgconf_list_t, fragments_deleter> fd (&f); // Auto-deleter.
+ return to_strings (f, 'I', client_->filter_includedirs);
+ }
+
+ strings pkgconf::
+ libs (bool stat) const
+ {
+ assert (client_ != nullptr); // Must not be empty.
+
+ mlock l (pkgconf_mutex);
+
+ pkgconf_client_set_flags (
+ client_,
+ pkgconf_flags |
+
+ // Additionally collect flags from the private dependency packages
+ // (see above) and from the Libs.private value for the static linking.
+ //
+ (stat
+ ? PKGCONF_PKG_PKGF_SEARCH_PRIVATE |
+ PKGCONF_PKG_PKGF_MERGE_PRIVATE_FRAGMENTS
+ : 0));
+
+ pkgconf_list_t f = PKGCONF_LIST_INITIALIZER; // Aggregate initialization.
+ int e (pkgconf_pkg_libs (client_, pkg_, &f, pkgconf_max_depth));
+
+ if (e != PKGCONF_PKG_ERRF_OK)
+ throw failed (); // Assume the diagnostics is issued.
+
+ unique_ptr<pkgconf_list_t, fragments_deleter> fd (&f); // Auto-deleter.
+ return to_strings (f, 'L', client_->filter_libdirs);
+ }
+
+ string pkgconf::
+ variable (const char* name) const
+ {
+ assert (client_ != nullptr); // Must not be empty.
+
+ mlock l (pkgconf_mutex);
+ const char* r (pkgconf_tuple_find (client_, &pkg_->vars, name));
+ return r != nullptr ? string (r) : string ();
+ }
+
+#endif
+
+ namespace cc
+ {
+ using namespace bin;
+
+ // In pkg-config backslashes, spaces, etc are escaped with a backslash.
+ //
+ static string
+ escape (const string& s)
+ {
+ string r;
+
+ for (size_t p (0);;)
+ {
+ size_t sp (s.find_first_of ("\\ ", p));
+
+ if (sp != string::npos)
+ {
+ r.append (s, p, sp - p);
+ r += '\\';
+ r += s[sp];
+ p = sp + 1;
+ }
+ else
+ {
+ r.append (s, p, sp);
+ break;
+ }
+ }
+
+ return r;
+ }
+
+ // Try to find a .pc file in the pkgconfig/ subdirectory of libd, trying
+ // several names derived from stem. If not found, return false. If found,
+ // load poptions, loptions, libs, and modules, set the corresponding
+ // *.export.* variables and add prerequisites on targets, and return true.
+ // Note that we assume the targets are locked so that all of this is
+ // MT-safe.
+ //
+ // System library search paths (those extracted from the compiler) are
+ // passed in top_sysd while the user-provided (via -L) in top_usrd.
+ //
+ // Note that scope and link order should be "top-level" from the
+ // search_library() POV.
+ //
+ // Also note that the bootstrapped version of build2 will not search for
+ // .pc files, always returning false (see above for the reasoning).
+ //
+#ifndef BUILD2_BOOTSTRAP
+
+ // Iterate over pkgconf directories that correspond to the specified
+ // library directory, passing them to the callback function for as long as
+ // it returns false (not found). Return true if the callback returned
+ // true.
+ //
+ bool common::
+ pkgconfig_search (const dir_path& d, const pkgconfig_callback& f) const
+ {
+ dir_path pd (d);
+
+ // First always check the pkgconfig/ subdirectory in this library
+ // directory. Even on platforms where this is not the canonical place,
+ // .pc files of autotools-based packages installed by the user often
+ // still end up there.
+ //
+ if (exists (pd /= "pkgconfig") && f (move (pd)))
+ return true;
+
+ // Platform-specific locations.
+ //
+ if (tsys == "freebsd")
+ {
+ // On FreeBSD .pc files go to libdata/pkgconfig/, not lib/pkgconfig/.
+ //
+ (((pd = d) /= "..") /= "libdata") /= "pkgconfig";
+
+ if (exists (pd) && f (move (pd)))
+ return true;
+ }
+
+ return false;
+ }
+
+ // Search for the .pc files in the pkgconf directories that correspond to
+ // the specified library directory. If found, return static (first) and
+ // shared (second) library .pc files. If common is false, then only
+ // consider our .static/.shared files.
+ //
+ pair<path, path> common::
+ pkgconfig_search (const dir_path& libd,
+ const optional<project_name>& proj,
+ const string& stem,
+ bool common) const
+ {
+ // When it comes to looking for .pc files we have to decide where to
+ // search (which directory(ies)) as well as what to search for (which
+ // names). Suffix is our ".shared" or ".static" extension.
+ //
+ auto search_dir = [&proj, &stem] (const dir_path& dir,
+ const string& sfx) -> path
+ {
+ path f;
+
+ // See if there is a corresponding .pc file. About half of them are
+ // called foo.pc and half libfoo.pc (and one of the pkg-config's
+ // authors suggests that some of you should call yours foolib.pc, just
+ // to keep things interesting, you know).
+ //
+ // Given the (general) import in the form <proj>%lib{<stem>}, we will
+ // first try lib<stem>.pc, then <stem>.pc. Maybe it also makes sense
+ // to try <proj>.pc, just in case. Though, according to pkg-config
+ // docs, the .pc file should correspond to a library, not project. But
+ // then you get something like zlib which calls it zlib.pc. So let's
+ // just do it.
+ //
+ f = dir;
+ f /= "lib";
+ f += stem;
+ f += sfx;
+ f += ".pc";
+ if (exists (f))
+ return f;
+
+ f = dir;
+ f /= stem;
+ f += sfx;
+ f += ".pc";
+ if (exists (f))
+ return f;
+
+ if (proj)
+ {
+ f = dir;
+ f /= proj->string ();
+ f += sfx;
+ f += ".pc";
+ if (exists (f))
+ return f;
+ }
+
+ return path ();
+ };
+
+ // Return false (and so stop the iteration) if a .pc file is found.
+ //
+ // Note that we rely on the "small function object" optimization here.
+ //
+ struct data
+ {
+ path a;
+ path s;
+ bool common;
+ } d {path (), path (), common};
+
+ auto check = [&d, &search_dir] (dir_path&& p) -> bool
+ {
+ // First look for static/shared-specific files.
+ //
+ d.a = search_dir (p, ".static");
+ d.s = search_dir (p, ".shared");
+
+ if (!d.a.empty () || !d.s.empty ())
+ return true;
+
+ // Then the common.
+ //
+ if (d.common)
+ d.a = d.s = search_dir (p, "");
+
+ return !d.a.empty ();
+ };
+
+ pair<path, path> r;
+
+ if (pkgconfig_search (libd, check))
+ {
+ r.first = move (d.a);
+ r.second = move (d.s);
+ }
+
+ return r;
+ };
+
+ bool common::
+ pkgconfig_load (action a,
+ const scope& s,
+ lib& lt,
+ liba* at,
+ libs* st,
+ const optional<project_name>& proj,
+ const string& stem,
+ const dir_path& libd,
+ const dir_paths& top_sysd,
+ const dir_paths& top_usrd) const
+ {
+ assert (at != nullptr || st != nullptr);
+
+ pair<path, path> p (
+ pkgconfig_search (libd, proj, stem, true /* common */));
+
+ if (p.first.empty () && p.second.empty ())
+ return false;
+
+ pkgconfig_load (a, s, lt, at, st, p, libd, top_sysd, top_usrd);
+ return true;
+ }
+
+ void common::
+ pkgconfig_load (action a,
+ const scope& s,
+ lib& lt,
+ liba* at,
+ libs* st,
+ const pair<path, path>& paths,
+ const dir_path& libd,
+ const dir_paths& top_sysd,
+ const dir_paths& top_usrd) const
+ {
+ tracer trace (x, "pkgconfig_load");
+
+ assert (at != nullptr || st != nullptr);
+
+ const path& ap (paths.first);
+ const path& sp (paths.second);
+
+ assert (!ap.empty () || !sp.empty ());
+
+ // Extract --cflags and set them as lib?{}:export.poptions. Note that we
+ // still pass --static in case this is pkgconf which has Cflags.private.
+ //
+ auto parse_cflags = [&trace, this] (target& t,
+ const pkgconf& pc,
+ bool la)
+ {
+ strings pops;
+
+ bool arg (false);
+ for (auto& o: pc.cflags (la))
+ {
+ if (arg)
+ {
+ // Can only be an argument for -I, -D, -U options.
+ //
+ pops.push_back (move (o));
+ arg = false;
+ continue;
+ }
+
+ size_t n (o.size ());
+
+ // We only keep -I, -D and -U.
+ //
+ if (n >= 2 &&
+ o[0] == '-' &&
+ (o[1] == 'I' || o[1] == 'D' || o[1] == 'U'))
+ {
+ pops.push_back (move (o));
+ arg = (n == 2);
+ continue;
+ }
+
+ l4 ([&]{trace << "ignoring " << pc.path << " --cflags option "
+ << o;});
+ }
+
+ if (arg)
+ fail << "argument expected after " << pops.back () <<
+ info << "while parsing pkg-config --cflags " << pc.path;
+
+ if (!pops.empty ())
+ {
+ auto p (t.vars.insert (c_export_poptions));
+
+ // The only way we could already have this value is if this same
+ // library was also imported as a project (as opposed to installed).
+ // Unlikely but possible. In this case the values were set by the
+ // export stub and we shouldn't touch them.
+ //
+ if (p.second)
+ p.first.get () = move (pops);
+ }
+ };
+
+ // Parse --libs into loptions/libs (interface and implementation). If
+ // ps is not NULL, add each resolves library target as a prerequisite.
+ //
+ auto parse_libs = [a, &s, top_sysd, this] (target& t,
+ bool binless,
+ const pkgconf& pc,
+ bool la,
+ prerequisites* ps)
+ {
+ strings lops;
+ vector<name> libs;
+
+ // Normally we will have zero or more -L's followed by one or more
+ // -l's, with the first one being the library itself, unless the
+ // library is binless. But sometimes we may have other linker options,
+ // for example, -Wl,... or -pthread. It's probably a bad idea to
+ // ignore them. Also, theoretically, we could have just the library
+ // name/path.
+ //
+ // The tricky part, of course, is to know whether what follows after
+ // an option we don't recognize is its argument or another option or
+ // library. What we do at the moment is stop recognizing just library
+ // names (without -l) after seeing an unknown option.
+ //
+ bool arg (false), first (true), known (true), have_L;
+ for (auto& o: pc.libs (la))
+ {
+ if (arg)
+ {
+ // Can only be an argument for an loption.
+ //
+ lops.push_back (move (o));
+ arg = false;
+ continue;
+ }
+
+ size_t n (o.size ());
+
+ // See if this is -L.
+ //
+ if (n >= 2 && o[0] == '-' && o[1] == 'L')
+ {
+ have_L = true;
+ lops.push_back (move (o));
+ arg = (n == 2);
+ continue;
+ }
+
+ // See if that's -l or just the library name/path.
+ //
+ if ((known && o[0] != '-') ||
+ (n > 2 && o[0] == '-' && o[1] == 'l'))
+ {
+ // Unless binless, the first one is the library itself, which we
+ // skip. Note that we don't verify this and theoretically it could
+ // be some other library, but we haven't encountered such a beast
+ // yet.
+ //
+ if (first)
+ {
+ first = false;
+
+ if (!binless)
+ continue;
+ }
+
+ // @@ If by some reason this is the library itself (doesn't go
+ // first or libpkgconf parsed libs in some bizarre way) we will
+ // hang trying to lock it's target inside search_library() (or
+ // fail an assertion if run serially) as by now it is already
+ // locked. To be safe we probably shouldn't rely on the position
+ // and filter out all occurrences of the library itself (by
+ // name?) and complain if none were encountered.
+ //
+ libs.push_back (name (move (o)));
+ continue;
+ }
+
+ // Otherwise we assume it is some other loption.
+ //
+ known = false;
+ lops.push_back (move (o));
+ }
+
+ if (arg)
+ fail << "argument expected after " << lops.back () <<
+ info << "while parsing pkg-config --libs " << pc.path;
+
+ // Space-separated list of escaped library flags.
+ //
+ auto lflags = [&pc, la] () -> string
+ {
+ string r;
+ for (const auto& o: pc.libs (la))
+ {
+ if (!r.empty ())
+ r += ' ';
+ r += escape (o);
+ }
+ return r;
+ };
+
+ if (first && !binless)
+ fail << "library expected in '" << lflags () << "'" <<
+ info << "while parsing pkg-config --libs " << pc.path;
+
+ // Resolve -lfoo into the library file path using our import installed
+ // machinery (i.e., we are going to call search_library() that will
+ // probably call us again, and so on).
+ //
+ // The reason we do it is the link order. For general libraries it
+ // shouldn't matter if we imported them via an export stub, direct
+ // import installed, or via a .pc file (which we could have generated
+ // from the export stub). The exception is "runtime libraries" (which
+ // are really the extension of libc) such as -lm, -ldl, -lpthread,
+ // etc. Those we will detect and leave as -l*.
+ //
+ // If we managed to resolve all the -l's (sans runtime), then we can
+ // omit -L's for nice and tidy command line.
+ //
+ bool all (true);
+ optional<dir_paths> usrd; // Populate lazily.
+
+ for (name& n: libs)
+ {
+ string& l (n.value);
+
+ // These ones are common/standard/POSIX.
+ //
+ if (l[0] != '-' || // e.g., shell32.lib
+ l == "-lm" ||
+ l == "-ldl" ||
+ l == "-lrt" ||
+ l == "-lpthread")
+ continue;
+
+ // Note: these list are most likely incomplete.
+ //
+ if (tclass == "linux")
+ {
+ // Some extras from libc (see libc6-dev) and other places.
+ //
+ if (l == "-lanl" ||
+ l == "-lcrypt" ||
+ l == "-lnsl" ||
+ l == "-lresolv" ||
+ l == "-lgcc")
+ continue;
+ }
+ else if (tclass == "macos")
+ {
+ if (l == "-lSystem")
+ continue;
+ }
+
+ // Prepare user search paths by entering the -L paths from the .pc
+ // file.
+ //
+ if (have_L && !usrd)
+ {
+ usrd = dir_paths ();
+
+ for (auto i (lops.begin ()); i != lops.end (); ++i)
+ {
+ const string& o (*i);
+
+ if (o.size () >= 2 && o[0] == '-' && o[1] == 'L')
+ {
+ string p;
+
+ if (o.size () == 2)
+ p = *++i; // We've verified it's there.
+ else
+ p = string (o, 2);
+
+ dir_path d (move (p));
+
+ if (d.relative ())
+ fail << "relative -L directory in '" << lflags () << "'" <<
+ info << "while parsing pkg-config --libs " << pc.path;
+
+ usrd->push_back (move (d));
+ }
+ }
+ }
+
+ // @@ OUT: for now we assume out is undetermined, just like in
+ // resolve_library().
+ //
+ dir_path out;
+ string name (l, 2); // Sans -l.
+
+ prerequisite_key pk {
+ nullopt, {&lib::static_type, &out, &out, &name, nullopt}, &s};
+
+ if (const target* lt = search_library (a, top_sysd, usrd, pk))
+ {
+ // We used to pick a member but that doesn't seem right since the
+ // same target could be used with different link orders.
+ //
+ n.dir = lt->dir;
+ n.type = lib::static_type.name;
+ n.value = lt->name;
+
+ if (ps != nullptr)
+ ps->push_back (prerequisite (*lt));
+ }
+ else
+ // If we couldn't find the library, then leave it as -l.
+ //
+ all = false;
+ }
+
+ // If all the -l's resolved and there were no other options, then drop
+ // all the -L's. If we have unknown options, then leave them in to be
+ // safe.
+ //
+ if (all && known)
+ lops.clear ();
+
+ if (!lops.empty ())
+ {
+ if (cclass == compiler_class::msvc)
+ {
+ // Translate -L to /LIBPATH.
+ //
+ for (auto i (lops.begin ()); i != lops.end (); )
+ {
+ string& o (*i);
+ size_t n (o.size ());
+
+ if (n >= 2 && o[0] == '-' && o[1] == 'L')
+ {
+ o.replace (0, 2, "/LIBPATH:");
+
+ if (n == 2)
+ {
+ o += *++i; // We've verified it's there.
+ i = lops.erase (i);
+ continue;
+ }
+ }
+
+ ++i;
+ }
+ }
+
+ auto p (t.vars.insert (c_export_loptions));
+
+ if (p.second)
+ p.first.get () = move (lops);
+ }
+
+ // Set even if empty (export override).
+ //
+ {
+ auto p (t.vars.insert (c_export_libs));
+
+ if (p.second)
+ p.first.get () = move (libs);
+ }
+ };
+
+ // On Windows pkg-config will escape backslahses in paths. In fact, it
+ // may escape things even on non-Windows platforms, for example,
+ // spaces. So we use a slightly modified version of next_word().
+ //
+ auto next = [] (const string& s, size_t& b, size_t& e) -> string
+ {
+ string r;
+ size_t n (s.size ());
+
+ if (b != e)
+ b = e;
+
+ // Skip leading delimiters.
+ //
+ for (; b != n && s[b] == ' '; ++b) ;
+
+ if (b == n)
+ {
+ e = n;
+ return r;
+ }
+
+ // Find first trailing delimiter while taking care of escapes.
+ //
+ r = s[b];
+ for (e = b + 1; e != n && s[e] != ' '; ++e)
+ {
+ if (s[e] == '\\')
+ {
+ if (++e == n)
+ fail << "dangling escape in pkg-config output '" << s << "'";
+ }
+
+ r += s[e];
+ }
+
+ return r;
+ };
+
+ // Parse modules and add them to the prerequisites.
+ //
+ auto parse_modules = [&trace, &next, &s, this]
+ (const pkgconf& pc, prerequisites& ps)
+ {
+ string mstr (pc.variable ("cxx_modules"));
+
+ string m;
+ for (size_t b (0), e (0); !(m = next (mstr, b, e)).empty (); )
+ {
+ // The format is <name>=<path>.
+ //
+ size_t p (m.find ('='));
+ if (p == string::npos ||
+ p == 0 || // Empty name.
+ p == m.size () - 1) // Empty path.
+ fail << "invalid module information in '" << mstr << "'" <<
+ info << "while parsing pkg-config --variable=cxx_modules "
+ << pc.path;
+
+ string mn (m, 0, p);
+ path mp (m, p + 1, string::npos);
+ path mf (mp.leaf ());
+
+ // Extract module properties, if any.
+ //
+ string pp (pc.variable ("cxx_module_preprocessed." + mn));
+ string se (pc.variable ("cxx_module_symexport." + mn));
+
+ // For now there are only C++ modules.
+ //
+ auto tl (
+ s.ctx.targets.insert_locked (
+ *x_mod,
+ mp.directory (),
+ dir_path (),
+ mf.base ().string (),
+ mf.extension (),
+ true, // Implied.
+ trace));
+
+ target& mt (tl.first);
+
+ // If the target already exists, then setting its variables is not
+ // MT-safe. So currently we only do it if we have the lock (and thus
+ // nobody can see this target yet) assuming that this has already
+ // been done otherwise.
+ //
+ // @@ This is not quite correct, though: this target could already
+ // exist but for a "different purpose" (e.g., it could be used as
+ // a header).
+ //
+ // @@ Could setting it in the rule-specific vars help? (But we
+ // are not matching a rule for it.) Note that we are setting
+ // it on the module source, not bmi*{}! So rule-specific vars
+ // don't seem to the answer here.
+ //
+ if (tl.second.owns_lock ())
+ {
+ mt.vars.assign (c_module_name) = move (mn);
+
+ // Set module properties. Note that if unspecified we should still
+ // set them to their default values since the hosting project may
+ // have them set to incompatible value.
+ //
+ {
+ value& v (mt.vars.assign (x_preprocessed)); // NULL
+ if (!pp.empty ()) v = move (pp);
+ }
+
+ {
+ mt.vars.assign (x_symexport) = (se == "true");
+ }
+
+ tl.second.unlock ();
+ }
+
+ ps.push_back (prerequisite (mt));
+ }
+ };
+
+ // For now we only populate prerequisites for lib{}. To do it for
+ // liba{} would require weeding out duplicates that are already in
+ // lib{}.
+ //
+ prerequisites prs;
+
+ pkgconf apc;
+ pkgconf spc;
+
+ // Create the .pc files search directory list.
+ //
+ dir_paths pc_dirs;
+
+ // Note that we rely on the "small function object" optimization here.
+ //
+ auto add_pc_dir = [&pc_dirs] (dir_path&& d) -> bool
+ {
+ pc_dirs.emplace_back (move (d));
+ return false;
+ };
+
+ pkgconfig_search (libd, add_pc_dir);
+ for (const dir_path& d: top_usrd) pkgconfig_search (d, add_pc_dir);
+ for (const dir_path& d: top_sysd) pkgconfig_search (d, add_pc_dir);
+
+ bool pa (at != nullptr && !ap.empty ());
+ if (pa || sp.empty ())
+ apc = pkgconf (ap, pc_dirs, sys_lib_dirs, sys_inc_dirs);
+
+ bool ps (st != nullptr && !sp.empty ());
+ if (ps || ap.empty ())
+ spc = pkgconf (sp, pc_dirs, sys_lib_dirs, sys_inc_dirs);
+
+ // Sort out the interface dependencies (which we are setting on lib{}).
+ // If we have the shared .pc variant, then we use that. Otherwise --
+ // static but extract without the --static option (see also the saving
+ // logic).
+ //
+ pkgconf& ipc (ps ? spc : apc); // Interface package info.
+
+ parse_libs (
+ lt,
+ (ps ? st->mtime () : at->mtime ()) == timestamp_unreal /* binless */,
+ ipc,
+ false,
+ &prs);
+
+ if (pa)
+ {
+ parse_cflags (*at, apc, true);
+ parse_libs (*at, at->path ().empty (), apc, true, nullptr);
+ }
+
+ if (ps)
+ parse_cflags (*st, spc, false);
+
+ // For now we assume static and shared variants export the same set of
+ // modules. While technically possible, having a different set will
+ // most likely lead to all sorts of trouble (at least for installed
+ // libraries) and life is short.
+ //
+ if (modules)
+ parse_modules (ipc, prs);
+
+ assert (!lt.has_prerequisites ());
+ if (!prs.empty ())
+ lt.prerequisites (move (prs));
+
+ // Bless the library group with a "trust me it exists" timestamp. Failed
+ // that, if we add it as a prerequisite (like we do above), the fallback
+ // file rule won't match.
+ //
+ lt.mtime (mtime (ipc.path));
+ }
+
+#else
+
+ pair<path, path> common::
+ pkgconfig_search (const dir_path&,
+ const optional<project_name>&,
+ const string&,
+ bool) const
+ {
+ return pair<path, path> ();
+ }
+
+ bool common::
+ pkgconfig_load (action,
+ const scope&,
+ lib&,
+ liba*,
+ libs*,
+ const optional<project_name>&,
+ const string&,
+ const dir_path&,
+ const dir_paths&,
+ const dir_paths&) const
+ {
+ return false;
+ }
+
+ void common::
+ pkgconfig_load (action,
+ const scope&,
+ lib&,
+ liba*,
+ libs*,
+ const pair<path, path>&,
+ const dir_path&,
+ const dir_paths&,
+ const dir_paths&) const
+ {
+ assert (false); // Should never be called.
+ }
+
+#endif
+
+ void link_rule::
+ pkgconfig_save (action a, const file& l, bool la, bool binless) const
+ {
+ tracer trace (x, "pkgconfig_save");
+
+ context& ctx (l.ctx);
+
+ const scope& bs (l.base_scope ());
+ const scope& rs (*bs.root_scope ());
+
+ auto* t (find_adhoc_member<pc> (l));
+ assert (t != nullptr);
+
+ // By default we assume things go into install.{include, lib}.
+ //
+ using install::resolve_dir;
+
+ dir_path idir (resolve_dir (l, cast<dir_path> (l["install.include"])));
+ dir_path ldir (resolve_dir (l, cast<dir_path> (l["install.lib"])));
+
+ const path& p (t->path ());
+
+ if (verb >= 2)
+ text << "cat >" << p;
+
+ if (ctx.dry_run)
+ return;
+
+ auto_rmfile arm (p);
+
+ try
+ {
+ ofdstream os (p);
+
+ {
+ const project_name& n (project (rs));
+
+ if (n.empty ())
+ fail << "no project name in " << rs;
+
+ lookup vl (rs.vars[ctx.var_version]);
+ if (!vl)
+ fail << "no version variable in project " << n <<
+ info << "while generating " << p;
+
+ const string& v (cast<string> (vl));
+
+ os << "Name: " << n << endl;
+ os << "Version: " << v << endl;
+
+ // This one is required so make something up if unspecified.
+ //
+ os << "Description: ";
+ if (const string* s = cast_null<string> (rs[ctx.var_project_summary]))
+ os << *s << endl;
+ else
+ os << n << ' ' << v << endl;
+
+ if (const string* u = cast_null<string> (rs[ctx.var_project_url]))
+ os << "URL: " << *u << endl;
+ }
+
+ auto save_poptions = [&l, &os] (const variable& var)
+ {
+ if (const strings* v = cast_null<strings> (l[var]))
+ {
+ for (auto i (v->begin ()); i != v->end (); ++i)
+ {
+ const string& o (*i);
+ size_t n (o.size ());
+
+ // Filter out -I (both -I<dir> and -I <dir> forms).
+ //
+ if (n >= 2 && o[0] == '-' && o[1] == 'I')
+ {
+ if (n == 2)
+ ++i;
+
+ continue;
+ }
+
+ os << ' ' << escape (o);
+ }
+ }
+ };
+
+ // Given a library save its -l-style library name.
+ //
+ auto save_library = [&os, this] (const file& l)
+ {
+ // If available (it may not, in case of import-installed libraris),
+ // use the .pc file name to derive the -l library name (in case of
+ // the shared library, l.path() may contain version).
+ //
+ string n;
+
+ auto strip_lib = [&n] ()
+ {
+ if (n.size () > 3 &&
+ path::traits_type::compare (n.c_str (), 3, "lib", 3) == 0)
+ n.erase (0, 3);
+ };
+
+ if (auto* t = find_adhoc_member<pc> (l))
+ {
+ // We also want to strip the lib prefix unless it is part of the
+ // target name while keeping custom library prefix/suffix, if any.
+ //
+ n = t->path ().leaf ().base ().base ().string ();
+
+ if (path::traits_type::compare (n.c_str (), n.size (),
+ l.name.c_str (), l.name.size ()) != 0)
+ strip_lib ();
+ }
+ else
+ {
+ // Derive -l-name from the file name in a fuzzy, platform-specific
+ // manner.
+ //
+ n = l.path ().leaf ().base ().string ();
+
+ if (cclass != compiler_class::msvc)
+ strip_lib ();
+ }
+
+ os << " -l" << n;
+ };
+
+ // @@ TODO: support whole archive?
+ //
+
+ // Cflags.
+ //
+ os << "Cflags:";
+ os << " -I" << escape (idir.string ());
+ save_poptions (c_export_poptions);
+ save_poptions (x_export_poptions);
+ os << endl;
+
+ // Libs.
+ //
+ // While we generate split shared/static .pc files, in case of static
+ // we still want to sort things out into Libs/Libs.private. This is
+ // necessary to distinguish between interface and implementation
+ // dependencies if we don't have the shared variant (see the load
+ // logic for details).
+ //
+ //@@ TODO: would be nice to weed out duplicates. But is it always
+ // safe? Think linking archives: will have to keep duplicates in
+ // the second position, not first. Gets even trickier with
+ // Libs.private split.
+ //
+ {
+ os << "Libs:";
+
+ // While we don't need it for a binless library itselt, it may be
+ // necessary to resolve its binfull dependencies.
+ //
+ os << " -L" << escape (ldir.string ());
+
+ // Now process ourselves as if we were being linked to something (so
+ // pretty similar to link_rule::append_libraries()).
+ //
+ bool priv (false);
+ auto imp = [&priv] (const file&, bool la) {return priv && la;};
+
+ auto lib = [&os, &save_library] (const file* const* c,
+ const string& p,
+ lflags,
+ bool)
+ {
+ const file* l (c != nullptr ? *c : nullptr);
+
+ if (l != nullptr)
+ {
+ if (l->is_a<libs> () || l->is_a<liba> ()) // See through libux.
+ save_library (*l);
+ }
+ else
+ os << ' ' << p; // Something "system'y", pass as is.
+ };
+
+ auto opt = [] (const file&,
+ const string&,
+ bool, bool)
+ {
+ //@@ TODO: should we filter -L similar to -I?
+ //@@ TODO: how will the Libs/Libs.private work?
+ //@@ TODO: remember to use escape()
+
+ /*
+ // If we need an interface value, then use the group (lib{}).
+ //
+ if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
+ {
+ const variable& var (
+ com
+ ? (exp ? c_export_loptions : c_loptions)
+ : (t == x
+ ? (exp ? x_export_loptions : x_loptions)
+ : var_pool[t + (exp ? ".export.loptions" : ".loptions")]));
+
+ append_options (args, *g, var);
+ }
+ */
+ };
+
+ // Pretend we are linking an executable using what would be normal,
+ // system-default link order.
+ //
+ linfo li {otype::e, la ? lorder::a_s : lorder::s_a};
+
+ process_libraries (a, bs, li, sys_lib_dirs,
+ l, la, 0, // Link flags.
+ imp, lib, opt, !binless);
+ os << endl;
+
+ if (la)
+ {
+ os << "Libs.private:";
+
+ priv = true;
+ process_libraries (a, bs, li, sys_lib_dirs,
+ l, la, 0, // Link flags.
+ imp, lib, opt, false);
+ os << endl;
+ }
+ }
+
+ // If we have modules, list them in the modules variable. We also save
+ // some extra info about them (yes, the rabbit hole runs deep). This
+ // code is pretty similar to compiler::search_modules().
+ //
+ if (modules)
+ {
+ struct module
+ {
+ string name;
+ path file;
+
+ string pp;
+ bool symexport;
+ };
+ vector<module> modules;
+
+ for (const target* pt: l.prerequisite_targets[a])
+ {
+ // @@ UTL: we need to (recursively) see through libu*{} (and
+ // also in search_modules()).
+ //
+ if (pt != nullptr && pt->is_a<bmix> ())
+ {
+ // What we have is a binary module interface. What we need is
+ // a module interface source it was built from. We assume it's
+ // the first mxx{} target that we see.
+ //
+ const target* mt (nullptr);
+ for (const target* t: pt->prerequisite_targets[a])
+ {
+ if ((mt = t->is_a (*x_mod)))
+ break;
+ }
+
+ // Can/should there be a bmi{} without mxx{}? Can't think of a
+ // reason.
+ //
+ assert (mt != nullptr);
+
+ path p (install::resolve_file (mt->as<file> ()));
+
+ if (p.empty ()) // Not installed.
+ continue;
+
+ string pp;
+ if (const string* v = cast_null<string> ((*mt)[x_preprocessed]))
+ pp = *v;
+
+ modules.push_back (
+ module {
+ cast<string> (pt->state[a].vars[c_module_name]),
+ move (p),
+ move (pp),
+ symexport
+ });
+ }
+ }
+
+ if (!modules.empty ())
+ {
+ os << endl
+ << "cxx_modules =";
+
+ // Module names shouldn't require escaping.
+ //
+ for (const module& m: modules)
+ os << ' ' << m.name << '=' << escape (m.file.string ());
+
+ os << endl;
+
+ // Module-specific properties. The format is:
+ //
+ // <lang>_module_<property>.<module> = <value>
+ //
+ for (const module& m: modules)
+ {
+ if (!m.pp.empty ())
+ os << "cxx_module_preprocessed." << m.name << " = " << m.pp
+ << endl;
+
+ if (m.symexport)
+ os << "cxx_module_symexport." << m.name << " = true" << endl;
+ }
+ }
+ }
+
+ os.close ();
+ arm.cancel ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to write " << p << ": " << e;
+ }
+ }
+ }
+}
diff --git a/libbuild2/cc/target.cxx b/libbuild2/cc/target.cxx
new file mode 100644
index 0000000..a438898
--- /dev/null
+++ b/libbuild2/cc/target.cxx
@@ -0,0 +1,101 @@
+// file : libbuild2/cc/target.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/target.hxx>
+
+#include <libbuild2/context.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace cc
+ {
+ const target_type cc::static_type
+ {
+ "cc",
+ &file::static_type,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ &target_search,
+ false
+ };
+
+ extern const char h_ext_def[] = "h";
+
+ const target_type h::static_type
+ {
+ "h",
+ &cc::static_type,
+ &target_factory<h>,
+ nullptr, /* fixed_extension */
+ &target_extension_var<h_ext_def>,
+ &target_pattern_var<h_ext_def>,
+ nullptr,
+ &file_search,
+ false
+ };
+
+ extern const char c_ext_def[] = "c";
+
+ const target_type c::static_type
+ {
+ "c",
+ &cc::static_type,
+ &target_factory<c>,
+ nullptr, /* fixed_extension */
+ &target_extension_var<c_ext_def>,
+ &target_pattern_var<c_ext_def>,
+ nullptr,
+ &file_search,
+ false
+ };
+
+ const target_type pc::static_type
+ {
+ "pc",
+ &file::static_type,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ &target_search,
+ false
+ };
+
+ extern const char pca_ext[] = "static.pc"; // VC14 rejects constexpr.
+
+ const target_type pca::static_type
+ {
+ "pca",
+ &pc::static_type,
+ &target_factory<pca>,
+ &target_extension_fix<pca_ext>,
+ nullptr, /* default_extension */
+ &target_pattern_fix<pca_ext>,
+ &target_print_0_ext_verb, // Fixed extension, no use printing.
+ &file_search,
+ false
+ };
+
+ extern const char pcs_ext[] = "shared.pc"; // VC14 rejects constexpr.
+
+ const target_type pcs::static_type
+ {
+ "pcs",
+ &pc::static_type,
+ &target_factory<pcs>,
+ &target_extension_fix<pcs_ext>,
+ nullptr, /* default_extension */
+ &target_pattern_fix<pcs_ext>,
+ &target_print_0_ext_verb, // Fixed extension, no use printing.
+ &file_search,
+ false
+ };
+ }
+}
diff --git a/libbuild2/cc/target.hxx b/libbuild2/cc/target.hxx
new file mode 100644
index 0000000..885bf68
--- /dev/null
+++ b/libbuild2/cc/target.hxx
@@ -0,0 +1,96 @@
+// file : libbuild2/cc/target.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_TARGET_HXX
+#define LIBBUILD2_CC_TARGET_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // This is an abstract base target for all c-common header/source files.
+ // We use this arrangement during rule matching to detect "unknown" (to
+ // this rule) source/header files that it cannot handle but should not
+ // ignore either. For example, a C link rule that sees a C++ source file.
+ //
+ class LIBBUILD2_CC_SYMEXPORT cc: public file
+ {
+ public:
+ using file::file;
+
+ public:
+ static const target_type static_type;
+ virtual const target_type& dynamic_type () const = 0;
+ };
+
+ // There is hardly a c-family compilation without a C header inclusion.
+ // As a result, this target type is registered for any c-family module.
+ //
+ class LIBBUILD2_CC_SYMEXPORT h: public cc
+ {
+ public:
+ using cc::cc;
+
+ public:
+ static const target_type static_type;
+ virtual const target_type& dynamic_type () const {return static_type;}
+ };
+
+ // This one we define in cc but the target type is only registered by the
+ // c module. This way we can implement rule chaining without jumping
+ // through too many hoops (like resolving target type dynamically) but
+ // also without relaxing things too much (i.e., the user still won't be
+ // able to refer to c{} without loading the c module).
+ //
+ class LIBBUILD2_CC_SYMEXPORT c: public cc
+ {
+ public:
+ using cc::cc;
+
+ public:
+ static const target_type static_type;
+ virtual const target_type& dynamic_type () const {return static_type;}
+ };
+
+ // pkg-config file targets.
+ //
+ class LIBBUILD2_CC_SYMEXPORT pc: public file
+ {
+ public:
+ using file::file;
+
+ public:
+ static const target_type static_type;
+ };
+
+ class LIBBUILD2_CC_SYMEXPORT pca: public pc // .static.pc
+ {
+ public:
+ using pc::pc;
+
+ public:
+ static const target_type static_type;
+ virtual const target_type& dynamic_type () const {return static_type;}
+ };
+
+ class LIBBUILD2_CC_SYMEXPORT pcs: public pc // .shared.pc
+ {
+ public:
+ using pc::pc;
+
+ public:
+ static const target_type static_type;
+ virtual const target_type& dynamic_type () const {return static_type;}
+ };
+ }
+}
+
+#endif // LIBBUILD2_CC_TARGET_HXX
diff --git a/libbuild2/cc/types.hxx b/libbuild2/cc/types.hxx
new file mode 100644
index 0000000..280dcbf
--- /dev/null
+++ b/libbuild2/cc/types.hxx
@@ -0,0 +1,116 @@
+// file : libbuild2/cc/types.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_TYPES_HXX
+#define LIBBUILD2_CC_TYPES_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target-type.hxx>
+
+namespace build2
+{
+ namespace cc
+ {
+ // Translation unit information.
+ //
+ // We use absolute and normalized header path as the header unit module
+ // name.
+ //
+ // Note that our terminology doesn't exactly align with the (current)
+ // standard where a header unit is not a module (that is, you either
+ // import a "module [interface translation unit]" or a "[synthesized]
+ // header [translation] unit"). On the other hand, lots of the underlying
+ // mechanics suggest that a header unit is module-like; they end up having
+ // BMIs (which stand for "binary module interface"), etc. In a sense, a
+ // header unit is an "interface unit" for (a part of) the global module
+ // (maybe a partition).
+ //
+ enum class unit_type
+ {
+ non_modular,
+ module_iface,
+ module_impl,
+ module_header
+ };
+
+ struct module_import
+ {
+ unit_type type; // Either module_iface or module_header.
+ string name;
+ bool exported; // True if re-exported (export import M;).
+ size_t score; // Match score (see compile::search_modules()).
+ };
+
+ using module_imports = vector<module_import>;
+
+ struct module_info
+ {
+ string name; // Empty if non-modular.
+ module_imports imports; // Imported modules.
+ };
+
+ struct unit
+ {
+ unit_type type = unit_type::non_modular;
+ build2::cc::module_info module_info;
+ };
+
+ // Compiler language.
+ //
+ enum class lang {c, cxx};
+
+ inline ostream&
+ operator<< (ostream& os, lang l)
+ {
+ return os << (l == lang::c ? "C" : "C++");
+ }
+
+ // Compile/link output type (executable, static, or shared).
+ //
+ enum class otype {e, a, s};
+
+ struct ltype
+ {
+ otype type;
+ bool utility; // True for utility libraries.
+
+ bool executable () const {return type == otype::e && !utility;}
+ bool library () const {return type != otype::e || utility;}
+ bool static_library () const {return type == otype::a || utility;}
+ bool shared_library () const {return type == otype::s && !utility;}
+ bool member_library () const {return type != otype::e;}
+ };
+
+ // Compile target types.
+ //
+ struct compile_target_types
+ {
+ const target_type& obj;
+ const target_type& bmi;
+ const target_type& hbmi;
+ };
+
+ // Library link order.
+ //
+ enum class lorder {a, s, a_s, s_a};
+
+ // Link information: output type and link order.
+ //
+ struct linfo
+ {
+ otype type;
+ lorder order;
+ };
+
+ // Prerequisite link flags.
+ //
+ using lflags = uintptr_t; // To match prerequisite_target::data.
+
+ const lflags lflag_whole = 0x00000001U; // Link whole liba{}/libu*}.
+ }
+}
+
+#endif // LIBBUILD2_CC_TYPES_HXX
diff --git a/libbuild2/cc/utility.cxx b/libbuild2/cc/utility.cxx
new file mode 100644
index 0000000..07f3b2e
--- /dev/null
+++ b/libbuild2/cc/utility.cxx
@@ -0,0 +1,114 @@
+// file : libbuild2/cc/utility.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/utility.hxx>
+
+#include <libbuild2/file.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx> // search()
+
+#include <libbuild2/bin/rule.hxx>
+#include <libbuild2/bin/target.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace cc
+ {
+ using namespace bin;
+
+ const dir_path module_dir ("cc");
+ const dir_path modules_sidebuild_dir (dir_path (module_dir) /= "modules");
+
+ lorder
+ link_order (const scope& bs, otype ot)
+ {
+ // Initialize to suppress 'may be used uninitialized' warning produced
+ // by MinGW GCC 5.4.0.
+ //
+ const char* var (nullptr);
+
+ switch (ot)
+ {
+ case otype::e: var = "bin.exe.lib"; break;
+ case otype::a: var = "bin.liba.lib"; break;
+ case otype::s: var = "bin.libs.lib"; break;
+ }
+
+ const auto& v (cast<strings> (bs[var]));
+ return v[0] == "shared"
+ ? v.size () > 1 && v[1] == "static" ? lorder::s_a : lorder::s
+ : v.size () > 1 && v[1] == "shared" ? lorder::a_s : lorder::a;
+ }
+
+ const target*
+ link_member (const bin::libx& x, action a, linfo li, bool exist)
+ {
+ if (x.is_a<libul> ())
+ {
+ // For libul{} that is linked to an executable the member choice
+ // should be dictated by the members of lib{} this libul{} is
+ // "primarily" for. If both are being built, then it seems natural to
+ // prefer static over shared since it could be faster (but I am sure
+ // someone will probably want this configurable).
+ //
+ if (li.type == otype::e)
+ {
+ // Utility libraries are project-local which means the primarily
+ // target should be in the same project as us.
+ //
+ li.type = lib_rule::build_members (x.root_scope ()).a
+ ? otype::a
+ : otype::s;
+ }
+
+ const target_type& tt (li.type == otype::a
+ ? libua::static_type
+ : libus::static_type);
+
+ // Called by the compile rule during execute.
+ //
+ return x.ctx.phase == run_phase::match && !exist
+ ? &search (x, tt, x.dir, x.out, x.name)
+ : search_existing (x.ctx, tt, x.dir, x.out, x.name);
+ }
+ else
+ {
+ assert (!exist);
+
+ const lib& l (x.as<lib> ());
+
+ // Make sure group members are resolved.
+ //
+ group_view gv (resolve_members (a, l));
+ assert (gv.members != nullptr);
+
+ lorder lo (li.order);
+
+ bool ls (true);
+ switch (lo)
+ {
+ case lorder::a:
+ case lorder::a_s:
+ ls = false; // Fall through.
+ case lorder::s:
+ case lorder::s_a:
+ {
+ if (ls ? l.s == nullptr : l.a == nullptr)
+ {
+ if (lo == lorder::a_s || lo == lorder::s_a)
+ ls = !ls;
+ else
+ fail << (ls ? "shared" : "static") << " variant of " << l
+ << " is not available";
+ }
+ }
+ }
+
+ return ls ? static_cast<const target*> (l.s) : l.a;
+ }
+ }
+ }
+}
diff --git a/libbuild2/cc/utility.hxx b/libbuild2/cc/utility.hxx
new file mode 100644
index 0000000..3ee07bd
--- /dev/null
+++ b/libbuild2/cc/utility.hxx
@@ -0,0 +1,73 @@
+// file : libbuild2/cc/utility.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_UTILITY_HXX
+#define LIBBUILD2_CC_UTILITY_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target.hxx>
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+namespace build2
+{
+ struct variable;
+
+ namespace cc
+ {
+ // To form the complete path do:
+ //
+ // root.out_path () / root.root_extra->build_dir / module_dir
+ //
+ extern const dir_path module_dir; // cc/
+ extern const dir_path modules_sidebuild_dir; // cc/modules/
+
+ // Compile output type.
+ //
+ otype
+ compile_type (const target&, unit_type);
+
+ compile_target_types
+ compile_types (otype);
+
+ // Link output type.
+ //
+ ltype
+ link_type (const target&);
+
+ // Library link order.
+ //
+ // The reason we pass scope and not the target is because this function is
+ // called not only for exe/lib but also for obj as part of the library
+ // meta-information protocol implementation. Normally the bin.*.lib values
+ // will be project-wide. With this scheme they can be customized on the
+ // per-directory basis but not per-target which means all exe/lib in the
+ // same directory have to have the same link order.
+ //
+ lorder
+ link_order (const scope& base, otype);
+
+ inline linfo
+ link_info (const scope& base, otype ot)
+ {
+ return linfo {ot, link_order (base, ot)};
+ }
+
+ // Given the link order return the library member to link. That is, liba{}
+ // or libs{} for lib{} and libua{} or libus{} for libul{}.
+ //
+ // If existing is true, then only return the member target if it exists
+ // (currently only used and supported for utility libraries).
+ //
+ const target*
+ link_member (const bin::libx&, action, linfo, bool existing = false);
+ }
+}
+
+#include <libbuild2/cc/utility.ixx>
+
+#endif // LIBBUILD2_CC_UTILITY_HXX
diff --git a/libbuild2/cc/utility.ixx b/libbuild2/cc/utility.ixx
new file mode 100644
index 0000000..1509bf2
--- /dev/null
+++ b/libbuild2/cc/utility.ixx
@@ -0,0 +1,73 @@
+// file : libbuild2/cc/utility.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+ namespace cc
+ {
+ inline otype
+ compile_type (const target& t, unit_type u)
+ {
+ using namespace bin;
+
+ auto test = [&t, u] (const auto& h, const auto& i, const auto& o)
+ {
+ return t.is_a (u == unit_type::module_header ? h :
+ u == unit_type::module_iface ? i :
+ o);
+ };
+
+ return
+ test (hbmie::static_type, bmie::static_type, obje::static_type) ? otype::e :
+ test (hbmia::static_type, bmia::static_type, obja::static_type) ? otype::a :
+ otype::s;
+ }
+
+ inline ltype
+ link_type (const target& t)
+ {
+ using namespace bin;
+
+ bool u (false);
+ otype o (
+ t.is_a<exe> () || (u = t.is_a<libue> ()) ? otype::e :
+ t.is_a<liba> () || (u = t.is_a<libua> ()) ? otype::a :
+ t.is_a<libs> () || (u = t.is_a<libus> ()) ? otype::s :
+ static_cast<otype> (0xFF));
+
+ return ltype {o, u};
+ }
+
+ inline compile_target_types
+ compile_types (otype t)
+ {
+ using namespace bin;
+
+ const target_type* o (nullptr);
+ const target_type* i (nullptr);
+ const target_type* h (nullptr);
+
+ switch (t)
+ {
+ case otype::e:
+ o = &obje::static_type;
+ i = &bmie::static_type;
+ h = &hbmie::static_type;
+ break;
+ case otype::a:
+ o = &obja::static_type;
+ i = &bmia::static_type;
+ h = &hbmia::static_type;
+ break;
+ case otype::s:
+ o = &objs::static_type;
+ i = &bmis::static_type;
+ h = &hbmis::static_type;
+ break;
+ }
+
+ return compile_target_types {*o, *i, *h};
+ }
+ }
+}
diff --git a/libbuild2/cc/windows-manifest.cxx b/libbuild2/cc/windows-manifest.cxx
new file mode 100644
index 0000000..8d67f0c
--- /dev/null
+++ b/libbuild2/cc/windows-manifest.cxx
@@ -0,0 +1,143 @@
+// file : libbuild2/cc/windows-manifest.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/cc/link-rule.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ // Translate the compiler target CPU value to the processorArchitecture
+ // attribute value.
+ //
+ const char*
+ windows_manifest_arch (const string& tcpu)
+ {
+ const char* pa (tcpu == "i386" || tcpu == "i686" ? "x86" :
+ tcpu == "x86_64" ? "amd64" :
+ nullptr);
+
+ if (pa == nullptr)
+ fail << "unable to translate CPU " << tcpu << " to manifest "
+ << "processor architecture";
+
+ return pa;
+ }
+
+ // Generate a Windows manifest and if necessary create/update the manifest
+ // file corresponding to the exe{} target. Return the manifest file path
+ // and its timestamp if unchanged or timestamp_nonexistent otherwise.
+ //
+ pair<path, timestamp> link_rule::
+ windows_manifest (const file& t, bool rpath_assembly) const
+ {
+ tracer trace (x, "link_rule::windows_manifest");
+
+ const scope& rs (t.root_scope ());
+
+ const char* pa (windows_manifest_arch (cast<string> (rs[x_target_cpu])));
+
+ string m;
+
+ m += "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n";
+ m += "<assembly xmlns='urn:schemas-microsoft-com:asm.v1'\n";
+ m += " manifestVersion='1.0'>\n";
+
+ // Program name, version, etc.
+ //
+ string name (t.path ().leaf ().string ());
+
+ m += " <assemblyIdentity name='"; m += name; m += "'\n";
+ m += " type='win32'\n";
+ m += " processorArchitecture='"; m += pa; m += "'\n";
+ m += " version='0.0.0.0'/>\n";
+
+ // Our rpath-emulating assembly.
+ //
+ if (rpath_assembly)
+ {
+ m += " <dependency>\n";
+ m += " <dependentAssembly>\n";
+ m += " <assemblyIdentity name='"; m += name; m += ".dlls'\n";
+ m += " type='win32'\n";
+ m += " processorArchitecture='"; m += pa; m += "'\n";
+ m += " language='*'\n";
+ m += " version='0.0.0.0'/>\n";
+ m += " </dependentAssembly>\n";
+ m += " </dependency>\n";
+ }
+
+ // UAC information. Without it Windows will try to guess, which, as you
+ // can imagine, doesn't end well.
+ //
+ m += " <trustInfo xmlns='urn:schemas-microsoft-com:asm.v3'>\n";
+ m += " <security>\n";
+ m += " <requestedPrivileges>\n";
+ m += " <requestedExecutionLevel level='asInvoker' uiAccess='false'/>\n";
+ m += " </requestedPrivileges>\n";
+ m += " </security>\n";
+ m += " </trustInfo>\n";
+
+ m += "</assembly>\n";
+
+ // If the manifest file exists, compare to its content. If nothing
+ // changed (common case), then we can avoid any further updates.
+ //
+ // The potentially faster alternative would be to hash it and store an
+ // entry in depdb. This, however, gets a bit complicated since we will
+ // need to avoid a race between the depdb and .manifest updates.
+ //
+ path mf (t.path () + ".manifest");
+
+ timestamp mt (mtime (mf));
+
+ if (mt != timestamp_nonexistent)
+ {
+ try
+ {
+ ifdstream is (mf);
+ if (is.read_text () == m)
+ return make_pair (move (mf), mt);
+ }
+ catch (const io_error&)
+ {
+ // Whatever the reason we failed for, let's rewrite the file.
+ }
+ }
+
+ if (verb >= 3)
+ text << "cat >" << mf;
+
+ if (!t.ctx.dry_run)
+ {
+ auto_rmfile rm (mf);
+
+ try
+ {
+ ofdstream os (mf);
+ os << m;
+ os.close ();
+ rm.cancel ();
+
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to write to " << mf << ": " << e;
+ }
+ }
+
+ return make_pair (move (mf), timestamp_nonexistent);
+ }
+ }
+}
diff --git a/libbuild2/cc/windows-rpath.cxx b/libbuild2/cc/windows-rpath.cxx
new file mode 100644
index 0000000..5583315
--- /dev/null
+++ b/libbuild2/cc/windows-rpath.cxx
@@ -0,0 +1,400 @@
+// file : libbuild2/cc/windows-rpath.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <errno.h> // E*
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/link-rule.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace cc
+ {
+ // Provide limited emulation of the rpath functionality on Windows using a
+ // side-by-side assembly. In a nutshell, the idea is to create an assembly
+ // with links to all the prerequisite DLLs.
+ //
+ // Note that currently our assemblies contain all the DLLs that the
+ // executable depends on, recursively. The alternative approach could be
+ // to also create assemblies for DLLs. This appears to be possible (but we
+ // will have to use the resource ID 2 for such a manifest). And it will
+ // probably be necessary for DLLs that are loaded dynamically with
+ // LoadLibrary(). The tricky part is how such nested assemblies will be
+ // found. Since we are effectively (from the loader's point of view)
+ // copying the DLLs, we will also have to copy their assemblies (because
+ // the loader looks for them in the same directory as the DLL). It's not
+ // clear how well such nested assemblies are supported (e.g., in Wine).
+ //
+ // What if the DLL is in the same directory as the executable, will it
+ // still be found even if there is an assembly? On the other hand,
+ // handling it as any other won't hurt us much.
+ //
+ using namespace bin;
+
+ // Return the greatest (newest) timestamp of all the DLLs that we will be
+ // adding to the assembly or timestamp_nonexistent if there aren't any.
+ //
+ timestamp link_rule::
+ windows_rpath_timestamp (const file& t,
+ const scope& bs,
+ action a,
+ linfo li) const
+ {
+ timestamp r (timestamp_nonexistent);
+
+ // We need to collect all the DLLs, so go into implementation of both
+ // shared and static (in case they depend on shared).
+ //
+ auto imp = [] (const file&, bool) {return true;};
+
+ auto lib = [&r] (const file* const* lc,
+ const string& f,
+ lflags,
+ bool sys)
+ {
+ const file* l (lc != nullptr ? *lc : nullptr);
+
+ // We don't rpath system libraries.
+ //
+ if (sys)
+ return;
+
+ // Skip static libraries.
+ //
+ if (l != nullptr)
+ {
+ // This can be an "undiscovered" DLL (see search_library()).
+ //
+ if (!l->is_a<libs> () || l->path ().empty ()) // Also covers binless.
+ return;
+ }
+ else
+ {
+ // This is an absolute path and we need to decide whether it is
+ // a shared or static library.
+ //
+ // @@ This is so broken: we don't link to DLLs, we link to .lib or
+ // .dll.a! Should we even bother? Maybe only for "our" DLLs
+ // (.dll.lib/.dll.a)? But the DLL can also be in a different
+ // directory (lib/../bin).
+ //
+ // Though this can happen on MinGW with direct DLL link...
+ //
+ size_t p (path::traits_type::find_extension (f));
+
+ if (p == string::npos || casecmp (f.c_str () + p + 1, "dll") != 0)
+ return;
+ }
+
+ // Ok, this is a DLL.
+ //
+ timestamp t (l != nullptr
+ ? l->load_mtime ()
+ : mtime (f.c_str ()));
+
+ if (t > r)
+ r = t;
+ };
+
+ for (const prerequisite_target& pt: t.prerequisite_targets[a])
+ {
+ if (pt == nullptr || pt.adhoc)
+ continue;
+
+ bool la;
+ const file* f;
+
+ if ((la = (f = pt->is_a<liba> ())) ||
+ (la = (f = pt->is_a<libux> ())) || // See through.
+ ( f = pt->is_a<libs> ()))
+ process_libraries (a, bs, li, sys_lib_dirs,
+ *f, la, pt.data,
+ imp, lib, nullptr, true);
+ }
+
+ return r;
+ }
+
+ // Like *_timestamp() but actually collect the DLLs (and weed out the
+ // duplicates).
+ //
+ auto link_rule::
+ windows_rpath_dlls (const file& t,
+ const scope& bs,
+ action a,
+ linfo li) const -> windows_dlls
+ {
+ windows_dlls r;
+
+ auto imp = [] (const file&, bool) {return true;};
+
+ auto lib = [&r, &bs] (const file* const* lc,
+ const string& f,
+ lflags,
+ bool sys)
+ {
+ const file* l (lc != nullptr ? *lc : nullptr);
+
+ if (sys)
+ return;
+
+ if (l != nullptr)
+ {
+ if (l->is_a<libs> () && !l->path ().empty ()) // Also covers binless.
+ {
+ // Get .pdb if there is one.
+ //
+ const target_type* tt (bs.find_target_type ("pdb"));
+ const target* pdb (tt != nullptr
+ ? find_adhoc_member (*l, *tt)
+ : nullptr);
+ r.insert (
+ windows_dll {
+ f,
+ pdb != nullptr ? &pdb->as<file> ().path ().string () : nullptr,
+ string ()
+ });
+ }
+ }
+ else
+ {
+ size_t p (path::traits_type::find_extension (f));
+
+ if (p != string::npos && casecmp (f.c_str () + p + 1, "dll") == 0)
+ {
+ // See if we can find a corresponding .pdb.
+ //
+ windows_dll wd {f, nullptr, string ()};
+ string& pdb (wd.pdb_storage);
+
+ // First try "our" naming: foo.dll.pdb.
+ //
+ pdb = f;
+ pdb += ".pdb";
+
+ if (!exists (path (pdb)))
+ {
+ // Then try the usual naming: foo.pdb.
+ //
+ pdb.assign (f, 0, p);
+ pdb += ".pdb";
+
+ if (!exists (path (pdb)))
+ pdb.clear ();
+ }
+
+ if (!pdb.empty ())
+ wd.pdb = &pdb;
+
+ r.insert (move (wd));
+ }
+ }
+ };
+
+ for (const prerequisite_target& pt: t.prerequisite_targets[a])
+ {
+ if (pt == nullptr || pt.adhoc)
+ continue;
+
+ bool la;
+ const file* f;
+
+ if ((la = (f = pt->is_a<liba> ())) ||
+ (la = (f = pt->is_a<libux> ())) || // See through.
+ ( f = pt->is_a<libs> ()))
+ process_libraries (a, bs, li, sys_lib_dirs,
+ *f, la, pt.data,
+ imp, lib, nullptr, true);
+ }
+
+ return r;
+ }
+
+ const char*
+ windows_manifest_arch (const string& tcpu); // windows-manifest.cxx
+
+ // The ts argument should be the DLLs timestamp returned by *_timestamp().
+ //
+ // The scratch argument should be true if the DLL set has changed and we
+ // need to regenerate everything from scratch. Otherwise, we try to avoid
+ // unnecessary work by comparing the DLLs timestamp against the assembly
+ // manifest file.
+ //
+ void link_rule::
+ windows_rpath_assembly (const file& t,
+ const scope& bs,
+ action a,
+ linfo li,
+ const string& tcpu,
+ timestamp ts,
+ bool scratch) const
+ {
+ // Assembly paths and name.
+ //
+ dir_path ad (path_cast<dir_path> (t.path () + ".dlls"));
+ string an (ad.leaf ().string ());
+ path am (ad / path (an + ".manifest"));
+
+ // First check if we actually need to do anything. Since most of the
+ // time we won't, we don't want to combine it with the *_dlls() call
+ // below which allocates memory, etc.
+ //
+ if (!scratch)
+ {
+ // The corner case here is when _timestamp() returns nonexistent
+ // signalling that there aren't any DLLs but the assembly manifest
+ // file exists. This, however, can only happen if we somehow managed
+ // to transition from the "have DLLs" state to "no DLLs" without going
+ // through the "from scratch" update. Actually this can happen when
+ // switching to update-for-install.
+ //
+ if (ts != timestamp_nonexistent && ts <= mtime (am))
+ return;
+ }
+
+ // Next collect the set of DLLs that will be in our assembly. We need to
+ // do this recursively which means we may end up with duplicates. Also,
+ // it is possible that there aren't/no longer are any DLLs which means
+ // we just need to clean things up.
+ //
+ bool empty (ts == timestamp_nonexistent);
+
+ windows_dlls dlls;
+ if (!empty)
+ dlls = windows_rpath_dlls (t, bs, a, li);
+
+ // Clean the assembly directory and make sure it exists. Maybe it would
+ // have been faster to overwrite the existing manifest rather than
+ // removing the old one and creating a new one. But this is definitely
+ // simpler.
+ //
+ {
+ rmdir_status s (rmdir_r (t.ctx, ad, empty, 3));
+
+ if (empty)
+ return;
+
+ if (s == rmdir_status::not_exist)
+ mkdir (ad, 3);
+ }
+
+ // Symlink or copy the DLLs.
+ //
+ {
+ const scope& as (t.weak_scope ()); // Amalgamation.
+
+ auto link = [&as] (const path& f, const path& l)
+ {
+ auto print = [&f, &l] (const char* cmd)
+ {
+ if (verb >= 3)
+ text << cmd << ' ' << f << ' ' << l;
+ };
+
+ // First we try to create a symlink. If that fails (e.g., "Windows
+ // happens"), then we resort to hard links. If that doesn't work
+ // out either (e.g., not on the same filesystem), then we fall back
+ // to copies.
+ //
+ // For the symlink use a relative target path if both paths are part
+ // of the same amalgamation. This way if the amalgamation is moved
+ // as a whole, the links will remain valid.
+ //
+ try
+ {
+ switch (mkanylink (f, l,
+ true /* copy */,
+ f.sub (as.out_path ()) /* relative */))
+ {
+ case entry_type::regular: print ("cp"); break;
+ case entry_type::symlink: print ("ln -s"); break;
+ case entry_type::other: print ("ln"); break;
+ default: assert (false);
+ }
+ }
+ catch (const pair<entry_type, system_error>& e)
+ {
+ const char* w (nullptr);
+ switch (e.first)
+ {
+ case entry_type::regular: print ("cp"); w = "copy"; break;
+ case entry_type::symlink: print ("ln -s"); w = "symlink"; break;
+ case entry_type::other: print ("ln"); w = "hardlink"; break;
+ default: assert (false);
+ }
+
+ fail << "unable to make " << w << ' ' << l << ": " << e.second;
+ }
+ };
+
+ for (const windows_dll& wd: dlls)
+ {
+ //@@ Would be nice to avoid copying. Perhaps reuse buffers
+ // by adding path::assign() and traits::leaf().
+ //
+ path dp (wd.dll); // DLL path.
+ path dn (dp.leaf ()); // DLL name.
+
+ link (dp, ad / dn);
+
+ // Link .pdb if there is one.
+ //
+ if (wd.pdb != nullptr)
+ {
+ path pp (*wd.pdb);
+ link (pp, ad / pp.leaf ());
+ }
+ }
+ }
+
+ if (verb >= 3)
+ text << "cat >" << am;
+
+ if (t.ctx.dry_run)
+ return;
+
+ auto_rmfile rm (am);
+
+ try
+ {
+ ofdstream os (am);
+
+ const char* pa (windows_manifest_arch (tcpu));
+
+ os << "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n"
+ << "<assembly xmlns='urn:schemas-microsoft-com:asm.v1'\n"
+ << " manifestVersion='1.0'>\n"
+ << " <assemblyIdentity name='" << an << "'\n"
+ << " type='win32'\n"
+ << " processorArchitecture='" << pa << "'\n"
+ << " version='0.0.0.0'/>\n";
+
+
+
+ for (const windows_dll& wd: dlls)
+ os << " <file name='" << path (wd.dll).leaf () << "'/>\n";
+
+ os << "</assembly>\n";
+
+ os.close ();
+ rm.cancel ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to write to " << am << ": " << e;
+ }
+ }
+ }
+}
diff --git a/libbuild2/module.cxx b/libbuild2/module.cxx
index bb7c61d..4e7080a 100644
--- a/libbuild2/module.cxx
+++ b/libbuild2/module.cxx
@@ -39,6 +39,7 @@ namespace build2
static const char* bundled_modules[] = {
"bash",
"bin",
+ "cc",
"in",
"version"
};