From 4bdf53837e010073de802070d4e6087410662d3e Mon Sep 17 00:00:00 2001
From: Karen Arutyunov <karen@codesynthesis.com>
Date: Sat, 24 Aug 2019 17:41:30 +0300
Subject: Move cc build system module to separate library

---
 libbuild2/cc/buildfile                             |   74 +
 libbuild2/cc/common.cxx                            | 1031 ++++
 libbuild2/cc/common.hxx                            |  358 ++
 libbuild2/cc/compile-rule.cxx                      | 6098 ++++++++++++++++++++
 libbuild2/cc/compile-rule.hxx                      |  189 +
 libbuild2/cc/export.hxx                            |   38 +
 libbuild2/cc/gcc.cxx                               |  263 +
 libbuild2/cc/guess.cxx                             | 1892 ++++++
 libbuild2/cc/guess.hxx                             |  246 +
 libbuild2/cc/init.cxx                              |  493 ++
 libbuild2/cc/init.hxx                              |   36 +
 libbuild2/cc/install-rule.cxx                      |  355 ++
 libbuild2/cc/install-rule.hxx                      |   82 +
 libbuild2/cc/lexer+char-literal.test.testscript    |   67 +
 libbuild2/cc/lexer+comment.test.testscript         |   88 +
 libbuild2/cc/lexer+line.test.testscript            |   67 +
 libbuild2/cc/lexer+number.test.testscript          |   48 +
 libbuild2/cc/lexer+preprocessor.test.testscript    |   73 +
 .../cc/lexer+raw-string-literal.test.testscript    |   90 +
 libbuild2/cc/lexer+string-literal.test.testscript  |   65 +
 libbuild2/cc/lexer.cxx                             | 1129 ++++
 libbuild2/cc/lexer.hxx                             |  190 +
 libbuild2/cc/lexer.test.cxx                        |   80 +
 libbuild2/cc/link-rule.cxx                         | 3043 ++++++++++
 libbuild2/cc/link-rule.hxx                         |  188 +
 libbuild2/cc/module.cxx                            |  781 +++
 libbuild2/cc/module.hxx                            |  103 +
 libbuild2/cc/msvc.cxx                              |  502 ++
 libbuild2/cc/parser+module.test.testscript         |  147 +
 libbuild2/cc/parser.cxx                            |  263 +
 libbuild2/cc/parser.hxx                            |   55 +
 libbuild2/cc/parser.test.cxx                       |   67 +
 libbuild2/cc/pkgconfig.cxx                         | 1550 +++++
 libbuild2/cc/target.cxx                            |  101 +
 libbuild2/cc/target.hxx                            |   96 +
 libbuild2/cc/types.hxx                             |  116 +
 libbuild2/cc/utility.cxx                           |  114 +
 libbuild2/cc/utility.hxx                           |   73 +
 libbuild2/cc/utility.ixx                           |   73 +
 libbuild2/cc/windows-manifest.cxx                  |  143 +
 libbuild2/cc/windows-rpath.cxx                     |  400 ++
 41 files changed, 20867 insertions(+)
 create mode 100644 libbuild2/cc/buildfile
 create mode 100644 libbuild2/cc/common.cxx
 create mode 100644 libbuild2/cc/common.hxx
 create mode 100644 libbuild2/cc/compile-rule.cxx
 create mode 100644 libbuild2/cc/compile-rule.hxx
 create mode 100644 libbuild2/cc/export.hxx
 create mode 100644 libbuild2/cc/gcc.cxx
 create mode 100644 libbuild2/cc/guess.cxx
 create mode 100644 libbuild2/cc/guess.hxx
 create mode 100644 libbuild2/cc/init.cxx
 create mode 100644 libbuild2/cc/init.hxx
 create mode 100644 libbuild2/cc/install-rule.cxx
 create mode 100644 libbuild2/cc/install-rule.hxx
 create mode 100644 libbuild2/cc/lexer+char-literal.test.testscript
 create mode 100644 libbuild2/cc/lexer+comment.test.testscript
 create mode 100644 libbuild2/cc/lexer+line.test.testscript
 create mode 100644 libbuild2/cc/lexer+number.test.testscript
 create mode 100644 libbuild2/cc/lexer+preprocessor.test.testscript
 create mode 100644 libbuild2/cc/lexer+raw-string-literal.test.testscript
 create mode 100644 libbuild2/cc/lexer+string-literal.test.testscript
 create mode 100644 libbuild2/cc/lexer.cxx
 create mode 100644 libbuild2/cc/lexer.hxx
 create mode 100644 libbuild2/cc/lexer.test.cxx
 create mode 100644 libbuild2/cc/link-rule.cxx
 create mode 100644 libbuild2/cc/link-rule.hxx
 create mode 100644 libbuild2/cc/module.cxx
 create mode 100644 libbuild2/cc/module.hxx
 create mode 100644 libbuild2/cc/msvc.cxx
 create mode 100644 libbuild2/cc/parser+module.test.testscript
 create mode 100644 libbuild2/cc/parser.cxx
 create mode 100644 libbuild2/cc/parser.hxx
 create mode 100644 libbuild2/cc/parser.test.cxx
 create mode 100644 libbuild2/cc/pkgconfig.cxx
 create mode 100644 libbuild2/cc/target.cxx
 create mode 100644 libbuild2/cc/target.hxx
 create mode 100644 libbuild2/cc/types.hxx
 create mode 100644 libbuild2/cc/utility.cxx
 create mode 100644 libbuild2/cc/utility.hxx
 create mode 100644 libbuild2/cc/utility.ixx
 create mode 100644 libbuild2/cc/windows-manifest.cxx
 create mode 100644 libbuild2/cc/windows-rpath.cxx

(limited to 'libbuild2/cc')

diff --git a/libbuild2/cc/buildfile b/libbuild2/cc/buildfile
new file mode 100644
index 0000000..5b3d8eb
--- /dev/null
+++ b/libbuild2/cc/buildfile
@@ -0,0 +1,74 @@
+# file      : libbuild2/cc/buildfile
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+import int_libs = libbutl%lib{butl}
+import imp_libs = libpkgconf%lib{pkgconf}
+
+include ../
+int_libs += ../lib{build2}
+
+include ../bin/
+int_libs += ../bin/lib{build2-bin}
+
+./: lib{build2-cc}: libul{build2-cc}: {hxx ixx txx cxx}{** -**.test...} \
+  $imp_libs $int_libs
+
+# Unit tests.
+#
+exe{*.test}:
+{
+  test = true
+  install = false
+}
+
+for t: cxx{**.test...}
+{
+  d = $directory($t)
+  n = $name($t)...
+  b = $path.base($name($t))
+
+  ./: $d/exe{$n}: $t $d/{hxx ixx txx}{+$n} $d/testscript{+$n +$b+*.test...}
+  $d/exe{$n}: libul{build2-cc}: bin.whole = false
+}
+
+# Build options.
+#
+obja{*}: cxx.poptions += -DLIBBUILD2_CC_STATIC_BUILD
+objs{*}: cxx.poptions += -DLIBBUILD2_CC_SHARED_BUILD
+
+# Export options.
+#
+lib{build2-cc}:
+{
+  cxx.export.poptions = "-I$out_root" "-I$src_root"
+  cxx.export.libs = $int_libs
+}
+
+liba{build2-cc}: cxx.export.poptions += -DLIBBUILD2_CC_STATIC
+libs{build2-cc}: cxx.export.poptions += -DLIBBUILD2_CC_SHARED
+
+# For pre-releases use the complete version to make sure they cannot be used
+# in place of another pre-release or the final version. See the version module
+# for details on the version.* variable values.
+#
+# And because this is a build system module, we also embed the same value as
+# the interface version (note that we cannot use build.version.interface for
+# bundled modules because we could be built with a different version of the
+# build system).
+#
+ver = ($version.pre_release    \
+       ? "$version.project_id" \
+       : "$version.major.$version.minor")
+
+lib{build2-cc}: bin.lib.version = @"-$ver"
+libs{build2-cc}: bin.lib.load_suffix = "-$ver"
+
+# Install into the libbuild2/cc/ subdirectory of, say, /usr/include/
+# recreating subdirectories.
+#
+{hxx ixx txx}{*}:
+{
+  install         = include/libbuild2/cc/
+  install.subdirs = true
+}
diff --git a/libbuild2/cc/common.cxx b/libbuild2/cc/common.cxx
new file mode 100644
index 0000000..bfcb00c
--- /dev/null
+++ b/libbuild2/cc/common.cxx
@@ -0,0 +1,1031 @@
+// file      : libbuild2/cc/common.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/file.hxx>        // import()
+#include <libbuild2/scope.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/cc/utility.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    // Recursively process prerequisite libraries. If proc_impl returns false,
+    // then only process interface (*.export.libs), otherwise -- interface and
+    // implementation (prerequisite and from *.libs, unless overriden).
+    //
+    // Note that here we assume that an interface library is also an
+    // implementation (since we don't use *.export.libs in static link). We
+    // currently have this restriction to make sure the target in
+    // *.export.libs is up-to-date (which will happen automatically if it is
+    // listed as a prerequisite of this library).
+    //
+    // Storing a reference to library path in proc_lib is legal (it comes
+    // either from the target's path or from one of the *.libs variables
+    // neither of which should change on this run).
+    //
+    // Note that the order of processing is:
+    //
+    // 1. options
+    // 2. lib itself (if self is true)
+    // 3. dependency libs (prerequisite_targets, left to right, depth-first)
+    // 4. dependency libs (*.libs variables).
+    //
+    // The first argument to proc_lib is a pointer to the last element of an
+    // array that contains the current library dependency chain all the way to
+    // the library passes to process_libraries(). The first element of this
+    // array is NULL.
+    //
+    void common::
+    process_libraries (
+      action a,
+      const scope& top_bs,
+      linfo top_li,
+      const dir_paths& top_sysd,
+      const file& l,
+      bool la,
+      lflags lf,
+      const function<bool (const file&,
+                           bool la)>& proc_impl, // Implementation?
+      const function<void (const file* const*,   // Can be NULL.
+                           const string& path,   // Library path.
+                           lflags,               // Link flags.
+                           bool sys)>& proc_lib, // True if system library.
+      const function<void (const file&,
+                           const string& type,   // cc.type
+                           bool com,             // cc. or x.
+                           bool exp)>& proc_opt, // *.export.
+      bool self /*= false*/,                     // Call proc_lib on l?
+      small_vector<const file*, 16>* chain) const
+    {
+      small_vector<const file*, 16> chain_storage;
+      if (chain == nullptr)
+      {
+        chain = &chain_storage;
+        chain->push_back (nullptr);
+      }
+
+      // See what type of library this is (C, C++, etc). Use it do decide
+      // which x.libs variable name to use. If it's unknown, then we only
+      // look into prerequisites. Note: lookup starting from rule-specific
+      // variables (target should already be matched).
+      //
+      const string* t (cast_null<string> (l.state[a][c_type]));
+
+      bool impl (proc_impl && proc_impl (l, la));
+      bool cc (false), same (false);
+
+      auto& vp (top_bs.ctx.var_pool);
+      lookup c_e_libs;
+      lookup x_e_libs;
+
+      if (t != nullptr)
+      {
+        cc = *t == "cc";
+        same = !cc && *t == x;
+
+        // The explicit export override should be set on the liba/libs{}
+        // target itself. Note also that we only check for *.libs. If one
+        // doesn't have any libraries but needs to set, say, *.loptions, then
+        // *.libs should be set to NULL or empty (this is why we check for
+        // the result being defined).
+        //
+        if (impl)
+          c_e_libs = l.vars[c_export_libs]; // Override.
+        else if (l.group != nullptr) // lib{} group.
+          c_e_libs = l.group->vars[c_export_libs];
+
+        if (!cc)
+        {
+          const variable& var (same
+                               ? x_export_libs
+                               : vp[*t + ".export.libs"]);
+
+          if (impl)
+            x_e_libs = l.vars[var]; // Override.
+          else if (l.group != nullptr) // lib{} group.
+            x_e_libs = l.group->vars[var];
+        }
+
+        // Process options first.
+        //
+        if (proc_opt)
+        {
+          // If all we know is it's a C-common library, then in both cases we
+          // only look for cc.export.*.
+          //
+          if (cc)
+            proc_opt (l, *t, true, true);
+          else
+          {
+            if (impl)
+            {
+              // Interface and implementation: as discussed above, we can have
+              // two situations: overriden export or default export.
+              //
+              if (c_e_libs.defined () || x_e_libs.defined ())
+              {
+                // NOTE: should this not be from l.vars rather than l? Or
+                // perhaps we can assume non-common values will be set on
+                // libs{}/liba{}.
+                //
+                proc_opt (l, *t, true, true);
+                proc_opt (l, *t, false, true);
+              }
+              else
+              {
+                // For default export we use the same options as were used to
+                // build the library.
+                //
+                proc_opt (l, *t, true, false);
+                proc_opt (l, *t, false, false);
+              }
+            }
+            else
+            {
+              // Interface: only add *.export.* (interface dependencies).
+              //
+              proc_opt (l, *t, true, true);
+              proc_opt (l, *t, false, true);
+            }
+          }
+        }
+      }
+
+      // Determine if an absolute path is to a system library. Note that
+      // we assume both paths to be normalized.
+      //
+      auto sys = [] (const dir_paths& sysd, const string& p) -> bool
+      {
+        size_t pn (p.size ());
+
+        for (const dir_path& d: sysd)
+        {
+          const string& ds (d.string ()); // Can be "/", otherwise no slash.
+          size_t dn (ds.size ());
+
+          if (pn > dn &&
+              p.compare (0, dn, ds) == 0 &&
+              (path::traits_type::is_separator (ds[dn - 1]) ||
+               path::traits_type::is_separator (p[dn])))
+            return true;
+        }
+
+        return false;
+      };
+
+      // Next process the library itself if requested.
+      //
+      if (self && proc_lib)
+      {
+        chain->push_back (&l);
+
+        // Note that while normally the path is assigned, in case of an import
+        // stub the path to the DLL may not be known and so the path will be
+        // empty (but proc_lib() will use the import stub).
+        //
+        const path& p (l.path ());
+
+        bool s (t != nullptr // If cc library (matched or imported).
+                ? cast_false<bool> (l.vars[c_system])
+                : !p.empty () && sys (top_sysd, p.string ()));
+
+        proc_lib (&chain->back (), p.string (), lf, s);
+      }
+
+      const scope& bs (t == nullptr || cc ? top_bs : l.base_scope ());
+      optional<linfo> li;                        // Calculate lazily.
+      const dir_paths* sysd (nullptr);           // Resolve lazily.
+
+      // Find system search directories corresponding to this library, i.e.,
+      // from its project and for its type (C, C++, etc).
+      //
+      auto find_sysd = [&top_sysd, t, cc, same, &bs, &sysd, this] ()
+      {
+        // Use the search dirs corresponding to this library scope/type.
+        //
+        sysd = (t == nullptr || cc)
+        ? &top_sysd // Imported library, use importer's sysd.
+        : &cast<dir_paths> (
+          bs.root_scope ()->vars[same
+                                 ? x_sys_lib_dirs
+                                 : bs.ctx.var_pool[*t + ".sys_lib_dirs"]]);
+      };
+
+      auto find_linfo = [top_li, t, cc, &bs, &l, &li] ()
+      {
+        li = (t == nullptr || cc)
+        ? top_li
+        : link_info (bs, link_type (l).type);
+      };
+
+      // Only go into prerequisites (implementation) if instructed and we are
+      // not using explicit export. Otherwise, interface dependencies come
+      // from the lib{}:*.export.libs below.
+      //
+      if (impl && !c_e_libs.defined () && !x_e_libs.defined ())
+      {
+        for (const prerequisite_target& pt: l.prerequisite_targets[a])
+        {
+          // Note: adhoc prerequisites are not part of the library meta-
+          // information protocol.
+          //
+          if (pt == nullptr || pt.adhoc)
+            continue;
+
+          bool la;
+          const file* f;
+
+          if ((la = (f = pt->is_a<liba>  ())) ||
+              (la = (f = pt->is_a<libux> ())) ||
+              (      f = pt->is_a<libs>  ()))
+          {
+            if (sysd == nullptr) find_sysd ();
+            if (!li) find_linfo ();
+
+            process_libraries (a, bs, *li, *sysd,
+                               *f, la, pt.data,
+                               proc_impl, proc_lib, proc_opt, true, chain);
+          }
+        }
+      }
+
+      // Process libraries (recursively) from *.export.libs (of type names)
+      // handling import, etc.
+      //
+      // If it is not a C-common library, then it probably doesn't have any of
+      // the *.libs.
+      //
+      if (t != nullptr)
+      {
+        optional<dir_paths> usrd; // Extract lazily.
+
+        // Determine if a "simple path" is a system library.
+        //
+        auto sys_simple = [&sysd, &sys, &find_sysd] (const string& p) -> bool
+        {
+          bool s (!path::traits_type::absolute (p));
+
+          if (!s)
+          {
+            if (sysd == nullptr) find_sysd ();
+
+            s = sys (*sysd, p);
+          }
+
+          return s;
+        };
+
+        auto proc_int = [&l,
+                         &proc_impl, &proc_lib, &proc_opt, chain,
+                         &sysd, &usrd,
+                         &find_sysd, &find_linfo, &sys_simple,
+                         &bs, a, &li, this] (const lookup& lu)
+        {
+          const vector<name>* ns (cast_null<vector<name>> (lu));
+          if (ns == nullptr || ns->empty ())
+            return;
+
+          for (const name& n: *ns)
+          {
+            if (n.simple ())
+            {
+              // This is something like -lpthread or shell32.lib so should be
+              // a valid path. But it can also be an absolute library path
+              // (e.g., something that may come from our .static/shared.pc
+              // files).
+              //
+              if (proc_lib)
+                proc_lib (nullptr, n.value, 0, sys_simple (n.value));
+            }
+            else
+            {
+              // This is a potentially project-qualified target.
+              //
+              if (sysd == nullptr) find_sysd ();
+              if (!li) find_linfo ();
+
+              const file& t (resolve_library (a, bs, n, *li, *sysd, usrd));
+
+              if (proc_lib)
+              {
+                // This can happen if the target is mentioned in *.export.libs
+                // (i.e., it is an interface dependency) but not in the
+                // library's prerequisites (i.e., it is not an implementation
+                // dependency).
+                //
+                // Note that we used to just check for path being assigned but
+                // on Windows import-installed DLLs may legally have empty
+                // paths.
+                //
+                if (t.mtime () == timestamp_unknown)
+                  fail << "interface dependency " << t << " is out of date" <<
+                    info << "mentioned in *.export.libs of target " << l <<
+                    info << "is it a prerequisite of " << l << "?";
+              }
+
+              // Process it recursively.
+              //
+              // @@ Where can we get the link flags? Should we try to find
+              //    them in the library's prerequisites? What about installed
+              //    stuff?
+              //
+              process_libraries (a, bs, *li, *sysd,
+                                 t, t.is_a<liba> () || t.is_a<libux> (), 0,
+                                 proc_impl, proc_lib, proc_opt, true, chain);
+            }
+          }
+        };
+
+        // Process libraries from *.libs (of type strings).
+        //
+        auto proc_imp = [&proc_lib, &sys_simple] (const lookup& lu)
+        {
+          const strings* ns (cast_null<strings> (lu));
+          if (ns == nullptr || ns->empty ())
+            return;
+
+          for (const string& n: *ns)
+          {
+            // This is something like -lpthread or shell32.lib so should be a
+            // valid path.
+            //
+            proc_lib (nullptr, n, 0, sys_simple (n));
+          }
+        };
+
+        // Note: the same structure as when processing options above.
+        //
+        // If all we know is it's a C-common library, then in both cases we
+        // only look for cc.export.libs.
+        //
+        if (cc)
+        {
+          if (c_e_libs) proc_int (c_e_libs);
+        }
+        else
+        {
+          if (impl)
+          {
+            // Interface and implementation: as discussed above, we can have
+            // two situations: overriden export or default export.
+            //
+            if (c_e_libs.defined () || x_e_libs.defined ())
+            {
+              if (c_e_libs) proc_int (c_e_libs);
+              if (x_e_libs) proc_int (x_e_libs);
+            }
+            else
+            {
+              // For default export we use the same options/libs as were used
+              // to build the library. Since libraries in (non-export) *.libs
+              // are not targets, we don't need to recurse.
+              //
+              if (proc_lib)
+              {
+                proc_imp (l[c_libs]);
+                proc_imp (l[same ? x_libs : vp[*t + ".libs"]]);
+              }
+            }
+          }
+          else
+          {
+            // Interface: only add *.export.* (interface dependencies).
+            //
+            if (c_e_libs) proc_int (c_e_libs);
+            if (x_e_libs) proc_int (x_e_libs);
+          }
+        }
+      }
+
+      // Remove this library from the chain.
+      //
+      if (self && proc_lib)
+        chain->pop_back ();
+    }
+
+    // The name can be an absolute or relative target name (for example,
+    // /tmp/libfoo/lib{foo} or ../libfoo/lib{foo}) or a project-qualified
+    // relative target name (e.g., libfoo%lib{foo}).
+    //
+    // Note that in case of the relative target that comes from export.libs,
+    // the resolution happens relative to the base scope of the target from
+    // which this export.libs came, which is exactly what we want.
+    //
+    // Note that the scope, search paths, and the link order should all be
+    // derived from the library target that mentioned this name. This way we
+    // will select exactly the same target as the library's matched rule and
+    // that's the only way to guarantee it will be up-to-date.
+    //
+    const file& common::
+    resolve_library (action a,
+                     const scope& s,
+                     name n,
+                     linfo li,
+                     const dir_paths& sysd,
+                     optional<dir_paths>& usrd) const
+    {
+      if (n.type != "lib" && n.type != "liba" && n.type != "libs")
+        fail << "target name " << n << " is not a library";
+
+      const target* xt (nullptr);
+
+      if (!n.qualified ())
+      {
+        // Search for an existing target with this name "as if" it was a
+        // prerequisite.
+        //
+        xt = search_existing (n, s);
+
+        if (xt == nullptr)
+          fail << "unable to find library " << n;
+      }
+      else
+      {
+        // This is import.
+        //
+        auto rp (s.find_target_type (n, location ())); // Note: changes name.
+        const target_type* tt (rp.first);
+        optional<string>& ext (rp.second);
+
+        if (tt == nullptr)
+          fail << "unknown target type '" << n.type << "' in library " << n;
+
+        // @@ OUT: for now we assume out is undetermined, just like in
+        // search (name, scope).
+        //
+        dir_path out;
+
+        prerequisite_key pk {n.proj, {tt, &n.dir, &out, &n.value, ext}, &s};
+        xt = search_library_existing (a, sysd, usrd, pk);
+
+        if (xt == nullptr)
+        {
+          if (n.qualified ())
+            xt = import_existing (s.ctx, pk);
+        }
+
+        if (xt == nullptr)
+          fail << "unable to find library " << pk;
+      }
+
+      // If this is lib{}/libu*{}, pick appropriate member.
+      //
+      if (const libx* l = xt->is_a<libx> ())
+        xt = link_member (*l, a, li); // Pick lib*{e,a,s}{}.
+
+      return xt->as<file> ();
+    }
+
+    // Insert a target verifying that it already exists if requested. Return
+    // the lock.
+    //
+    template <typename T>
+    ulock common::
+    insert_library (context& ctx,
+                    T*& r,
+                    const string& name,
+                    const dir_path& d,
+                    optional<string> ext,
+                    bool exist,
+                    tracer& trace)
+    {
+      auto p (ctx.targets.insert_locked (T::static_type,
+                                         d,
+                                         dir_path (),
+                                         name,
+                                         move (ext),
+                                         true, // Implied.
+                                         trace));
+
+      assert (!exist || !p.second.owns_lock ());
+      r = &p.first.template as<T> ();
+      return move (p.second);
+    }
+
+    // Note that pk's scope should not be NULL (even if dir is absolute).
+    //
+    target* common::
+    search_library (action act,
+                    const dir_paths& sysd,
+                    optional<dir_paths>& usrd,
+                    const prerequisite_key& p,
+                    bool exist) const
+    {
+      tracer trace (x, "search_library");
+
+      assert (p.scope != nullptr);
+
+      // @@ This is hairy enough to warrant a separate implementation for
+      //    Windows.
+
+      // Note: since we are searching for a (presumably) installed library,
+      // utility libraries do not apply.
+      //
+      bool l (p.is_a<lib> ());
+      const optional<string>& ext (l ? nullopt : p.tk.ext); // Only liba/libs.
+
+      // First figure out what we need to search for.
+      //
+      const string& name (*p.tk.name);
+
+      // liba
+      //
+      path an;
+      optional<string> ae;
+
+      if (l || p.is_a<liba> ())
+      {
+        // We are trying to find a library in the search paths extracted from
+        // the compiler. It would only be natural if we used the library
+        // prefix/extension that correspond to this compiler and/or its
+        // target.
+        //
+        // Unlike MinGW, VC's .lib/.dll.lib naming is by no means standard and
+        // we might need to search for other names. In fact, there is no
+        // reliable way to guess from the file name what kind of library it
+        // is, static or import and we will have to do deep inspection of such
+        // alternative names. However, if we did find .dll.lib, then we can
+        // assume that .lib is the static library without any deep inspection
+        // overhead.
+        //
+        const char* e ("");
+
+        if (tsys == "win32-msvc")
+        {
+          an = path (name);
+          e = "lib";
+        }
+        else
+        {
+          an = path ("lib" + name);
+          e = "a";
+        }
+
+        ae = ext ? ext : string (e);
+        if (!ae->empty ())
+        {
+          an += '.';
+          an += *ae;
+        }
+      }
+
+      // libs
+      //
+      path sn;
+      optional<string> se;
+
+      if (l || p.is_a<libs> ())
+      {
+        const char* e ("");
+
+        if (tsys == "win32-msvc")
+        {
+          sn = path (name);
+          e = "dll.lib";
+        }
+        else
+        {
+          sn = path ("lib" + name);
+
+          if      (tsys == "darwin")  e = "dylib";
+          else if (tsys == "mingw32") e = "dll.a"; // See search code below.
+          else                        e = "so";
+        }
+
+        se = ext ? ext : string (e);
+        if (!se->empty ())
+        {
+          sn += '.';
+          sn += *se;
+        }
+      }
+
+      // Now search.
+      //
+      liba* a (nullptr);
+      libs* s (nullptr);
+
+      pair<path, path> pc; // pkg-config .pc file paths.
+      path f;              // Reuse the buffer.
+
+      auto search =[&a, &s, &pc,
+                    &an, &ae,
+                    &sn, &se,
+                    &name, ext,
+                    &p, &f, exist, &trace, this] (const dir_path& d) -> bool
+      {
+        context& ctx (p.scope->ctx);
+
+        timestamp mt;
+
+        // libs
+        //
+        // Look for the shared library first. The order is important for VC:
+        // only if we found .dll.lib can we safely assumy that just .lib is a
+        // static library.
+        //
+        if (!sn.empty ())
+        {
+          f = d;
+          f /= sn;
+          mt = mtime (f);
+
+          if (mt != timestamp_nonexistent)
+          {
+            // On Windows what we found is the import library which we need
+            // to make the first ad hoc member of libs{}.
+            //
+            if (tclass == "windows")
+            {
+              libi* i (nullptr);
+              insert_library (ctx, i, name, d, se, exist, trace);
+
+              ulock l (
+                insert_library (ctx, s, name, d, nullopt, exist, trace));
+
+              if (!exist)
+              {
+                if (l.owns_lock ())
+                {
+                  s->member = i; // We are first.
+                  l.unlock ();
+                }
+                else
+                  assert (find_adhoc_member<libi> (*s) == i);
+
+                i->mtime (mt);
+                i->path (move (f));
+
+                // Presumably there is a DLL somewhere, we just don't know
+                // where (and its possible we might have to look for one if we
+                // decide we need to do rpath emulation for installed
+                // libraries as well). We will represent this as empty path
+                // but valid timestamp (aka "trust me, it's there").
+                //
+                s->mtime (mt);
+                s->path (empty_path);
+              }
+            }
+            else
+            {
+              insert_library (ctx, s, name, d, se, exist, trace);
+
+              s->mtime (mt);
+              s->path (move (f));
+            }
+          }
+          else if (!ext && tsys == "mingw32")
+          {
+            // Above we searched for the import library (.dll.a) but if it's
+            // not found, then we also search for the .dll (unless the
+            // extension was specified explicitly) since we can link to it
+            // directly. Note also that the resulting libs{} would end up
+            // being the .dll.
+            //
+            se = string ("dll");
+            f = f.base (); // Remove .a from .dll.a.
+            mt = mtime (f);
+
+            if (mt != timestamp_nonexistent)
+            {
+              insert_library (ctx, s, name, d, se, exist, trace);
+
+              s->mtime (mt);
+              s->path (move (f));
+            }
+          }
+        }
+
+        // liba
+        //
+        // If we didn't find .dll.lib then we cannot assume .lib is static.
+        //
+        if (!an.empty () && (s != nullptr || tsys != "win32-msvc"))
+        {
+          f = d;
+          f /= an;
+
+          if ((mt = mtime (f)) != timestamp_nonexistent)
+          {
+            // Enter the target. Note that because the search paths are
+            // normalized, the result is automatically normalized as well.
+            //
+            // Note that this target is outside any project which we treat
+            // as out trees.
+            //
+            insert_library (ctx, a, name, d, ae, exist, trace);
+            a->mtime (mt);
+            a->path (move (f));
+          }
+        }
+
+        // Alternative search for VC.
+        //
+        if (tsys == "win32-msvc")
+        {
+          const scope& rs (*p.scope->root_scope ());
+          const process_path& ld (cast<process_path> (rs["bin.ld.path"]));
+
+          if (s == nullptr && !sn.empty ())
+            s = msvc_search_shared (ld, d, p, exist);
+
+          if (a == nullptr && !an.empty ())
+            a = msvc_search_static (ld, d, p, exist);
+        }
+
+        // Look for binary-less libraries via pkg-config .pc files. Note that
+        // it is possible we have already found one of them as binfull but the
+        // other is binless.
+        //
+        {
+          bool na (a == nullptr && !an.empty ()); // Need static.
+          bool ns (s == nullptr && !sn.empty ()); // Need shared.
+
+          if (na || ns)
+          {
+            // Only consider the common .pc file if we can be sure there
+            // is no binfull variant.
+            //
+            pair<path, path> r (
+              pkgconfig_search (d, p.proj, name, na && ns /* common */));
+
+            if (na && !r.first.empty ())
+            {
+              insert_library (ctx, a, name, d, nullopt, exist, trace);
+              a->mtime (timestamp_unreal);
+              a->path (empty_path);
+            }
+
+            if (ns && !r.second.empty ())
+            {
+              insert_library (ctx, s, name, d, nullopt, exist, trace);
+              s->mtime (timestamp_unreal);
+              s->path (empty_path);
+            }
+
+            // Only keep these .pc paths if we found anything via them.
+            //
+            if ((na && a != nullptr) || (ns && s != nullptr))
+              pc = move (r);
+          }
+        }
+
+        return a != nullptr || s != nullptr;
+      };
+
+      // First try user directories (i.e., -L).
+      //
+      bool sys (false);
+
+      if (!usrd)
+        usrd = extract_library_dirs (*p.scope);
+
+      const dir_path* pd (nullptr);
+      for (const dir_path& d: *usrd)
+      {
+        if (search (d))
+        {
+          pd = &d;
+          break;
+        }
+      }
+
+      // Next try system directories (i.e., those extracted from the compiler).
+      //
+      if (pd == nullptr)
+      {
+        for (const dir_path& d: sysd)
+        {
+          if (search (d))
+          {
+            pd = &d;
+            break;
+          }
+        }
+
+        sys = true;
+      }
+
+      if (pd == nullptr)
+        return nullptr;
+
+      // Enter (or find) the lib{} target group.
+      //
+      lib* lt;
+      insert_library (
+        p.scope->ctx, lt, name, *pd, l ? p.tk.ext : nullopt, exist, trace);
+
+      // Result.
+      //
+      target* r (l ? lt : (p.is_a<liba> () ? static_cast<target*> (a) : s));
+
+      // Assume the rest is already done if existing.
+      //
+      if (exist)
+        return r;
+
+      // If we cannot acquire the lock then this mean the target has already
+      // been matched (though not clear by whom) and we assume all of this
+      // has already been done.
+      //
+      target_lock ll (lock (act, *lt));
+
+      // Set lib{} group members to indicate what's available. Note that we
+      // must be careful here since its possible we have already imported some
+      // of its members.
+      //
+      if (ll)
+      {
+        if (a != nullptr) lt->a = a;
+        if (s != nullptr) lt->s = s;
+      }
+
+      target_lock al (a != nullptr ? lock (act, *a) : target_lock ());
+      target_lock sl (s != nullptr ? lock (act, *s) : target_lock ());
+
+      if (!al) a = nullptr;
+      if (!sl) s = nullptr;
+
+      if (a != nullptr) a->group = lt;
+      if (s != nullptr) s->group = lt;
+
+      // Mark as a "cc" library (unless already marked) and set the system
+      // flag.
+      //
+      auto mark_cc = [sys, this] (target& t) -> bool
+      {
+        auto p (t.vars.insert (c_type));
+
+        if (p.second)
+        {
+          p.first.get () = string ("cc");
+
+          if (sys)
+            t.vars.assign (c_system) = true;
+        }
+
+        return p.second;
+      };
+
+      // If the library already has cc.type, then assume it was either
+      // already imported or was matched by a rule.
+      //
+      if (a != nullptr && !mark_cc (*a)) a = nullptr;
+      if (s != nullptr && !mark_cc (*s)) s = nullptr;
+
+      // Add the "using static/shared library" macro (used, for example, to
+      // handle DLL export). The absence of either of these macros would
+      // mean some other build system that cannot distinguish between the
+      // two (and no pkg-config information).
+      //
+      auto add_macro = [this] (target& t, const char* suffix)
+      {
+        // If there is already a value (either in cc.export or x.export),
+        // don't add anything: we don't want to be accumulating defines nor
+        // messing with custom values. And if we are adding, then use the
+        // generic cc.export.
+        //
+        // The only way we could already have this value is if this same
+        // library was also imported as a project (as opposed to installed).
+        // Unlikely but possible. In this case the values were set by the
+        // export stub and we shouldn't touch them.
+        //
+        if (!t.vars[x_export_poptions])
+        {
+          auto p (t.vars.insert (c_export_poptions));
+
+          if (p.second)
+          {
+            // The "standard" macro name will be LIB<NAME>_{STATIC,SHARED},
+            // where <name> is the target name. Here we want to strike a
+            // balance between being unique and not too noisy.
+            //
+            string d ("-DLIB");
+
+            d += sanitize_identifier (
+              ucase (const_cast<const string&> (t.name)));
+
+            d += '_';
+            d += suffix;
+
+            strings o;
+            o.push_back (move (d));
+            p.first.get () = move (o);
+          }
+        }
+      };
+
+      if (ll && (a != nullptr || s != nullptr))
+      {
+        // Try to extract library information from pkg-config. We only add the
+        // default macro if we could not extract more precise information. The
+        // idea is that in .pc files that we generate, we copy those macros
+        // (or custom ones) from *.export.poptions.
+        //
+        if (pc.first.empty () && pc.second.empty ())
+        {
+          if (!pkgconfig_load (act, *p.scope,
+                               *lt, a, s,
+                               p.proj, name,
+                               *pd, sysd, *usrd))
+          {
+            if (a != nullptr) add_macro (*a, "STATIC");
+            if (s != nullptr) add_macro (*s, "SHARED");
+          }
+        }
+        else
+          pkgconfig_load (act, *p.scope, *lt, a, s, pc, *pd, sysd, *usrd);
+      }
+
+      // If we have the lock (meaning this is the first time), set the
+      // traget's recipe to noop. Failed that we will keep re-locking it,
+      // updating its members, etc.
+      //
+      if (al) match_recipe (al, noop_recipe);
+      if (sl) match_recipe (sl, noop_recipe);
+      if (ll) match_recipe (ll, noop_recipe);
+
+      return r;
+    }
+
+    dir_paths common::
+    extract_library_dirs (const scope& bs) const
+    {
+      dir_paths r;
+
+      // Extract user-supplied search paths (i.e., -L, /LIBPATH).
+      //
+      auto extract = [&bs, &r, this] (const value& val, const variable& var)
+      {
+        const auto& v (cast<strings> (val));
+
+        for (auto i (v.begin ()), e (v.end ()); i != e; ++i)
+        {
+          const string& o (*i);
+
+          dir_path d;
+
+          try
+          {
+            if (cclass == compiler_class::msvc)
+            {
+              // /LIBPATH:<dir> (case-insensitive).
+              //
+              if ((o[0] == '/' || o[0] == '-') &&
+                  casecmp (o.c_str () + 1, "LIBPATH:", 8) == 0)
+                d = dir_path (o, 9, string::npos);
+              else
+                continue;
+            }
+            else
+            {
+              // -L can either be in the "-L<dir>" or "-L <dir>" form.
+              //
+              if (o == "-L")
+              {
+                if (++i == e)
+                  break; // Let the compiler complain.
+
+                d = dir_path (*i);
+              }
+              else if (o.compare (0, 2, "-L") == 0)
+                d = dir_path (o, 2, string::npos);
+              else
+                continue;
+            }
+          }
+          catch (const invalid_path& e)
+          {
+            fail << "invalid directory '" << e.path << "'"
+                 << " in option '" << o << "'"
+                 << " in variable " << var
+                 << " for scope " << bs;
+          }
+
+          // Ignore relative paths. Or maybe we should warn?
+          //
+          if (!d.relative ())
+            r.push_back (move (d));
+        }
+      };
+
+      if (auto l = bs[c_loptions]) extract (*l, c_loptions);
+      if (auto l = bs[x_loptions]) extract (*l, x_loptions);
+
+      return r;
+    }
+  }
+}
diff --git a/libbuild2/cc/common.hxx b/libbuild2/cc/common.hxx
new file mode 100644
index 0000000..31219a3
--- /dev/null
+++ b/libbuild2/cc/common.hxx
@@ -0,0 +1,358 @@
+// file      : build2/cc/common.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_COMMON_HXX
+#define LIBBUILD2_CC_COMMON_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/guess.hxx>  // compiler_id
+#include <libbuild2/cc/target.hxx> // h{}
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    // Data entries that define a concrete c-family module (e.g., c or cxx).
+    // These classes are used as a virtual bases by the rules as well as the
+    // modules. This way the member variables can be referenced as is, without
+    // any extra decorations (in other words, it is a bunch of data members
+    // that can be shared between several classes/instances).
+    //
+    struct config_data
+    {
+      lang x_lang;
+
+      const char* x;         // Module name ("c", "cxx").
+      const char* x_name;    // Compiler name ("c", "c++").
+      const char* x_default; // Compiler default ("gcc", "g++").
+      const char* x_pext;    // Preprocessed source extension (".i", ".ii").
+
+      // Array of modules that can hint us the toolchain, terminate with
+      // NULL.
+      //
+      const char* const* x_hinters;
+
+      const variable& config_x;
+      const variable& config_x_id;      // <type>[-<variant>]
+      const variable& config_x_version;
+      const variable& config_x_target;
+      const variable& config_x_std;
+      const variable& config_x_poptions;
+      const variable& config_x_coptions;
+      const variable& config_x_loptions;
+      const variable& config_x_aoptions;
+      const variable& config_x_libs;
+      const variable* config_x_importable_headers;
+
+      const variable& x_path;         // Compiler process path.
+      const variable& x_sys_lib_dirs; // System library search directories.
+      const variable& x_sys_inc_dirs; // System header search directories.
+
+      const variable& x_std;
+      const variable& x_poptions;
+      const variable& x_coptions;
+      const variable& x_loptions;
+      const variable& x_aoptions;
+      const variable& x_libs;
+      const variable* x_importable_headers;
+
+      const variable& c_poptions; // cc.*
+      const variable& c_coptions;
+      const variable& c_loptions;
+      const variable& c_aoptions;
+      const variable& c_libs;
+
+      const variable& x_export_poptions;
+      const variable& x_export_coptions;
+      const variable& x_export_loptions;
+      const variable& x_export_libs;
+
+      const variable& c_export_poptions; // cc.export.*
+      const variable& c_export_coptions;
+      const variable& c_export_loptions;
+      const variable& c_export_libs;
+
+      const variable& x_stdlib;       // x.stdlib
+
+      const variable& c_runtime;      // cc.runtime
+      const variable& c_stdlib;       // cc.stdlib
+
+      const variable& c_type;         // cc.type
+      const variable& c_system;       // cc.system
+      const variable& c_module_name;  // cc.module_name
+      const variable& c_reprocess;    // cc.reprocess
+
+      const variable& x_preprocessed; // x.preprocessed
+      const variable* x_symexport;    // x.features.symexport
+
+      const variable& x_id;
+      const variable& x_id_type;
+      const variable& x_id_variant;
+
+      const variable& x_class;
+
+      const variable& x_version;
+      const variable& x_version_major;
+      const variable& x_version_minor;
+      const variable& x_version_patch;
+      const variable& x_version_build;
+
+      const variable& x_signature;
+      const variable& x_checksum;
+
+      const variable& x_pattern;
+
+      const variable& x_target;
+      const variable& x_target_cpu;
+      const variable& x_target_vendor;
+      const variable& x_target_system;
+      const variable& x_target_version;
+      const variable& x_target_class;
+    };
+
+    struct data: config_data
+    {
+      const char* x_compile; // Rule names.
+      const char* x_link;
+      const char* x_install;
+      const char* x_uninstall;
+
+      // Cached values for some commonly-used variables/values.
+      //
+
+      compiler_type ctype;          // x.id.type
+      const string& cvariant;       // x.id.variant
+      compiler_class cclass;        // x.class
+      uint64_t cmaj;                // x.version.major
+      uint64_t cmin;                // x.version.minor
+      const process_path& cpath;    // x.path
+
+      const target_triplet& ctgt;   // x.target
+      const string& tsys;           // x.target.system
+      const string& tclass;         // x.target.class
+
+      const strings& tstd;          // Translated x_std value (options).
+
+      bool modules;                 // x.features.modules
+      bool symexport;               // x.features.symexport
+
+      const strings* import_hdr;    // x.importable_headers (NULL if unused/empty).
+
+      const dir_paths& sys_lib_dirs; // x.sys_lib_dirs
+      const dir_paths& sys_inc_dirs; // x.sys_inc_dirs
+
+      size_t sys_lib_dirs_extra;     // First extra path (size if none).
+      size_t sys_inc_dirs_extra;     // First extra path (size if none).
+
+      const target_type& x_src; // Source target type (c{}, cxx{}).
+      const target_type* x_mod; // Module target type (mxx{}), if any.
+
+      // Array of target types that are considered the X-language headers
+      // (excluding h{} except for C). Keep them in the most likely to appear
+      // order with the "real header" first and terminated with NULL.
+      //
+      const target_type* const* x_hdr;
+
+      template <typename T>
+      bool
+      x_header (const T& t, bool c_hdr = true) const
+      {
+        for (const target_type* const* ht (x_hdr); *ht != nullptr; ++ht)
+          if (t.is_a (**ht))
+            return true;
+
+        return c_hdr && t.is_a (h::static_type);
+      }
+
+      // Array of target types that can be #include'd. Used to reverse-lookup
+      // extensions to target types. Keep them in the most likely to appear
+      // order and terminate with NULL.
+      //
+      const target_type* const* x_inc;
+
+      // Aggregate-like constructor with from-base support.
+      //
+      data (const config_data& cd,
+            const char* compile,
+            const char* link,
+            const char* install,
+            const char* uninstall,
+            compiler_type ct,
+            const string& cv,
+            compiler_class cl,
+            uint64_t mj, uint64_t mi,
+            const process_path& path,
+            const target_triplet& tgt,
+            const strings& std,
+            bool fm,
+            bool fs,
+            const dir_paths& sld,
+            const dir_paths& sid,
+            size_t sle,
+            size_t sie,
+            const target_type& src,
+            const target_type* mod,
+            const target_type* const* hdr,
+            const target_type* const* inc)
+          : config_data (cd),
+            x_compile (compile),
+            x_link (link),
+            x_install (install),
+            x_uninstall (uninstall),
+            ctype (ct), cvariant (cv), cclass (cl),
+            cmaj (mj), cmin (mi),
+            cpath (path),
+            ctgt (tgt), tsys (ctgt.system), tclass (ctgt.class_),
+            tstd (std),
+            modules (fm),
+            symexport (fs),
+            import_hdr (nullptr),
+            sys_lib_dirs (sld), sys_inc_dirs (sid),
+            sys_lib_dirs_extra (sle), sys_inc_dirs_extra (sie),
+            x_src (src), x_mod (mod), x_hdr (hdr), x_inc (inc) {}
+    };
+
+    class LIBBUILD2_CC_SYMEXPORT common: public data
+    {
+    public:
+      common (data&& d): data (move (d)) {}
+
+      // Library handling.
+      //
+    public:
+      void
+      process_libraries (
+        action,
+        const scope&,
+        linfo,
+        const dir_paths&,
+        const file&,
+        bool,
+        lflags,
+        const function<bool (const file&, bool)>&,
+        const function<void (const file* const*, const string&, lflags, bool)>&,
+        const function<void (const file&, const string&, bool, bool)>&,
+        bool = false,
+        small_vector<const file*, 16>* = nullptr) const;
+
+      const target*
+      search_library (action a,
+                      const dir_paths& sysd,
+                      optional<dir_paths>& usrd,
+                      const prerequisite& p) const
+      {
+        const target* r (p.target.load (memory_order_consume));
+
+        if (r == nullptr)
+        {
+          if ((r = search_library (a, sysd, usrd, p.key ())) != nullptr)
+          {
+            const target* e (nullptr);
+            if (!p.target.compare_exchange_strong (
+                  e, r,
+                  memory_order_release,
+                  memory_order_consume))
+              assert (e == r);
+          }
+        }
+
+        return r;
+      }
+
+    public:
+      const file&
+      resolve_library (action,
+                       const scope&,
+                       name,
+                       linfo,
+                       const dir_paths&,
+                       optional<dir_paths>&) const;
+
+      template <typename T>
+      static ulock
+      insert_library (context&,
+                      T*&,
+                      const string&,
+                      const dir_path&,
+                      optional<string>,
+                      bool,
+                      tracer&);
+
+      target*
+      search_library (action,
+                      const dir_paths&,
+                      optional<dir_paths>&,
+                      const prerequisite_key&,
+                      bool existing = false) const;
+
+      const target*
+      search_library_existing (action a,
+                               const dir_paths& sysd,
+                               optional<dir_paths>& usrd,
+                               const prerequisite_key& pk) const
+      {
+        return search_library (a, sysd, usrd, pk, true);
+      }
+
+      dir_paths
+      extract_library_dirs (const scope&) const;
+
+      // Alternative search logic for VC (msvc.cxx).
+      //
+      bin::liba*
+      msvc_search_static (const process_path&,
+                          const dir_path&,
+                          const prerequisite_key&,
+                          bool existing) const;
+
+      bin::libs*
+      msvc_search_shared (const process_path&,
+                          const dir_path&,
+                          const prerequisite_key&,
+                          bool existing) const;
+
+      // The pkg-config file searching and loading (pkgconfig.cxx)
+      //
+      using pkgconfig_callback = function<bool (dir_path&& d)>;
+
+      bool
+      pkgconfig_search (const dir_path&, const pkgconfig_callback&) const;
+
+      pair<path, path>
+      pkgconfig_search (const dir_path&,
+                        const optional<project_name>&,
+                        const string&,
+                        bool) const;
+
+      void
+      pkgconfig_load (action, const scope&,
+                      bin::lib&, bin::liba*, bin::libs*,
+                      const pair<path, path>&,
+                      const dir_path&,
+                      const dir_paths&,
+                      const dir_paths&) const;
+
+      bool
+      pkgconfig_load (action, const scope&,
+                      bin::lib&, bin::liba*, bin::libs*,
+                      const optional<project_name>&,
+                      const string&,
+                      const dir_path&,
+                      const dir_paths&,
+                      const dir_paths&) const;
+    };
+  }
+}
+
+#endif // LIBBUILD2_CC_COMMON_HXX
diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx
new file mode 100644
index 0000000..8cebef0
--- /dev/null
+++ b/libbuild2/cc/compile-rule.cxx
@@ -0,0 +1,6098 @@
+// file      : libbuild2/cc/compile-rule.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/compile-rule.hxx>
+
+#include <cstdlib>  // exit()
+#include <cstring>  // strlen(), strchr()
+
+#include <libbuild2/file.hxx>
+#include <libbuild2/depdb.hxx>
+#include <libbuild2/scope.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>  // mtime()
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/config/utility.hxx> // create_project()
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/parser.hxx>
+#include <libbuild2/cc/target.hxx>  // h
+#include <libbuild2/cc/module.hxx>
+#include <libbuild2/cc/utility.hxx>
+
+using std::exit;
+using std::strlen;
+
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    // Module type/info string serialization.
+    //
+    // The string representation is a space-separated list of module names
+    // or quoted paths for header units with the following rules:
+    //
+    // 1. If this is a module unit, then the first name is the module name
+    //    intself following by either '!' for an interface or header unit and
+    //    by '+' for an implementation unit.
+    //
+    // 2. If an imported module is re-exported, then the module name is
+    //    followed by '*'.
+    //
+    // For example:
+    //
+    // foo! foo.core* foo.base* foo.impl
+    // foo.base+ foo.impl
+    // foo.base foo.impl
+    // "/usr/include/stdio.h"!
+    // "/usr/include/stdio.h"! "/usr/include/stddef.h"
+    //
+    // NOTE: currently we omit the imported header units since we have no need
+    //       for this information (everything is handled by the mapper). Plus,
+    //       resolving an import declaration to an absolute path would require
+    //       some effort.
+    //
+    static string
+    to_string (unit_type ut, const module_info& mi)
+    {
+      string s;
+
+      if (ut != unit_type::non_modular)
+      {
+        if (ut == unit_type::module_header) s += '"';
+        s += mi.name;
+        if (ut == unit_type::module_header) s += '"';
+
+        s += (ut == unit_type::module_impl ? '+' : '!');
+      }
+
+      for (const module_import& i: mi.imports)
+      {
+        if (!s.empty ())
+          s += ' ';
+
+        if (i.type == unit_type::module_header) s += '"';
+        s += i.name;
+        if (i.type == unit_type::module_header) s += '"';
+
+        if (i.exported)
+          s += '*';
+      }
+
+      return s;
+    }
+
+    static pair<unit_type, module_info>
+    to_module_info (const string& s)
+    {
+      unit_type ut (unit_type::non_modular);
+      module_info mi;
+
+      for (size_t b (0), e (0), n (s.size ()), m; e < n; )
+      {
+        // Let's handle paths with spaces seeing that we already quote them.
+        //
+        char d (s[b = e] == '"' ? '"' : ' ');
+
+        if ((m = next_word (s, n, b, e, d)) == 0)
+          break;
+
+        char c (d == ' '  ? s[e - 1] : // Before delimiter.
+                e + 1 < n ? s[e + 1] : // After delimiter.
+                '\0');
+
+        switch (c)
+        {
+        case '!':
+        case '+':
+        case '*': break;
+        default:  c = '\0';
+        }
+
+        string w (s, b, m - (d == ' ' && c != '\0' ? 1 : 0));
+
+        unit_type t (c == '+' ? unit_type::module_impl  :
+                     d == ' ' ? unit_type::module_iface :
+                     unit_type::module_header);
+
+        if (c == '!' || c == '+')
+        {
+          ut = t;
+          mi.name = move (w);
+        }
+        else
+          mi.imports.push_back (module_import {t, move (w), c == '*', 0});
+
+        // Skip to the next word (quote and space or just space).
+        //
+        e += (d == '"' ? 2 : 1);
+      }
+
+      return pair<unit_type, module_info> (move (ut), move (mi));
+    }
+
+    // preprocessed
+    //
+    template <typename T>
+    inline bool
+    operator< (preprocessed l, T r) // Template because of VC14 bug.
+    {
+      return static_cast<uint8_t> (l) < static_cast<uint8_t> (r);
+    }
+
+    preprocessed
+    to_preprocessed (const string& s)
+    {
+      if (s == "none")     return preprocessed::none;
+      if (s == "includes") return preprocessed::includes;
+      if (s == "modules")  return preprocessed::modules;
+      if (s == "all")      return preprocessed::all;
+      throw invalid_argument ("invalid preprocessed value '" + s + "'");
+    }
+
+    struct compile_rule::match_data
+    {
+      explicit
+      match_data (unit_type t, const prerequisite_member& s)
+          : type (t), src (s) {}
+
+      unit_type type;
+      preprocessed pp = preprocessed::none;
+      bool symexport = false;               // Target uses __symexport.
+      bool touch = false;                   // Target needs to be touched.
+      timestamp mt = timestamp_unknown;     // Target timestamp.
+      prerequisite_member src;
+      auto_rmfile psrc;                     // Preprocessed source, if any.
+      path dd;                              // Dependency database path.
+      size_t headers = 0;                   // Number of imported header units.
+      module_positions modules = {0, 0, 0}; // Positions of imported modules.
+    };
+
+    compile_rule::
+    compile_rule (data&& d)
+        : common (move (d)),
+          rule_id (string (x) += ".compile 4")
+    {
+      static_assert (sizeof (match_data) <= target::data_size,
+                     "insufficient space");
+    }
+
+    size_t compile_rule::
+    append_lang_options (cstrings& args, const match_data& md) const
+    {
+      size_t r (args.size ());
+
+      // Normally there will be one or two options/arguments.
+      //
+      const char* o1 (nullptr);
+      const char* o2 (nullptr);
+
+      switch (cclass)
+      {
+      case compiler_class::msvc:
+        {
+          switch (x_lang)
+          {
+          case lang::c:   o1 = "/TC"; break;
+          case lang::cxx: o1 = "/TP"; break;
+          }
+          break;
+        }
+      case compiler_class::gcc:
+        {
+          // For GCC we ignore the preprocessed value since it is handled via
+          // -fpreprocessed -fdirectives-only.
+          //
+          // Clang has *-cpp-output (but not c++-module-cpp-output) and they
+          // handle comments and line continuations. However, currently this
+          // is only by accident since these modes are essentially equivalent
+          // to their cpp-output-less versions.
+          //
+          switch (md.type)
+          {
+          case unit_type::non_modular:
+          case unit_type::module_impl:
+            {
+              o1 = "-x";
+              switch (x_lang)
+              {
+              case lang::c:   o2 = "c";   break;
+              case lang::cxx: o2 = "c++"; break;
+              }
+              break;
+            }
+          case unit_type::module_iface:
+          case unit_type::module_header:
+            {
+              // Here things get rather compiler-specific. We also assume
+              // the language is C++.
+              //
+              bool h (md.type == unit_type::module_header);
+
+              //@@ MODHDR TODO: should we try to distinguish c-header vs
+              //   c++-header based on the source target type?
+
+              switch (ctype)
+              {
+              case compiler_type::gcc:
+                {
+                  // In GCC compiling a header unit required -fmodule-header
+                  // in addition to -x c/c++-header. Probably because relying
+                  // on just -x would be ambigous with its PCH support.
+                  //
+                  if (h)
+                    args.push_back ("-fmodule-header");
+
+                  o1 = "-x";
+                  o2 = h ? "c++-header" : "c++";
+                  break;
+                }
+              case compiler_type::clang:
+                {
+                  o1 = "-x";
+                  o2 =  h ? "c++-header" : "c++-module";
+                  break;
+                }
+              default:
+                  assert (false);
+              }
+              break;
+            }
+          }
+          break;
+        }
+      }
+
+      if (o1 != nullptr) args.push_back (o1);
+      if (o2 != nullptr) args.push_back (o2);
+
+      return args.size () - r;
+    }
+
+    inline void compile_rule::
+    append_symexport_options (cstrings& args, const target& t) const
+    {
+      // With VC if a BMI is compiled with dllexport, then when such BMI is
+      // imported, it is auto-magically treated as dllimport. Let's hope
+      // other compilers follow suit.
+      //
+      args.push_back (t.is_a<bmis> () && tclass == "windows"
+                      ? "-D__symexport=__declspec(dllexport)"
+                      : "-D__symexport=");
+    }
+
+    bool compile_rule::
+    match (action a, target& t, const string&) const
+    {
+      tracer trace (x, "compile_rule::match");
+
+      // Note: unit type will be refined in apply().
+      //
+      unit_type ut (t.is_a<hbmix> () ? unit_type::module_header :
+                    t.is_a<bmix> ()  ? unit_type::module_iface  :
+                    unit_type::non_modular);
+
+      // Link-up to our group (this is the obj/bmi{} target group protocol
+      // which means this can be done whether we match or not).
+      //
+      if (t.group == nullptr)
+        t.group = &search (t,
+                           (ut == unit_type::module_header ? hbmi::static_type:
+                            ut == unit_type::module_iface  ? bmi::static_type :
+                            obj::static_type),
+                           t.dir, t.out, t.name);
+
+      // See if we have a source file. Iterate in reverse so that a source
+      // file specified for a member overrides the one specified for the
+      // group. Also "see through" groups.
+      //
+      for (prerequisite_member p: reverse_group_prerequisite_members (a, t))
+      {
+        // If excluded or ad hoc, then don't factor it into our tests.
+        //
+        if (include (a, t, p) != include_type::normal)
+          continue;
+
+        // For a header unit we check the "real header" plus the C header.
+        //
+        if (ut == unit_type::module_header ? p.is_a (**x_hdr) || p.is_a<h> () :
+            ut == unit_type::module_iface  ? p.is_a (*x_mod)                  :
+            p.is_a (x_src))
+        {
+          // Save in the target's auxiliary storage.
+          //
+          t.data (match_data (ut, p));
+          return true;
+        }
+      }
+
+      l4 ([&]{trace << "no " << x_lang << " source file for target " << t;});
+      return false;
+    }
+
+    // Append or hash library options from a pair of *.export.* variables
+    // (first one is cc.export.*) recursively, prerequisite libraries first.
+    //
+    void compile_rule::
+    append_lib_options (const scope& bs,
+                        cstrings& args,
+                        action a,
+                        const target& t,
+                        linfo li) const
+    {
+      // See through utility libraries.
+      //
+      auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+      auto opt = [&args, this] (
+        const file& l, const string& t, bool com, bool exp)
+      {
+        // Note that in our model *.export.poptions are always "interface",
+        // even if set on liba{}/libs{}, unlike loptions.
+        //
+        if (!exp) // Ignore libux.
+          return;
+
+        const variable& var (
+          com
+          ? c_export_poptions
+          : (t == x
+             ? x_export_poptions
+             : l.ctx.var_pool[t + ".export.poptions"]));
+
+        append_options (args, l, var);
+      };
+
+      // In case we don't have the "small function object" optimization.
+      //
+      const function<bool (const file&, bool)> impf (imp);
+      const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+          continue;
+
+        // Should be already searched and matched for libraries.
+        //
+        if (const target* pt = p.load ())
+        {
+          if (const libx* l = pt->is_a<libx> ())
+            pt = link_member (*l, a, li);
+
+          bool la;
+          if (!((la = pt->is_a<liba> ())  ||
+                (la = pt->is_a<libux> ()) ||
+                pt->is_a<libs> ()))
+            continue;
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             pt->as<file> (), la, 0, // Hack: lflags unused.
+                             impf, nullptr, optf);
+        }
+      }
+    }
+
+    void compile_rule::
+    hash_lib_options (const scope& bs,
+                      sha256& cs,
+                      action a,
+                      const target& t,
+                      linfo li) const
+    {
+      auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+      auto opt = [&cs, this] (
+        const file& l, const string& t, bool com, bool exp)
+      {
+        if (!exp)
+          return;
+
+        const variable& var (
+          com
+          ? c_export_poptions
+          : (t == x
+             ? x_export_poptions
+             : l.ctx.var_pool[t + ".export.poptions"]));
+
+        hash_options (cs, l, var);
+      };
+
+      // The same logic as in append_lib_options().
+      //
+      const function<bool (const file&, bool)> impf (imp);
+      const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+          continue;
+
+        if (const target* pt = p.load ())
+        {
+          if (const libx* l = pt->is_a<libx> ())
+            pt = link_member (*l, a, li);
+
+          bool la;
+          if (!((la = pt->is_a<liba> ())  ||
+                (la = pt->is_a<libux> ()) ||
+                pt->is_a<libs> ()))
+            continue;
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             pt->as<file> (), la, 0, // Hack: lflags unused.
+                             impf, nullptr, optf);
+        }
+      }
+    }
+
+    // Append library prefixes based on the *.export.poptions variables
+    // recursively, prerequisite libraries first.
+    //
+    void compile_rule::
+    append_lib_prefixes (const scope& bs,
+                         prefix_map& m,
+                         action a,
+                         target& t,
+                         linfo li) const
+    {
+      auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();};
+
+      auto opt = [&m, this] (
+        const file& l, const string& t, bool com, bool exp)
+      {
+        if (!exp)
+          return;
+
+        const variable& var (
+          com
+          ? c_export_poptions
+          : (t == x
+             ? x_export_poptions
+             : l.ctx.var_pool[t + ".export.poptions"]));
+
+        append_prefixes (m, l, var);
+      };
+
+      // The same logic as in append_lib_options().
+      //
+      const function<bool (const file&, bool)> impf (imp);
+      const function<void (const file&, const string&, bool, bool)> optf (opt);
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+          continue;
+
+        if (const target* pt = p.load ())
+        {
+          if (const libx* l = pt->is_a<libx> ())
+            pt = link_member (*l, a, li);
+
+          bool la;
+          if (!((la = pt->is_a<liba> ())  ||
+                (la = pt->is_a<libux> ()) ||
+                pt->is_a<libs> ()))
+            continue;
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             pt->as<file> (), la, 0, // Hack: lflags unused.
+                             impf, nullptr, optf);
+        }
+      }
+    }
+
+    // Update the target during the match phase. Return true if it has changed
+    // or if the passed timestamp is not timestamp_unknown and is older than
+    // the target.
+    //
+    // This function is used to make sure header dependencies are up to date.
+    //
+    // There would normally be a lot of headers for every source file (think
+    // all the system headers) and just calling execute_direct() on all of
+    // them can get expensive. At the same time, most of these headers are
+    // existing files that we will never be updating (again, system headers,
+    // for example) and the rule that will match them is the fallback
+    // file_rule. That rule has an optimization: it returns noop_recipe (which
+    // causes the target state to be automatically set to unchanged) if the
+    // file is known to be up to date. So we do the update "smartly".
+    //
+    static bool
+    update (tracer& trace, action a, const target& t, timestamp ts)
+    {
+      const path_target* pt (t.is_a<path_target> ());
+
+      if (pt == nullptr)
+        ts = timestamp_unknown;
+
+      target_state os (t.matched_state (a));
+
+      if (os == target_state::unchanged)
+      {
+        if (ts == timestamp_unknown)
+          return false;
+        else
+        {
+          // We expect the timestamp to be known (i.e., existing file).
+          //
+          timestamp mt (pt->mtime ());
+          assert (mt != timestamp_unknown);
+          return mt > ts;
+        }
+      }
+      else
+      {
+        // We only want to return true if our call to execute() actually
+        // caused an update. In particular, the target could already have been
+        // in target_state::changed because of a dependency extraction run for
+        // some other source file.
+        //
+        // @@ MT perf: so we are going to switch the phase and execute for
+        //    any generated header.
+        //
+        phase_switch ps (t.ctx, run_phase::execute);
+        target_state ns (execute_direct (a, t));
+
+        if (ns != os && ns != target_state::unchanged)
+        {
+          l6 ([&]{trace << "updated " << t
+                        << "; old state " << os
+                        << "; new state " << ns;});
+          return true;
+        }
+        else
+          return ts != timestamp_unknown ? pt->newer (ts) : false;
+      }
+    }
+
+    recipe compile_rule::
+    apply (action a, target& xt) const
+    {
+      tracer trace (x, "compile_rule::apply");
+
+      file& t (xt.as<file> ()); // Either obj*{} or bmi*{}.
+
+      match_data& md (t.data<match_data> ());
+
+      context& ctx (t.ctx);
+
+      // Note: until refined below, non-BMI-generating translation unit is
+      // assumed non-modular.
+      //
+      unit_type ut (md.type);
+
+      const scope& bs (t.base_scope ());
+      const scope& rs (*bs.root_scope ());
+
+      otype ot (compile_type (t, ut));
+      linfo li (link_info (bs, ot)); // Link info for selecting libraries.
+      compile_target_types tts (compile_types (ot));
+
+      // Derive file name from target name.
+      //
+      string e; // Primary target extension (module or object).
+      {
+        const char* o ("o"); // Object extension (.o or .obj).
+
+        if (tsys == "win32-msvc")
+        {
+          switch (ot)
+          {
+          case otype::e: e = "exe."; break;
+          case otype::a: e = "lib."; break;
+          case otype::s: e = "dll."; break;
+          }
+          o = "obj";
+        }
+        else if (tsys == "mingw32")
+        {
+          switch (ot)
+          {
+          case otype::e: e = "exe."; break;
+          case otype::a: e = "a.";   break;
+          case otype::s: e = "dll."; break;
+          }
+        }
+        else if (tsys == "darwin")
+        {
+          switch (ot)
+          {
+          case otype::e: e = "";       break;
+          case otype::a: e = "a.";     break;
+          case otype::s: e = "dylib."; break;
+          }
+        }
+        else
+        {
+          switch (ot)
+          {
+          case otype::e: e = "";    break;
+          case otype::a: e = "a.";  break;
+          case otype::s: e = "so."; break;
+          }
+        }
+
+        switch (ctype)
+        {
+        case compiler_type::gcc:
+          {
+            e += (ut != unit_type::non_modular ? "gcm" : o);
+            break;
+          }
+        case compiler_type::clang:
+          {
+            e += (ut != unit_type::non_modular ? "pcm" : o);
+            break;
+          }
+        case compiler_type::msvc:
+          {
+            e += (ut != unit_type::non_modular ? "ifc" : o);
+            break;
+          }
+        case compiler_type::icc:
+          {
+            assert (ut == unit_type::non_modular);
+            e += o;
+          }
+        }
+
+        // If we are compiling a module, then the obj*{} is an ad hoc member
+        // of bmi*{}. For now neither GCC nor Clang produce an object file
+        // for a header unit (but something tells me this is going to change).
+        //
+        if (ut == unit_type::module_iface)
+        {
+          // The module interface unit can be the same as an implementation
+          // (e.g., foo.mxx and foo.cxx) which means obj*{} targets could
+          // collide. So we add the module extension to the target name.
+          //
+          file& obj (add_adhoc_member<file> (t, tts.obj, e.c_str ()));
+
+          if (obj.path ().empty ())
+            obj.derive_path (o);
+        }
+      }
+
+      const path& tp (t.derive_path (e.c_str ()));
+
+      // Inject dependency on the output directory.
+      //
+      const fsdir* dir (inject_fsdir (a, t));
+
+      // Match all the existing prerequisites. The injection code takes care
+      // of the ones it is adding.
+      //
+      // When cleaning, ignore prerequisites that are not in the same or a
+      // subdirectory of our project root.
+      //
+      auto& pts (t.prerequisite_targets[a]);
+      optional<dir_paths> usr_lib_dirs; // Extract lazily.
+
+      // Start asynchronous matching of prerequisites. Wait with unlocked
+      // phase to allow phase switching.
+      //
+      wait_guard wg (ctx, ctx.count_busy (), t[a].task_count, true);
+
+      size_t start (pts.size ()); // Index of the first to be added.
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        const target* pt (nullptr);
+        include_type  pi (include (a, t, p));
+
+        if (!pi)
+          continue;
+
+        // A dependency on a library is there so that we can get its
+        // *.export.poptions, modules, etc. This is the library
+        // meta-information protocol. See also append_lib_options().
+        //
+        if (pi == include_type::normal &&
+            (p.is_a<libx> () ||
+             p.is_a<liba> () ||
+             p.is_a<libs> () ||
+             p.is_a<libux> ()))
+        {
+          if (a.operation () == update_id)
+          {
+            // Handle (phase two) imported libraries. We know that for such
+            // libraries we don't need to do match() in order to get options
+            // (if any, they would be set by search_library()).
+            //
+            if (p.proj ())
+            {
+              if (search_library (a,
+                                  sys_lib_dirs,
+                                  usr_lib_dirs,
+                                  p.prerequisite) != nullptr)
+                continue;
+            }
+
+            pt = &p.search (t);
+
+            if (const libx* l = pt->is_a<libx> ())
+              pt = link_member (*l, a, li);
+          }
+          else
+            continue;
+        }
+        //
+        // For modules we pick only what we import which is done below so
+        // skip it here. One corner case is clean: we assume that someone
+        // else (normally library/executable) also depends on it and will
+        // clean it up.
+        //
+        else if (pi == include_type::normal &&
+                 (p.is_a<bmi> ()  || p.is_a (tts.bmi) ||
+                  p.is_a<hbmi> () || p.is_a (tts.hbmi)))
+          continue;
+        else
+        {
+          pt = &p.search (t);
+
+          if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ()))
+            continue;
+        }
+
+        match_async (a, *pt, ctx.count_busy (), t[a].task_count);
+        pts.push_back (prerequisite_target (pt, pi));
+      }
+
+      wg.wait ();
+
+      // Finish matching all the targets that we have started.
+      //
+      for (size_t i (start), n (pts.size ()); i != n; ++i)
+      {
+        const target*& pt (pts[i]);
+
+        // Making sure a library is updated before us will only restrict
+        // parallelism. But we do need to match it in order to get its imports
+        // resolved and prerequisite_targets populated. So we match it but
+        // then unmatch if it is safe. And thanks to the two-pass prerequisite
+        // match in link::apply() it will be safe unless someone is building
+        // an obj?{} target directory.
+        //
+        if (build2::match (
+              a,
+              *pt,
+              pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ()
+              ? unmatch::safe
+              : unmatch::none))
+          pt = nullptr; // Ignore in execute.
+      }
+
+      // Inject additional prerequisites. We only do it when performing update
+      // since chances are we will have to update some of our prerequisites in
+      // the process (auto-generated source code, header units).
+      //
+      if (a == perform_update_id)
+      {
+        // The cached prerequisite target should be the same as what is in
+        // t.prerequisite_targets since we used standard search() and match()
+        // above.
+        //
+        const file& src (*md.src.search (t).is_a<file> ());
+
+        // Figure out if __symexport is used. While normally it is specified
+        // on the project root (which we cached), it can be overridden with
+        // a target-specific value for installed modules (which we sidebuild
+        // as part of our project).
+        //
+        // @@ MODHDR MSVC: are we going to do the same for header units? I
+        //    guess we will figure it out when MSVC supports header units.
+        //    Also see hashing below.
+        //
+        if (ut == unit_type::module_iface)
+        {
+          lookup l (src.vars[x_symexport]);
+          md.symexport = l ? cast<bool> (l) : symexport;
+        }
+
+        // Make sure the output directory exists.
+        //
+        // Is this the right thing to do? It does smell a bit, but then we do
+        // worse things in inject_prerequisites() below. There is also no way
+        // to postpone this until update since we need to extract and inject
+        // header dependencies now (we don't want to be calling search() and
+        // match() in update), which means we need to cache them now as well.
+        // So the only alternative, it seems, is to cache the updates to the
+        // database until later which will sure complicate (and slow down)
+        // things.
+        //
+        if (dir != nullptr)
+        {
+          // We can do it properly by using execute_direct(). But this means
+          // we will be switching to the execute phase with all the associated
+          // overheads. At the same time, in case of update, creation of a
+          // directory is not going to change the external state in any way
+          // that would affect any parallel efforts in building the internal
+          // state. So we are just going to create the directory directly.
+          // Note, however, that we cannot modify the fsdir{} target since
+          // this can very well be happening in parallel. But that's not a
+          // problem since fsdir{}'s update is idempotent.
+          //
+          fsdir_rule::perform_update_direct (a, t);
+        }
+
+        // Note: the leading '@' is reserved for the module map prefix (see
+        // extract_modules()) and no other line must start with it.
+        //
+        depdb dd (tp + ".d");
+
+        // First should come the rule name/version.
+        //
+        if (dd.expect (rule_id) != nullptr)
+          l4 ([&]{trace << "rule mismatch forcing update of " << t;});
+
+        // Then the compiler checksum. Note that here we assume it
+        // incorporates the (default) target so that if the compiler changes
+        // but only in what it targets, then the checksum will still change.
+        //
+        if (dd.expect (cast<string> (rs[x_checksum])) != nullptr)
+          l4 ([&]{trace << "compiler mismatch forcing update of " << t;});
+
+        // Then the options checksum.
+        //
+        // The idea is to keep them exactly as they are passed to the compiler
+        // since the order may be significant.
+        //
+        {
+          sha256 cs;
+
+          // These flags affect how we compile the source and/or the format of
+          // depdb so factor them in.
+          //
+          cs.append (&md.pp, sizeof (md.pp));
+
+          if (ut == unit_type::module_iface)
+            cs.append (&md.symexport, sizeof (md.symexport));
+
+          if (import_hdr != nullptr)
+            hash_options (cs,  *import_hdr);
+
+          if (md.pp != preprocessed::all)
+          {
+            hash_options (cs, t, c_poptions);
+            hash_options (cs, t, x_poptions);
+
+            // Hash *.export.poptions from prerequisite libraries.
+            //
+            hash_lib_options (bs, cs, a, t, li);
+
+            // Extra system header dirs (last).
+            //
+            assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+            hash_option_values (
+              cs, "-I",
+              sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+              [] (const dir_path& d) {return d.string ();});
+          }
+
+          hash_options (cs, t, c_coptions);
+          hash_options (cs, t, x_coptions);
+          hash_options (cs, tstd);
+
+          if (ot == otype::s)
+          {
+            // On Darwin, Win32 -fPIC is the default.
+            //
+            if (tclass == "linux" || tclass == "bsd")
+              cs.append ("-fPIC");
+          }
+
+          if (dd.expect (cs.string ()) != nullptr)
+            l4 ([&]{trace << "options mismatch forcing update of " << t;});
+        }
+
+        // Finally the source file.
+        //
+        if (dd.expect (src.path ()) != nullptr)
+          l4 ([&]{trace << "source file mismatch forcing update of " << t;});
+
+        // If any of the above checks resulted in a mismatch (different
+        // compiler, options, or source file) or if the depdb is newer than
+        // the target (interrupted update), then do unconditional update.
+        //
+        // Note that load_mtime() can only be used in the execute phase so we
+        // have to check for a cached value manually.
+        //
+        bool u;
+        timestamp mt;
+
+        if (dd.writing ())
+          u = true;
+        else
+        {
+          if ((mt = t.mtime ()) == timestamp_unknown)
+            t.mtime (mt = mtime (tp)); // Cache.
+
+          u = dd.mtime > mt;
+        }
+
+        if (u)
+          mt = timestamp_nonexistent; // Treat as if it doesn't exist.
+
+        // Update prerequisite targets (normally just the source file).
+        //
+        // This is an unusual place and time to do it. But we have to do it
+        // before extracting dependencies. The reasoning for source file is
+        // pretty clear. What other prerequisites could we have? While
+        // normally they will be some other sources (as in, static content
+        // from src_root), it's possible they are some auto-generated stuff.
+        // And it's possible they affect the preprocessor result. Say some ad
+        // hoc/out-of-band compiler input file that is passed via the command
+        // line. So, to be safe, we make sure everything is up to date.
+        //
+        for (const target* pt: pts)
+        {
+          if (pt == nullptr || pt == dir)
+            continue;
+
+          u = update (trace, a, *pt, u ? timestamp_unknown : mt) || u;
+        }
+
+        // Check if the source is already preprocessed to a certain degree.
+        // This determines which of the following steps we perform and on
+        // what source (original or preprocessed).
+        //
+        // Note: must be set on the src target.
+        //
+        if (const string* v = cast_null<string> (src[x_preprocessed]))
+        try
+        {
+          md.pp = to_preprocessed (*v);
+        }
+        catch (const invalid_argument& e)
+        {
+          fail << "invalid " << x_preprocessed.name << " variable value "
+               << "for target " << src << ": " << e;
+        }
+
+        // If we have no #include directives (or header unit imports), then
+        // skip header dependency extraction.
+        //
+        pair<auto_rmfile, bool> psrc (auto_rmfile (), false);
+        if (md.pp < preprocessed::includes)
+        {
+          // Note: trace is used in a test.
+          //
+          l5 ([&]{trace << "extracting headers from " << src;});
+          psrc = extract_headers (a, bs, t, li, src, md, dd, u, mt);
+        }
+
+        // Next we "obtain" the translation unit information. What exactly
+        // "obtain" entails is tricky: If things changed, then we re-parse the
+        // translation unit. Otherwise, we re-create this information from
+        // depdb. We, however, have to do it here and now in case the database
+        // is invalid and we still have to fallback to re-parse.
+        //
+        // Store the translation unit's checksum to detect ignorable changes
+        // (whitespaces, comments, etc).
+        //
+        {
+          optional<string> cs;
+          if (string* l = dd.read ())
+            cs = move (*l);
+          else
+            u = true; // Database is invalid, force re-parse.
+
+          unit tu;
+          for (bool first (true);; first = false)
+          {
+            if (u)
+            {
+              // Flush depdb since it can be used (as a module map) by
+              // parse_unit().
+              //
+              if (dd.writing ())
+                dd.flush ();
+
+              auto p (parse_unit (a, t, li, src, psrc.first, md, dd.path));
+
+              if (!cs || *cs != p.second)
+              {
+                assert (first); // Unchanged TU has a different checksum?
+                dd.write (p.second);
+              }
+              //
+              // Don't clear if it was forced or the checksum should not be
+              // relied upon.
+              //
+              else if (first && !p.second.empty ())
+              {
+                // Clear the update flag and set the touch flag. Unless there
+                // is no object file, of course. See also the md.mt logic
+                // below.
+                //
+                if (mt != timestamp_nonexistent)
+                {
+                  u = false;
+                  md.touch = true;
+                }
+              }
+
+              tu = move (p.first);
+            }
+
+            if (modules)
+            {
+              if (u || !first)
+              {
+                string s (to_string (tu.type, tu.module_info));
+
+                if (first)
+                  dd.expect (s);
+                else
+                  dd.write (s);
+              }
+              else
+              {
+                if (string* l = dd.read ())
+                {
+                  auto p (to_module_info (*l));
+                  tu.type = p.first;
+                  tu.module_info = move (p.second);
+                }
+                else
+                {
+                  u = true; // Database is invalid, force re-parse.
+                  continue;
+                }
+              }
+            }
+
+            break;
+          }
+
+          // Make sure the translation unit type matches the resulting target
+          // type.
+          //
+          switch (tu.type)
+          {
+          case unit_type::non_modular:
+          case unit_type::module_impl:
+            {
+              if (ut != unit_type::non_modular)
+                fail << "translation unit " << src << " is not a module interface" <<
+                  info << "consider using " << x_src.name << "{} instead";
+              break;
+            }
+          case unit_type::module_iface:
+            {
+              if (ut != unit_type::module_iface)
+                fail << "translation unit " << src << " is a module interface" <<
+                  info << "consider using " << x_mod->name << "{} instead";
+              break;
+            }
+          case unit_type::module_header:
+            {
+              assert (ut == unit_type::module_header);
+              break;
+            }
+          }
+
+          // Refine the non-modular/module-impl decision from match().
+          //
+          ut = md.type = tu.type;
+
+          // Note: trace is used in a test.
+          //
+          l5 ([&]{trace << "extracting modules from " << t;});
+
+          // Extract the module dependency information in addition to header
+          // dependencies.
+          //
+          // NOTE: assumes that no further targets will be added into
+          //       t.prerequisite_targets!
+          //
+          if (modules)
+          {
+            extract_modules (a, bs, t, li,
+                             tts, src,
+                             md, move (tu.module_info), dd, u);
+
+            // Currently in VC module interface units must be compiled from
+            // the original source (something to do with having to detect and
+            // store header boundaries in the .ifc files).
+            //
+            // @@ MODHDR MSVC: should we do the same for header units? I guess
+            //    we will figure it out when MSVC supports header units.
+            //
+            if (ctype == compiler_type::msvc)
+            {
+              if (ut == unit_type::module_iface)
+                psrc.second = false;
+            }
+          }
+        }
+
+        // If anything got updated, then we didn't rely on the cache. However,
+        // the cached data could actually have been valid and the compiler run
+        // in extract_headers() as well as the code above merely validated it.
+        //
+        // We do need to update the database timestamp, however. Failed that,
+        // we will keep re-validating the cached data over and over again.
+        //
+        // @@ DRYRUN: note that for dry-run we would keep re-touching the
+        // database on every run (because u is true). So for now we suppress
+        // it (the file will be re-validated on the real run anyway). It feels
+        // like support for reusing the (partially) preprocessed output (see
+        // note below) should help solve this properly (i.e., we don't want
+        // to keep re-validating the file on every subsequent dry-run as well
+        // on the real run).
+        //
+        if (u && dd.reading () && !ctx.dry_run)
+          dd.touch = true;
+
+        dd.close ();
+        md.dd = move (dd.path);
+
+        // If the preprocessed output is suitable for compilation, then pass
+        // it along.
+        //
+        if (psrc.second)
+        {
+          md.psrc = move (psrc.first);
+
+          // Without modules keeping the (partially) preprocessed output
+          // around doesn't buy us much: if the source/headers haven't changed
+          // then neither will the object file. Modules make things more
+          // interesting: now we may have to recompile an otherwise unchanged
+          // translation unit because a BMI it depends on has changed. In this
+          // case re-processing the translation unit would be a waste and
+          // compiling the original source would break distributed
+          // compilation.
+          //
+          // Note also that the long term trend will (hopefully) be for
+          // modularized projects to get rid of #include's which means the
+          // need for producing this partially preprocessed output will
+          // (hopefully) gradually disappear.
+          //
+          if (modules)
+            md.psrc.active = false; // Keep.
+        }
+
+        // Above we may have ignored changes to the translation unit. The
+        // problem is, unless we also update the target's timestamp, we will
+        // keep re-checking this on subsequent runs and it is not cheap.
+        // Updating the target's timestamp is not without problems either: it
+        // will cause a re-link on a subsequent run. So, essentially, we
+        // somehow need to remember two timestamps: one for checking
+        // "preprocessor prerequisites" above and one for checking other
+        // prerequisites (like modules) below. So what we are going to do is
+        // store the first in the target file (so we do touch it) and the
+        // second in depdb (which is never newer that the target).
+        //
+        // Perhaps when we start keeping the partially preprocessed this will
+        // fall away? Yes, please.
+        //
+        md.mt = u ? timestamp_nonexistent : dd.mtime;
+      }
+
+      switch (a)
+      {
+      case perform_update_id: return [this] (action a, const target& t)
+        {
+          return perform_update (a, t);
+        };
+      case perform_clean_id: return [this] (action a, const target& t)
+        {
+          return perform_clean (a, t);
+        };
+      default: return noop_recipe; // Configure update.
+      }
+    }
+
+    // Reverse-lookup target type(s) from extension.
+    //
+    small_vector<const target_type*, 2> compile_rule::
+    map_extension (const scope& s, const string& n, const string& e) const
+    {
+      // We will just have to try all of the possible ones, in the "most
+      // likely to match" order.
+      //
+      auto test = [&s, &n, &e] (const target_type& tt) -> bool
+      {
+        // Call the extension derivation function. Here we know that it will
+        // only use the target type and name from the target key so we can
+        // pass bogus values for the rest.
+        //
+        target_key tk {&tt, nullptr, nullptr, &n, nullopt};
+
+        // This is like prerequisite search.
+        //
+        optional<string> de (tt.default_extension (tk, s, nullptr, true));
+
+        return de && *de == e;
+      };
+
+      small_vector<const target_type*, 2> r;
+
+      for (const target_type* const* p (x_inc); *p != nullptr; ++p)
+        if (test (**p))
+          r.push_back (*p);
+
+      return r;
+    }
+
+    void compile_rule::
+    append_prefixes (prefix_map& m, const target& t, const variable& var) const
+    {
+      tracer trace (x, "compile_rule::append_prefixes");
+
+      // If this target does not belong to any project (e.g, an "imported as
+      // installed" library), then it can't possibly generate any headers for
+      // us.
+      //
+      const scope& bs (t.base_scope ());
+      const scope* rs (bs.root_scope ());
+      if (rs == nullptr)
+        return;
+
+      const dir_path& out_base (t.dir);
+      const dir_path& out_root (rs->out_path ());
+
+      if (auto l = t[var])
+      {
+        const auto& v (cast<strings> (l));
+
+        for (auto i (v.begin ()), e (v.end ()); i != e; ++i)
+        {
+          // -I can either be in the "-Ifoo" or "-I foo" form. For VC it can
+          // also be /I.
+          //
+          const string& o (*i);
+
+          if (o.size () < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I')
+            continue;
+
+          dir_path d;
+
+          try
+          {
+            if (o.size () == 2)
+            {
+              if (++i == e)
+                break; // Let the compiler complain.
+
+              d = dir_path (*i);
+            }
+            else
+              d = dir_path (*i, 2, string::npos);
+          }
+          catch (const invalid_path& e)
+          {
+            fail << "invalid directory '" << e.path << "'"
+                 << " in option '" << o << "'"
+                 << " in variable " << var
+                 << " for target " << t;
+          }
+
+          l6 ([&]{trace << "-I " << d;});
+
+          if (d.relative ())
+            fail << "relative directory " << d
+                 << " in option '" << o << "'"
+                 << " in variable " << var
+                 << " for target " << t;
+
+          // If the directory is not normalized, we can complain or normalize
+          // it. Let's go with normalizing to minimize questions/complaints.
+          //
+          if (!d.normalized (false)) // Allow non-canonical dir separators.
+            d.normalize ();
+
+          // If we are not inside our project root, then ignore.
+          //
+          if (!d.sub (out_root))
+            continue;
+
+          // If the target directory is a sub-directory of the include
+          // directory, then the prefix is the difference between the
+          // two. Otherwise, leave it empty.
+          //
+          // The idea here is to make this "canonical" setup work auto-
+          // magically:
+          //
+          // 1. We include all files with a prefix, e.g., <foo/bar>.
+          // 2. The library target is in the foo/ sub-directory, e.g.,
+          //    /tmp/foo/.
+          // 3. The poptions variable contains -I/tmp.
+          //
+          dir_path p (out_base.sub (d) ? out_base.leaf (d) : dir_path ());
+
+          // We use the target's directory as out_base but that doesn't work
+          // well for targets that are stashed in subdirectories. So as a
+          // heuristics we are going to also enter the outer directories of
+          // the original prefix. It is, however, possible, that another -I
+          // option after this one will produce one of these outer prefixes as
+          // its original prefix in which case we should override it.
+          //
+          // So we are going to assign the original prefix priority value 0
+          // (highest) and then increment it for each outer prefix.
+          //
+          auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio)
+          {
+            auto j (m.find (p));
+
+            if (j != m.end ())
+            {
+              prefix_value& v (j->second);
+
+              // We used to reject duplicates but it seems this can be
+              // reasonably expected to work according to the order of the
+              // -I options.
+              //
+              // Seeing that we normally have more "specific" -I paths first,
+              // (so that we don't pick up installed headers, etc), we ignore
+              // it.
+              //
+              if (v.directory == d)
+              {
+                if (v.priority > prio)
+                  v.priority = prio;
+              }
+              else if (v.priority <= prio)
+              {
+                if (verb >= 4)
+                  trace << "ignoring mapping for prefix '" << p << "'\n"
+                        << "  existing mapping to " << v.directory
+                        << " priority " << v.priority << '\n'
+                        << "  another mapping to  " << d
+                        << " priority " << prio;
+              }
+              else
+              {
+                if (verb >= 4)
+                  trace << "overriding mapping for prefix '" << p << "'\n"
+                        << "  existing mapping to " << v.directory
+                        << " priority " << v.priority << '\n'
+                        << "  new mapping to      " << d
+                        << " priority " << prio;
+
+                v.directory = move (d);
+                v.priority = prio;
+              }
+            }
+            else
+            {
+              l6 ([&]{trace << "'" << p << "' -> " << d << " priority "
+                            << prio;});
+              m.emplace (move (p), prefix_value {move (d), prio});
+            }
+          };
+
+#if 1
+          // Enter all outer prefixes, including prefixless.
+          //
+          // The prefixless part is fuzzy but seems to be doing the right
+          // thing ignoring/overriding-wise, at least in cases where one of
+          // the competing -I paths is a subdirectory of another. But the
+          // proper solution will be to keep all the prefixless entries (by
+          // changing prefix_map to a multimap) since for them we have an
+          // extra check (target must be explicitly spelled out in a
+          // buildfile).
+          //
+          for (size_t prio (0);; ++prio)
+          {
+            bool e (p.empty ());
+            enter ((e ? move (p) : p), (e ? move (d) : d), prio);
+            if (e)
+              break;
+            p = p.directory ();
+          }
+#else
+          size_t prio (0);
+          for (bool e (false); !e; ++prio)
+          {
+            dir_path n (p.directory ());
+            e = n.empty ();
+            enter ((e ? move (p) : p), (e ? move (d) : d), prio);
+            p = move (n);
+          }
+#endif
+        }
+      }
+    }
+
+    auto compile_rule::
+    build_prefix_map (const scope& bs,
+                      action a,
+                      target& t,
+                      linfo li) const -> prefix_map
+    {
+      prefix_map m;
+
+      // First process our own.
+      //
+      append_prefixes (m, t, c_poptions);
+      append_prefixes (m, t, x_poptions);
+
+      // Then process the include directories from prerequisite libraries.
+      //
+      append_lib_prefixes (bs, m, a, t, li);
+
+      return m;
+    }
+
+    // Return the next make prerequisite starting from the specified
+    // position and update position to point to the start of the
+    // following prerequisite or l.size() if there are none left.
+    //
+    static string
+    next_make (const string& l, size_t& p)
+    {
+      size_t n (l.size ());
+
+      // Skip leading spaces.
+      //
+      for (; p != n && l[p] == ' '; p++) ;
+
+      // Lines containing multiple prerequisites are 80 characters max.
+      //
+      string r;
+      r.reserve (n);
+
+      // Scan the next prerequisite while watching out for escape sequences.
+      //
+      for (; p != n && l[p] != ' '; p++)
+      {
+        char c (l[p]);
+
+        if (p + 1 != n)
+        {
+          if (c == '$')
+          {
+            // Got to be another (escaped) '$'.
+            //
+            if (l[p + 1] == '$')
+              ++p;
+          }
+          else if (c == '\\')
+          {
+            // This may or may not be an escape sequence depending on whether
+            // what follows is "escapable".
+            //
+            switch (c = l[++p])
+            {
+            case '\\': break;
+            case ' ': break;
+            default: c = '\\'; --p; // Restore.
+            }
+          }
+        }
+
+        r += c;
+      }
+
+      // Skip trailing spaces.
+      //
+      for (; p != n && l[p] == ' '; p++) ;
+
+      // Skip final '\'.
+      //
+      if (p == n - 1 && l[p] == '\\')
+        p++;
+
+      return r;
+    }
+
+    // VC /showIncludes output. The first line is the file being compiled
+    // (handled by our caller). Then we have the list of headers, one per
+    // line, in this form (text can presumably be translated):
+    //
+    // Note: including file: C:\Program Files (x86)\[...]\iostream
+    //
+    // Finally, if we hit a non-existent header, then we end with an error
+    // line in this form:
+    //
+    // x.cpp(3): fatal error C1083: Cannot open include file: 'd/h.hpp':
+    // No such file or directory
+    //
+    // Distinguishing between the include note and the include error is
+    // easy: we can just check for C1083. Distinguising between the note and
+    // other errors/warnings is harder: an error could very well end with
+    // what looks like a path so we cannot look for the note but rather have
+    // to look for an error. Here we assume that a line containing ' CNNNN:'
+    // is an error. Should be robust enough in the face of language
+    // translation, etc.
+    //
+    // It turns out C1083 is also used when we are unable to open the main
+    // source file and the error line (which is printed after the first line
+    // containing the file name) looks like this:
+    //
+    // c1xx: fatal error C1083: Cannot open source file: 's.cpp': No such
+    // file or directory
+
+    size_t
+    msvc_sense_diag (const string&, char); // msvc.cxx
+
+    // Extract the include path from the VC /showIncludes output line. Return
+    // empty string if the line is not an include note or include error. Set
+    // the good_error flag if it is an include error (which means the process
+    // will terminate with the error status that needs to be ignored).
+    //
+    static string
+    next_show (const string& l, bool& good_error)
+    {
+      // The include error should be the last line that we handle.
+      //
+      assert (!good_error);
+
+      size_t p (msvc_sense_diag (l, 'C'));
+      if (p == string::npos)
+      {
+        // Include note.
+        //
+        // We assume the path is always at the end but need to handle both
+        // absolute Windows and POSIX ones.
+        //
+        // Note that VC appears to always write the absolute path to the
+        // included file even if it is ""-included and the source path is
+        // relative. Aren't we lucky today?
+        //
+        p = l.rfind (':');
+
+        if (p != string::npos)
+        {
+          // See if this one is part of the Windows drive letter.
+          //
+          if (p > 1 && p + 1 < l.size () && // 2 chars before, 1 after.
+              l[p - 2] == ' '            &&
+              alpha (l[p - 1])           &&
+              path::traits_type::is_separator (l[p + 1]))
+            p = l.rfind (':', p - 2);
+        }
+
+        if (p != string::npos)
+        {
+          // VC uses indentation to indicate the include nesting so there
+          // could be any number of spaces after ':'. Skip them.
+          //
+          p = l.find_first_not_of (' ', p + 1);
+        }
+
+        if (p == string::npos)
+          fail << "unable to parse /showIncludes include note line \""
+               << l << '"';
+
+        return string (l, p);
+      }
+      else if (l.compare (p, 4, "1083")  == 0 &&
+               l.compare (0, 5, "c1xx:") != 0 /* Not the main source file. */ )
+      {
+        // Include error.
+        //
+        // The path is conveniently quoted with ''. Or so we thought: turns
+        // out different translations (e.g., Chinese) can use different quote
+        // characters. But the overall structure seems to be stable:
+        //
+        // ...C1083: <translated>: 'd/h.hpp': <translated>
+        //
+        // Plus, it seems the quote character could to be multi-byte.
+        //
+        size_t p1 (l.find (':', p + 5));
+        size_t p2 (l.rfind (':'));
+
+        if (p1 != string::npos &&
+            p2 != string::npos &&
+            (p2 - p1) > 4      && // At least ": 'x':".
+            l[p1 + 1] == ' '   &&
+            l[p2 + 1] == ' ')
+        {
+          p1 += 3; // First character of the path.
+          p2 -= 1; // One past last character of the path.
+
+          // Skip any non-printable ASCII characters before/after (the mutli-
+          // byte quote case).
+          //
+          auto printable = [] (char c) { return c >= 0x20 && c <= 0x7e; };
+
+          for (; p1 != p2 && !printable (l[p1]);     ++p1) ;
+          for (; p2 != p1 && !printable (l[p2 - 1]); --p2) ;
+
+          if (p1 != p2)
+          {
+            good_error = true;
+            return string (l, p1 , p2 - p1);
+          }
+        }
+
+        fail << "unable to parse /showIncludes include error line \""
+             << l << '"' << endf;
+      }
+      else
+      {
+        // Some other error.
+        //
+        return string ();
+      }
+    }
+
+    void
+    msvc_sanitize_cl (cstrings&); // msvc.cxx
+
+    // GCC module mapper handler.
+    //
+    // Note that the input stream is non-blocking while output is blocking
+    // and this function should be prepared to handle closed input stream.
+    // Any unhandled io_error is handled by the caller as a generic module
+    // mapper io error.
+    //
+    struct compile_rule::module_mapper_state
+    {
+      size_t headers  = 0; // Number of header units imported.
+      size_t skip;         // Number of depdb entries to skip.
+      string data;         // Auxiliary data.
+
+      explicit
+      module_mapper_state (size_t skip_count): skip (skip_count) {}
+    };
+
+    void compile_rule::
+    gcc_module_mapper (module_mapper_state& st,
+                       action a, const scope& bs, file& t, linfo li,
+                       ifdstream& is,
+                       ofdstream& os,
+                       depdb& dd, bool& update, bool& bad_error,
+                       optional<prefix_map>& pfx_map, srcout_map& so_map) const
+    {
+      tracer trace (x, "compile_rule::gcc_module_mapper");
+
+      // Read in the request line.
+      //
+      // Because the dynamic mapper is only used during preprocessing, we
+      // can assume there is no batching and expect to see one line at a
+      // time.
+      //
+      string rq;
+#if 1
+      if (!eof (getline (is, rq)))
+      {
+        if (rq.empty ())
+          rq = "<empty>"; // Not to confuse with EOF.
+      }
+#else
+      for (char buf[4096]; !is.eof (); )
+      {
+        streamsize n (is.readsome (buf, sizeof (buf) - 1));
+        buf[n] = '\0';
+
+        if (char* p = strchr (buf, '\n'))
+        {
+          *p = '\0';
+
+          if (++p != buf + n)
+            fail << "batched module mapper request: '" << p << "'";
+
+          rq += buf;
+          break;
+        }
+        else
+          rq += buf;
+      }
+#endif
+
+      if (rq.empty ()) // EOF
+        return;
+
+      // @@ MODHDR: Should we print the pid we are talking to? It gets hard to
+      //            follow once things get nested. But if all our diag will
+      //            include some kind of id (chain, thread?), then this will
+      //            not be strictly necessary.
+      //
+      if (verb >= 3)
+        text << "  > " << rq;
+
+      // Check for a command. If match, remove it and the following space from
+      // the request string saving it in cmd (for diagnostics) unless the
+      // second argument is false, and return true.
+      //
+      const char* cmd (nullptr);
+      auto command = [&rq, &cmd] (const char* c, bool r = true)
+      {
+        size_t n (strlen (c));
+        bool m (rq.compare (0, n, c) == 0 && rq[n] == ' ');
+
+        if (m && r)
+        {
+          cmd = c;
+          rq.erase (0, n + 1);
+        }
+
+        return m;
+      };
+
+      string rs;
+      for (;;) // Breakout loop.
+      {
+        // Each command is reponsible for handling its auxiliary data while we
+        // just clear it.
+        //
+        string data (move (st.data));
+
+        if (command ("HELLO"))
+        {
+          // HELLO <ver> <kind> <ident>
+          //
+          //@@ MODHDR TODO: check protocol version.
+
+          // We don't use "repository path" (whatever it is) so we pass '.'.
+          //
+          rs = "HELLO 0 build2 .";
+        }
+        //
+        // Turns out it's easiest to handle IMPORT together with INCLUDE since
+        // it can also trigger a re-search, etc. In a sense, IMPORT is all of
+        // the INCLUDE logic (skipping translation) plus the BMI dependency
+        // synthesis.
+        //
+        else if (command ("INCLUDE") || command ("IMPORT"))
+        {
+          // INCLUDE [<"']<name>[>"'] <path>
+          // IMPORT [<"']<name>[>"'] <path>
+          // IMPORT '<path>'
+          //
+          // <path> is the resolved path or empty if the header is not found.
+          // It can be relative if it is derived from a relative path (either
+          // via -I or includer). If <name> is single-quoted, then it cannot
+          // be re-searched (e.g., implicitly included stdc-predef.h) and in
+          // this case <path> is never empty.
+          //
+          // In case of re-search or include translation we may have to split
+          // handling the same include or import across multiple commands.
+          // Here are the scenarios in question:
+          //
+          // INCLUDE --> SEARCH -?-> INCLUDE
+          // IMPORT  --> SEARCH -?-> IMPORT
+          // INCLUDE --> IMPORT -?-> IMPORT
+          //
+          // The problem is we may not necessarily get the "followup" command
+          // (the question marks above). We may not get the followup after
+          // SEARCH because, for example, the newly found header has already
+          // been included/imported using a different style/path. Similarly,
+          // the IMPORT response may not be followed up with the IMPORT
+          // command because this header has already been imported, for
+          // example, using an import declaration. Throw into this #pragma
+          // once, include guards, and how exactly the compiler deals with
+          // them and things become truly unpredictable and hard to reason
+          // about. As a result, for each command we have to keep the build
+          // state consistent, specifically, without any "dangling" matched
+          // targets (which would lead to skew dependency counts). Note: the
+          // include translation is no longer a problem since we respond with
+          // an immediate BMI.
+          //
+          // To keep things simple we are going to always add a target that we
+          // matched to our prerequisite_targets. This includes the header
+          // target when building the BMI: while not ideal, this should be
+          // harmless provided we don't take its state/mtime into account.
+          //
+          // One thing we do want to handle specially is the "maybe-followup"
+          // case discussed above. It is hard to distinguish from an unrelated
+          // INCLUDE/IMPORT (we could have saved <name> and maybe correlated
+          // based on that). But if we don't, then we will keep matching and
+          // adding each target twice. What we can do, however, is check
+          // whether this target is already in prerequisite_targets and skip
+          // it if that's the case, which is a valid thing to do whether it is
+          // a followup or an unrelated command. In fact, for a followup, we
+          // only need to check the last element in prerequisite_targets.
+          //
+          // This approach strikes a reasonable balance between keeping things
+          // simple and handling normal cases without too much overhead. Note
+          // that we may still end up matching and adding the same targets
+          // multiple times for pathological cases, like when the same header
+          // is included using a different style/path, etc. We could, however,
+          // take care of this by searching the entire prerequisite_targets,
+          // which is always an option (and which would probably be required
+          // if the compiler were to send the INCLUDE command before checking
+          // for #pragma once or include guards, which GCC does not do).
+          //
+          // One thing that we cannot do without distinguishing followup and
+          // unrelated commands is verify the remapped header found by the
+          // compiler resolves to the expected target. So we will also do the
+          // correlation via <name>.
+          //
+          bool imp (cmd[1] == 'M');
+
+          path f;          // <path> or <name> if doesn't exist
+          string n;        // [<"']<name>[>"']
+          bool exists;     // <path> is not empty
+          bool searchable; // <name> is not single-quoted
+          {
+            char q (rq[0]);                // Opening quote.
+            q = (q ==  '<' ?  '>' :
+                 q ==  '"' ?  '"' :
+                 q == '\'' ? '\'' : '\0'); // Closing quote.
+
+            size_t s (rq.size ()), qp; // Quote position.
+            if (q == '\0' || (qp = rq.find (q, 1)) == string::npos)
+              break; // Malformed command.
+
+            n.assign (rq, 0, qp + 1);
+
+            size_t p (qp + 1);
+            if (imp && q == '\'' && p == s) // IMPORT '<path>'
+            {
+              exists = true;
+              // Leave f empty and fall through.
+            }
+            else
+            {
+              if (p != s && rq[p++] != ' ') // Skip following space, if any.
+                break;
+
+              exists = (p != s);
+
+              if (exists)
+              {
+                rq.erase (0, p);
+                f = path (move (rq));
+                assert (!f.empty ());
+              }
+              //else // Leave f empty and fall through.
+            }
+
+            if (f.empty ())
+            {
+              rq.erase (0, 1);   // Opening quote.
+              rq.erase (qp - 1); // Closing quote and trailing space, if any.
+              f = path (move (rq));
+            }
+
+            // Complete relative paths not to confuse with non-existent.
+            //
+            if (exists && !f.absolute ())
+              f.complete ();
+
+            searchable = (q != '\'');
+          }
+
+          // The skip_count logic: in a nutshell (and similar to the non-
+          // mapper case), we may have "processed" some portion of the headers
+          // based on the depdb cache and we need to avoid re-processing them
+          // here. See the skip_count discussion for details.
+          //
+          // Note also that we need to be careful not to decrementing the
+          // count for re-searches and include translation.
+          //
+          bool skip (st.skip != 0);
+
+          // The first part is the same for both INCLUDE and IMPORT: resolve
+          // the header path to target, update it, and trigger re-search if
+          // necessary.
+          //
+          const file* ht (nullptr);
+          auto& pts (t.prerequisite_targets[a]);
+
+          // If this is a followup command (or indistinguishable from one),
+          // then as a sanity check verify the header found by the compiler
+          // resolves to the expected target.
+          //
+          if (data == n)
+          {
+            assert (!skip); // We shouldn't be re-searching while skipping.
+
+            if (exists)
+            {
+              pair<const file*, bool> r (
+                enter_header (a, bs, t, li,
+                              move (f), false /* cache */,
+                              pfx_map, so_map));
+
+              if (!r.second) // Shouldn't be remapped.
+                ht = r.first;
+            }
+
+            if (ht != pts.back ())
+            {
+              ht = static_cast<const file*> (pts.back ().target);
+              rs = "ERROR expected header '" + ht->path ().string () +
+                "' to be found instead";
+              bad_error = true; // We expect an error from the compiler.
+              break;
+            }
+
+            // Fall through.
+          }
+          else
+          {
+            // Enter, update, and see if we need to re-search this header.
+            //
+            bool updated (false), remapped;
+            try
+            {
+              pair<const file*, bool> er (
+                enter_header (a, bs, t, li,
+                              move (f), false /* cache */,
+                              pfx_map, so_map));
+
+              ht = er.first;
+              remapped = er.second;
+
+              if (remapped && !searchable)
+              {
+                rs = "ERROR remapping non-re-searchable header " + n;
+                bad_error = true;
+                break;
+              }
+
+              // If we couldn't enter this header as a target (as opposed to
+              // not finding a rule to update it), then our diagnostics won't
+              // really add anything to the compiler's.
+              //
+              if (ht == nullptr)
+              {
+                assert (!exists); // Sanity check.
+                throw failed ();
+              }
+
+              // Note that we explicitly update even for IMPORT (instead of,
+              // say, letting the BMI rule do it implicitly) since we may need
+              // to cause a re-search (see below).
+              //
+              if (!skip)
+              {
+                if (pts.empty () || pts.back () != ht)
+                {
+                  optional<bool> ir (inject_header (a, t,
+                                                    *ht, false /* cache */,
+                                                    timestamp_unknown));
+                  assert (ir); // Not from cache.
+                  updated = *ir;
+                }
+                else
+                  assert (exists);
+              }
+              else
+                assert (exists && !remapped); // Maybe this should be an error.
+            }
+            catch (const failed&)
+            {
+              // If the header does not exist or could not be updated, do we
+              // want our diagnostics, the compiler's, or both? We definitely
+              // want the compiler's since it points to the exact location.
+              // Ours could also be helpful. So while it will look a bit
+              // messy, let's keep both (it would have been nicer to print
+              // ours after the compiler's but that isn't easy).
+              //
+              rs = !exists
+                ? string ("INCLUDE")
+                : ("ERROR unable to update header '" +
+                   (ht != nullptr ? ht->path () : f).string () + "'");
+
+              bad_error = true;
+              break;
+            }
+
+            if (!imp) // Indirect prerequisite (see above).
+              update = updated || update;
+
+            // A mere update is not enough to cause a re-search. It either had
+            // to also not exist or be remapped.
+            //
+            if ((updated && !exists) || remapped)
+            {
+              rs = "SEARCH";
+              st.data = move (n); // Followup correlation.
+              break;
+            }
+
+            // Fall through.
+          }
+
+          // Now handle INCLUDE and IMPORT differences.
+          //
+          const string& hp (ht->path ().string ());
+
+          // Reduce include translation to the import case.
+          //
+          if (!imp && import_hdr != nullptr)
+          {
+            const strings& ih (*import_hdr);
+
+            auto i (lower_bound (ih.begin (),
+                                 ih.end (),
+                                 hp,
+                                 [] (const string& x, const string& y)
+                                 {
+                                   return path::traits_type::compare (x, y) < 0;
+                                 }));
+
+            imp = (i != ih.end () && *i == hp);
+          }
+
+          if (imp)
+          {
+            try
+            {
+              // Synthesize the BMI dependency then update and add the BMI
+              // target as a prerequisite.
+              //
+              const file& bt (make_header_sidebuild (a, bs, li, *ht));
+
+              if (!skip)
+              {
+                optional<bool> ir (inject_header (a, t,
+                                                  bt, false /* cache */,
+                                                  timestamp_unknown));
+                assert (ir); // Not from cache.
+                update = *ir || update;
+              }
+
+              const string& bp (bt.path ().string ());
+
+              if (!skip)
+              {
+                // @@ MODHDR: we write normalized path while the compiler will
+                //            look for the original. In particular, this means
+                //            that paths with `..` won't work. Maybe write
+                //            original for mapping and normalized for our use?
+                //
+                st.headers++;
+                dd.expect ("@ '" + hp + "' " + bp);
+              }
+              else
+                st.skip--;
+
+              rs = "IMPORT " + bp;
+            }
+            catch (const failed&)
+            {
+              rs = "ERROR unable to update header unit '" + hp + "'";
+              bad_error = true;
+              break;
+            }
+          }
+          else
+          {
+            if (!skip)
+              dd.expect (hp);
+            else
+              st.skip--;
+
+            rs = "INCLUDE";
+          }
+        }
+
+        break;
+      }
+
+      if (rs.empty ())
+      {
+        rs = "ERROR unexpected command '";
+
+        if (cmd != nullptr)
+        {
+          rs += cmd; // Add the command back.
+          rs += ' ';
+        }
+
+        rs += rq;
+        rs += "'";
+
+        bad_error = true;
+      }
+
+      if (verb >= 3)
+        text << "  < " << rs;
+
+      os << rs << endl;
+    }
+
+    // Enter as a target a header file. Depending on the cache flag, the file
+    // is assumed to either have come from the depdb cache or from the
+    // compiler run.
+    //
+    // Return the header target and an indication of whether it was remapped
+    // or NULL if the header does not exist and cannot be generated. In the
+    // latter case the passed header path is guaranteed to be still valid but
+    // might have been adjusted (e.g., normalized, etc).
+    //
+    // Note: this used to be a lambda inside extract_headers() so refer to the
+    // body of that function for the overall picture.
+    //
+    pair<const file*, bool> compile_rule::
+    enter_header (action a, const scope& bs, file& t, linfo li,
+                  path&& f, bool cache,
+                  optional<prefix_map>& pfx_map, srcout_map& so_map) const
+    {
+      tracer trace (x, "compile_rule::enter_header");
+
+      // Find or maybe insert the target. The directory is only moved from if
+      // insert is true.
+      //
+      auto find = [&trace, &t, this] (dir_path&& d,
+                                      path&& f,
+                                      bool insert) -> const file*
+      {
+        // Split the file into its name part and extension. Here we can assume
+        // the name part is a valid filesystem name.
+        //
+        // Note that if the file has no extension, we record an empty
+        // extension rather than NULL (which would signify that the default
+        // extension should be added).
+        //
+        string e (f.extension ());
+        string n (move (f).string ());
+
+        if (!e.empty ())
+          n.resize (n.size () - e.size () - 1); // One for the dot.
+
+        // See if this directory is part of any project out_root hierarchy and
+        // if so determine the target type.
+        //
+        // Note that this will miss all the headers that come from src_root
+        // (so they will be treated as generic C headers below). Generally, we
+        // don't have the ability to determine that some file belongs to
+        // src_root of some project. But that's not a problem for our
+        // purposes: it is only important for us to accurately determine
+        // target types for headers that could be auto-generated.
+        //
+        // While at it also try to determine if this target is from the src or
+        // out tree of said project.
+        //
+        dir_path out;
+
+        // It's possible the extension-to-target type mapping is ambiguous
+        // (usually because both C and X-language headers use the same .h
+        // extension). In this case we will first try to find one that matches
+        // an explicit target (similar logic to when insert is false).
+        //
+        small_vector<const target_type*, 2> tts;
+
+        const scope& bs (t.ctx.scopes.find (d));
+        if (const scope* rs = bs.root_scope ())
+        {
+          tts = map_extension (bs, n, e);
+
+          if (bs.out_path () != bs.src_path () && d.sub (bs.src_path ()))
+            out = out_src (d, *rs);
+        }
+
+        // If it is outside any project, or the project doesn't have such an
+        // extension, assume it is a plain old C header.
+        //
+        if (tts.empty ())
+        {
+          // If the project doesn't "know" this extension then we can't
+          // possibly find an explicit target of this type.
+          //
+          if (!insert)
+            return nullptr;
+
+          tts.push_back (&h::static_type);
+        }
+
+        // Find or insert target.
+        //
+        // Note that in case of the target type ambiguity we first try to find
+        // an explicit target that resolves this ambiguity.
+        //
+        const target* r (nullptr);
+
+        if (!insert || tts.size () > 1)
+        {
+          // Note that we skip any target type-specific searches (like for an
+          // existing file) and go straight for the target object since we
+          // need to find the target explicitly spelled out.
+          //
+          // Also, it doesn't feel like we should be able to resolve an
+          // absolute path with a spelled-out extension to multiple targets.
+          //
+          for (const target_type* tt: tts)
+            if ((r = t.ctx.targets.find (*tt, d, out, n, e, trace)) != nullptr)
+              break;
+
+          // Note: we can't do this because of the in-source builds where
+          // there won't be explicit targets for non-generated headers.
+          //
+          // This should be harmless, however, since in our world generated
+          // headers are normally spelled-out as explicit targets. And if not,
+          // we will still get an error, just a bit less specific.
+          //
+#if 0
+          if (r == nullptr && insert)
+          {
+            f = d / n;
+            if (!e.empty ())
+            {
+              f += '.';
+              f += e;
+            }
+
+            diag_record dr (fail);
+            dr << "mapping of header " << f << " to target type is ambiguous";
+            for (const target_type* tt: tts)
+              dr << info << "could be " << tt->name << "{}";
+            dr << info << "spell-out its target to resolve this ambiguity";
+          }
+#endif
+        }
+
+        // @@ OPT: move d, out, n
+        //
+        if (r == nullptr && insert)
+          r = &search (t, *tts[0], d, out, n, &e, nullptr);
+
+        return static_cast<const file*> (r);
+      };
+
+      // If it's not absolute then it either does not (yet) exist or is a
+      // relative ""-include (see init_args() for details). Reduce the second
+      // case to absolute.
+      //
+      // Note: we now always use absolute path to the translation unit so this
+      // no longer applies. But let's keep it for posterity.
+      //
+#if 0
+      if (f.relative () && rels.relative ())
+      {
+        // If the relative source path has a directory component, make sure
+        // it matches since ""-include will always start with that (none of
+        // the compilers we support try to normalize this path). Failed that
+        // we may end up searching for a generated header in a random
+        // (working) directory.
+        //
+        const string& fs (f.string ());
+        const string& ss (rels.string ());
+
+        size_t p (path::traits::rfind_separator (ss));
+
+        if (p == string::npos || // No directory.
+            (fs.size () > p + 1 &&
+             path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0))
+        {
+          path t (work / f); // The rels path is relative to work.
+
+          if (exists (t))
+            f = move (t);
+        }
+      }
+#endif
+
+      const file* pt (nullptr);
+      bool remapped (false);
+
+      // If still relative then it does not exist.
+      //
+      if (f.relative ())
+      {
+        // This is probably as often an error as an auto-generated file, so
+        // trace at level 4.
+        //
+        l4 ([&]{trace << "non-existent header '" << f << "'";});
+
+        f.normalize ();
+
+        // The relative path might still contain '..' (e.g., ../foo.hxx;
+        // presumably ""-include'ed). We don't attempt to support auto-
+        // generated headers with such inclusion styles.
+        //
+        if (f.normalized ())
+        {
+          if (!pfx_map)
+            pfx_map = build_prefix_map (bs, a, t, li);
+
+          // First try the whole file. Then just the directory.
+          //
+          // @@ Has to be a separate map since the prefix can be the same as
+          //    the file name.
+          //
+          // auto i (pfx_map->find (f));
+
+          // Find the most qualified prefix of which we are a sub-path.
+          //
+          if (!pfx_map->empty ())
+          {
+            dir_path d (f.directory ());
+            auto i (pfx_map->find_sup (d));
+
+            if (i != pfx_map->end ())
+            {
+              const dir_path& pd (i->second.directory);
+
+              l4 ([&]{trace << "prefix '" << d << "' mapped to " << pd;});
+
+              // If this is a prefixless mapping, then only use it if we can
+              // resolve it to an existing target (i.e., it is explicitly
+              // spelled out in a buildfile).
+              //
+              // Note that at some point we will probably have a list of
+              // directories.
+              //
+              pt = find (pd / d, f.leaf (), !i->first.empty ());
+              if (pt != nullptr)
+              {
+                f = pd / f;
+                l4 ([&]{trace << "mapped as auto-generated " << f;});
+              }
+              else
+                l4 ([&]{trace << "no explicit target in " << pd;});
+            }
+            else
+              l4 ([&]{trace << "no prefix map entry for '" << d << "'";});
+          }
+          else
+            l4 ([&]{trace << "prefix map is empty";});
+        }
+      }
+      else
+      {
+        // We used to just normalize the path but that could result in an
+        // invalid path (e.g., for some system/compiler headers on CentOS 7
+        // with Clang 3.4) because of the symlinks (if a directory component
+        // is a symlink, then any following `..` are resolved relative to the
+        // target; see path::normalize() for background).
+        //
+        // Initially, to fix this, we realized (i.e., realpath(3)) it instead.
+        // But that turned out also not to be quite right since now we have
+        // all the symlinks resolved: conceptually it feels correct to keep
+        // the original header names since that's how the user chose to
+        // arrange things and practically this is how the compilers see/report
+        // them (e.g., the GCC module mapper).
+        //
+        // So now we have a pretty elaborate scheme where we try to use the
+        // normalized path if possible and fallback to realized. Normalized
+        // paths will work for situations where `..` does not cross symlink
+        // boundaries, which is the sane case. And for the insane case we only
+        // really care about out-of-project files (i.e., system/compiler
+        // headers). In other words, if you have the insane case inside your
+        // project, then you are on your own.
+        //
+        // All of this is unless the path comes from the depdb, in which case
+        // we've already done that. This is also where we handle src-out remap
+        // (again, not needed if cached).
+        //
+        if (!cache)
+        {
+          // Interestingly, on most paltforms and with most compilers (Clang
+          // on Linux being a notable exception) most system/compiler headers
+          // are already normalized.
+          //
+          path_abnormality a (f.abnormalities ());
+          if (a != path_abnormality::none)
+          {
+            // While we can reasonably expect this path to exit, things do go
+            // south from time to time (like compiling under wine with file
+            // wlantypes.h included as WlanTypes.h).
+            //
+            try
+            {
+              // If we have any parent components, then we have to verify the
+              // normalized path matches realized.
+              //
+              path r;
+              if ((a & path_abnormality::parent) == path_abnormality::parent)
+              {
+                r = f;
+                r.realize ();
+              }
+
+              try
+              {
+                f.normalize ();
+
+                // Note that we might still need to resolve symlinks in the
+                // normalized path.
+                //
+                if (!r.empty () && f != r && path (f).realize () != r)
+                  f = move (r);
+              }
+              catch (const invalid_path&)
+              {
+                assert (!r.empty ()); // Shouldn't have failed if no `..`.
+                f = move (r);         // Fallback to realize.
+              }
+            }
+            catch (const invalid_path&)
+            {
+              fail << "invalid header path '" << f.string () << "'";
+            }
+            catch (const system_error& e)
+            {
+              fail << "invalid header path '" << f.string () << "': " << e;
+            }
+          }
+
+          if (!so_map.empty ())
+          {
+            // Find the most qualified prefix of which we are a sub-path.
+            //
+            auto i (so_map.find_sup (f));
+            if (i != so_map.end ())
+            {
+              // Ok, there is an out tree for this headers. Remap to a path
+              // from the out tree and see if there is a target for it.
+              //
+              dir_path d (i->second);
+              d /= f.leaf (i->first).directory ();
+              pt = find (move (d), f.leaf (), false); // d is not moved from.
+
+              if (pt != nullptr)
+              {
+                path p (d / f.leaf ());
+                l4 ([&]{trace << "remapping " << f << " to " << p;});
+                f = move (p);
+                remapped = true;
+              }
+            }
+          }
+        }
+
+        if (pt == nullptr)
+        {
+          l6 ([&]{trace << "entering " << f;});
+          pt = find (f.directory (), f.leaf (), true);
+        }
+      }
+
+      return make_pair (pt, remapped);
+    }
+
+    // Update and add (unless add is false) to the list of prerequisite
+    // targets a header or header unit target. Depending on the cache flag,
+    // the target is assumed to either have come from the depdb cache or from
+    // the compiler run.
+    //
+    // Return the indication of whether it has changed or, if the passed
+    // timestamp is not timestamp_unknown, is older than the target. If the
+    // header came from the cache and it no longer exists nor can be
+    // generated, then return nullopt.
+    //
+    // Note: this used to be a lambda inside extract_headers() so refer to the
+    // body of that function for the overall picture.
+    //
+    optional<bool> compile_rule::
+    inject_header (action a, file& t,
+                   const file& pt, bool cache, timestamp mt) const
+    {
+      tracer trace (x, "compile_rule::inject_header");
+
+      // Match to a rule.
+      //
+      // If we are reading the cache, then it is possible the file has since
+      // been removed (think of a header in /usr/local/include that has been
+      // uninstalled and now we need to use one from /usr/include). This will
+      // lead to the match failure which we translate to a restart.
+      //
+      if (!cache)
+        build2::match (a, pt);
+      else if (!build2::try_match (a, pt).first)
+        return nullopt;
+
+      bool r (update (trace, a, pt, mt));
+
+      // Add to our prerequisite target list.
+      //
+      t.prerequisite_targets[a].push_back (&pt);
+
+      return r;
+    }
+
+    // Extract and inject header dependencies. Return the preprocessed source
+    // file as well as an indication if it is usable for compilation (see
+    // below for details).
+    //
+    // This is also the place where we handle header units which are a lot
+    // more like auto-generated headers than modules. In particular, if a
+    // header unit BMI is out-of-date, then we have to re-preprocess this
+    // translation unit.
+    //
+    pair<auto_rmfile, bool> compile_rule::
+    extract_headers (action a,
+                     const scope& bs,
+                     file& t,
+                     linfo li,
+                     const file& src,
+                     match_data& md,
+                     depdb& dd,
+                     bool& update,
+                     timestamp mt) const
+    {
+      tracer trace (x, "compile_rule::extract_headers");
+
+      otype ot (li.type);
+
+      bool reprocess (cast_false<bool> (t[c_reprocess]));
+
+      auto_rmfile psrc;
+      bool puse (true);
+
+      // If things go wrong (and they often do in this area), give the user a
+      // bit extra context.
+      //
+      auto df = make_diag_frame (
+        [&src](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while extracting header dependencies from " << src;
+        });
+
+      const scope& rs (*bs.root_scope ());
+
+      // Preprocesor mode that preserves as much information as possible while
+      // still performing inclusions. Also serves as a flag indicating whether
+      // this compiler uses the separate preprocess and compile setup.
+      //
+      const char* pp (nullptr);
+
+      switch (ctype)
+      {
+      case compiler_type::gcc:
+        {
+          // -fdirectives-only is available since GCC 4.3.0.
+          //
+          if (cmaj > 4 || (cmaj == 4 && cmin >= 3))
+            pp = "-fdirectives-only";
+
+          break;
+        }
+      case compiler_type::clang:
+        {
+          // -frewrite-includes is available since vanilla Clang 3.2.0.
+          //
+          // Apple Clang 5.0 is based on LLVM 3.3svn so it should have this
+          // option (4.2 is based on 3.2svc so it may or may not have it and,
+          // no, we are not going to try to find out).
+          //
+          if (cvariant == "apple"
+              ? (cmaj >= 5)
+              : (cmaj > 3 || (cmaj == 3 && cmin >= 2)))
+            pp = "-frewrite-includes";
+
+          break;
+        }
+      case compiler_type::msvc:
+        {
+          // Asking MSVC to preserve comments doesn't really buy us anything
+          // but does cause some extra buggy behavior.
+          //
+          //pp = "/C";
+          break;
+        }
+      case compiler_type::icc:
+        break;
+      }
+
+      // Initialize lazily, only if required.
+      //
+      environment env;
+      cstrings args;
+      string out; // Storage.
+
+      // Some compilers in certain modes (e.g., when also producing the
+      // preprocessed output) are incapable of writing the dependecy
+      // information to stdout. In this case we use a temporary file.
+      //
+      auto_rmfile drm;
+
+      // Here is the problem: neither GCC nor Clang allow -MG (treat missing
+      // header as generated) when we produce any kind of other output (-MD).
+      // And that's probably for the best since otherwise the semantics gets
+      // pretty hairy (e.g., what is the exit code and state of the output)?
+      //
+      // One thing to note about generated headers: if we detect one, then,
+      // after generating it, we re-run the compiler since we need to get
+      // this header's dependencies.
+      //
+      // So this is how we are going to work around this problem: we first run
+      // with -E but without -MG. If there are any errors (maybe because of
+      // generated headers maybe not), we restart with -MG and without -E. If
+      // this fixes the error (so it was a generated header after all), then
+      // we have to restart at which point we go back to -E and no -MG. And we
+      // keep yo-yoing like this. Missing generated headers will probably be
+      // fairly rare occurrence so this shouldn't be too expensive.
+      //
+      // Actually, there is another error case we would like to handle: an
+      // outdated generated header that is now causing an error (e.g., because
+      // of a check that is now triggering #error or some such). So there are
+      // actually three error cases: outdated generated header, missing
+      // generated header, and some other error. To handle the outdated case
+      // we need the compiler to produce the dependency information even in
+      // case of an error. Clang does it, for VC we parse diagnostics
+      // ourselves, but GCC does not (but a patch has been submitted).
+      //
+      // So the final plan is then as follows:
+      //
+      // 1. Start wothout -MG and with suppressed diagnostics.
+      // 2. If error but we've updated a header, then repeat step 1.
+      // 3. Otherwise, restart with -MG and diagnostics.
+      //
+      // Note that below we don't even check if the compiler supports the
+      // dependency info on error. We just try to use it and if it's not
+      // there we ignore the io error since the compiler has failed.
+      //
+      bool args_gen;     // Current state of args.
+      size_t args_i (0); // Start of the -M/-MD "tail".
+
+      // Ok, all good then? Not so fast, the rabbit hole is deeper than it
+      // seems: When we run with -E we have to discard diagnostics. This is
+      // not a problem for errors since they will be shown on the re-run but
+      // it is for (preprocessor) warnings.
+      //
+      // Clang's -frewrite-includes is nice in that it preserves the warnings
+      // so they will be shown during the compilation of the preprocessed
+      // source. They are also shown during -E but that we discard. And unlike
+      // GCC, in Clang -M does not imply -w (disable warnings) so it would
+      // have been shown in -M -MG re-runs but we suppress that with explicit
+      // -w. All is good in the Clang land then (even -Werror works nicely).
+      //
+      // GCC's -fdirective-only, on the other hand, processes all the
+      // directives so they are gone from the preprocessed source. Here is
+      // what we are going to do to work around this: we will detect if any
+      // diagnostics has been written to stderr on the -E run. If that's the
+      // case (but the compiler indicated success) then we assume they are
+      // warnings and disable the use of the preprocessed output for
+      // compilation. This in turn will result in compilation from source
+      // which will display the warnings. Note that we may still use the
+      // preprocessed output for other things (e.g., C++ module dependency
+      // discovery). BTW, another option would be to collect all the
+      // diagnostics and then dump it if the run is successful, similar to
+      // the VC semantics (and drawbacks) described below.
+      //
+      // Finally, for VC, things are completely different: there is no -MG
+      // equivalent and we handle generated headers by analyzing the
+      // diagnostics. This means that unlike in the above two cases, the
+      // preprocessor warnings are shown during dependency extraction, not
+      // compilation. Not ideal but that's the best we can do. Or is it -- we
+      // could implement ad hoc diagnostics sensing... It appears warnings are
+      // in the C4000-C4999 code range though there can also be note lines
+      // which don't have any C-code.
+      //
+      // BTW, triggering a warning in the VC preprocessor is not easy; there
+      // is no #warning and pragmas are passed through to the compiler. One
+      // way to do it is to redefine a macro, for example:
+      //
+      // hello.cxx(4): warning C4005: 'FOO': macro redefinition
+      // hello.cxx(3): note: see previous definition of 'FOO'
+      //
+      // So seeing that it is hard to trigger a legitimate VC preprocessor
+      // warning, for now, we will just treat them as errors by adding /WX.
+      //
+      // Finally, if we are using the module mapper, then all this mess falls
+      // away: we only run the compiler once, we let the diagnostics through,
+      // we get a compiler error (with location information) if a header is
+      // not found, and there is no problem with outdated generated headers
+      // since we update/remap them before the compiler has a chance to read
+      // them. Overall, this "dependency mapper" approach is how it should
+      // have been done from the beginning.
+
+      // Note: diagnostics sensing is currently only supported if dependency
+      // info is written to a file (see above).
+      //
+      bool sense_diag (false);
+
+      // And here is another problem: if we have an already generated header
+      // in src and the one in out does not yet exist, then the compiler will
+      // pick the one in src and we won't even notice. Note that this is not
+      // only an issue with mixing in- and out-of-tree builds (which does feel
+      // wrong but is oh so convenient): this is also a problem with
+      // pre-generated headers, a technique we use to make installing the
+      // generator by end-users optional by shipping pre-generated headers.
+      //
+      // This is a nasty problem that doesn't seem to have a perfect solution
+      // (except, perhaps, C++ modules). So what we are going to do is try to
+      // rectify the situation by detecting and automatically remapping such
+      // mis-inclusions. It works as follows.
+      //
+      // First we will build a map of src/out pairs that were specified with
+      // -I. Here, for performance and simplicity, we will assume that they
+      // always come in pairs with out first and src second. We build this
+      // map lazily only if we are running the preprocessor and reuse it
+      // between restarts.
+      //
+      // With the map in hand we can then check each included header for
+      // potentially having a doppelganger in the out tree. If this is the
+      // case, then we calculate a corresponding header in the out tree and,
+      // (this is the most important part), check if there is a target for
+      // this header in the out tree. This should be fairly accurate and not
+      // require anything explicit from the user except perhaps for a case
+      // where the header is generated out of nothing (so there is no need to
+      // explicitly mention its target in the buildfile). But this probably
+      // won't be very common.
+      //
+      // One tricky area in this setup are target groups: if the generated
+      // sources are mentioned in the buildfile as a group, then there might
+      // be no header target (yet). The way we solve this is by requiring code
+      // generator rules to cooperate and create at least the header target as
+      // part of the group creation. While not all members of the group may be
+      // generated depending on the options (e.g., inline files might be
+      // suppressed), headers are usually non-optional.
+      //
+      // Note that we use path_map instead of dir_path_map to allow searching
+      // using path (file path).
+      //
+      srcout_map so_map; // path_map<dir_path>
+
+      // Dynamic module mapper.
+      //
+      bool mod_mapper (false);
+
+      // The gen argument to init_args() is in/out. The caller signals whether
+      // to force the generated header support and on return it signals
+      // whether this support is enabled. The first call to init_args is
+      // expected to have gen false.
+      //
+      // Return NULL if the dependency information goes to stdout and a
+      // pointer to the temporary file path otherwise.
+      //
+      auto init_args = [a, &t, ot, li, reprocess,
+                        &src, &md, &psrc, &sense_diag, &mod_mapper,
+                        &rs, &bs,
+                        pp, &env, &args, &args_gen, &args_i, &out, &drm,
+                        &so_map, this]
+        (bool& gen) -> const path*
+      {
+        const path* r (nullptr);
+
+        if (args.empty ()) // First call.
+        {
+          assert (!gen);
+
+          // We use absolute/relative paths in the dependency output to
+          // distinguish existing headers from (missing) generated. Which
+          // means we have to (a) use absolute paths in -I and (b) pass
+          // absolute source path (for ""-includes). That (b) is a problem:
+          // if we use an absolute path, then all the #line directives will be
+          // absolute and all the diagnostics will have long, noisy paths
+          // (actually, we will still have long paths for diagnostics in
+          // headers).
+          //
+          // To work around this we used to pass a relative path to the source
+          // file and then check every relative path in the dependency output
+          // for existence in the source file's directory. This is not without
+          // issues: it is theoretically possible for a generated header that
+          // is <>-included and found via -I to exist in the source file's
+          // directory. Note, however, that this is a lot more likely to
+          // happen with prefix-less inclusion (e.g., <foo>) and in this case
+          // we assume the file is in the project anyway. And if there is a
+          // conflict with a prefixed include (e.g., <bar/foo>), then, well,
+          // we will just have to get rid of quoted includes (which are
+          // generally a bad idea, anyway).
+          //
+          // But then this approach (relative path) fell apart further when we
+          // tried to implement precise changed detection: the preprocessed
+          // output would change depending from where it was compiled because
+          // of #line (which we could work around) and __FILE__/assert()
+          // (which we can't really do anything about). So it looks like using
+          // the absolute path is the lesser of all the evils (and there are
+          // many).
+          //
+          // Note that we detect and diagnose relative -I directories lazily
+          // when building the include prefix map.
+          //
+          args.push_back (cpath.recall_string ());
+
+          // If we are re-processing the translation unit, then allow the
+          // translation unit to detect header/module dependency extraction.
+          // This can be used to work around separate preprocessing bugs in
+          // the compiler.
+          //
+          if (reprocess)
+            args.push_back ("-D__build2_preprocess");
+
+          append_options (args, t, c_poptions);
+          append_options (args, t, x_poptions);
+
+          // Add *.export.poptions from prerequisite libraries.
+          //
+          append_lib_options (bs, args, a, t, li);
+
+          // Populate the src-out with the -I$out_base -I$src_base pairs.
+          //
+          {
+            // Try to be fast and efficient by reusing buffers as much as
+            // possible.
+            //
+            string ds;
+
+            // Previous -I innermost scope if out_base plus the difference
+            // between the scope path and the -I path (normally empty).
+            //
+            const scope* s (nullptr);
+            dir_path p;
+
+            for (auto i (args.begin ()), e (args.end ()); i != e; ++i)
+            {
+              // -I can either be in the "-Ifoo" or "-I foo" form. For VC it
+              // can also be /I.
+              //
+              const char* o (*i);
+              size_t n (strlen (o));
+
+              if (n < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I')
+              {
+                s = nullptr;
+                continue;
+              }
+
+              if (n == 2)
+              {
+                if (++i == e)
+                  break; // Let the compiler complain.
+
+                ds = *i;
+              }
+              else
+                ds.assign (o + 2, n - 2);
+
+              if (!ds.empty ())
+              {
+                // Note that we don't normalize the paths since it would be
+                // quite expensive and normally the pairs we are inerested in
+                // are already normalized (since they are usually specified as
+                // -I$src/out_*). We just need to add a trailing directory
+                // separator if it's not already there.
+                //
+                if (!dir_path::traits_type::is_separator (ds.back ()))
+                  ds += dir_path::traits_type::directory_separator;
+
+                dir_path d (move (ds), dir_path::exact); // Move the buffer in.
+
+                // Ignore invalid paths (buffer is not moved).
+                //
+                if (!d.empty ())
+                {
+                  // Ignore any paths containing '.', '..' components. Allow
+                  // any directory separators thought (think -I$src_root/foo
+                  // on Windows).
+                  //
+                  if (d.absolute () && d.normalized (false))
+                  {
+                    // If we have a candidate out_base, see if this is its
+                    // src_base.
+                    //
+                    if (s != nullptr)
+                    {
+                      const dir_path& bp (s->src_path ());
+
+                      if (d.sub (bp))
+                      {
+                        if (p.empty () || d.leaf (bp) == p)
+                        {
+                          // We've got a pair.
+                          //
+                          so_map.emplace (move (d), s->out_path () / p);
+                          s = nullptr; // Taken.
+                          continue;
+                        }
+                      }
+
+                      // Not a pair. Fall through to consider as out_base.
+                      //
+                      s = nullptr;
+                    }
+
+                    // See if this path is inside a project with an out-of-
+                    // tree build and is in the out directory tree.
+                    //
+                    const scope& bs (t.ctx.scopes.find (d));
+                    if (bs.root_scope () != nullptr)
+                    {
+                      const dir_path& bp (bs.out_path ());
+                      if (bp != bs.src_path ())
+                      {
+                        bool e;
+                        if ((e = (d == bp)) || d.sub (bp))
+                        {
+                          s = &bs;
+                          if (e)
+                            p.clear ();
+                          else
+                            p = d.leaf (bp);
+                        }
+                      }
+                    }
+                  }
+                  else
+                    s = nullptr;
+
+                  ds = move (d).string (); // Move the buffer out.
+                }
+                else
+                  s = nullptr;
+              }
+              else
+                s = nullptr;
+            }
+          }
+
+          // Extra system header dirs (last).
+          //
+          assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+          append_option_values (
+            args, "-I",
+            sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+            [] (const dir_path& d) {return d.string ().c_str ();});
+
+          if (md.symexport)
+            append_symexport_options (args, t);
+
+          // Some compile options (e.g., -std, -m) affect the preprocessor.
+          //
+          // Currently Clang supports importing "header modules" even when in
+          // the TS mode. And "header modules" support macros which means
+          // imports have to be resolved during preprocessing. Which poses a
+          // bit of a chicken and egg problem for us. For now, the workaround
+          // is to remove the -fmodules-ts option when preprocessing. Hopefully
+          // there will be a "pure modules" mode at some point.
+          //
+          // @@ MODHDR Clang: should be solved with the dynamic module mapper
+          //    if/when Clang supports it?
+          //
+
+          // Don't treat warnings as errors.
+          //
+          const char* werror (nullptr);
+          switch (cclass)
+          {
+          case compiler_class::gcc:  werror = "-Werror"; break;
+          case compiler_class::msvc: werror = "/WX";     break;
+          }
+
+          bool clang (ctype == compiler_type::clang);
+
+          append_options (args, t, c_coptions, werror);
+          append_options (args, t, x_coptions, werror);
+          append_options (args, tstd,
+                          tstd.size () - (modules && clang ? 1 : 0));
+
+          switch (cclass)
+          {
+          case compiler_class::msvc:
+            {
+              args.push_back ("/nologo");
+
+              // See perform_update() for details on overriding the default
+              // exceptions and runtime.
+              //
+              if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+                args.push_back ("/EHsc");
+
+              if (!find_option_prefixes ({"/MD", "/MT"}, args))
+                args.push_back ("/MD");
+
+              args.push_back ("/P");            // Preprocess to file.
+              args.push_back ("/showIncludes"); // Goes to stdout (with diag).
+              if (pp != nullptr)
+                args.push_back (pp);            // /C (preserve comments).
+              args.push_back ("/WX");           // Warning as error (see above).
+
+              msvc_sanitize_cl (args);
+
+              psrc = auto_rmfile (t.path () + x_pext);
+
+              if (cast<uint64_t> (rs[x_version_major]) >= 18)
+              {
+                args.push_back ("/Fi:");
+                args.push_back (psrc.path.string ().c_str ());
+              }
+              else
+              {
+                out = "/Fi" + psrc.path.string ();
+                args.push_back (out.c_str ());
+              }
+
+              append_lang_options (args, md); // Compile as.
+              gen = args_gen = true;
+              break;
+            }
+          case compiler_class::gcc:
+            {
+              if (ot == otype::s)
+              {
+                // On Darwin, Win32 -fPIC is the default.
+                //
+                if (tclass == "linux" || tclass == "bsd")
+                  args.push_back ("-fPIC");
+              }
+
+              // Setup the dynamic module mapper if needed.
+              //
+              // Note that it's plausible in the future we will use it even if
+              // modules are disabled, for example, to implement better -MG.
+              // In which case it will have probably be better called a
+              // "dependency mapper".
+              //
+              if (modules)
+              {
+                if (ctype == compiler_type::gcc)
+                {
+                  args.push_back ("-fmodule-mapper=<>");
+                  mod_mapper = true;
+                }
+              }
+
+              // Depending on the compiler, decide whether (and how) we can
+              // produce preprocessed output as a side effect of dependency
+              // extraction.
+              //
+              // Note: -MM -MG skips missing <>-included.
+
+              // Clang's -M does not imply -w (disable warnings). We also
+              // don't need them in the -MD case (see above) so disable for
+              // both.
+              //
+              if (clang)
+                args.push_back ("-w");
+
+              append_lang_options (args, md);
+
+              if (pp != nullptr)
+              {
+                // With the GCC module mapper the dependency information is
+                // written directly to depdb by the mapper.
+                //
+                if (ctype == compiler_type::gcc && mod_mapper)
+                {
+                  // Note that in this mode we don't have -MG re-runs. In a
+                  // sense we are in the -MG mode (or, more precisely, the "no
+                  // -MG required" mode) right away.
+                  //
+                  args.push_back ("-E");
+                  args.push_back (pp);
+                  gen = args_gen = true;
+                  r = &drm.path; // Bogus/hack to force desired process start.
+                }
+                else
+                {
+                  // Previously we used '*' as a target name but it gets
+                  // expanded to the current directory file names by GCC (4.9)
+                  // that comes with MSYS2 (2.4). Yes, this is the (bizarre)
+                  // behavior of GCC being executed in the shell with -MQ '*'
+                  // option and not just -MQ *.
+                  //
+                  args.push_back ("-MQ"); // Quoted target name.
+                  args.push_back ("^");   // Old versions can't do empty.
+
+                  // Note that the options are carefully laid out to be easy
+                  // to override (see below).
+                  //
+                  args_i = args.size ();
+
+                  args.push_back ("-MD");
+                  args.push_back ("-E");
+                  args.push_back (pp);
+
+                  // Dependency output.
+                  //
+                  // GCC until version 8 was not capable of writing the
+                  // dependency information to stdout. We also either need to
+                  // sense the diagnostics on the -E runs (which we currently
+                  // can only do if we don't need to read stdout) or we could
+                  // be communicating with the module mapper via stdin/stdout.
+                  //
+                  if (ctype == compiler_type::gcc)
+                  {
+                    // Use the .t extension (for "temporary"; .d is taken).
+                    //
+                    r = &(drm = auto_rmfile (t.path () + ".t")).path;
+                  }
+
+                  args.push_back ("-MF");
+                  args.push_back (r != nullptr ? r->string ().c_str () : "-");
+
+                  sense_diag = (ctype == compiler_type::gcc);
+                  gen = args_gen = false;
+                }
+
+                // Preprocessor output.
+                //
+                psrc = auto_rmfile (t.path () + x_pext);
+                args.push_back ("-o");
+                args.push_back (psrc.path.string ().c_str ());
+              }
+              else
+              {
+                args.push_back ("-MQ");
+                args.push_back ("^");
+                args.push_back ("-M");
+                args.push_back ("-MG"); // Treat missing headers as generated.
+                gen = args_gen = true;
+              }
+
+              break;
+            }
+          }
+
+          args.push_back (src.path ().string ().c_str ());
+          args.push_back (nullptr);
+
+          // Note: only doing it here.
+          //
+          if (!env.empty ())
+            env.push_back (nullptr);
+        }
+        else
+        {
+          assert (gen != args_gen && args_i != 0);
+
+          size_t i (args_i);
+
+          if (gen)
+          {
+            // Overwrite.
+            //
+            args[i++] = "-M";
+            args[i++] = "-MG";
+            args[i++] = src.path ().string ().c_str ();
+            args[i]   = nullptr;
+
+            if (ctype == compiler_type::gcc)
+            {
+              sense_diag = false;
+            }
+          }
+          else
+          {
+            // Restore.
+            //
+            args[i++] = "-MD";
+            args[i++] = "-E";
+            args[i++] = pp;
+            args[i]   = "-MF";
+
+            if (ctype == compiler_type::gcc)
+            {
+              r = &drm.path;
+              sense_diag = true;
+            }
+          }
+
+          args_gen = gen;
+        }
+
+        return r;
+      };
+
+      // Build the prefix map lazily only if we have non-existent files.
+      // Also reuse it over restarts since it doesn't change.
+      //
+      optional<prefix_map> pfx_map;
+
+      // If any prerequisites that we have extracted changed, then we have to
+      // redo the whole thing. The reason for this is auto-generated headers:
+      // the updated header may now include a yet-non-existent header. Unless
+      // we discover this and generate it (which, BTW, will trigger another
+      // restart since that header, in turn, can also include auto-generated
+      // headers), we will end up with an error during compilation proper.
+      //
+      // One complication with this restart logic is that we will see a
+      // "prefix" of prerequisites that we have already processed (i.e., they
+      // are already in our prerequisite_targets list) and we don't want to
+      // keep redoing this over and over again. One thing to note, however, is
+      // that the prefix that we have seen on the previous run must appear
+      // exactly the same in the subsequent run. The reason for this is that
+      // none of the files that it can possibly be based on have changed and
+      // thus it should be exactly the same. To put it another way, the
+      // presence or absence of a file in the dependency output can only
+      // depend on the previous files (assuming the compiler outputs them as
+      // it encounters them and it is hard to think of a reason why would
+      // someone do otherwise). And we have already made sure that all those
+      // files are up to date. And here is the way we are going to exploit
+      // this: we are going to keep track of how many prerequisites we have
+      // processed so far and on restart skip right to the next one.
+      //
+      // And one more thing: most of the time this list of headers would stay
+      // unchanged and extracting them by running the compiler every time is a
+      // bit wasteful. So we are going to cache them in the depdb. If the db
+      // hasn't been invalidated yet (e.g., because the compiler options have
+      // changed), then we start by reading from it. If anything is out of
+      // date then we use the same restart and skip logic to switch to the
+      // compiler run.
+      //
+      size_t skip_count (0);
+
+      // Enter as a target, update, and add to the list of prerequisite
+      // targets a header file. Depending on the cache flag, the file is
+      // assumed to either have come from the depdb cache or from the compiler
+      // run. Return true if the extraction process should be restarted.
+      //
+      auto add = [a, &bs, &t, li,
+                  &pfx_map, &so_map,
+                  &dd, &skip_count,
+                  this] (path hp, bool cache, timestamp mt) -> bool
+      {
+        const file* ht (enter_header (a, bs, t, li,
+                                      move (hp), cache,
+                                      pfx_map, so_map).first);
+        if (ht == nullptr)
+        {
+          diag_record dr;
+          dr << fail << "header '" << hp
+             << "' not found and cannot be generated";
+
+          if (verb < 4)
+            dr << info << "re-run with --verbose=4 for more information";
+        }
+
+        if (optional<bool> u = inject_header (a, t, *ht, cache, mt))
+        {
+          // Verify/add it to the dependency database.
+          //
+          if (!cache)
+            dd.expect (ht->path ());
+
+          skip_count++;
+          return *u;
+        }
+
+        dd.write (); // Invalidate this line.
+        return true;
+      };
+
+      // As above but for a header unit. Note that currently it is only used
+      // for the cached case (the other case is handled by the mapper).
+      //
+      auto add_unit = [a, &bs, &t, li,
+                       &pfx_map, &so_map,
+                       &dd, &skip_count, &md,
+                       this] (path hp, path bp, timestamp mt) -> bool
+      {
+        const file* ht (enter_header (a, bs, t, li,
+                                      move (hp), true /* cache */,
+                                      pfx_map, so_map).first);
+        if (ht == nullptr)
+          fail << "header '" << hp << "' not found and cannot be generated";
+
+        // Again, looks like we have to update the header explicitly since
+        // we want to restart rather than fail if it cannot be updated.
+        //
+        if (inject_header (a, t, *ht, true /* cache */, mt))
+        {
+          const file& bt (make_header_sidebuild (a, bs, li, *ht));
+
+          // It doesn't look like we need the cache semantics here since given
+          // the header, we should be able to build its BMI. In other words, a
+          // restart is not going to change anything.
+          //
+          optional<bool> u (inject_header (a, t,
+                                           bt, false /* cache */, mt));
+          assert (u); // Not from cache.
+
+          if (bt.path () == bp)
+          {
+            md.headers++;
+            skip_count++;
+            return *u;
+          }
+        }
+
+        dd.write (); // Invalidate this line.
+        return true;
+      };
+
+      // See init_args() above for details on generated header support.
+      //
+      bool gen (false);
+      optional<bool>   force_gen;
+      optional<size_t> force_gen_skip; // Skip count at last force_gen run.
+
+      const path* drmp (nullptr); // Points to drm.path () if active.
+
+      // If nothing so far has invalidated the dependency database, then try
+      // the cached data before running the compiler.
+      //
+      bool cache (!update);
+
+      for (bool restart (true); restart; cache = false)
+      {
+        restart = false;
+
+        if (cache)
+        {
+          // If any, this is always the first run.
+          //
+          assert (skip_count == 0);
+
+          // We should always end with a blank line.
+          //
+          for (;;)
+          {
+            string* l (dd.read ());
+
+            // If the line is invalid, run the compiler.
+            //
+            if (l == nullptr)
+            {
+              restart = true;
+              break;
+            }
+
+            if (l->empty ()) // Done, nothing changed.
+            {
+              // If modules are enabled, then we keep the preprocessed output
+              // around (see apply() for details).
+              //
+              return modules
+                ? make_pair (auto_rmfile (t.path () + x_pext, false), true)
+                : make_pair (auto_rmfile (), false);
+            }
+
+            // This can be a header or a header unit (mapping). The latter
+            // is single-quoted.
+            //
+            // If this header (unit) came from the depdb, make sure it is no
+            // older than the target (if it has changed since the target was
+            // updated, then the cached data is stale).
+            //
+            if ((*l)[0] == '@')
+            {
+              size_t p (l->find ('\'', 3));
+
+              if (p != string::npos)
+              {
+                path h (*l, 3, p - 3);
+                path b (move (l->erase (0, p + 2)));
+
+                restart = add_unit (move (h), move (b), mt);
+              }
+              else
+                restart = true; // Corrupt database?
+            }
+            else
+              restart = add (path (move (*l)), true, mt);
+
+            if (restart)
+            {
+              update = true;
+              l6 ([&]{trace << "restarting (cache)";});
+              break;
+            }
+          }
+        }
+        else
+        {
+          try
+          {
+            if (force_gen)
+              gen = *force_gen;
+
+            if (args.empty () || gen != args_gen)
+              drmp = init_args (gen);
+
+            if (verb >= 3)
+              print_process (args.data ()); // Disable pipe mode.
+
+            process pr;
+
+            try
+            {
+              // Assume the preprocessed output (if produced) is usable
+              // until proven otherwise.
+              //
+              puse = true;
+
+              // Save the timestamp just before we start preprocessing. If
+              // we depend on any header that has been updated since, then
+              // we should assume we've "seen" the old copy and re-process.
+              //
+              timestamp pmt (system_clock::now ());
+
+              // In some cases we may need to ignore the error return status.
+              // The good_error flag keeps track of that. Similarly, sometimes
+              // we expect the error return status based on the output that we
+              // see. The bad_error flag is for that.
+              //
+              bool good_error (false), bad_error (false);
+
+              // If we have no generated header support, then suppress all
+              // diagnostics (if things go badly we will restart with this
+              // support).
+              //
+              if (drmp == nullptr) // Dependency info goes to stdout.
+              {
+                assert (!sense_diag); // Note: could support with fdselect().
+
+                // For VC with /P the dependency info and diagnostics all go
+                // to stderr so redirect it to stdout.
+                //
+                pr = process (
+                  cpath,
+                  args.data (),
+                  0,
+                  -1,
+                  cclass == compiler_class::msvc ? 1 : gen ? 2 : -2,
+                  nullptr, // CWD
+                  env.empty () ? nullptr : env.data ());
+              }
+              else // Dependency info goes to a temporary file.
+              {
+                pr = process (cpath,
+                              args.data (),
+                              mod_mapper ? -1 : 0,
+                              mod_mapper ? -1 : 2, // Send stdout to stderr.
+                              gen ? 2 : sense_diag ? -1 : -2,
+                              nullptr, // CWD
+                              env.empty () ? nullptr : env.data ());
+
+                // Monitor for module mapper requests and/or diagnostics. If
+                // diagnostics is detected, mark the preprocessed output as
+                // unusable for compilation.
+                //
+                if (mod_mapper || sense_diag)
+                {
+                  module_mapper_state mm_state (skip_count);
+
+                  const char* w (nullptr);
+                  try
+                  {
+                    // For now we don't need to do both so let's use a simpler
+                    // blocking implementation. Note that the module mapper
+                    // also needs to be adjusted when switching to the
+                    // non-blocking version.
+                    //
+#if 1
+                    assert (mod_mapper != sense_diag);
+
+                    if (mod_mapper)
+                    {
+                      w = "module mapper request";
+
+                      // Note: the order is important (see the non-blocking
+                      // verison for details).
+                      //
+                      ifdstream is (move (pr.in_ofd),
+                                    fdstream_mode::skip,
+                                    ifdstream::badbit);
+                      ofdstream os (move (pr.out_fd));
+
+                      do
+                      {
+                        gcc_module_mapper (mm_state,
+                                           a, bs, t, li,
+                                           is, os,
+                                           dd, update, bad_error,
+                                           pfx_map, so_map);
+                      } while (!is.eof ());
+
+                      os.close ();
+                      is.close ();
+                    }
+
+                    if (sense_diag)
+                    {
+                      w = "diagnostics";
+                      ifdstream is (move (pr.in_efd), fdstream_mode::skip);
+                      puse = puse && (is.peek () == ifdstream::traits_type::eof ());
+                      is.close ();
+                    }
+#else
+                    fdselect_set fds;
+                    auto add = [&fds] (const auto_fd& afd) -> fdselect_state*
+                    {
+                      int fd (afd.get ());
+                      fdmode (fd, fdstream_mode::non_blocking);
+                      fds.push_back (fd);
+                      return &fds.back ();
+                    };
+
+                    // Note that while we read both streams until eof in
+                    // normal circumstances, we cannot use fdstream_mode::skip
+                    // for the exception case on both of them: we may end up
+                    // being blocked trying to read one stream while the
+                    // process may be blocked writing to the other. So in case
+                    // of an exception we only skip the diagnostics and close
+                    // the mapper stream hard. The latter should happen first
+                    // so the order of the following variable is important.
+                    //
+                    ifdstream es;
+                    ofdstream os;
+                    ifdstream is;
+
+                    fdselect_state* ds (nullptr);
+                    if (sense_diag)
+                    {
+                      w = "diagnostics";
+                      ds = add (pr.in_efd);
+                      es.open (move (pr.in_efd), fdstream_mode::skip);
+                    }
+
+                    fdselect_state* ms (nullptr);
+                    if (mod_mapper)
+                    {
+                      w = "module mapper request";
+                      ms = add (pr.in_ofd);
+                      is.open (move (pr.in_ofd));
+                      os.open (move (pr.out_fd)); // Note: blocking.
+                    }
+
+                    // Set each state pointer to NULL when the respective
+                    // stream reaches eof.
+                    //
+                    while (ds != nullptr || ms != nullptr)
+                    {
+                      w = "output";
+                      ifdselect (fds);
+
+                      // First read out the diagnostics in case the mapper
+                      // interaction produces more. To make sure we don't get
+                      // blocked by full stderr, the mapper should only handle
+                      // one request at a time.
+                      //
+                      if (ds != nullptr && ds->ready)
+                      {
+                        w = "diagnostics";
+
+                        for (char buf[4096];;)
+                        {
+                          streamsize c (sizeof (buf));
+                          streamsize n (es.readsome (buf, c));
+
+                          if (puse && n > 0)
+                            puse = false;
+
+                          if (n < c)
+                            break;
+                        }
+
+                        if (es.eof ())
+                        {
+                          es.close ();
+                          ds->fd = nullfd;
+                          ds = nullptr;
+                        }
+                      }
+
+                      if (ms != nullptr && ms->ready)
+                      {
+                        w = "module mapper request";
+
+                        gcc_module_mapper (mm_state,
+                                           a, bs, t, li,
+                                           is, os,
+                                           dd, update, bad_error,
+                                           pfx_map, so_map);
+                        if (is.eof ())
+                        {
+                          os.close ();
+                          is.close ();
+                          ms->fd = nullfd;
+                          ms = nullptr;
+                        }
+                      }
+                    }
+#endif
+                  }
+                  catch (const io_error& e)
+                  {
+                    if (pr.wait ())
+                      fail << "io error handling " << x_lang << " compiler "
+                           << w << ": " << e;
+
+                    // Fall through.
+                  }
+
+                  if (mod_mapper)
+                    md.headers += mm_state.headers;
+                }
+
+                // The idea is to reduce this to the stdout case.
+                //
+                pr.wait ();
+
+                // With -MG we want to read dependency info even if there is
+                // an error (in case an outdated header file caused it). But
+                // with the GCC module mapper an error is non-negotiable, so
+                // to speak, and so we want to skip all of that. In fact, we
+                // now write directly to depdb without generating and then
+                // parsing an intermadiate dependency makefile.
+                //
+                pr.in_ofd = (ctype == compiler_type::gcc && mod_mapper)
+                  ? auto_fd (nullfd)
+                  : fdopen (*drmp, fdopen_mode::in);
+              }
+
+              if (pr.in_ofd != nullfd)
+              {
+                // We may not read all the output (e.g., due to a restart).
+                // Before we used to just close the file descriptor to signal
+                // to the other end that we are not interested in the rest.
+                // This works fine with GCC but Clang (3.7.0) finds this
+                // impolite and complains, loudly (broken pipe). So now we are
+                // going to skip until the end.
+                //
+                ifdstream is (move (pr.in_ofd),
+                              fdstream_mode::text | fdstream_mode::skip,
+                              ifdstream::badbit);
+
+                size_t skip (skip_count);
+                string l; // Reuse.
+                for (bool first (true), second (false); !restart; )
+                {
+                  if (eof (getline (is, l)))
+                    break;
+
+                  l6 ([&]{trace << "header dependency line '" << l << "'";});
+
+                  // Parse different dependency output formats.
+                  //
+                  switch (cclass)
+                  {
+                  case compiler_class::msvc:
+                    {
+                      if (first)
+                      {
+                        // The first line should be the file we are compiling.
+                        // If it is not, then something went wrong even before
+                        // we could compile anything (e.g., file does not
+                        // exist). In this case the first line (and everything
+                        // after it) is presumably diagnostics.
+                        //
+                        // It can, however, be a command line warning, for
+                        // example:
+                        //
+                        // cl : Command line warning D9025 : overriding '/W3' with '/W4'
+                        //
+                        // So we try to detect and skip them assuming they
+                        // will also show up during the compilation proper.
+                        //
+                        if (l != src.path ().leaf ().string ())
+                        {
+                          // D8XXX are errors while D9XXX are warnings.
+                          //
+                          size_t p (msvc_sense_diag (l, 'D'));
+                          if (p != string::npos && l[p] == '9')
+                            continue;
+
+                          text << l;
+                          bad_error = true;
+                          break;
+                        }
+
+                        first = false;
+                        continue;
+                      }
+
+                      string f (next_show (l, good_error));
+
+                      if (f.empty ()) // Some other diagnostics.
+                      {
+                        text << l;
+                        bad_error = true;
+                        break;
+                      }
+
+                      // Skip until where we left off.
+                      //
+                      if (skip != 0)
+                      {
+                        // We can't be skipping over a non-existent header.
+                        //
+                        assert (!good_error);
+                        skip--;
+                      }
+                      else
+                      {
+                        restart = add (path (move (f)), false, pmt);
+
+                        // If the header does not exist (good_error), then
+                        // restart must be true. Except that it is possible
+                        // that someone running in parallel has already
+                        // updated it. In this case we must force a restart
+                        // since we haven't yet seen what's after this
+                        // at-that-time-non-existent header.
+                        //
+                        // We also need to force the target update (normally
+                        // done by add()).
+                        //
+                        if (good_error)
+                          restart = true;
+                        //
+                        // And if we have updated the header (restart is
+                        // true), then we may end up in this situation: an old
+                        // header got included which caused the preprocessor
+                        // to fail down the line. So if we are restarting, set
+                        // the good error flag in case the process fails
+                        // because of something like this (and if it is for a
+                        // valid reason, then we will pick it up on the next
+                        // round).
+                        //
+                        else if (restart)
+                          good_error = true;
+
+                        if (restart)
+                        {
+                          update = true;
+                          l6 ([&]{trace << "restarting";});
+                        }
+                      }
+
+                      break;
+                    }
+                  case compiler_class::gcc:
+                    {
+                      // Make dependency declaration.
+                      //
+                      size_t pos (0);
+
+                      if (first)
+                      {
+                        // Empty/invalid output should mean the wait() call
+                        // below will return false.
+                        //
+                        if (l.empty ()  ||
+                            l[0] != '^' || l[1] != ':' || l[2] != ' ')
+                        {
+                          // @@ Hm, we don't seem to redirect stderr to stdout
+                          //    for this class of compilers so I wonder why
+                          //    we are doing this?
+                          //
+                          if (!l.empty ())
+                            text << l;
+
+                          bad_error = true;
+                          break;
+                        }
+
+                        first = false;
+                        second = true;
+
+                        // While normally we would have the source file on the
+                        // first line, if too long, it will be moved to the
+                        // next line and all we will have on this line is:
+                        // "^: \".
+                        //
+                        if (l.size () == 4 && l[3] == '\\')
+                          continue;
+                        else
+                          pos = 3; // Skip "^: ".
+
+                        // Fall through to the 'second' block.
+                      }
+
+                      if (second)
+                      {
+                        second = false;
+                        next_make (l, pos); // Skip the source file.
+                      }
+
+                      while (pos != l.size ())
+                      {
+                        string f (next_make (l, pos));
+
+                        // Skip until where we left off.
+                        //
+                        if (skip != 0)
+                        {
+                          skip--;
+                          continue;
+                        }
+
+                        restart = add (path (move (f)), false, pmt);
+
+                        if (restart)
+                        {
+                          // The same "preprocessor may fail down the line"
+                          // logic as above.
+                          //
+                          good_error = true;
+
+                          update = true;
+                          l6 ([&]{trace << "restarting";});
+                          break;
+                        }
+                      }
+
+                      break;
+                    }
+                  }
+
+                  if (bad_error)
+                    break;
+                }
+
+                // In case of VC, we are parsing stderr and if things go
+                // south, we need to copy the diagnostics for the user to see.
+                //
+                if (bad_error && cclass == compiler_class::msvc)
+                {
+                  // We used to just dump the whole rdbuf but it turns out VC
+                  // may continue writing include notes interleaved with the
+                  // diagnostics. So we have to filter them out.
+                  //
+                  for (; !eof (getline (is, l)); )
+                  {
+                    size_t p (msvc_sense_diag (l, 'C'));
+                    if (p != string::npos && l.compare (p, 4, "1083") != 0)
+                      diag_stream_lock () << l << endl;
+                  }
+                }
+
+                is.close ();
+
+                // This is tricky: it is possible that in parallel someone has
+                // generated all our missing headers and we wouldn't restart
+                // normally.
+                //
+                // In this case we also need to force the target update (which
+                // is normally done by add()).
+                //
+                if (force_gen && *force_gen)
+                {
+                  restart = update = true;
+                  force_gen = false;
+                }
+              }
+
+              if (pr.wait ())
+              {
+                if (!bad_error) // Ignore expected successes (we are done).
+                  continue;
+
+                fail << "expected error exit status from " << x_lang
+                     << " compiler";
+              }
+              else if (pr.exit->normal ())
+              {
+                if (good_error) // Ignore expected errors (restart).
+                  continue;
+              }
+
+              // Fall through.
+            }
+            catch (const io_error& e)
+            {
+              if (pr.wait ())
+                fail << "unable to read " << x_lang << " compiler header "
+                     << "dependency output: " << e;
+
+              // Fall through.
+            }
+
+            assert (pr.exit && !*pr.exit);
+            const process_exit& e (*pr.exit);
+
+            // For normal exit we assume the child process issued some
+            // diagnostics.
+            //
+            if (e.normal ())
+            {
+              // If this run was with the generated header support then we
+              // have issued diagnostics and it's time to give up.
+              //
+              if (gen)
+                throw failed ();
+
+              // Just to recap, being here means something is wrong with the
+              // source: it can be a missing generated header, it can be an
+              // outdated generated header (e.g., some check triggered #error
+              // which will go away if only we updated the generated header),
+              // or it can be a real error that is not going away.
+              //
+              // So this is what we are going to do here: if anything got
+              // updated on this run (i.e., the compiler has produced valid
+              // dependency information even though there were errors and we
+              // managed to find and update a header based on this
+              // informaion), then we restart in the same mode hoping that
+              // this fixes things. Otherwise, we force the generated header
+              // support which will either uncover a missing generated header
+              // or will issue diagnostics.
+              //
+              if (restart)
+                l6 ([&]{trace << "trying again without generated headers";});
+              else
+              {
+                // In some pathological situations we may end up switching
+                // back and forth indefinitely without making any headway. So
+                // we use skip_count to track our progress.
+                //
+                // Examples that have been encountered so far:
+                //
+                // - Running out of disk space.
+                //
+                // - Using __COUNTER__ in #if which is incompatible with the
+                //   GCC's -fdirectives-only mode.
+                //
+                // - A Clang bug: https://bugs.llvm.org/show_bug.cgi?id=35580
+                //
+                // So let's show the yo-yo'ing command lines and ask the user
+                // to investigate.
+                //
+                // Note: we could restart one more time but this time without
+                // suppressing diagnostics. This could be useful since, say,
+                // running out of disk space may not reproduce on its own (for
+                // example, because we have removed all the partially
+                // preprocessed source files).
+                //
+                if (force_gen_skip && *force_gen_skip == skip_count)
+                {
+                  diag_record dr (fail);
+
+                  dr << "inconsistent " << x_lang << " compiler behavior" <<
+                    info << "run the following two commands to investigate";
+
+                  dr << info;
+                  print_process (dr, args.data ()); // No pipes.
+
+                  init_args ((gen = true));
+                  dr << info << "";
+                  print_process (dr, args.data ()); // No pipes.
+                }
+
+                restart = true;
+                force_gen = true;
+                force_gen_skip = skip_count;
+                l6 ([&]{trace << "restarting with forced generated headers";});
+              }
+              continue;
+            }
+            else
+              run_finish (args, pr); // Throws.
+          }
+          catch (const process_error& e)
+          {
+            error << "unable to execute " << args[0] << ": " << e;
+
+            // In a multi-threaded program that fork()'ed but did not exec(),
+            // it is unwise to try to do any kind of cleanup (like unwinding
+            // the stack and running destructors).
+            //
+            if (e.child)
+            {
+              drm.cancel ();
+              exit (1);
+            }
+
+            throw failed ();
+          }
+        }
+      }
+
+      // Add the terminating blank line (we are updating depdb).
+      //
+      dd.expect ("");
+
+      puse = puse && !reprocess && !psrc.path.empty ();
+      return make_pair (move (psrc), puse);
+    }
+
+    // Return the translation unit information (first) and its checksum
+    // (second). If the checksum is empty, then it should not be used.
+    //
+    pair<unit, string> compile_rule::
+    parse_unit (action a,
+                file& t,
+                linfo li,
+                const file& src,
+                auto_rmfile& psrc,
+                const match_data& md,
+                const path& dd) const
+    {
+      tracer trace (x, "compile_rule::parse_unit");
+
+      otype ot (li.type);
+
+      // If things go wrong give the user a bit extra context.
+      //
+      auto df = make_diag_frame (
+        [&src](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while parsing " << src;
+        });
+
+      // For some compilers (GCC, Clang) the preporcessed output is only
+      // partially preprocessed. For others (VC), it is already fully
+      // preprocessed (well, almost: it still has comments but we can handle
+      // that). Plus, the source file might already be (sufficiently)
+      // preprocessed.
+      //
+      // So the plan is to start the compiler process that writes the fully
+      // preprocessed output to stdout and reduce the already preprocessed
+      // case to it.
+      //
+      environment env;
+      cstrings args;
+      small_vector<string, 2> header_args; // Header unit options storage.
+
+      const path* sp; // Source path.
+
+      // @@ MODHDR: If we are reprocessing, then will need module mapper for
+      //            include translation. Hairy... Can't we add support for
+      //            include translation in file mapper?
+      //
+      bool reprocess (cast_false<bool> (t[c_reprocess]));
+
+      bool ps; // True if extracting from psrc.
+      if (md.pp < preprocessed::modules)
+      {
+        // If we were instructed to reprocess the source during compilation,
+        // then also reprocess it here. While the preprocessed output may be
+        // usable for our needs, to be safe we assume it is not (and later we
+        // may extend cc.reprocess to allow specifying where reprocessing is
+        // needed).
+        //
+        ps = !psrc.path.empty () && !reprocess;
+        sp = &(ps ? psrc.path : src.path ());
+
+        // VC's preprocessed output, if present, is fully preprocessed.
+        //
+        if (cclass != compiler_class::msvc || !ps)
+        {
+          // This should match with how we setup preprocessing and is pretty
+          // similar to init_args() from extract_headers().
+          //
+          args.push_back (cpath.recall_string ());
+
+          if (reprocess)
+            args.push_back ("-D__build2_preprocess");
+
+          append_options (args, t, c_poptions);
+          append_options (args, t, x_poptions);
+
+          append_lib_options (t.base_scope (), args, a, t, li);
+
+          assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+          append_option_values (
+            args, "-I",
+            sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+            [] (const dir_path& d) {return d.string ().c_str ();});
+
+          if (md.symexport)
+            append_symexport_options (args, t);
+
+          // Make sure we don't fail because of warnings.
+          //
+          // @@ Can be both -WX and /WX.
+          //
+          const char* werror (nullptr);
+          switch (cclass)
+          {
+          case compiler_class::gcc:  werror = "-Werror"; break;
+          case compiler_class::msvc: werror = "/WX";     break;
+          }
+
+          bool clang (ctype == compiler_type::clang);
+
+          append_options (args, t, c_coptions, werror);
+          append_options (args, t, x_coptions, werror);
+          append_options (args, tstd,
+                          tstd.size () - (modules && clang ? 1 : 0));
+
+          append_headers (env, args, header_args, a, t, md, dd);
+
+          switch (cclass)
+          {
+          case compiler_class::msvc:
+            {
+              args.push_back ("/nologo");
+
+              if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+                args.push_back ("/EHsc");
+
+              if (!find_option_prefixes ({"/MD", "/MT"}, args))
+                args.push_back ("/MD");
+
+              args.push_back ("/E");
+              // args.push_back ("/C"); // See above.
+
+              msvc_sanitize_cl (args);
+
+              append_lang_options (args, md); // Compile as.
+
+              break;
+            }
+          case compiler_class::gcc:
+            {
+              if (ot == otype::s)
+              {
+                if (tclass == "linux" || tclass == "bsd")
+                  args.push_back ("-fPIC");
+              }
+
+              args.push_back ("-E");
+              append_lang_options (args, md);
+
+              // Options that trigger preprocessing of partially preprocessed
+              // output are a bit of a compiler-specific voodoo.
+              //
+              if (ps)
+              {
+                if (ctype == compiler_type::gcc)
+                {
+                  // Note that only these two *plus* -x do the trick.
+                  //
+                  args.push_back ("-fpreprocessed");
+                  args.push_back ("-fdirectives-only");
+                }
+              }
+
+              break;
+            }
+          }
+
+          args.push_back (sp->string ().c_str ());
+          args.push_back (nullptr);
+        }
+
+        if (!env.empty ())
+          env.push_back (nullptr);
+      }
+      else
+      {
+        // Extracting directly from source.
+        //
+        ps = false;
+        sp = &src.path ();
+      }
+
+      // Preprocess and parse.
+      //
+      for (;;) // Breakout loop.
+      try
+      {
+        // Disarm the removal of the preprocessed file in case of an error.
+        // We re-arm it below.
+        //
+        if (ps)
+          psrc.active = false;
+
+        process pr;
+
+        try
+        {
+          if (args.empty ())
+          {
+            pr = process (process_exit (0)); // Successfully exited.
+            pr.in_ofd = fdopen (*sp, fdopen_mode::in);
+          }
+          else
+          {
+            if (verb >= 3)
+              print_process (args);
+
+            // We don't want to see warnings multiple times so ignore all
+            // diagnostics.
+            //
+            pr = process (cpath,
+                          args.data (),
+                          0, -1, -2,
+                          nullptr, // CWD
+                          env.empty () ? nullptr : env.data ());
+          }
+
+          // Use binary mode to obtain consistent positions.
+          //
+          ifdstream is (move (pr.in_ofd),
+                        fdstream_mode::binary | fdstream_mode::skip);
+
+          parser p;
+          unit tu (p.parse (is, *sp));
+
+          is.close ();
+
+          if (pr.wait ())
+          {
+            if (ps)
+              psrc.active = true; // Re-arm.
+
+            unit_type& ut (tu.type);
+            module_info& mi (tu.module_info);
+
+            if (!modules)
+            {
+              if (ut != unit_type::non_modular || !mi.imports.empty ())
+                fail << "modules support required by " << src;
+            }
+            else
+            {
+              // Sanity checks.
+              //
+              // If we are compiling a module interface, make sure the
+              // translation unit has the necessary declarations.
+              //
+              if (ut != unit_type::module_iface && src.is_a (*x_mod))
+                fail << src << " is not a module interface unit";
+
+              // A header unit should look like a non-modular translation unit.
+              //
+              if (md.type == unit_type::module_header)
+              {
+                if (ut != unit_type::non_modular)
+                  fail << "module declaration in header unit " << src;
+
+                ut = md.type;
+                mi.name = src.path ().string ();
+              }
+
+              // Prior to 15.5 (19.12) VC was not using the 'export module M;'
+              // syntax so we use the preprequisite type to distinguish
+              // between interface and implementation units.
+              //
+              if (ctype == compiler_type::msvc && cmaj == 19 && cmin <= 11)
+              {
+                if (ut == unit_type::module_impl && src.is_a (*x_mod))
+                  ut = unit_type::module_iface;
+              }
+            }
+
+            // If we were forced to reprocess, assume the checksum is not
+            // accurate (parts of the translation unit could have been
+            // #ifdef'ed out; see __build2_preprocess).
+            //
+            return pair<unit, string> (
+              move (tu),
+              reprocess ? string () : move (p.checksum));
+          }
+
+          // Fall through.
+        }
+        catch (const io_error& e)
+        {
+          if (pr.wait ())
+            fail << "unable to read " << x_lang << " preprocessor output: "
+                 << e;
+
+          // Fall through.
+        }
+
+        assert (pr.exit && !*pr.exit);
+        const process_exit& e (*pr.exit);
+
+        // What should we do with a normal error exit? Remember we suppressed
+        // the compiler's diagnostics. We used to issue a warning and continue
+        // with the assumption that the compilation step will fail with
+        // diagnostics. The problem with this approach is that we may fail
+        // before that because the information we return (e.g., module name)
+        // is bogus. So looks like failing is the only option.
+        //
+        if (e.normal ())
+        {
+          fail << "unable to preprocess " << src <<
+            info << "re-run with -s -V to display failing command" <<
+            info << "then run failing command to display compiler diagnostics";
+        }
+        else
+          run_finish (args, pr); // Throws.
+      }
+      catch (const process_error& e)
+      {
+        error << "unable to execute " << args[0] << ": " << e;
+
+        if (e.child)
+          exit (1);
+      }
+
+      throw failed ();
+    }
+
+    // Extract and inject module dependencies.
+    //
+    void compile_rule::
+    extract_modules (action a,
+                     const scope& bs,
+                     file& t,
+                     linfo li,
+                     const compile_target_types& tts,
+                     const file& src,
+                     match_data& md,
+                     module_info&& mi,
+                     depdb& dd,
+                     bool& update) const
+    {
+      tracer trace (x, "compile_rule::extract_modules");
+
+      // If things go wrong, give the user a bit extra context.
+      //
+      auto df = make_diag_frame (
+        [&src](const diag_record& dr)
+        {
+          if (verb != 0)
+            dr << info << "while extracting module dependencies from " << src;
+        });
+
+      unit_type ut (md.type);
+      module_imports& is (mi.imports);
+
+      // Search and match all the modules we depend on. If this is a module
+      // implementation unit, then treat the module itself as if it was
+      // imported (we insert it first since for some compilers we have to
+      // differentiate between this special module and real imports). Note:
+      // move.
+      //
+      if (ut == unit_type::module_impl)
+        is.insert (
+          is.begin (),
+          module_import {unit_type::module_iface, move (mi.name), false, 0});
+
+      // The change to the set of imports would have required a change to
+      // source code (or options). Changes to the bmi{}s themselves will be
+      // detected via the normal prerequisite machinery. However, the same set
+      // of imports could be resolved to a different set of bmi{}s (in a sense
+      // similar to changing the source file). To detect this we calculate and
+      // store a hash of all (not just direct) bmi{}'s paths.
+      //
+      sha256 cs;
+
+      if (!is.empty ())
+        md.modules = search_modules (a, bs, t, li, tts.bmi, src, is, cs);
+
+      if (dd.expect (cs.string ()) != nullptr)
+        update = true;
+
+      // Save the module map for compilers that use it.
+      //
+      switch (ctype)
+      {
+      case compiler_type::gcc:
+        {
+          // We don't need to redo this if the above hash hasn't changed and
+          // the database is still valid.
+          //
+          if (dd.writing () || !dd.skip ())
+          {
+            auto write = [&dd] (const string& name, const path& file, bool q)
+            {
+              dd.write ("@ ", false);
+              if (q) dd.write ('\'', false);
+              dd.write (name, false);
+              if (q) dd.write ('\'', false);
+              dd.write (' ', false);
+              dd.write (file);
+            };
+
+            // The output mapping is provided in the same way as input.
+            //
+            if (ut == unit_type::module_iface ||
+                ut == unit_type::module_header)
+              write (mi.name, t.path (), ut == unit_type::module_header);
+
+            if (size_t start = md.modules.start)
+            {
+              // Note that we map both direct and indirect imports to override
+              // any module paths that might be stored in the BMIs (or
+              // resolved relative to "repository path", whatever that is).
+              //
+              const auto& pts (t.prerequisite_targets[a]);
+              for (size_t i (start); i != pts.size (); ++i)
+              {
+                if (const target* m = pts[i])
+                {
+                  // Save a variable lookup by getting the module name from
+                  // the import list (see search_modules()).
+                  //
+                  // Note: all real modules (not header units).
+                  //
+                  write (is[i - start].name, m->as<file> ().path (), false);
+                }
+              }
+            }
+          }
+          break;
+        }
+      default:
+        break;
+      }
+
+      // Set the cc.module_name rule-specific variable if this is an interface
+      // unit. Note that it may seem like a good idea to set it on the bmi{}
+      // group to avoid duplication. We, however, cannot do it MT-safely since
+      // we don't match the group.
+      //
+      // @@ MODHDR TODO: do we need this for header units? Currently we don't
+      //    see header units here.
+      //
+      if (ut == unit_type::module_iface /*|| ut == unit_type::module_header*/)
+      {
+        if (value& v = t.state[a].assign (c_module_name))
+          assert (cast<string> (v) == mi.name);
+        else
+          v = move (mi.name); // Note: move.
+      }
+    }
+
+    inline bool
+    std_module (const string& m)
+    {
+      size_t n (m.size ());
+      return (n >= 3 &&
+              m[0] == 's' && m[1] == 't' && m[2] == 'd' &&
+              (n == 3 || m[3] == '.'));
+    };
+
+    // Resolve imported modules to bmi*{} targets.
+    //
+    module_positions compile_rule::
+    search_modules (action a,
+                    const scope& bs,
+                    file& t,
+                    linfo li,
+                    const target_type& btt,
+                    const file& src,
+                    module_imports& imports,
+                    sha256& cs) const
+    {
+      tracer trace (x, "compile_rule::search_modules");
+
+      // NOTE: currently we don't see header unit imports (they are
+      //       handled by extract_headers() and are not in imports).
+
+      // So we have a list of imports and a list of "potential" module
+      // prerequisites. They are potential in the sense that they may or may
+      // not be required by this translation unit. In other words, they are
+      // the pool where we can resolve actual imports.
+      //
+      // Because we may not need all of these prerequisites, we cannot just go
+      // ahead and match all of them (and they can even have cycles; see rule
+      // synthesis). This poses a bit of a problem: the only way to discover
+      // the module's actual name (see cc.module_name) is by matching it.
+      //
+      // One way to solve this would be to make the user specify the module
+      // name for each mxx{} explicitly. This will be a major pain, however.
+      // Another would be to require encoding of the module name in the
+      // interface unit file name. For example, hello.core -> hello-core.mxx.
+      // This is better but still too restrictive: some will want to call it
+      // hello_core.mxx or HelloCore.mxx (because that's their file naming
+      // convention) or place it in a subdirectory, say, hello/core.mxx.
+      //
+      // In the above examples one common theme about all the file names is
+      // that they contain, in one form or another, the "tail" of the module
+      // name ('core'). So what we are going to do is require that the
+      // interface file names contain enough of the module name tail to
+      // unambiguously resolve all the module imports. On our side we are
+      // going to implement a "fuzzy" module name to file name match. This
+      // should be reliable enough since we will always verify our guesses
+      // once we match the target and extract the actual module name. Plus,
+      // the user will always have the option of resolving any impasses by
+      // specifying the module name explicitly.
+      //
+      // So, the fuzzy match: the idea is that each match gets a score, the
+      // number of characters in the module name that got matched. A match
+      // with the highest score is used. And we use the (length + 1) for a
+      // match against an actual module name.
+      //
+      // Actually, the scoring system is a bit more elaborate than that.
+      // Consider module name core.window and two files, window.mxx and
+      // abstract-window.mxx: which one is likely to define this module?
+      // Clearly the first, but in the above-described scheme they will get
+      // the same score. More generally, consider these "obvious" (to the
+      // human) situations:
+      //
+      //   window.mxx          vs  abstract-window.mxx
+      //   details/window.mxx  vs  abstract-window.mxx
+      //   gtk-window.mxx      vs  gtk-abstract-window.mxx
+      //
+      // To handle such cases we are going to combine the above primary score
+      // with the following secondary scores (in that order):
+      //
+      // a) Strength of separation between matched and unmatched parts:
+      //
+      //    '\0' > directory separator > other separator > unseparated
+      //
+      //    Here '\0' signifies nothing to separate (unmatched part is empty).
+      //
+      // b) Shortness of the unmatched part.
+      //
+      // For std.* modules we only accept non-fuzzy matches (think std.core vs
+      // some core.mxx). And if such a module is unresolved, then we assume it
+      // is pre-built and will be found by some other means (e.g., VC's
+      // IFCPATH).
+      //
+      auto match_max = [] (const string& m) -> size_t
+      {
+        // The primary and sub-scores are packed in the following decimal
+        // representation:
+        //
+        // PPPPABBBB
+        //
+        // We use decimal instead of binary packing to make it easier to
+        // separate fields in the trace messages, during debugging, etc.
+        //
+        return m.size () * 100000 + 99999; // Maximum match score.
+      };
+
+      auto match = [] (const string& f, const string& m) -> size_t
+      {
+        auto file_sep = [] (char c) -> char
+        {
+          // Return the character (translating directory seperator to '/') if
+          // it is a separator and '\0' otherwise (so can be used as bool).
+          //
+          return (c == '_' || c == '-' || c == '.'    ? c   :
+                  path::traits_type::is_separator (c) ? '/' : '\0');
+        };
+
+        auto case_sep = [] (char c1, char c2)
+        {
+          return (alpha (c1) &&
+                  alpha (c2) &&
+                  (ucase (c1) == c1) != (ucase (c2) == c2));
+        };
+
+        size_t fn (f.size ()), fi (fn);
+        size_t mn (m.size ()), mi (mn);
+
+        // True if the previous character was counted as a real (that is,
+        // non-case changing) separator.
+        //
+        bool fsep (false);
+        bool msep (false);
+
+        // Scan backwards for as long as we match. Keep track of the previous
+        // character for case change detection.
+        //
+        for (char fc, mc, fp ('\0'), mp ('\0');
+             fi != 0 && mi != 0;
+             fp = fc, mp = mc, --fi, --mi)
+        {
+          fc = f[fi - 1];
+          mc = m[mi - 1];
+
+          if (casecmp (fc, mc) == 0)
+          {
+            fsep = msep = false;
+            continue;
+          }
+
+          // We consider all separators equal and character case change being
+          // a separators. Some examples of the latter:
+          //
+          // foo.bar
+          //  fooBAR
+          //  FOObar
+          //
+          bool fs (file_sep (fc));
+          bool ms (mc == '_' || mc == '.');
+
+          if (fs && ms)
+          {
+            fsep = msep = true;
+            continue;
+          }
+
+          // Only if one is a real separator do we consider case change.
+          //
+          if (fs || ms)
+          {
+            bool fa (false), ma (false);
+            if ((fs || (fa = case_sep (fp, fc))) &&
+                (ms || (ma = case_sep (mp, mc))))
+            {
+              // Stay on this character if imaginary punctuation (note: cannot
+              // be both true).
+              //
+              if (fa) {++fi; msep = true;}
+              if (ma) {++mi; fsep = true;}
+
+              continue;
+            }
+          }
+
+          break; // No match.
+        }
+
+        // "Uncount" real separators.
+        //
+        if (fsep) fi++;
+        if (msep) mi++;
+
+        // Use the number of characters matched in the module name and not
+        // in the file (this may not be the same because of the imaginary
+        // separators).
+        //
+        size_t ps (mn - mi);
+
+        // The strength of separation sub-score.
+        //
+        // Check for case change between the last character that matched and
+        // the first character that did not.
+        //
+        size_t as (0);
+        if      (fi == 0)                                 as = 9;
+        else if (char c = file_sep (f[fi - 1]))           as = c == '/' ? 8 : 7;
+        else if (fi != fn && case_sep (f[fi], f[fi - 1])) as = 7;
+
+        // The length of the unmatched part sub-score.
+        //
+        size_t bs (9999 - fi);
+
+        return ps * 100000 + as * 10000 + bs;
+      };
+
+      auto& pts (t.prerequisite_targets[a]);
+      size_t start (pts.size ()); // Index of the first to be added.
+
+      // We have two parallel vectors: module names/scores in imports and
+      // targets in prerequisite_targets (offset with start). Pre-allocate
+      // NULL entries in the latter.
+      //
+      size_t n (imports.size ());
+      pts.resize (start + n, nullptr);
+
+      // Oh, yes, there is one "minor" complication. It's the last one, I
+      // promise. It has to do with module re-exporting (export import M;).
+      // In this case (currently) all implementations simply treat it as a
+      // shallow (from the BMI's point of view) reference to the module (or an
+      // implicit import, if you will). Do you see where it's going? Nowever
+      // good, that's right. This shallow reference means that the compiler
+      // should be able to find BMIs for all the re-exported modules,
+      // recursive. The good news is we are actually in a pretty good shape to
+      // handle this: after match all our prerequisite BMIs will have their
+      // prerequisite BMIs known, recursively. The only bit that is missing is
+      // the re-export flag of some sorts. As well as deciding where to handle
+      // it: here or in append_modules(). After some meditation it became
+      // clear handling it here will be simpler: we need to weed out
+      // duplicates for which we can re-use the imports vector. And we may
+      // also need to save this "flattened" list of modules in depdb.
+      //
+      // Ok, so, here is the plan:
+      //
+      // 1. There is no good place in prerequisite_targets to store the
+      //    exported flag (no, using the marking facility across match/execute
+      //    is a bad idea). So what we are going to do is put re-exported
+      //    bmi{}s at the back and store (in the target's data pad) the start
+      //    position. One bad aspect about this part is that we assume those
+      //    bmi{}s have been matched by the same rule. But let's not kid
+      //    ourselves, there will be no other rule that matches bmi{}s.
+      //
+      // 2. Once we have matched all the bmi{}s we are importing directly
+      //    (with all the re-exported by us at the back), we will go over them
+      //    and copy all of their re-exported bmi{}s (using the position we
+      //    saved on step #1). The end result will be a recursively-explored
+      //    list of imported bmi{}s that append_modules() can simply convert
+      //    to the list of options.
+      //
+      //    One issue with this approach is that these copied targets will be
+      //    executed which means we need to adjust their dependent counts
+      //    (which is normally done by match). While this seems conceptually
+      //    correct (especially if you view re-exports as implicit imports),
+      //    it's just extra overhead (we know they will be updated). So what
+      //    we are going to do is save another position, that of the start of
+      //    these copied-over targets, and will only execute up to this point.
+      //
+      // And after implementing this came the reality check: all the current
+      // implementations require access to all the imported BMIs, not only
+      // re-exported. Some (like Clang) store references to imported BMI files
+      // so we actually don't need to pass any extra options (unless things
+      // get moved) but they still need access to the BMIs (and things will
+      // most likely have to be done differenly for distributed compilation).
+      //
+      // So the revised plan: on the off chance that some implementation will
+      // do it differently we will continue maintaing the imported/re-exported
+      // split and how much to copy-over can be made compiler specific.
+      //
+      // As a first sub-step of step #1, move all the re-exported imports to
+      // the end of the vector. This will make sure they end up at the end
+      // of prerequisite_targets. Note: the special first import, if any,
+      // should be unaffected.
+      //
+      sort (imports.begin (), imports.end (),
+            [] (const module_import& x, const module_import& y)
+            {
+              return !x.exported && y.exported;
+            });
+
+      // Go over the prerequisites once.
+      //
+      // For (direct) library prerequisites, check their prerequisite bmi{}s
+      // (which should be searched and matched with module names discovered;
+      // see the library meta-information protocol for details).
+      //
+      // For our own bmi{} prerequisites, checking if each (better) matches
+      // any of the imports.
+
+      // For fuzzy check if a file name (better) resolves any of our imports
+      // and if so make it the new selection. For exact the name is the actual
+      // module name and it can only resolve one import (there are no
+      // duplicates).
+      //
+      // Set done to true if all the imports have now been resolved to actual
+      // module names (which means we can stop searching). This will happens
+      // if all the modules come from libraries. Which will be fairly common
+      // (think of all the tests) so it's worth optimizing for.
+      //
+      bool done (false);
+
+      auto check_fuzzy = [&trace, &imports, &pts, &match, &match_max, start, n]
+        (const target* pt, const string& name)
+      {
+        for (size_t i (0); i != n; ++i)
+        {
+          module_import& m (imports[i]);
+
+          if (std_module (m.name)) // No fuzzy std.* matches.
+            continue;
+
+          if (m.score > match_max (m.name)) // Resolved to module name.
+            continue;
+
+          size_t s (match (name, m.name));
+
+          l5 ([&]{trace << name << " ~ " << m.name << ": " << s;});
+
+          if (s > m.score)
+          {
+            pts[start + i] = pt;
+            m.score = s;
+          }
+        }
+      };
+
+      // If resolved, return the "slot" in pts (we don't want to create a
+      // side build until we know we match; see below for details).
+      //
+      auto check_exact = [&trace, &imports, &pts, &match_max, start, n, &done]
+        (const string& name) -> const target**
+      {
+        const target** r (nullptr);
+        done = true;
+
+        for (size_t i (0); i != n; ++i)
+        {
+          module_import& m (imports[i]);
+
+          size_t ms (match_max (m.name));
+
+          if (m.score > ms) // Resolved to module name (no effect on done).
+            continue;
+
+          if (r == nullptr)
+          {
+            size_t s (name == m.name ? ms + 1 : 0);
+
+            l5 ([&]{trace << name << " ~ " << m.name << ": " << s;});
+
+            if (s > m.score)
+            {
+              r = &pts[start + i].target;
+              m.score = s;
+              continue; // Scan the rest to detect if all done.
+            }
+          }
+
+          done = false;
+        }
+
+        return r;
+      };
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+          continue;
+
+        const target* pt (p.load ()); // Should be cached for libraries.
+
+        if (pt != nullptr)
+        {
+          const target* lt (nullptr);
+
+          if (const libx* l = pt->is_a<libx> ())
+            lt = link_member (*l, a, li);
+          else if (pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ())
+            lt = pt;
+
+          // If this is a library, check its bmi{}s and mxx{}s.
+          //
+          if (lt != nullptr)
+          {
+            for (const target* bt: lt->prerequisite_targets[a])
+            {
+              if (bt == nullptr)
+                continue;
+
+              // Note that here we (try) to use whatever flavor of bmi*{} is
+              // available.
+              //
+              // @@ MOD: BMI compatibility check.
+              // @@ UTL: we need to (recursively) see through libu*{} (and
+              //    also in pkgconfig_save()).
+              //
+              if (bt->is_a<bmix> ())
+              {
+                const string& n (
+                  cast<string> (bt->state[a].vars[c_module_name]));
+
+                if (const target** p = check_exact (n))
+                  *p = bt;
+              }
+              else if (bt->is_a (*x_mod))
+              {
+                // This is an installed library with a list of module sources
+                // (the source are specified as prerequisites but the fallback
+                // file rule puts them into prerequisite_targets for us).
+                //
+                // The module names should be specified but if not assume
+                // something else is going on and ignore.
+                //
+                const string* n (cast_null<string> (bt->vars[c_module_name]));
+
+                if (n == nullptr)
+                  continue;
+
+                if (const target** p = check_exact (*n))
+                  *p = &make_module_sidebuild (a, bs, *lt, *bt, *n);
+              }
+              else
+                continue;
+
+              if (done)
+                break;
+            }
+
+            if (done)
+              break;
+
+            continue;
+          }
+
+          // Fall through.
+        }
+
+        // While it would have been even better not to search for a target, we
+        // need to get hold of the corresponding mxx{} (unlikely but possible
+        // for bmi{} to have a different name).
+        //
+        // While we want to use group_prerequisite_members() below, we cannot
+        // call resolve_group() since we will be doing it "speculatively" for
+        // modules that we may use but also for modules that may use us. This
+        // quickly leads to deadlocks. So instead we are going to perform an
+        // ad hoc group resolution.
+        //
+        const target* pg;
+        if (p.is_a<bmi> ())
+        {
+          pg = pt != nullptr ? pt : &p.search (t);
+          pt = &search (t, btt, p.key ()); // Same logic as in picking obj*{}.
+        }
+        else if (p.is_a (btt))
+        {
+          pg = &search (t, bmi::static_type, p.key ());
+          if (pt == nullptr) pt = &p.search (t);
+        }
+        else
+          continue;
+
+        // Find the mxx{} prerequisite and extract its "file name" for the
+        // fuzzy match unless the user specified the module name explicitly.
+        //
+        for (prerequisite_member p:
+               prerequisite_members (a, t, group_prerequisites (*pt, pg)))
+        {
+          if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+            continue;
+
+          if (p.is_a (*x_mod))
+          {
+            // Check for an explicit module name. Only look for an existing
+            // target (which means the name can only be specified on the
+            // target itself, not target type/pattern-spec).
+            //
+            const target* t (p.search_existing ());
+            const string* n (t != nullptr
+                             ? cast_null<string> (t->vars[c_module_name])
+                             : nullptr);
+            if (n != nullptr)
+            {
+              if (const target** p = check_exact (*n))
+                *p = pt;
+            }
+            else
+            {
+              // Fuzzy match.
+              //
+              string f;
+
+              // Add the directory part if it is relative. The idea is to
+              // include it into the module match, say hello.core vs
+              // hello/mxx{core}.
+              //
+              // @@ MOD: Why not for absolute? Good question. What if it
+              // contains special components, say, ../mxx{core}?
+              //
+              const dir_path& d (p.dir ());
+
+              if (!d.empty () && d.relative ())
+                f = d.representation (); // Includes trailing slash.
+
+              f += p.name ();
+              check_fuzzy (pt, f);
+            }
+            break;
+          }
+        }
+
+        if (done)
+          break;
+      }
+
+      // Diagnose unresolved modules.
+      //
+      if (!done)
+      {
+        for (size_t i (0); i != n; ++i)
+        {
+          if (pts[start + i] == nullptr && !std_module (imports[i].name))
+          {
+            // It would have been nice to print the location of the import
+            // declaration. And we could save it during parsing at the expense
+            // of a few paths (that can be pooled). The question is what to do
+            // when we re-create this information from depdb? We could have
+            // saved the location information there but the relative paths
+            // (e.g., from the #line directives) could end up being wrong if
+            // the we re-run from a different working directory.
+            //
+            // It seems the only workable approach is to extract full location
+            // info during parse, not save it in depdb, when re-creating,
+            // fallback to just src path without any line/column information.
+            // This will probably cover the majority of case (most of the time
+            // it will be a misspelled module name, not a removal of module
+            // from buildfile).
+            //
+            // But at this stage this doesn't seem worth the trouble.
+            //
+            fail (relative (src)) << "unable to resolve module "
+                                  << imports[i].name;
+          }
+        }
+      }
+
+      // Match in parallel and wait for completion.
+      //
+      match_members (a, t, pts, start);
+
+      // Post-process the list of our (direct) imports. While at it, calculate
+      // the checksum of all (direct and indirect) bmi{} paths.
+      //
+      size_t exported (n);
+      size_t copied (pts.size ());
+
+      for (size_t i (0); i != n; ++i)
+      {
+        const module_import& m (imports[i]);
+
+        // Determine the position of the first re-exported bmi{}.
+        //
+        if (m.exported && exported == n)
+          exported = i;
+
+        const target* bt (pts[start + i]);
+
+        if (bt == nullptr)
+          continue; // Unresolved (std.*).
+
+        // Verify our guesses against extracted module names but don't waste
+        // time if it was a match against the actual module name.
+        //
+        const string& in (m.name);
+
+        if (m.score <= match_max (in))
+        {
+          const string& mn (cast<string> (bt->state[a].vars[c_module_name]));
+
+          if (in != mn)
+          {
+            // Note: matched, so the group should be resolved.
+            //
+            for (prerequisite_member p: group_prerequisite_members (a, *bt))
+            {
+              if (include (a, t, p) != include_type::normal) // Excluded/ad hoc.
+                continue;
+
+              if (p.is_a (*x_mod)) // Got to be there.
+              {
+                fail (relative (src))
+                  << "failed to correctly guess module name from " << p <<
+                  info << "guessed: " << in <<
+                  info << "actual:  " << mn <<
+                  info << "consider adjusting module interface file names or" <<
+                  info << "consider specifying module name with " << x
+                  << ".module_name";
+              }
+            }
+          }
+        }
+
+        // Hash (we know it's a file).
+        //
+        cs.append (static_cast<const file&> (*bt).path ().string ());
+
+        // Copy over bmi{}s from our prerequisites weeding out duplicates.
+        //
+        if (size_t j = bt->data<match_data> ().modules.start)
+        {
+          // Hard to say whether we should reserve or not. We will probably
+          // get quite a bit of duplications.
+          //
+          auto& bpts (bt->prerequisite_targets[a]);
+          for (size_t m (bpts.size ()); j != m; ++j)
+          {
+            const target* et (bpts[j]);
+
+            if (et == nullptr)
+              continue; // Unresolved (std.*).
+
+            const string& mn (cast<string> (et->state[a].vars[c_module_name]));
+
+            if (find_if (imports.begin (), imports.end (),
+                         [&mn] (const module_import& i)
+                         {
+                           return i.name == mn;
+                         }) == imports.end ())
+            {
+              pts.push_back (et);
+              cs.append (static_cast<const file&> (*et).path ().string ());
+
+              // Add to the list of imports for further duplicate suppression.
+              // We could have stored reference to the name (e.g., in score)
+              // but it's probably not worth it if we have a small string
+              // optimization.
+              //
+              imports.push_back (
+                module_import {unit_type::module_iface, mn, true, 0});
+            }
+          }
+        }
+      }
+
+      if (copied == pts.size ()) // No copied tail.
+        copied = 0;
+
+      if (exported == n) // No (own) re-exported imports.
+        exported = copied;
+      else
+        exported += start; // Rebase.
+
+      return module_positions {start, exported, copied};
+    }
+
+    // Find or create a modules sidebuild subproject returning its root
+    // directory.
+    //
+    dir_path compile_rule::
+    find_modules_sidebuild (const scope& rs) const
+    {
+      // First figure out where we are going to build. We want to avoid
+      // multiple sidebuilds so the outermost scope that has loaded the
+      // cc.config module and that is within our amalgmantion seems like a
+      // good place.
+      //
+      const scope* as (&rs);
+      {
+        const scope* ws (as->weak_scope ());
+        if (as != ws)
+        {
+          const scope* s (as);
+          do
+          {
+            s = s->parent_scope ()->root_scope ();
+
+            // Use cc.core.vars as a proxy for {c,cxx}.config (a bit smelly).
+            //
+            // This is also the module that registers the scope operation
+            // callback that cleans up the subproject.
+            //
+            if (cast_false<bool> ((*s)["cc.core.vars.loaded"]))
+              as = s;
+
+          } while (s != ws);
+        }
+      }
+
+      // We build modules in a subproject (since there might be no full
+      // language support loaded in the amalgamation, only *.config). So the
+      // first step is to check if the project has already been created and/or
+      // loaded and if not, then to go ahead and do so.
+      //
+      dir_path pd (as->out_path () /
+                   as->root_extra->build_dir /
+                   modules_sidebuild_dir /=
+                   x);
+
+      const scope* ps (&rs.ctx.scopes.find (pd));
+
+      if (ps->out_path () != pd)
+      {
+        // Switch the phase to load then create and load the subproject.
+        //
+        phase_switch phs (rs.ctx, run_phase::load);
+
+        // Re-test again now that we are in exclusive phase (another thread
+        // could have already created and loaded the subproject).
+        //
+        ps = &rs.ctx.scopes.find (pd);
+
+        if (ps->out_path () != pd)
+        {
+          // The project might already be created in which case we just need
+          // to load it.
+          //
+          optional<bool> altn (false); // Standard naming scheme.
+          if (!is_src_root (pd, altn))
+          {
+            // Copy our standard and force modules.
+            //
+            string extra;
+
+            if (const string* std = cast_null<string> (rs[x_std]))
+              extra += string (x) + ".std = " + *std + '\n';
+
+            extra += string (x) + ".features.modules = true";
+
+            config::create_project (
+              pd,
+              as->out_path ().relative (pd),  /* amalgamation */
+              {},                             /* boot_modules */
+              extra,                          /* root_pre */
+              {string (x) + '.'},             /* root_modules */
+              "",                             /* root_post */
+              false,                          /* config */
+              false,                          /* buildfile */
+              "the cc module",
+              2);                             /* verbosity */
+          }
+
+          ps = &load_project (as->rw () /* lock */,
+                              pd,
+                              pd,
+                              false /* forwarded */);
+        }
+      }
+
+      // Some sanity checks.
+      //
+#ifndef NDEBUG
+      assert (ps->root ());
+      const module* m (ps->lookup_module<module> (x));
+      assert (m != nullptr && m->modules);
+#endif
+
+      return pd;
+    }
+
+    // Synthesize a dependency for building a module binary interface on
+    // the side.
+    //
+    const file& compile_rule::
+    make_module_sidebuild (action a,
+                           const scope& bs,
+                           const target& lt,
+                           const target& mt,
+                           const string& mn) const
+    {
+      tracer trace (x, "compile_rule::make_module_sidebuild");
+
+      // Note: see also make_header_sidebuild() below.
+
+      dir_path pd (find_modules_sidebuild (*bs.root_scope ()));
+
+      // We need to come up with a file/target name that will be unique enough
+      // not to conflict with other modules. If we assume that within an
+      // amalgamation there is only one "version" of each module, then the
+      // module name itself seems like a good fit. We just replace '.' with
+      // '-'.
+      //
+      string mf;
+      transform (mn.begin (), mn.end (),
+                 back_inserter (mf),
+                 [] (char c) {return c == '.' ? '-' : c;});
+
+      // It seems natural to build a BMI type that corresponds to the library
+      // type. After all, this is where the object file part of the BMI is
+      // going to come from (though things will probably be different for
+      // module-only libraries).
+      //
+      const target_type& tt (compile_types (link_type (lt).type).bmi);
+
+      // Store the BMI target in the subproject root. If the target already
+      // exists then we assume all this is already done (otherwise why would
+      // someone have created such a target).
+      //
+      if (const file* bt = bs.ctx.targets.find<file> (
+            tt,
+            pd,
+            dir_path (), // Always in the out tree.
+            mf,
+            nullopt,     // Use default extension.
+            trace))
+        return *bt;
+
+      prerequisites ps;
+      ps.push_back (prerequisite (mt));
+
+      // We've added the mxx{} but it may import other modules from this
+      // library. Or from (direct) dependencies of this library. We add them
+      // all as prerequisites so that the standard module search logic can
+      // sort things out. This is pretty similar to what we do in link when
+      // synthesizing dependencies for bmi{}'s.
+      //
+      // Note: lt is matched and so the group is resolved.
+      //
+      ps.push_back (prerequisite (lt));
+      for (prerequisite_member p: group_prerequisite_members (a, lt))
+      {
+        if (include (a, lt, p) != include_type::normal) // Excluded/ad hoc.
+          continue;
+
+        // @@ TODO: will probably need revision if using sidebuild for
+        //    non-installed libraries (e.g., direct BMI dependencies
+        //    will probably have to be translated to mxx{} or some such).
+        //
+        if (p.is_a<libx> () ||
+            p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ())
+        {
+          ps.push_back (p.as_prerequisite ());
+        }
+      }
+
+      auto p (bs.ctx.targets.insert_locked (
+                tt,
+                move (pd),
+                dir_path (), // Always in the out tree.
+                move (mf),
+                nullopt,     // Use default extension.
+                true,        // Implied.
+                trace));
+      file& bt (static_cast<file&> (p.first));
+
+      // Note that this is racy and someone might have created this target
+      // while we were preparing the prerequisite list.
+      //
+      if (p.second.owns_lock ())
+        bt.prerequisites (move (ps));
+
+      return bt;
+    }
+
+    // Synthesize a dependency for building a header unit binary interface on
+    // the side.
+    //
+    const file& compile_rule::
+    make_header_sidebuild (action,
+                           const scope& bs,
+                           linfo li,
+                           const file& ht) const
+    {
+      tracer trace (x, "compile_rule::make_header_sidebuild");
+
+      // Note: similar to make_module_sidebuild() above.
+
+      dir_path pd (find_modules_sidebuild (*bs.root_scope ()));
+
+      // What should we use as a file/target name? On one hand we want it
+      // unique enough so that <stdio.h> and <custom/stdio.h> don't end up
+      // with the same BMI. On the other, we need the same headers resolving
+      // to the same target, regardless of how they were imported. So it feels
+      // like the name should be the absolute and normalized (actualized on
+      // case-insensitive filesystems) header path. We could try to come up
+      // with something by sanitizing certain characters, etc. But then the
+      // names will be very long and ugly, they will run into path length
+      // limits, etc. So instead we will use the file name plus an abbreviated
+      // hash of the whole path, something like stdio-211321fe6de7.
+      //
+      string mf;
+      {
+        // @@ MODHDR: Can we assume the path is actualized since the header
+        //            target came from enter_header()? No, not anymore: it
+        //            is now normally just normalized.
+        //
+        const path& hp (ht.path ());
+        mf = hp.leaf ().make_base ().string ();
+        mf += '-';
+        mf += sha256 (hp.string ()).abbreviated_string (12);
+      }
+
+      const target_type& tt (compile_types (li.type).hbmi);
+
+      if (const file* bt = bs.ctx.targets.find<file> (
+            tt,
+            pd,
+            dir_path (), // Always in the out tree.
+            mf,
+            nullopt,     // Use default extension.
+            trace))
+        return *bt;
+
+      prerequisites ps;
+      ps.push_back (prerequisite (ht));
+
+      auto p (bs.ctx.targets.insert_locked (
+                tt,
+                move (pd),
+                dir_path (), // Always in the out tree.
+                move (mf),
+                nullopt,     // Use default extension.
+                true,        // Implied.
+                trace));
+      file& bt (static_cast<file&> (p.first));
+
+      // Note that this is racy and someone might have created this target
+      // while we were preparing the prerequisite list.
+      //
+      if (p.second.owns_lock ())
+        bt.prerequisites (move (ps));
+
+      return bt;
+    }
+
+    // Filter cl.exe noise (msvc.cxx).
+    //
+    void
+    msvc_filter_cl (ifdstream&, const path& src);
+
+    // Append header unit-related options.
+    //
+    // Note that this function is called for both full preprocessing and
+    // compilation proper and in the latter case it is followed by a call
+    // to append_modules().
+    //
+    void compile_rule::
+    append_headers (environment&,
+                    cstrings& args,
+                    small_vector<string, 2>& stor,
+                    action,
+                    const file&,
+                    const match_data& md,
+                    const path& dd) const
+    {
+      switch (ctype)
+      {
+      case compiler_type::gcc:
+        {
+          if (md.headers != 0)
+          {
+            string s (relative (dd).string ());
+            s.insert (0, "-fmodule-mapper=");
+            s += "?@"; // Cookie (aka line prefix).
+            stor.push_back (move (s));
+          }
+
+          break;
+        }
+      case compiler_type::clang:
+      case compiler_type::msvc:
+      case compiler_type::icc:
+        break;
+      }
+
+      // Shallow-copy storage to args. Why not do it as we go along pushing
+      // into storage? Because of potential reallocations.
+      //
+      for (const string& a: stor)
+        args.push_back (a.c_str ());
+    }
+
+    // Append module-related options.
+    //
+    // Note that this function is only called for the compilation proper and
+    // after a call to append_headers() (so watch out for duplicate options).
+    //
+    void compile_rule::
+    append_modules (environment& env,
+                    cstrings& args,
+                    small_vector<string, 2>& stor,
+                    action a,
+                    const file& t,
+                    const match_data& md,
+                    const path& dd) const
+    {
+      unit_type ut (md.type);
+      const module_positions& ms (md.modules);
+
+      dir_path stdifc; // See the VC case below.
+
+      switch (ctype)
+      {
+      case compiler_type::gcc:
+        {
+          // Use the module map stored in depdb.
+          //
+          // Note that it is also used to specify the output BMI file.
+          //
+          if (md.headers == 0                && // Done in append_headers()?
+              (ms.start != 0                 ||
+               ut == unit_type::module_iface ||
+               ut == unit_type::module_header))
+          {
+            string s (relative (dd).string ());
+            s.insert (0, "-fmodule-mapper=");
+            s += "?@"; // Cookie (aka line prefix).
+            stor.push_back (move (s));
+          }
+
+          break;
+        }
+      case compiler_type::clang:
+        {
+          if (ms.start == 0)
+            return;
+
+          // Clang embeds module file references so we only need to specify
+          // our direct imports.
+          //
+          // If/when we get the ability to specify the mapping in a file, we
+          // will pass the whole list.
+          //
+#if 0
+          // In Clang the module implementation's unit .pcm is special and
+          // must be "loaded".
+          //
+          if (ut == unit_type::module_impl)
+          {
+            const file& f (pts[ms.start]->as<file> ());
+            string s (relative (f.path ()).string ());
+            s.insert (0, "-fmodule-file=");
+            stor.push_back (move (s));
+          }
+
+          // Use the module map stored in depdb for others.
+          //
+          string s (relative (dd).string ());
+          s.insert (0, "-fmodule-file-map=@=");
+          stor.push_back (move (s));
+#else
+          auto& pts (t.prerequisite_targets[a]);
+          for (size_t i (ms.start),
+                 n (ms.copied != 0 ? ms.copied : pts.size ());
+               i != n;
+               ++i)
+          {
+            const target* pt (pts[i]);
+
+            if (pt == nullptr)
+              continue;
+
+            // Here we use whatever bmi type has been added. And we know all
+            // of these are bmi's.
+            //
+            const file& f (pt->as<file> ());
+            string s (relative (f.path ()).string ());
+
+            // In Clang the module implementation's unit .pcm is special and
+            // must be "loaded".
+            //
+            if (ut == unit_type::module_impl && i == ms.start)
+              s.insert (0, "-fmodule-file=");
+            else
+            {
+              s.insert (0, 1, '=');
+              s.insert (0, cast<string> (f.state[a].vars[c_module_name]));
+              s.insert (0, "-fmodule-file=");
+            }
+
+            stor.push_back (move (s));
+          }
+#endif
+          break;
+        }
+      case compiler_type::msvc:
+        {
+          if (ms.start == 0)
+            return;
+
+          auto& pts (t.prerequisite_targets[a]);
+          for (size_t i (ms.start), n (pts.size ());
+               i != n;
+               ++i)
+          {
+            const target* pt (pts[i]);
+
+            if (pt == nullptr)
+              continue;
+
+            // Here we use whatever bmi type has been added. And we know all
+            // of these are bmi's.
+            //
+            const file& f (pt->as<file> ());
+
+            // In VC std.* modules can only come from a single directory
+            // specified with the IFCPATH environment variable or the
+            // /module:stdIfcDir option.
+            //
+            if (std_module (cast<string> (f.state[a].vars[c_module_name])))
+            {
+              dir_path d (f.path ().directory ());
+
+              if (stdifc.empty ())
+              {
+                // Go one directory up since /module:stdIfcDir will look in
+                // either Release or Debug subdirectories. Keeping the result
+                // absolute feels right.
+                //
+                stor.push_back ("/module:stdIfcDir");
+                stor.push_back (d.directory ().string ());
+                stdifc = move (d);
+              }
+              else if (d != stdifc) // Absolute and normalized.
+                fail << "multiple std.* modules in different directories";
+            }
+            else
+            {
+              stor.push_back ("/module:reference");
+              stor.push_back (relative (f.path ()).string ());
+            }
+          }
+          break;
+        }
+      case compiler_type::icc:
+        break;
+      }
+
+      // Shallow-copy storage to args. Why not do it as we go along pushing
+      // into storage? Because of potential reallocations.
+      //
+      for (const string& a: stor)
+        args.push_back (a.c_str ());
+
+      // VC's IFCPATH takes precedence over /module:stdIfcDir so unset it
+      // if we are using our own std modules.
+      //
+      if (!stdifc.empty ())
+        env.push_back ("IFCPATH");
+    }
+
+    target_state compile_rule::
+    perform_update (action a, const target& xt) const
+    {
+      const file& t (xt.as<file> ());
+      const path& tp (t.path ());
+
+      match_data md (move (t.data<match_data> ()));
+      unit_type ut (md.type);
+
+      context& ctx (t.ctx);
+
+      // While all our prerequisites are already up-to-date, we still have to
+      // execute them to keep the dependency counts straight. Actually, no, we
+      // may also have to update the modules.
+      //
+      // Note that this also takes care of forcing update on any ad hoc
+      // prerequisite change.
+      //
+      auto pr (
+        execute_prerequisites<file> (
+          md.src.type (),
+          a, t,
+          md.mt,
+          [s = md.modules.start] (const target&, size_t i)
+          {
+            return s != 0 && i >= s; // Only compare timestamps for modules.
+          },
+          md.modules.copied)); // See search_modules() for details.
+
+      const file& s (pr.second);
+      const path* sp (&s.path ());
+
+      if (pr.first)
+      {
+        if (md.touch)
+        {
+          touch (ctx, tp, false, 2);
+          t.mtime (system_clock::now ());
+          ctx.skip_count.fetch_add (1, memory_order_relaxed);
+        }
+        // Note: else mtime should be cached.
+
+        return *pr.first;
+      }
+
+      // Make sure depdb is no older than any of our prerequisites (see md.mt
+      // logic description above for details). Also save the sequence start
+      // time if doing mtime checks (see the depdb::check_mtime() call below).
+      //
+      timestamp start (depdb::mtime_check ()
+                       ? system_clock::now ()
+                       : timestamp_unknown);
+
+      touch (ctx, md.dd, false, verb_never);
+
+      const scope& bs (t.base_scope ());
+      const scope& rs (*bs.root_scope ());
+
+      otype ot (compile_type (t, ut));
+      linfo li (link_info (bs, ot));
+      compile_target_types tts (compile_types (ot));
+
+      environment env;
+      cstrings args {cpath.recall_string ()};
+
+      // If we are building a module interface, then the target is bmi*{} and
+      // its ad hoc member is obj*{}. For header units there is no obj*{}.
+      //
+      path relm;
+      path relo (ut == unit_type::module_header
+                 ? path ()
+                 : relative (ut == unit_type::module_iface
+                             ? find_adhoc_member<file> (t, tts.obj)->path ()
+                             : tp));
+
+      // Build the command line.
+      //
+      if (md.pp != preprocessed::all)
+      {
+        append_options (args, t, c_poptions);
+        append_options (args, t, x_poptions);
+
+        // Add *.export.poptions from prerequisite libraries.
+        //
+        append_lib_options (bs, args, a, t, li);
+
+        // Extra system header dirs (last).
+        //
+        assert (sys_inc_dirs_extra <= sys_inc_dirs.size ());
+        append_option_values (
+          args, "-I",
+          sys_inc_dirs.begin () + sys_inc_dirs_extra, sys_inc_dirs.end (),
+          [] (const dir_path& d) {return d.string ().c_str ();});
+
+        if (md.symexport)
+          append_symexport_options (args, t);
+      }
+
+      append_options (args, t, c_coptions);
+      append_options (args, t, x_coptions);
+      append_options (args, tstd);
+
+      string out, out1;                    // Output options storage.
+      small_vector<string, 2> header_args; // Header unit options storage.
+      small_vector<string, 2> module_args; // Module options storage.
+
+      size_t out_i (0);  // Index of the -o option.
+      size_t lang_n (0); // Number of lang options.
+
+      if (cclass == compiler_class::msvc)
+      {
+        // The /F*: option variants with separate names only became available
+        // in VS2013/12.0. Why do we bother? Because the command line suddenly
+        // becomes readable.
+        //
+        uint64_t ver (cast<uint64_t> (rs[x_version_major]));
+
+        args.push_back ("/nologo");
+
+        // While we want to keep the low-level build as "pure" as possible,
+        // the two misguided defaults, exceptions and runtime, just have to be
+        // fixed. Otherwise the default build is pretty much unusable. But we
+        // also make sure that the user can easily disable our defaults: if we
+        // see any relevant options explicitly specified, we take our hands
+        // off.
+        //
+        // For C looks like no /EH* (exceptions supported but no C++ objects
+        // destroyed) is a reasonable default.
+        //
+        if (x_lang == lang::cxx && !find_option_prefix ("/EH", args))
+          args.push_back ("/EHsc");
+
+        // The runtime is a bit more interesting. At first it may seem like a
+        // good idea to be a bit clever and use the static runtime if we are
+        // building obja{}. And for obje{} we could decide which runtime to
+        // use based on the library link order: if it is static-only, then we
+        // could assume the static runtime. But it is indeed too clever: when
+        // building liba{} we have no idea who is going to use it. It could be
+        // an exe{} that links both static and shared libraries (and is
+        // therefore built with the shared runtime). And to safely use the
+        // static runtime, everything must be built with /MT and there should
+        // be no DLLs in the picture. So we are going to play it safe and
+        // always default to the shared runtime.
+        //
+        // In a similar vein, it would seem reasonable to use the debug runtime
+        // if we are compiling with debug. But, again, there will be fireworks
+        // if we have some projects built with debug and some without and then
+        // we try to link them together (which is not an unreasonable thing to
+        // do). So by default we will always use the release runtime.
+        //
+        if (!find_option_prefixes ({"/MD", "/MT"}, args))
+          args.push_back ("/MD");
+
+        msvc_sanitize_cl (args);
+
+        append_headers (env, args, header_args, a, t, md, md.dd);
+        append_modules (env, args, module_args, a, t, md, md.dd);
+
+        // The presence of /Zi or /ZI causes the compiler to write debug info
+        // to the .pdb file. By default it is a shared file called vcNN.pdb
+        // (where NN is the VC version) created (wait for it) in the current
+        // working directory (and not the directory of the .obj file). Also,
+        // because it is shared, there is a special Windows service that
+        // serializes access. We, of course, want none of that so we will
+        // create a .pdb per object file.
+        //
+        // Note that this also changes the name of the .idb file (used for
+        // minimal rebuild and incremental compilation): cl.exe take the /Fd
+        // value and replaces the .pdb extension with .idb.
+        //
+        // Note also that what we are doing here appears to be incompatible
+        // with PCH (/Y* options) and /Gm (minimal rebuild).
+        //
+        if (find_options ({"/Zi", "/ZI"}, args))
+        {
+          if (ver >= 18)
+            args.push_back ("/Fd:");
+          else
+            out1 = "/Fd";
+
+          out1 += relo.string ();
+          out1 += ".pdb";
+
+          args.push_back (out1.c_str ());
+        }
+
+        if (ver >= 18)
+        {
+          args.push_back ("/Fo:");
+          args.push_back (relo.string ().c_str ());
+        }
+        else
+        {
+          out = "/Fo" + relo.string ();
+          args.push_back (out.c_str ());
+        }
+
+        // @@ MODHDR MSVC
+        //
+        if (ut == unit_type::module_iface)
+        {
+          relm = relative (tp);
+
+          args.push_back ("/module:interface");
+          args.push_back ("/module:output");
+          args.push_back (relm.string ().c_str ());
+        }
+
+        // Note: no way to indicate that the source if already preprocessed.
+
+        args.push_back ("/c");                   // Compile only.
+        append_lang_options (args, md);          // Compile as.
+        args.push_back (sp->string ().c_str ()); // Note: relied on being last.
+      }
+      else
+      {
+        if (ot == otype::s)
+        {
+          // On Darwin, Win32 -fPIC is the default.
+          //
+          if (tclass == "linux" || tclass == "bsd")
+            args.push_back ("-fPIC");
+        }
+
+        append_headers (env, args, header_args, a, t, md, md.dd);
+        append_modules (env, args, module_args, a, t, md, md.dd);
+
+        // Note: the order of the following options is relied upon below.
+        //
+        out_i = args.size (); // Index of the -o option.
+
+        if (ut == unit_type::module_iface || ut == unit_type::module_header)
+        {
+          switch (ctype)
+          {
+          case compiler_type::gcc:
+            {
+              // Output module file is specified in the mapping file, the
+              // same as input.
+              //
+              if (ut != unit_type::module_header) // No object file.
+              {
+                args.push_back ("-o");
+                args.push_back (relo.string ().c_str ());
+                args.push_back ("-c");
+              }
+              break;
+            }
+          case compiler_type::clang:
+            {
+              relm = relative (tp);
+
+              args.push_back ("-o");
+              args.push_back (relm.string ().c_str ());
+              args.push_back ("--precompile");
+
+              // Without this option Clang's .pcm will reference source files.
+              // In our case this file may be transient (.ii). Plus, it won't
+              // play nice with distributed compilation.
+              //
+              args.push_back ("-Xclang");
+              args.push_back ("-fmodules-embed-all-files");
+
+              break;
+            }
+          case compiler_type::msvc:
+          case compiler_type::icc:
+            assert (false);
+          }
+        }
+        else
+        {
+          args.push_back ("-o");
+          args.push_back (relo.string ().c_str ());
+          args.push_back ("-c");
+        }
+
+        lang_n = append_lang_options (args, md);
+
+        if (md.pp == preprocessed::all)
+        {
+          // Note that the mode we select must still handle comments and line
+          // continuations. So some more compiler-specific voodoo.
+          //
+          switch (ctype)
+          {
+          case compiler_type::gcc:
+            {
+              // -fdirectives-only is available since GCC 4.3.0.
+              //
+              if (cmaj > 4 || (cmaj == 4 && cmin >= 3))
+              {
+                args.push_back ("-fpreprocessed");
+                args.push_back ("-fdirectives-only");
+              }
+              break;
+            }
+          case compiler_type::clang:
+            {
+              // Clang handles comments and line continuations in the
+              // preprocessed source (it does not have -fpreprocessed).
+              //
+              break;
+            }
+          case compiler_type::icc:
+            break; // Compile as normal source for now.
+          case compiler_type::msvc:
+            assert (false);
+          }
+        }
+
+        args.push_back (sp->string ().c_str ());
+      }
+
+      args.push_back (nullptr);
+
+      if (!env.empty ())
+        env.push_back (nullptr);
+
+      // With verbosity level 2 print the command line as if we are compiling
+      // the source file, not its preprocessed version (so that it's easy to
+      // copy and re-run, etc). Only at level 3 and above print the real deal.
+      //
+      if (verb == 1)
+        text << x_name << ' ' << s;
+      else if (verb == 2)
+        print_process (args);
+
+      // If we have the (partially) preprocessed output, switch to that.
+      //
+      bool psrc (!md.psrc.path.empty ());
+      bool pact (md.psrc.active);
+      if (psrc)
+      {
+        args.pop_back (); // nullptr
+        args.pop_back (); // sp
+
+        sp = &md.psrc.path;
+
+        // This should match with how we setup preprocessing.
+        //
+        switch (ctype)
+        {
+        case compiler_type::gcc:
+          {
+            // The -fpreprocessed is implied by .i/.ii. But not when compiling
+            // a header unit (there is no .hi/.hii).
+            //
+            if (ut == unit_type::module_header)
+              args.push_back ("-fpreprocessed");
+            else
+              // Pop -x since it takes precedence over the extension.
+              //
+              // @@ I wonder why bother and not just add -fpreprocessed? Are
+              //    we trying to save an option or does something break?
+              //
+              for (; lang_n != 0; --lang_n)
+                args.pop_back ();
+
+            args.push_back ("-fdirectives-only");
+            break;
+          }
+        case compiler_type::clang:
+          {
+            // Note that without -x Clang will treat .i/.ii as fully
+            // preprocessed.
+            //
+            break;
+          }
+        case compiler_type::msvc:
+          {
+            // Nothing to do (/TP or /TC already there).
+            //
+            break;
+          }
+        case compiler_type::icc:
+          assert (false);
+        }
+
+        args.push_back (sp->string ().c_str ());
+        args.push_back (nullptr);
+
+        // Let's keep the preprocessed file in case of an error but only at
+        // verbosity level 3 and up (when one actually sees it mentioned on
+        // the command line). We also have to re-arm on success (see below).
+        //
+        if (pact && verb >= 3)
+          md.psrc.active = false;
+      }
+
+      if (verb >= 3)
+        print_process (args);
+
+      // @@ DRYRUN: Currently we discard the (partially) preprocessed file on
+      // dry-run which is a waste. Even if we keep the file around (like we do
+      // for the error case; see above), we currently have no support for
+      // re-using the previously preprocessed output. However, everything
+      // points towards us needing this in the near future since with modules
+      // we may be out of date but not needing to re-preprocess the
+      // translation unit (i.e., one of the imported module's has BMIs
+      // changed).
+      //
+      if (!ctx.dry_run)
+      {
+        try
+        {
+          // VC cl.exe sends diagnostics to stdout. It also prints the file
+          // name being compiled as the first line. So for cl.exe we redirect
+          // stdout to a pipe, filter that noise out, and send the rest to
+          // stderr.
+          //
+          // For other compilers redirect stdout to stderr, in case any of
+          // them tries to pull off something similar. For sane compilers this
+          // should be harmless.
+          //
+          bool filter (ctype == compiler_type::msvc);
+
+          process pr (cpath,
+                      args.data (),
+                      0, (filter ? -1 : 2), 2,
+                      nullptr, // CWD
+                      env.empty () ? nullptr : env.data ());
+
+          if (filter)
+          {
+            try
+            {
+              ifdstream is (
+                move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit);
+
+              msvc_filter_cl (is, *sp);
+
+              // If anything remains in the stream, send it all to stderr.
+              // Note that the eof check is important: if the stream is at
+              // eof, this and all subsequent writes to the diagnostics stream
+              // will fail (and you won't see a thing).
+              //
+              if (is.peek () != ifdstream::traits_type::eof ())
+                diag_stream_lock () << is.rdbuf ();
+
+              is.close ();
+            }
+            catch (const io_error&) {} // Assume exits with error.
+          }
+
+          run_finish (args, pr);
+        }
+        catch (const process_error& e)
+        {
+          error << "unable to execute " << args[0] << ": " << e;
+
+          if (e.child)
+            exit (1);
+
+          throw failed ();
+        }
+      }
+
+      // Remove preprocessed file (see above).
+      //
+      if (pact && verb >= 3)
+        md.psrc.active = true;
+
+      // Clang's module compilation requires two separate compiler
+      // invocations.
+      //
+      if (ctype == compiler_type::clang && ut == unit_type::module_iface)
+      {
+        // Adjust the command line. First discard everything after -o then
+        // build the new "tail".
+        //
+        args.resize (out_i + 1);
+        args.push_back (relo.string ().c_str ()); // Produce .o.
+        args.push_back ("-c");                    // By compiling .pcm.
+        args.push_back ("-Wno-unused-command-line-argument");
+        args.push_back (relm.string ().c_str ());
+        args.push_back (nullptr);
+
+        if (verb >= 2)
+          print_process (args);
+
+        if (!ctx.dry_run)
+        {
+          // Remove the target file if this fails. If we don't do that, we
+          // will end up with a broken build that is up-to-date.
+          //
+          auto_rmfile rm (relm);
+
+          try
+          {
+            process pr (cpath,
+                        args.data (),
+                        0, 2, 2,
+                        nullptr, // CWD
+                        env.empty () ? nullptr : env.data ());
+
+            run_finish (args, pr);
+          }
+          catch (const process_error& e)
+          {
+            error << "unable to execute " << args[0] << ": " << e;
+
+            if (e.child)
+              exit (1);
+
+            throw failed ();
+          }
+
+          rm.cancel ();
+        }
+      }
+
+      timestamp now (system_clock::now ());
+
+      if (!ctx.dry_run)
+        depdb::check_mtime (start, md.dd, tp, now);
+
+      // Should we go to the filesystem and get the new mtime? We know the
+      // file has been modified, so instead just use the current clock time.
+      // It has the advantage of having the subseconds precision. Plus, in
+      // case of dry-run, the file won't be modified.
+      //
+      t.mtime (now);
+      return target_state::changed;
+    }
+
+    target_state compile_rule::
+    perform_clean (action a, const target& xt) const
+    {
+      const file& t (xt.as<file> ());
+
+      clean_extras extras;
+
+      switch (ctype)
+      {
+      case compiler_type::gcc:   extras = {".d", x_pext, ".t"};          break;
+      case compiler_type::clang: extras = {".d", x_pext};                break;
+      case compiler_type::msvc:  extras = {".d", x_pext, ".idb", ".pdb"};break;
+      case compiler_type::icc:   extras = {".d"};                        break;
+      }
+
+      return perform_clean_extra (a, t, extras);
+    }
+  }
+}
diff --git a/libbuild2/cc/compile-rule.hxx b/libbuild2/cc/compile-rule.hxx
new file mode 100644
index 0000000..93972a2
--- /dev/null
+++ b/libbuild2/cc/compile-rule.hxx
@@ -0,0 +1,189 @@
+// file      : libbuild2/cc/compile-rule.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_COMPILE_RULE_HXX
+#define LIBBUILD2_CC_COMPILE_RULE_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/rule.hxx>
+#include <libbuild2/filesystem.hxx> // auto_rmfile
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+  class depdb;
+
+  namespace cc
+  {
+    // The order is arranged so that their integral values indicate whether
+    // one is a "stronger" than another.
+    //
+    enum class preprocessed: uint8_t {none, includes, modules, all};
+
+    // Positions of the re-exported bmi{}s. See search_modules() for
+    // details.
+    //
+    struct module_positions
+    {
+      size_t start;    // First imported    bmi*{}, 0 if none.
+      size_t exported; // First re-exported bmi*{}, 0 if none.
+      size_t copied;   // First copied-over bmi*{}, 0 if none.
+    };
+
+    class LIBBUILD2_CC_SYMEXPORT compile_rule: public rule, virtual common
+    {
+    public:
+      compile_rule (data&&);
+
+      virtual bool
+      match (action, target&, const string&) const override;
+
+      virtual recipe
+      apply (action, target&) const override;
+
+      target_state
+      perform_update (action, const target&) const;
+
+      target_state
+      perform_clean (action, const target&) const;
+
+    private:
+      struct match_data;
+      using environment = small_vector<const char*, 2>;
+
+      void
+      append_lib_options (const scope&,
+                          cstrings&,
+                          action,
+                          const target&,
+                          linfo) const;
+
+      void
+      hash_lib_options (const scope&,
+                        sha256&,
+                        action,
+                        const target&,
+                        linfo) const;
+
+      // Mapping of include prefixes (e.g., foo in <foo/bar>) for auto-
+      // generated headers to directories where they will be generated.
+      //
+      // We are using a prefix map of directories (dir_path_map) instead of
+      // just a map in order to also cover sub-paths (e.g., <foo/more/bar> if
+      // we continue with the example). Specifically, we need to make sure we
+      // don't treat foobar as a sub-directory of foo.
+      //
+      // The priority is used to decide who should override whom. Lesser
+      // values are considered higher priority. See append_prefixes() for
+      // details.
+      //
+      // @@ The keys should be normalized.
+      //
+      struct prefix_value
+      {
+        dir_path directory;
+        size_t priority;
+      };
+      using prefix_map = dir_path_map<prefix_value>;
+
+      void
+      append_prefixes (prefix_map&, const target&, const variable&) const;
+
+      void
+      append_lib_prefixes (const scope&,
+                           prefix_map&,
+                           action,
+                           target&,
+                           linfo) const;
+
+      prefix_map
+      build_prefix_map (const scope&, action, target&, linfo) const;
+
+      small_vector<const target_type*, 2>
+      map_extension (const scope&, const string&, const string&) const;
+
+      // Src-to-out re-mapping. See extract_headers() for details.
+      //
+      using srcout_map = path_map<dir_path>;
+
+      struct module_mapper_state;
+
+      void
+      gcc_module_mapper (module_mapper_state&,
+                         action, const scope&, file&, linfo,
+                         ifdstream&, ofdstream&,
+                         depdb&, bool&, bool&,
+                         optional<prefix_map>&, srcout_map&) const;
+
+      pair<const file*, bool>
+      enter_header (action, const scope&, file&, linfo,
+                    path&&, bool,
+                    optional<prefix_map>&, srcout_map&) const;
+
+      optional<bool>
+      inject_header (action, file&, const file&, bool, timestamp) const;
+
+      pair<auto_rmfile, bool>
+      extract_headers (action, const scope&, file&, linfo,
+                       const file&, match_data&,
+                       depdb&, bool&, timestamp) const;
+
+      pair<unit, string>
+      parse_unit (action, file&, linfo,
+                  const file&, auto_rmfile&,
+                  const match_data&, const path&) const;
+
+      void
+      extract_modules (action, const scope&, file&, linfo,
+                       const compile_target_types&,
+                       const file&, match_data&,
+                       module_info&&, depdb&, bool&) const;
+
+      module_positions
+      search_modules (action, const scope&, file&, linfo,
+                      const target_type&,
+                      const file&, module_imports&, sha256&) const;
+
+      dir_path
+      find_modules_sidebuild (const scope&) const;
+
+      const file&
+      make_module_sidebuild (action, const scope&, const target&,
+                             const target&, const string&) const;
+
+      const file&
+      make_header_sidebuild (action, const scope&, linfo, const file&) const;
+
+      void
+      append_headers (environment&, cstrings&, small_vector<string, 2>&,
+                      action, const file&,
+                      const match_data&, const path&) const;
+
+      void
+      append_modules (environment&, cstrings&, small_vector<string, 2>&,
+                      action, const file&,
+                      const match_data&, const path&) const;
+
+      // Compiler-specific language selection option. Return the number of
+      // options (arguments, really) appended.
+      //
+      size_t
+      append_lang_options (cstrings&, const match_data&) const;
+
+      void
+      append_symexport_options (cstrings&, const target&) const;
+
+    private:
+      const string rule_id;
+    };
+  }
+}
+
+#endif // LIBBUILD2_CC_COMPILE_RULE_HXX
diff --git a/libbuild2/cc/export.hxx b/libbuild2/cc/export.hxx
new file mode 100644
index 0000000..16118d6
--- /dev/null
+++ b/libbuild2/cc/export.hxx
@@ -0,0 +1,38 @@
+// file      : libbuild2/cc/export.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#pragma once
+
+// Normally we don't export class templates (but do complete specializations),
+// inline functions, and classes with only inline member functions. Exporting
+// classes that inherit from non-exported/imported bases (e.g., std::string)
+// will end up badly. The only known workarounds are to not inherit or to not
+// export. Also, MinGW GCC doesn't like seeing non-exported functions being
+// used before their inline definition. The workaround is to reorder code. In
+// the end it's all trial and error.
+
+#if defined(LIBBUILD2_CC_STATIC)         // Using static.
+#  define LIBBUILD2_CC_SYMEXPORT
+#elif defined(LIBBUILD2_CC_STATIC_BUILD) // Building static.
+#  define LIBBUILD2_CC_SYMEXPORT
+#elif defined(LIBBUILD2_CC_SHARED)       // Using shared.
+#  ifdef _WIN32
+#    define LIBBUILD2_CC_SYMEXPORT __declspec(dllimport)
+#  else
+#    define LIBBUILD2_CC_SYMEXPORT
+#  endif
+#elif defined(LIBBUILD2_CC_SHARED_BUILD) // Building shared.
+#  ifdef _WIN32
+#    define LIBBUILD2_CC_SYMEXPORT __declspec(dllexport)
+#  else
+#    define LIBBUILD2_CC_SYMEXPORT
+#  endif
+#else
+// If none of the above macros are defined, then we assume we are being used
+// by some third-party build system that cannot/doesn't signal the library
+// type. Note that this fallback works for both static and shared but in case
+// of shared will be sub-optimal compared to having dllimport.
+//
+#  define LIBBUILD2_CC_SYMEXPORT         // Using static or shared.
+#endif
diff --git a/libbuild2/cc/gcc.cxx b/libbuild2/cc/gcc.cxx
new file mode 100644
index 0000000..632805c
--- /dev/null
+++ b/libbuild2/cc/gcc.cxx
@@ -0,0 +1,263 @@
+// file      : libbuild2/cc/gcc.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+#include <libbuild2/cc/module.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    // Extract system header search paths from GCC (gcc/g++) or compatible
+    // (Clang, Intel) using the -v -E </dev/null method.
+    //
+    dir_paths config_module::
+    gcc_header_search_paths (const process_path& xc, scope& rs) const
+    {
+      dir_paths r;
+
+      cstrings args;
+      string std; // Storage.
+
+      args.push_back (xc.recall_string ());
+      append_options (args, rs, c_coptions);
+      append_options (args, rs, x_coptions);
+      append_options (args, tstd);
+
+      // Compile as.
+      //
+      auto langopt = [this] () -> const char*
+      {
+        switch (x_lang)
+        {
+        case lang::c:   return "c";
+        case lang::cxx: return "c++";
+        }
+
+        assert (false); // Can't get here.
+        return nullptr;
+      };
+
+      args.push_back ("-x");
+      args.push_back (langopt ());
+      args.push_back ("-v");
+      args.push_back ("-E");
+      args.push_back ("-");
+      args.push_back (nullptr);
+
+      if (verb >= 3)
+        print_process (args);
+
+      try
+      {
+        // Open pipe to stderr, redirect stdin and stdout to /dev/null.
+        //
+        process pr (xc, args.data (), -2, -2, -1);
+
+        try
+        {
+          ifdstream is (
+            move (pr.in_efd), fdstream_mode::skip, ifdstream::badbit);
+
+          // Normally the system header paths appear between the following
+          // lines:
+          //
+          // #include <...> search starts here:
+          // End of search list.
+          //
+          // The exact text depends on the current locale. What we can rely on
+          // is the presence of the "#include <...>" substring in the
+          // "opening" line and the fact that the paths are indented with a
+          // single space character, unlike the "closing" line.
+          //
+          // Note that on Mac OS we will also see some framework paths among
+          // system header paths, followed with a comment. For example:
+          //
+          //  /Library/Frameworks (framework directory)
+          //
+          // For now we ignore framework paths and to filter them out we will
+          // only consider valid paths to existing directories, skipping those
+          // which we fail to normalize or stat.
+          //
+          string s;
+          for (bool found (false); getline (is, s); )
+          {
+            if (!found)
+              found = s.find ("#include <...>") != string::npos;
+            else
+            {
+              if (s[0] != ' ')
+                break;
+
+              try
+              {
+                dir_path d (s, 1, s.size () - 1);
+
+                if (d.absolute () && exists (d, true) &&
+                    find (r.begin (), r.end (), d.normalize ()) == r.end ())
+                  r.emplace_back (move (d));
+              }
+              catch (const invalid_path&) {}
+            }
+          }
+
+          is.close (); // Don't block.
+
+          if (!pr.wait ())
+          {
+            // We have read stderr so better print some diagnostics.
+            //
+            diag_record dr (fail);
+
+            dr << "failed to extract " << x_lang << " header search paths" <<
+              info << "command line: ";
+
+            print_process (dr, args);
+          }
+        }
+        catch (const io_error&)
+        {
+          pr.wait ();
+          fail << "error reading " << x_lang << " compiler -v -E output";
+        }
+      }
+      catch (const process_error& e)
+      {
+        error << "unable to execute " << args[0] << ": " << e;
+
+        if (e.child)
+          exit (1);
+
+        throw failed ();
+      }
+
+      // It's highly unlikely not to have any system directories. More likely
+      // we misinterpreted the compiler output.
+      //
+      if (r.empty ())
+        fail << "unable to extract " << x_lang << " compiler system header "
+             << "search paths";
+
+      return r;
+    }
+
+    // Extract system library search paths from GCC (gcc/g++) or compatible
+    // (Clang, Intel) using the -print-search-dirs option.
+    //
+    dir_paths config_module::
+    gcc_library_search_paths (const process_path& xc, scope& rs) const
+    {
+      dir_paths r;
+
+      cstrings args;
+      string std; // Storage.
+
+      args.push_back (xc.recall_string ());
+      append_options (args, rs, c_coptions);
+      append_options (args, rs, x_coptions);
+      append_options (args, tstd);
+      append_options (args, rs, c_loptions);
+      append_options (args, rs, x_loptions);
+      args.push_back ("-print-search-dirs");
+      args.push_back (nullptr);
+
+      if (verb >= 3)
+        print_process (args);
+
+      // Open pipe to stdout.
+      //
+      process pr (run_start (xc,
+                             args.data (),
+                             0, /* stdin */
+                             -1 /* stdout */));
+
+      string l;
+      try
+      {
+        ifdstream is (
+          move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit);
+
+        // The output of -print-search-dirs are a bunch of lines that start
+        // with "<name>: =" where name can be "install", "programs", or
+        // "libraries". If you have English locale, that is. If you set your
+        // LC_ALL="tr_TR", then it becomes "kurulum", "programlar", and
+        // "kitapl?klar". Also, Clang omits "install" while GCC and Intel icc
+        // print all three. The "libraries" seem to be alwasy last, however.
+        //
+        string s;
+        for (bool found (false); !found && getline (is, s); )
+        {
+          found = (s.compare (0, 12, "libraries: =") == 0);
+
+          size_t p (found ? 9 : s.find (": ="));
+
+          if (p != string::npos)
+            l.assign (s, p + 3, string::npos);
+        }
+
+        is.close (); // Don't block.
+      }
+      catch (const io_error&)
+      {
+        pr.wait ();
+        fail << "error reading " << x_lang << " compiler -print-search-dirs "
+             << "output";
+      }
+
+      run_finish (args, pr);
+
+      if (l.empty ())
+        fail << "unable to extract " << x_lang << " compiler system library "
+             << "search paths";
+
+      // Now the fun part: figuring out which delimiter is used. Normally it
+      // is ':' but on Windows it is ';' (or can be; who knows for sure). Also
+      // note that these paths are absolute (or should be). So here is what we
+      // are going to do: first look for ';'. If found, then that's the
+      // delimiter. If not found, then there are two cases: it is either a
+      // single Windows path or the delimiter is ':'. To distinguish these two
+      // cases we check if the path starts with a Windows drive.
+      //
+      char d (';');
+      string::size_type e (l.find (d));
+
+      if (e == string::npos &&
+          (l.size () < 2 || l[0] == '/' || l[1] != ':'))
+      {
+        d = ':';
+        e = l.find (d);
+      }
+
+      // Now chop it up. We already have the position of the first delimiter
+      // (if any).
+      //
+      for (string::size_type b (0);; e = l.find (d, (b = e + 1)))
+      {
+        dir_path d (l, b, (e != string::npos ? e - b : e));
+
+        if (find (r.begin (), r.end (), d.normalize ()) == r.end ())
+          r.emplace_back (move (d));
+
+        if (e == string::npos)
+          break;
+      }
+
+      return r;
+    }
+  }
+}
diff --git a/libbuild2/cc/guess.cxx b/libbuild2/cc/guess.cxx
new file mode 100644
index 0000000..02a2f5a
--- /dev/null
+++ b/libbuild2/cc/guess.cxx
@@ -0,0 +1,1892 @@
+// file      : libbuild2/cc/guess.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/guess.hxx>
+
+#include <map>
+#include <cstring>  // strlen(), strchr()
+
+#include <libbuild2/diagnostics.hxx>
+
+using namespace std;
+
+namespace build2
+{
+  namespace cc
+  {
+    string
+    to_string (compiler_type t)
+    {
+      string r;
+
+      switch (t)
+      {
+      case compiler_type::clang: r = "clang"; break;
+      case compiler_type::gcc:   r = "gcc";   break;
+      case compiler_type::msvc:  r = "msvc";  break;
+      case compiler_type::icc:   r = "icc";   break;
+      }
+
+      return r;
+    }
+
+    compiler_id::
+    compiler_id (const std::string& id)
+    {
+      using std::string;
+
+      size_t p (id.find ('-'));
+
+      if      (id.compare (0, p, "gcc"  ) == 0) type = compiler_type::gcc;
+      else if (id.compare (0, p, "clang") == 0) type = compiler_type::clang;
+      else if (id.compare (0, p, "msvc" ) == 0) type = compiler_type::msvc;
+      else if (id.compare (0, p, "icc"  ) == 0) type = compiler_type::icc;
+      else
+        throw invalid_argument (
+          "invalid compiler type '" + string (id, 0, p) + "'");
+
+      if (p != string::npos)
+      {
+        variant.assign (id, p + 1, string::npos);
+
+        if (variant.empty ())
+          throw invalid_argument ("empty compiler variant");
+      }
+    }
+
+    string compiler_id::
+    string () const
+    {
+      std::string r (to_string (type));
+
+      if (!variant.empty ())
+      {
+        r += '-';
+        r += variant;
+      }
+
+      return r;
+    }
+
+    string
+    to_string (compiler_class c)
+    {
+      string r;
+
+      switch (c)
+      {
+      case compiler_class::gcc:  r = "gcc";  break;
+      case compiler_class::msvc: r = "msvc"; break;
+      }
+
+      return r;
+    }
+
+    // Standard library detection for GCC-class compilers.
+    //
+    // The src argument should detect the standard library based on the
+    // preprocessor macros and output the result in the stdlib:="XXX" form.
+    //
+    static string
+    stdlib (lang xl,
+            const process_path& xp,
+            const strings* c_po, const strings* x_po,
+            const strings* c_co, const strings* x_co,
+            const char* src)
+    {
+      cstrings args {xp.recall_string ()};
+      if (c_po != nullptr) append_options (args, *c_po);
+      if (x_po != nullptr) append_options (args, *x_po);
+      if (c_co != nullptr) append_options (args, *c_co);
+      if (x_co != nullptr) append_options (args, *x_co);
+      args.push_back ("-x");
+      switch (xl)
+      {
+      case lang::c:   args.push_back ("c");   break;
+      case lang::cxx: args.push_back ("c++"); break;
+      }
+      args.push_back ("-E");
+      args.push_back ("-");  // Read stdin.
+      args.push_back (nullptr);
+
+      // The source we are going to preprocess may contains #include's which
+      // may fail to resolve if, for example, there is no standard library
+      // (-nostdinc/-nostdinc++). So we are going to suppress diagnostics and
+      // assume the error exit code means no standard library (of course it
+      // could also be because there is something wrong with the compiler or
+      // options but that we simply leave to blow up later).
+      //
+      process pr (run_start (3     /* verbosity */,
+                             xp,
+                             args.data (),
+                             -1    /* stdin */,
+                             -1    /* stdout */,
+                             false /* error  */));
+      string l, r;
+      try
+      {
+        // Here we have to simultaneously write to stdin and read from stdout
+        // with both operations having the potential to block. For now we
+        // assume that src fits into the pipe's buffer.
+        //
+        ofdstream os (move (pr.out_fd));
+        ifdstream is (move (pr.in_ofd),
+                      fdstream_mode::skip,
+                      ifdstream::badbit);
+
+        os << src << endl;
+        os.close ();
+
+        while (!eof (getline (is, l)))
+        {
+          size_t p (l.find_first_not_of (' '));
+
+          if (p != string::npos && l.compare (p, 9, "stdlib:=\"") == 0)
+          {
+            p += 9;
+            r = string (l, p, l.size () - p - 1); // One for closing \".
+            break;
+          }
+        }
+
+        is.close ();
+      }
+      catch (const io_error&)
+      {
+        // Presumably the child process failed. Let run_finish() deal with
+        // that.
+      }
+
+      if (!run_finish (args.data (), pr, false /* error */, l))
+        r = "none";
+
+      if (r.empty ())
+        fail << "unable to determine " << xl << " standard library";
+
+      return r;
+    }
+
+    // C standard library detection on POSIX (i.e., non-Windows) systems.
+    // Notes:
+    //
+    // - We place platform macro-based checks (__FreeBSD__, __APPLE__, etc)
+    //   after library macro-based ones in case a non-default libc is used.
+    //
+    static const char* c_stdlib_src =
+"#if !defined(__STDC_HOSTED__) || __STDC_HOSTED__ == 1                      \n"
+"#  include <stddef.h>    /* Forces defining __KLIBC__ for klibc.        */ \n"
+"#  include <limits.h>    /* Includes features.h for glibc.              */ \n"
+"#  include <sys/types.h> /* Includes sys/cdefs.h for bionic.            */ \n"
+"                         /* Includes sys/features.h for newlib.         */ \n"
+"                         /* Includes features.h for uclibc.             */ \n"
+"#    if defined(__KLIBC__)                                                 \n"
+"     stdlib:=\"klibc\"                                                     \n"
+"#  elif defined(__BIONIC__)                                                \n"
+"     stdlib:=\"bionic\"                                                    \n"
+"#  elif defined(__NEWLIB__)                                                \n"
+"     stdlib:=\"newlib\"                                                    \n"
+"#  elif defined(__UCLIBC__)                                                \n"
+"     stdlib:=\"uclibc\"                                                    \n"
+"#  elif defined(__dietlibc__) /* Also has to be defined manually by     */ \n"
+"     stdlib:=\"dietlibc\"     /* or some wrapper.                       */ \n"
+"#  elif defined(__MUSL__)     /* This libc refuses to define __MUSL__   */ \n"
+"     stdlib:=\"musl\"         /* so it has to be defined by user.       */ \n"
+"#  elif defined(__GLIBC__)    /* Check for glibc last since some libc's */ \n"
+"     stdlib:=\"glibc\"        /* pretend to be it.                      */ \n"
+"#  elif defined(__FreeBSD__)                                               \n"
+"     stdlib:=\"freebsd\"                                                   \n"
+"#  elif defined(__APPLE__)                                                 \n"
+"     stdlib:=\"apple\"                                                     \n"
+"#  else                                                                    \n"
+"     stdlib:=\"other\"                                                     \n"
+"#  endif                                                                   \n"
+"#else                                                                      \n"
+"  stdlib:=\"none\"                                                         \n"
+"#endif                                                                     \n";
+
+    // Pre-guess the compiler type based on the compiler executable name and
+    // also return the start of that name in the path (used to derive the
+    // toolchain pattern). Return empty string/npos if can't make a guess (for
+    // example, because the compiler name is a generic 'c++'). Note that it
+    // only guesses the type, not the variant.
+    //
+    static pair<compiler_type, size_t>
+    pre_guess (lang xl, const path& xc, const optional<compiler_id>& xi)
+    {
+      tracer trace ("cc::pre_guess");
+
+      // Analyze the last path component only.
+      //
+      const string& s (xc.string ());
+      size_t s_p (path::traits_type::find_leaf (s));
+      size_t s_n (s.size ());
+
+      // Name separator characters (e.g., '-' in 'g++-4.8').
+      //
+      auto sep = [] (char c) -> bool
+      {
+        return c == '-' || c == '_' || c == '.';
+      };
+
+      auto stem = [&sep, &s, s_p, s_n] (const char* x) -> size_t
+      {
+        size_t m (strlen (x));
+        size_t p (s.find (x, s_p, m));
+
+        return (p != string::npos &&
+                (      p == s_p || sep (s[p - 1])) && // Separated beginning.
+                ((p + m) == s_n || sep (s[p + m])))   // Separated end.
+        ? p
+        : string::npos;
+      };
+
+      using type = compiler_type;
+      using pair = std::pair<type, size_t>;
+
+      // If the user specified the compiler id, then only check the stem for
+      // that compiler.
+      //
+      auto check = [&xi, &stem] (type t, const char* s) -> optional<pair>
+      {
+        if (!xi || xi->type == t)
+        {
+          size_t p (stem (s));
+
+          if (p != string::npos)
+            return pair (t, p);
+        }
+
+        return nullopt;
+      };
+
+      // Warn if the user specified a C compiler instead of C++ or vice versa.
+      //
+      lang o;                   // Other language.
+      const char* as (nullptr); // Actual stem.
+      const char* es (nullptr); // Expected stem.
+
+      switch (xl)
+      {
+      case lang::c:
+        {
+          // Keep msvc last since 'cl' is very generic.
+          //
+          if (auto r = check (type::gcc,   "gcc")  ) return *r;
+          if (auto r = check (type::clang, "clang")) return *r;
+          if (auto r = check (type::icc,   "icc")  ) return *r;
+          if (auto r = check (type::msvc,  "cl")   ) return *r;
+
+          if      (check (type::gcc,   as = "g++")    ) es = "gcc";
+          else if (check (type::clang, as = "clang++")) es = "clang";
+          else if (check (type::icc,   as = "icpc")   ) es = "icc";
+          else if (check (type::msvc,  as = "c++")    ) es = "cc";
+
+          o = lang::cxx;
+          break;
+        }
+      case lang::cxx:
+        {
+          // Keep msvc last since 'cl' is very generic.
+          //
+          if (auto r = check (type::gcc,   "g++")    ) return *r;
+          if (auto r = check (type::clang, "clang++")) return *r;
+          if (auto r = check (type::icc,   "icpc")   ) return *r;
+          if (auto r = check (type::msvc,  "cl")     ) return *r;
+
+          if      (check (type::gcc,   as = "gcc")  ) es = "g++";
+          else if (check (type::clang, as = "clang")) es = "clang++";
+          else if (check (type::icc,   as = "icc")  ) es = "icpc";
+          else if (check (type::msvc,  as = "cc")   ) es = "c++";
+
+          o = lang::c;
+          break;
+        }
+      }
+
+      if (es != nullptr)
+        warn << xc << " looks like a " << o << " compiler" <<
+          info << "should it be '" << es << "' instead of '" << as << "'?";
+
+      // If the user specified the id, then continue as if we pre-guessed.
+      //
+      if (xi)
+        return pair (xi->type, string::npos);
+
+      l4 ([&]{trace << "unable to guess compiler type of " << xc;});
+
+      return pair (invalid_compiler_type, string::npos);
+    }
+
+    // Guess the compiler type and variant by running it. If the pre argument
+    // is not empty, then only "confirm" the pre-guess. Return empty result if
+    // unable to guess.
+    //
+    struct guess_result
+    {
+      compiler_id id;
+      string signature;
+      string checksum;
+      process_path path;
+
+      guess_result () = default;
+      guess_result (compiler_id i, string&& s)
+          : id (move (i)), signature (move (s)) {}
+
+      bool
+      empty () const {return id.empty ();}
+    };
+
+    // Allowed to change pre if succeeds.
+    //
+    static guess_result
+    guess (const char* xm,
+           lang,
+           const path& xc,
+           const optional<compiler_id>& xi,
+           compiler_type& pre)
+    {
+      tracer trace ("cc::guess");
+
+      assert (!xi || xi->type == pre);
+
+      guess_result r;
+
+      process_path xp;
+      {
+        auto df = make_diag_frame (
+          [&xm](const diag_record& dr)
+          {
+            dr << info << "use config." << xm << " to override";
+          });
+
+        // Only search in PATH (specifically, omitting the current
+        // executable's directory on Windows).
+        //
+        xp = run_search (xc,
+                         false       /* init */,    // Note: result is cached.
+                         dir_path () /* fallback */,
+                         true        /* path_only */);
+      }
+
+      using type = compiler_type;
+      const type invalid = invalid_compiler_type;
+
+      // Start with -v. This will cover gcc and clang.
+      //
+      // While icc also writes what may seem like something we can use to
+      // detect it:
+      //
+      // icpc version 16.0.2 (gcc version 4.9.0 compatibility)
+      //
+      // That first word is actually the executable name. So if we rename
+      // icpc to foocpc, we will get:
+      //
+      // foocpc version 16.0.2 (gcc version 4.9.0 compatibility)
+      //
+      // In fact, if someone renames icpc to g++, there will be no way for
+      // us to detect this. Oh, well, their problem.
+      //
+      if (r.empty () && (pre == invalid   ||
+                         pre == type::gcc ||
+                         pre == type::clang))
+      {
+        auto f = [&xi] (string& l, bool last) -> guess_result
+        {
+          if (xi)
+          {
+            // The signature line is first in Clang and last in GCC.
+            //
+            if (xi->type != type::gcc || last)
+              return guess_result (*xi, move (l));
+          }
+
+          // The gcc/g++ -v output will have a last line in the form:
+          //
+          // "gcc version X.Y.Z ..."
+          //
+          // The "version" word can probably be translated. For example:
+          //
+          // gcc version 3.4.4
+          // gcc version 4.2.1
+          // gcc version 4.8.2 (GCC)
+          // gcc version 4.8.5 (Ubuntu 4.8.5-2ubuntu1~14.04.1)
+          // gcc version 4.9.2 (Ubuntu 4.9.2-0ubuntu1~14.04)
+          // gcc version 5.1.0 (Ubuntu 5.1.0-0ubuntu11~14.04.1)
+          // gcc version 6.0.0 20160131 (experimental) (GCC)
+          //
+          if (last && l.compare (0, 4, "gcc ") == 0)
+            return guess_result (compiler_id {type::gcc, ""}, move (l));
+
+          // The Apple clang/clang++ -v output will have a line (currently
+          // first) in the form:
+          //
+          // "Apple (LLVM|clang) version X.Y.Z ..."
+          //
+          // Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn)
+          // Apple clang version 4.0 (tags/Apple/clang-421.0.60) (based on LLVM 3.1svn)
+          // Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)
+          // Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)
+          // Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn)
+          // Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn)
+          // Apple LLVM version 6.0 (clang-600.0.57) (based on LLVM 3.5svn)
+          // Apple LLVM version 6.1.0 (clang-602.0.53) (based on LLVM 3.6.0svn)
+          // Apple LLVM version 7.0.0 (clang-700.0.53)
+          // Apple LLVM version 7.0.0 (clang-700.1.76)
+          // Apple LLVM version 7.0.2 (clang-700.1.81)
+          // Apple LLVM version 7.3.0 (clang-703.0.16.1)
+          //
+          // Note that the gcc/g++ "aliases" for clang/clang++ also include
+          // this line but it is (currently) preceded by "Configured with:
+          // ...".
+          //
+          // Check for Apple clang before the vanilla one since the above line
+          // also includes "clang".
+          //
+          if (l.compare (0, 6, "Apple ") == 0 &&
+              (l.compare (6, 5, "LLVM ") == 0 ||
+               l.compare (6, 6, "clang ") == 0))
+            return guess_result (compiler_id {type::clang, "apple"}, move (l));
+
+          // The vanilla clang/clang++ -v output will have a first line in the
+          // form:
+          //
+          // "[... ]clang version X.Y.Z[-...] ..."
+          //
+          // The "version" word can probably be translated. For example:
+          //
+          // FreeBSD clang version 3.4.1 (tags/RELEASE_34/dot1-final 208032) 20140512
+          // Ubuntu clang version 3.5.0-4ubuntu2~trusty2 (tags/RELEASE_350/final) (based on LLVM 3.5.0)
+          // Ubuntu clang version 3.6.0-2ubuntu1~trusty1 (tags/RELEASE_360/final) (based on LLVM 3.6.0)
+          // clang version 3.7.0 (tags/RELEASE_370/final)
+          //
+          if (l.find ("clang ") != string::npos)
+            return guess_result (compiler_id {type::clang, ""}, move (l));
+
+          return guess_result ();
+        };
+
+        // The -v output contains other information (such as the compiler
+        // build configuration for gcc or the selected gcc installation for
+        // clang) which makes sense to include into the compiler checksum. So
+        // ask run() to calculate it for every line of the -v ouput.
+        //
+        // One notable consequence of this is that if the locale changes
+        // (e.g., via LC_ALL), then the compiler signature will most likely
+        // change as well because of the translated text.
+        //
+        sha256 cs;
+
+        // Suppress all the compiler errors because we may be trying an
+        // unsupported option (but still consider the exit code).
+        //
+        r = run<guess_result> (3, xp, "-v", f, false, false, &cs);
+
+        if (r.empty ())
+        {
+          if (xi)
+          {
+            // Fallback to --version below in case this GCC/Clang-like
+            // compiler doesn't support -v.
+            //
+            //fail << "unable to obtain " << xc << " signature with -v";
+          }
+        }
+        else
+        {
+          // If this is clang-apple and pre-guess was gcc then change it so
+          // that we don't issue any warnings.
+          //
+          if (r.id.type == type::clang &&
+              r.id.variant == "apple"  &&
+              pre == type::gcc)
+            pre = type::clang;
+
+          r.checksum = cs.string ();
+        }
+      }
+
+      // Next try --version to detect icc. As well as obtain signature for
+      // GCC/Clang-like compilers in case -v above didn't work.
+      //
+      if (r.empty () && (pre == invalid   ||
+                         pre == type::icc ||
+                         pre == type::gcc ||
+                         pre == type::clang))
+      {
+        auto f = [&xi] (string& l, bool) -> guess_result
+        {
+          // Assume the first line is the signature.
+          //
+          if (xi)
+            return guess_result (*xi, move (l));
+
+          // The first line has the " (ICC) " in it, for example:
+          //
+          // icpc (ICC) 9.0 20060120
+          // icpc (ICC) 11.1 20100414
+          // icpc (ICC) 12.1.0 20110811
+          // icpc (ICC) 14.0.0 20130728
+          // icpc (ICC) 15.0.2 20150121
+          // icpc (ICC) 16.0.2 20160204
+          // icc (ICC) 16.0.2 20160204
+          //
+          if (l.find (" (ICC) ") != string::npos)
+            return guess_result (compiler_id {type::icc, ""}, move (l));
+
+          return guess_result ();
+        };
+
+        r = run<guess_result> (3, xp, "--version", f, false);
+
+        if (r.empty ())
+        {
+          if (xi)
+            fail << "unable to obtain " << xc << " signature with --version";
+        }
+      }
+
+      // Finally try to run it without any options to detect msvc.
+      //
+      if (r.empty () && (pre == invalid || pre == type::msvc))
+      {
+        auto f = [&xi] (string& l, bool) -> guess_result
+        {
+          // Assume the first line is the signature.
+          //
+          if (xi)
+            return guess_result (*xi, move (l));
+
+          // Check for "Microsoft (R)" and "C/C++" in the first line as a
+          // signature since all other words/positions can be translated. For
+          // example:
+          //
+          // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.6030 for 80x86
+          // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 14.00.50727.762 for 80x86
+          // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86
+          // Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64
+          // Microsoft (R) C/C++ Optimizing Compiler Version 17.00.50727.1 for x86
+          // Microsoft (R) C/C++ Optimizing Compiler Version 18.00.21005.1 for x86
+          // Microsoft (R) C/C++ Optimizing Compiler Version 19.00.23026 for x86
+          // Microsoft (R) C/C++ Optimizing Compiler Version 19.10.24629 for x86
+          //
+          // In the recent versions the architecture is either "x86", "x64",
+          // or "ARM".
+          //
+          if (l.find ("Microsoft (R)") != string::npos &&
+              l.find ("C/C++") != string::npos)
+            return guess_result (compiler_id {type::msvc, ""}, move (l));
+
+          return guess_result ();
+        };
+
+        // One can pass extra options/arguments to cl.exe with the CL and _CL_
+        // environment variables. However, if such extra options are passed
+        // without anything to compile, then cl.exe no longer prints usage and
+        // exits successfully but instead issues an error and fails. So we are
+        // going to unset these variables for our test (interestingly, only CL
+        // seem to cause the problem but let's unset both, for good measure).
+        //
+        const char* env[] = {"CL=", "_CL_=", nullptr};
+
+        r = run<guess_result> (3, process_env (xp, env), f, false);
+
+        if (r.empty ())
+        {
+          if (xi)
+            fail << "unable to obtain " << xc << " signature";
+        }
+      }
+
+      if (!r.empty ())
+      {
+        if (pre != invalid && r.id.type != pre)
+        {
+          l4 ([&]{trace << "compiler type guess mismatch"
+                        << ", pre-guessed " << pre
+                        << ", determined " << r.id.type;});
+
+          r = guess_result ();
+        }
+        else
+        {
+          l5 ([&]{trace << xc << " is " << r.id << ": '"
+                        << r.signature << "'";});
+
+          r.path = move (xp);
+        }
+      }
+      else
+        l4 ([&]{trace << "unable to determine compiler type of " << xc;});
+
+      return r;
+    }
+
+    // Try to derive the toolchain pattern.
+    //
+    // The s argument is the stem to look for in the leaf of the path. The ls
+    // and rs arguments are the left/right separator characters. If either is
+    // NULL, then the stem should be the prefix/suffix of the leaf,
+    // respectively. Note that a path that is equal to stem is not considered
+    // a pattern.
+    //
+    // Note that the default right separator includes digits to handle cases
+    // like clang++37 (FreeBSD).
+    //
+    static string
+    pattern (const path& xc,
+             const char* s,
+             const char* ls = "-_.",
+             const char* rs = "-_.0123456789")
+    {
+      string r;
+      size_t sn (strlen (s));
+
+      if (xc.size () > sn)
+      {
+        string l (xc.leaf ().string ());
+        size_t ln (l.size ());
+
+        size_t b;
+        if (ln >= sn && (b = l.find (s)) != string::npos)
+        {
+          // Check left separators.
+          //
+          if (b == 0 || (ls != nullptr && strchr (ls, l[b - 1]) != nullptr))
+          {
+            // Check right separators.
+            //
+            size_t e (b + sn);
+            if (e == ln || (rs != nullptr && strchr (rs, l[e]) != nullptr))
+            {
+              l.replace (b, sn, "*", 1);
+              path p (xc.directory ());
+              p /= l;
+              r = move (p).string ();
+            }
+          }
+        }
+      }
+
+      return r;
+    }
+
+
+    static compiler_info
+    guess_gcc (const char* xm,
+               lang xl,
+               const path& xc,
+               const string* xv,
+               const string* xt,
+               const strings* c_po, const strings* x_po,
+               const strings* c_co, const strings* x_co,
+               const strings*, const strings*,
+               guess_result&& gr)
+    {
+      tracer trace ("cc::guess_gcc");
+
+      const process_path& xp (gr.path);
+
+      // Extract the version. The signature line has the following format
+      // though language words can be translated and even rearranged (see
+      // examples above).
+      //
+      // "gcc version A.B.C[ ...]"
+      //
+      compiler_version v;
+      {
+        auto df = make_diag_frame (
+          [&xm](const diag_record& dr)
+          {
+            dr << info << "use config." << xm << ".version to override";
+          });
+
+        // Treat the custom version as just a tail of the signature.
+        //
+        const string& s (xv == nullptr ? gr.signature : *xv);
+
+        // Scan the string as words and look for one that looks like a
+        // version.
+        //
+        size_t b (0), e (0);
+        while (next_word (s, b, e))
+        {
+          // The third argument to find_first_not_of() is the length of the
+          // first argument, not the length of the interval to check. So to
+          // limit it to [b, e) we are also going to compare the result to the
+          // end of the word position (first space). In fact, we can just
+          // check if it is >= e.
+          //
+          if (s.find_first_not_of ("1234567890.", b, 11) >= e)
+            break;
+        }
+
+        if (b == e)
+          fail << "unable to extract gcc version from '" << s << "'";
+
+        v.string.assign (s, b, string::npos);
+
+        // Split the version into components.
+        //
+        size_t vb (b), ve (b);
+        auto next = [&s, b, e, &vb, &ve] (const char* m) -> uint64_t
+        {
+          try
+          {
+            if (next_word (s, e, vb, ve, '.'))
+              return stoull (string (s, vb, ve - vb));
+          }
+          catch (const invalid_argument&) {}
+          catch (const out_of_range&) {}
+
+          fail << "unable to extract gcc " << m << " version from '"
+               << string (s, b, e - b) << "'" << endf;
+        };
+
+        v.major = next ("major");
+        v.minor = next ("minor");
+        v.patch = next ("patch");
+
+        if (e != s.size ())
+          v.build.assign (s, e + 1, string::npos);
+      }
+
+      // Figure out the target architecture. This is actually a lot trickier
+      // than one would have hoped.
+      //
+      // There is the -dumpmachine option but gcc doesn't adjust it per the
+      // compile options (e.g., -m32). However, starting with 4.6 it has the
+      // -print-multiarch option which gives (almost) the right answer. The
+      // "almost" part has to do with it not honoring the -arch option (which
+      // is really what this compiler is building for). To get to that, we
+      // would have to resort to a hack like this:
+      //
+      // gcc -v -E - 2>&1 | grep cc1
+      // .../cc1 ... -mtune=generic -march=x86-64
+      //
+      // Also, -print-multiarch will print am empty line if the compiler
+      // actually wasn't built with multi-arch support.
+      //
+      // So for now this is what we are going to do for the time being: First
+      // try -print-multiarch. If that works out (recent gcc configure with
+      // multi-arch support), then use the result. Otherwise, fallback to
+      // -dumpmachine (older gcc or not multi-arch).
+      //
+      string t, ot;
+
+      if (xt == nullptr)
+      {
+        cstrings args {xp.recall_string (), "-print-multiarch"};
+        if (c_co != nullptr) append_options (args, *c_co);
+        if (x_co != nullptr) append_options (args, *x_co);
+        args.push_back (nullptr);
+
+        // The output of both -print-multiarch and -dumpmachine is a single
+        // line containing just the target triplet.
+        //
+        auto f = [] (string& l, bool) {return move (l);};
+
+        t = run<string> (3, xp, args.data (), f, false);
+
+        if (t.empty ())
+        {
+          l5 ([&]{trace << xc << " doesn's support -print-multiarch, "
+                        << "falling back to -dumpmachine";});
+
+          args[1] = "-dumpmachine";
+          t = run<string> (3, xp, args.data (), f, false);
+        }
+
+        if (t.empty ())
+          fail << "unable to extract target architecture from " << xc
+               << " using -print-multiarch or -dumpmachine output" <<
+            info << "use config." << xm << ".target to override";
+
+        ot = t;
+      }
+      else
+        ot = t = *xt;
+
+      // Parse the target into triplet (for further tests) ignoring any
+      // failures.
+      //
+      target_triplet tt;
+      try {tt = target_triplet (t);} catch (const invalid_argument&) {}
+
+      // Derive the toolchain pattern. Try cc/c++ as a fallback.
+      //
+      string pat (pattern (xc, xl == lang::c ? "gcc" : "g++"));
+
+      if (pat.empty ())
+        pat = pattern (xc, xl == lang::c ? "cc" : "c++");
+
+      // Runtime and standard library.
+      //
+      // GCC always uses libgcc (even on MinGW). Even with -nostdlib GCC's
+      // documentation says that you should usually specify -lgcc.
+      //
+      string rt  ("libgcc");
+      string csl (tt.system == "mingw32"
+                  ? "msvc"
+                  : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src));
+      string xsl;
+      switch (xl)
+      {
+      case lang::c:   xsl = csl;     break;
+      case lang::cxx:
+        {
+          // While GCC only supports it's own C++ standard library (libstdc++)
+          // we still run the test to detect the "none" case (-nostdinc++).
+          //
+          const char* src =
+            "#include <bits/c++config.h> \n"
+            "stdlib:=\"libstdc++\"       \n";
+
+          xsl = stdlib (xl, xp, c_po, x_po, c_co, x_co, src);
+          break;
+        }
+      }
+
+      return compiler_info {
+        move (gr.path),
+        move (gr.id),
+        compiler_class::gcc,
+        move (v),
+        move (gr.signature),
+        move (gr.checksum), // Calculated on whole -v output.
+        move (t),
+        move (ot),
+        move (pat),
+        "",
+        move (rt),
+        move (csl),
+        move (xsl)};
+    }
+
+    static compiler_info
+    guess_clang (const char* xm,
+                 lang xl,
+                 const path& xc,
+                 const string* xv,
+                 const string* xt,
+                 const strings* c_po, const strings* x_po,
+                 const strings* c_co, const strings* x_co,
+                 const strings* c_lo, const strings* x_lo,
+                 guess_result&& gr)
+    {
+      const process_path& xp (gr.path);
+
+      // Extract the version. Here we will try to handle both vanilla and
+      // Apple clang since the signature lines are fairly similar. They have
+      // the following format though language words can probably be translated
+      // and even rearranged (see examples above).
+      //
+      // "[... ]clang version A.B.C[( |-)...]"
+      // "Apple (clang|LLVM) version A.B[.C] ..."
+      //
+      compiler_version v;
+      {
+        auto df = make_diag_frame (
+          [&xm](const diag_record& dr)
+          {
+            dr << info << "use config." << xm << ".version to override";
+          });
+
+        // Treat the custom version as just a tail of the signature.
+        //
+        const string& s (xv == nullptr ? gr.signature : *xv);
+
+        // Some overrides for testing.
+        //
+        //s = "clang version 3.7.0 (tags/RELEASE_370/final)";
+        //
+        //gr.id.variant = "apple";
+        //s = "Apple LLVM version 7.3.0 (clang-703.0.16.1)";
+        //s = "Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn)";
+
+        // Scan the string as words and look for one that looks like a
+        // version.  Use '-' as a second delimiter to handle versions like
+        // "3.6.0-2ubuntu1~trusty1".
+        //
+        size_t b (0), e (0);
+        while (next_word (s, b, e, ' ', '-'))
+        {
+          // The third argument to find_first_not_of() is the length of the
+          // first argument, not the length of the interval to check. So to
+          // limit it to [b, e) we are also going to compare the result to the
+          // end of the word position (first space). In fact, we can just
+          // check if it is >= e.
+          //
+          if (s.find_first_not_of ("1234567890.", b, 11) >= e)
+            break;
+        }
+
+        if (b == e)
+          fail << "unable to extract clang version from '" << s << "'";
+
+        v.string.assign (s, b, string::npos);
+
+        // Split the version into components.
+        //
+        size_t vb (b), ve (b);
+        auto next = [&s, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t
+        {
+          try
+          {
+            if (next_word (s, e, vb, ve, '.'))
+              return stoull (string (s, vb, ve - vb));
+
+            if (opt)
+              return 0;
+          }
+          catch (const invalid_argument&) {}
+          catch (const out_of_range&) {}
+
+          fail << "unable to extract clang " << m << " version from '"
+               << string (s, b, e - b) << "'" << endf;
+        };
+
+        v.major = next ("major", false);
+        v.minor = next ("minor", false);
+        v.patch = next ("patch", gr.id.variant == "apple");
+
+        if (e != s.size ())
+          v.build.assign (s, e + 1, string::npos);
+      }
+
+      // Figure out the target architecture.
+      //
+      // Unlike gcc, clang doesn't have -print-multiarch. Its -dumpmachine,
+      // however, respects the compile options (e.g., -m32).
+      //
+      string t, ot;
+
+      if (xt == nullptr)
+      {
+        cstrings args {xp.recall_string (), "-dumpmachine"};
+        if (c_co != nullptr) append_options (args, *c_co);
+        if (x_co != nullptr) append_options (args, *x_co);
+        args.push_back (nullptr);
+
+        // The output of -dumpmachine is a single line containing just the
+        // target triplet.
+        //
+        auto f = [] (string& l, bool) {return move (l);};
+        t = run<string> (3, xp, args.data (), f, false);
+
+        if (t.empty ())
+          fail << "unable to extract target architecture from " << xc
+               << " using -dumpmachine output" <<
+            info << "use config." << xm << ".target to override";
+
+        ot = t;
+      }
+      else
+        ot = t = *xt;
+
+      // Parse the target into triplet (for further tests) ignoring any
+      // failures.
+      //
+      target_triplet tt;
+      try {tt = target_triplet (t);} catch (const invalid_argument&) {}
+
+      // For Clang on Windows targeting MSVC we remap the target to match
+      // MSVC's.
+      //
+      if (tt.system == "windows-msvc")
+      {
+        // Keep the CPU and replace the rest.
+        //
+        // @@ Note that currently there is no straightforward way to determine
+        // the VC version Clang is using. See:
+        //
+        // http://lists.llvm.org/pipermail/cfe-dev/2017-December/056240.html
+        //
+        tt.vendor = "microsoft";
+        tt.system = "win32-msvc";
+        tt.version = "14.1";
+        t = tt.string ();
+      }
+
+      // Derive the toolchain pattern. Try clang/clang++, the gcc/g++ alias,
+      // as well as cc/c++.
+      //
+      string pat (pattern (xc, xl == lang::c ? "clang" : "clang++"));
+
+      if (pat.empty ())
+        pat = pattern (xc, xl == lang::c ? "gcc" : "g++");
+
+      if (pat.empty ())
+        pat = pattern (xc, xl == lang::c ? "cc" : "c++");
+
+      // Runtime and standard library.
+      //
+      // Clang can use libgcc, its own compiler-rt, or, on Windows targeting
+      // MSVC, the VC's runtime. As usual, there is no straightforward way
+      // to query this and silence on the mailing list. See:
+      //
+      // http://lists.llvm.org/pipermail/cfe-dev/2018-January/056494.html
+      //
+      // So for now we will just look for --rtlib (note: linker option) and if
+      // none specified, assume some platform-specific defaults.
+      //
+      string rt;
+      {
+        auto find_rtlib = [] (const strings* ops) -> const string*
+        {
+          return ops != nullptr
+          ? find_option_prefix ("--rtlib=", *ops, false)
+          : nullptr;
+        };
+
+        const string* o;
+        if ((o = find_rtlib (x_lo)) != nullptr ||
+            (o = find_rtlib (c_lo)) != nullptr)
+        {
+          rt = string (*o, 8);
+        }
+        else if (tt.system == "win32-msvc")  rt = "msvc";
+        else if (tt.system == "linux-gnu" ||
+                 tt.system == "freebsd")     rt = "libgcc";
+        else /* Mac OS, etc. */              rt = "compiler-rt";
+      }
+
+      string csl (tt.system == "win32-msvc" || tt.system == "mingw32"
+                  ? "msvc"
+                  : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src));
+
+      string xsl;
+      switch (xl)
+      {
+      case lang::c:   xsl = csl; break;
+      case lang::cxx:
+        {
+          // All Clang versions that we care to support have __has_include()
+          // so we use it to determine which standard library is available.
+          //
+          // Note that we still include the corresponding headers to verify
+          // things are usable. For the "other" case we include some
+          // standard header to detect the "none" case (e.g, -nostdinc++).
+          //
+          const char* src =
+            "#if __has_include(<__config>)           \n"
+            "  #include <__config>                   \n"
+            "  stdlib:=\"libc++\"                    \n"
+            "#elif __has_include(<bits/c++config.h>) \n"
+            "  #include <bits/c++config.h>           \n"
+            "  stdlib:=\"libstdc++\"                 \n"
+            "#else                                   \n"
+            "  #include <cstddef>                    \n"
+            "  stdlib:=\"other\"                     \n"
+            "#endif                                  \n";
+
+          xsl = tt.system == "win32-msvc"
+            ? "msvcp"
+            : stdlib (xl, xp, c_po, x_po, c_co, x_co, src);
+          break;
+        }
+      }
+
+      return compiler_info {
+        move (gr.path),
+        move (gr.id),
+        compiler_class::gcc,
+        move (v),
+        move (gr.signature),
+        move (gr.checksum), // Calculated on whole -v output.
+        move (t),
+        move (ot),
+        move (pat),
+        "",
+        move (rt),
+        move (csl),
+        move (xsl)};
+    }
+
+    static compiler_info
+    guess_icc (const char* xm,
+               lang xl,
+               const path& xc,
+               const string* xv,
+               const string* xt,
+               const strings* c_po, const strings* x_po,
+               const strings* c_co, const strings* x_co,
+               const strings*, const strings*,
+               guess_result&& gr)
+    {
+      const process_path& xp (gr.path);
+
+      // Extract the version. If the version has the fourth component, then
+      // the signature line (extracted with --version) won't include it. So we
+      // will have to get a more elaborate line with -V. We will also have to
+      // do it to get the compiler target that respects the -m option: icc
+      // doesn't support -print-multiarch like gcc and its -dumpmachine
+      // doesn't respect -m like clang. In fact, its -dumpmachine is
+      // completely broken as it appears to print the compiler's host and not
+      // the target (e.g., .../bin/ia32/icpc prints x86_64-linux-gnu).
+      //
+      // Some examples of the signature lines from -V output:
+      //
+      // Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047
+      // Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116
+      // Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010
+      // Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074
+      // Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064
+      // Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427
+      // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 16.0.2.181 Build 20160204
+      // Intel(R) C++ Intel(R) 64 Compiler for applications running on IA-32, Version 16.0.2.181 Build 20160204
+      // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204
+      // Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204
+      //
+      // We should probably also assume the language words can be translated
+      // and even rearranged.
+      //
+      auto f = [] (string& l, bool)
+      {
+        return l.compare (0, 5, "Intel") == 0 && (l[5] == '(' || l[5] == ' ')
+        ? move (l)
+        : string ();
+      };
+
+      if (xv == nullptr)
+      {
+        string& s (gr.signature);
+        s.clear ();
+
+        // The -V output is sent to STDERR.
+        //
+        s = run<string> (3, xp, "-V", f, false);
+
+        if (s.empty ())
+          fail << "unable to extract signature from " << xc << " -V output";
+
+        if (s.find (xl == lang::c ? " C " : " C++ ") == string::npos)
+          fail << xc << " does not appear to be the Intel " << xl
+               << " compiler" <<
+            info << "extracted signature: '" << s << "'";
+      }
+
+      // Scan the string as words and look for the version. It consist of only
+      // digits and periods and contains at least one period.
+      //
+      compiler_version v;
+      {
+        auto df = make_diag_frame (
+          [&xm](const diag_record& dr)
+          {
+            dr << info << "use config." << xm << ".version to override";
+          });
+
+        // Treat the custom version as just a tail of the signature.
+        //
+        const string& s (xv == nullptr ? gr.signature : *xv);
+
+        // Some overrides for testing.
+        //
+        //s = "Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047";
+        //s = "Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116";
+        //s = "Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010";
+        //s = "Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074";
+        //s = "Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064";
+        //s = "Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427";
+
+        size_t b (0), e (0);
+        while (next_word (s, b, e, ' ', ',') != 0)
+        {
+          // The third argument to find_first_not_of() is the length of the
+          // first argument, not the length of the interval to check. So to
+          // limit it to [b, e) we are also going to compare the result to the
+          // end of the word position (first space). In fact, we can just
+          // check if it is >= e. Similar logic for find_first_of() except
+          // that we add space to the list of character to make sure we don't
+          // go too far.
+          //
+          if (s.find_first_not_of ("1234567890.", b, 11) >= e &&
+              s.find_first_of (". ", b, 2) < e)
+            break;
+        }
+
+        if (b == e)
+          fail << "unable to extract icc version from '" << s << "'";
+
+        v.string.assign (s, b, string::npos);
+
+        // Split the version into components.
+        //
+        size_t vb (b), ve (b);
+        auto next = [&s, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t
+        {
+          try
+          {
+            if (next_word (s, e, vb, ve, '.'))
+              return stoull (string (s, vb, ve - vb));
+
+            if (opt)
+              return 0;
+          }
+          catch (const invalid_argument&) {}
+          catch (const out_of_range&) {}
+
+          fail << "unable to extract icc " << m << " version from '"
+               << string (s, b, e - b) << "'" << endf;
+        };
+
+        v.major = next ("major", false);
+        v.minor = next ("minor", false);
+        v.patch = next ("patch", true);
+
+        if (vb != ve && next_word (s, e, vb, ve, '.'))
+          v.build.assign (s, vb, ve - vb);
+
+        if (e != s.size ())
+        {
+          if (!v.build.empty ())
+            v.build += ' ';
+
+          v.build.append (s, e + 1, string::npos);
+        }
+      }
+
+      // Figure out the target CPU by re-running the compiler with -V and
+      // compile options (which may include, e.g., -m32). The output will
+      // contain two CPU keywords: the first is the host and the second is the
+      // target (hopefully this won't get rearranged by the translation).
+      //
+      // The CPU keywords (based on the above samples) appear to be:
+      //
+      // "32-bit"
+      // "IA-32"
+      // "Intel"    "64"
+      // "Intel(R)" "64"
+      // "Intel(R)" "MIC"      (-dumpmachine says: x86_64-k1om-linux)
+      //
+      string t, ot;
+
+      if (xt == nullptr)
+      {
+        auto df = make_diag_frame (
+          [&xm](const diag_record& dr)
+          {
+            dr << info << "use config." << xm << ".target to override";
+          });
+
+        cstrings args {xp.recall_string (), "-V"};
+        if (c_co != nullptr) append_options (args, *c_co);
+        if (x_co != nullptr) append_options (args, *x_co);
+        args.push_back (nullptr);
+
+        // The -V output is sent to STDERR.
+        //
+        t = run<string> (3, xp, args.data (), f, false);
+
+        if (t.empty ())
+          fail << "unable to extract target architecture from " << xc
+               << " -V output";
+
+        string arch;
+        for (size_t b (0), e (0), n;
+             (n = next_word (t, b, e, ' ', ',')) != 0; )
+        {
+          if (t.compare (b, n, "Intel(R)", 8) == 0 ||
+              t.compare (b, n, "Intel", 5) == 0)
+          {
+            if ((n = next_word (t, b, e, ' ', ',')) != 0)
+            {
+              if (t.compare (b, n, "64", 2) == 0)
+              {
+                arch = "x86_64";
+              }
+              else if (t.compare (b, n, "MIC", 3) == 0)
+              {
+                arch = "x86_64"; // Plus "-k1om-linux" from -dumpmachine below.
+              }
+            }
+            else
+              break;
+          }
+          else if (t.compare (b, n, "IA-32", 5) == 0 ||
+                 t.compare (b, n, "32-bit", 6) == 0)
+          {
+            arch = "i386";
+          }
+        }
+
+        if (arch.empty ())
+          fail << "unable to extract icc target architecture from '"
+               << t << "'";
+
+        // So we have the CPU but we still need the rest of the triplet. While
+        // icc currently doesn't support cross-compilation (at least on Linux)
+        // and we could have just used the build triplet (i.e., the
+        // architecture on which we are running), who knows what will happen
+        // in the future. So instead we are going to use -dumpmachine and
+        // substitute the CPU.
+        //
+        {
+          auto f = [] (string& l, bool) {return move (l);};
+          t = run<string> (3, xp, "-dumpmachine", f);
+        }
+
+        if (t.empty ())
+          fail << "unable to extract target architecture from " << xc
+               << " using -dumpmachine output";
+
+        // The first component in the triplet is always CPU.
+        //
+        size_t p (t.find ('-'));
+
+        if (p == string::npos)
+          fail << "unable to parse icc target architecture '" << t << "'";
+
+        t.swap (arch);
+        t.append (arch, p, string::npos);
+
+        ot = t;
+      }
+      else
+        ot = t = *xt;
+
+      // Parse the target into triplet (for further tests) ignoring any
+      // failures.
+      //
+      target_triplet tt;
+      try {tt = target_triplet (t);} catch (const invalid_argument&) {}
+
+      // Derive the toolchain pattern.
+      //
+      string pat (pattern (xc, xl == lang::c ? "icc" : "icpc"));
+
+      // Runtime and standard library.
+      //
+      // For now we assume that unless it is Windows, we are targeting
+      // Linux/GCC.
+      //
+      string rt  (tt.system == "win32-msvc" ? "msvc" : "libgcc");
+      string csl (tt.system == "win32-msvc"
+                  ? "msvc"
+                  : stdlib (xl, xp, c_po, x_po, c_co, x_co, c_stdlib_src));
+      string xsl;
+      switch (xl)
+      {
+      case lang::c:   xsl = csl;     break;
+      case lang::cxx:
+        {
+          xsl = tt.system == "win32-msvc" ? "msvcp" : "libstdc++";
+          break;
+        }
+      }
+
+      return compiler_info {
+        move (gr.path),
+        move (gr.id),
+        compiler_class::gcc, //@@ TODO: msvc on Windows?
+        move (v),
+        move (gr.signature),
+        "",
+        move (t),
+        move (ot),
+        move (pat),
+        "",
+        move (rt),
+        move (csl),
+        move (xsl)};
+    }
+
+    static compiler_info
+    guess_msvc (const char* xm,
+                lang xl,
+                const path& xc,
+                const string* xv,
+                const string* xt,
+                const strings*, const strings*,
+                const strings*, const strings*,
+                const strings*, const strings*,
+                guess_result&& gr)
+    {
+      // Extract the version. The signature line has the following format
+      // though language words can be translated and even rearranged (see
+      // examples above).
+      //
+      // "Microsoft (R) C/C++ Optimizing Compiler Version A.B.C[.D] for CPU"
+      //
+      // The CPU keywords (based on the above samples) appear to be:
+      //
+      // "80x86"
+      // "x86"
+      // "x64"
+      // "ARM"
+      //
+      compiler_version v;
+      {
+        auto df = make_diag_frame (
+          [&xm](const diag_record& dr)
+          {
+            dr << info << "use config." << xm << ".version to override";
+          });
+
+        // Treat the custom version as just a tail of the signature.
+        //
+        const string& s (xv == nullptr ? gr.signature : *xv);
+
+        // Some overrides for testing.
+        //
+        //string s;
+        //s = "Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86";
+        //s = "Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64";
+        //s = "Compilateur d'optimisation Microsoft (R) C/C++ version 19.16.27026.1 pour x64";
+
+        // Scan the string as words and look for the version.
+        //
+        size_t b (0), e (0);
+        while (next_word (s, b, e, ' ', ','))
+        {
+          // The third argument to find_first_not_of() is the length of the
+          // first argument, not the length of the interval to check. So to
+          // limit it to [b, e) we are also going to compare the result to the
+          // end of the word position (first space). In fact, we can just
+          // check if it is >= e.
+          //
+          if (s.find_first_not_of ("1234567890.", b, 11) >= e)
+            break;
+        }
+
+        if (b == e)
+          fail << "unable to extract msvc version from '" << s << "'";
+
+        v.string.assign (s, b, e - b);
+
+        // Split the version into components.
+        //
+        size_t vb (b), ve (b);
+        auto next = [&s, b, e, &vb, &ve] (const char* m) -> uint64_t
+        {
+          try
+          {
+            if (next_word (s, e, vb, ve, '.'))
+              return stoull (string (s, vb, ve - vb));
+          }
+          catch (const invalid_argument&) {}
+          catch (const out_of_range&) {}
+
+          fail << "unable to extract msvc " << m << " version from '"
+               << string (s, b, e - b) << "'" << endf;
+        };
+
+        v.major = next ("major");
+        v.minor = next ("minor");
+        v.patch = next ("patch");
+
+        if (next_word (s, e, vb, ve, '.'))
+          v.build.assign (s, vb, ve - vb);
+      }
+
+
+      // Figure out the target architecture.
+      //
+      string t, ot;
+
+      if (xt == nullptr)
+      {
+        auto df = make_diag_frame (
+          [&xm](const diag_record& dr)
+          {
+            dr << info << "use config." << xm << ".target to override";
+          });
+
+        const string& s (gr.signature);
+
+        // Scan the string as words and look for the CPU.
+        //
+        string arch;
+
+        for (size_t b (0), e (0), n;
+             (n = next_word (s, b, e, ' ', ',')) != 0; )
+        {
+          if (s.compare (b, n, "x64", 3) == 0 ||
+              s.compare (b, n, "x86", 3) == 0 ||
+              s.compare (b, n, "ARM", 3) == 0 ||
+              s.compare (b, n, "80x86", 5) == 0)
+          {
+            arch.assign (s, b, n);
+            break;
+          }
+        }
+
+        if (arch.empty ())
+          fail << "unable to extract msvc target architecture from "
+               << "'" << s << "'";
+
+        // Now we need to map x86, x64, and ARM to the target triplets. The
+        // problem is, there aren't any established ones so we got to invent
+        // them ourselves. Based on the discussion in
+        // <libbutl/target-triplet.mxx>, we need something in the
+        // CPU-VENDOR-OS-ABI form.
+        //
+        // The CPU part is fairly straightforward with x86 mapped to 'i386'
+        // (or maybe 'i686'), x64 to 'x86_64', and ARM to 'arm' (it could also
+        // include the version, e.g., 'amrv8').
+        //
+        // The (toolchain) VENDOR is also straightforward: 'microsoft'. Why
+        // not omit it? Two reasons: firstly, there are other compilers with
+        // the otherwise same target, for example Intel C/C++, and it could be
+        // useful to distinguish between them. Secondly, by having all four
+        // components we remove any parsing ambiguity.
+        //
+        // OS-ABI is where things are not as clear cut. The OS part shouldn't
+        // probably be just 'windows' since we have Win32 and WinCE. And
+        // WinRT.  And Universal Windows Platform (UWP). So perhaps the
+        // following values for OS: 'win32', 'wince', 'winrt', 'winup'.
+        //
+        // For 'win32' the ABI part could signal the Microsoft C/C++ runtime
+        // by calling it 'msvc'. And seeing that the runtimes are incompatible
+        // from version to version, we should probably add the 'X.Y' version
+        // at the end (so we essentially mimic the DLL name, for example,
+        // msvcr120.dll). Some suggested we also encode the runtime type
+        // (those pesky /M* options) though I am not sure: the only
+        // "redistributable" runtime is multi-threaded release DLL.
+        //
+        // The ABI part for the other OS values needs thinking. For 'winrt'
+        // and 'winup' it probably makes sense to encode the WINAPI_FAMILY
+        // macro value (perhaps also with the version). Some of its values:
+        //
+        // WINAPI_FAMILY_APP        Windows 10
+        // WINAPI_FAMILY_PC_APP     Windows 8.1
+        // WINAPI_FAMILY_PHONE_APP  Windows Phone 8.1
+        //
+        // For 'wince' we may also want to add the OS version, for example,
+        // 'wince4.2'.
+        //
+        // Putting it all together, Visual Studio 2015 will then have the
+        // following target triplets:
+        //
+        // x86  i386-microsoft-win32-msvc14.0
+        // x64  x86_64-microsoft-win32-msvc14.0
+        // ARM  arm-microsoft-winup-???
+        //
+        if (arch == "ARM")
+          fail << "cl.exe ARM/WinRT/UWP target is not yet supported";
+        else
+        {
+          if (arch == "x64")
+            t = "x86_64-microsoft-win32-msvc";
+          else if (arch == "x86" || arch == "80x86")
+            t = "i386-microsoft-win32-msvc";
+          else
+            assert (false);
+
+          // Mapping of compiler versions to runtime versions:
+          //
+          // Note that VC 15 has runtime version 14.1 but the DLLs are still
+          // called *140.dll (they are said to be backwards-compatible).
+          //
+          // And VC 16 seems to have the runtime version 14.1 (and not 14.2,
+          // as one might expect; DLLs are still *140.dll but there are now _1
+          // and _2 variants for, say, msvcp140.dll). We will, however, call
+          // it 14.2 (which is the version of the "toolset") in our target
+          // triplet.
+          //
+          // year   ver   cl     crt/dll   toolset
+          //
+          // 2019   16.1  19.21  14.2/140  14.21
+          // 2019   16.0  19.20  14.2/140
+          // 2017   15.9  19.16  14.1/140
+          // 2017   15.8  19.15  14.1/140
+          // 2017   15.7  19.14  14.1/140
+          // 2017   15.6  19.13  14.1/140
+          // 2017   15.5  19.12  14.1/140
+          // 2017   15.3  19.11  14.1/140
+          // 2017   15    19.10  14.1/140
+          // 2015   14    19.00  14.0/140
+          // 2013   12    18.00  12.0/120
+          // 2012   11    17.00  11.0/110
+          // 2010   10    16.00  10.0/100
+          // 2008    9    15.00   9.0/90
+          // 2005    8    14.00   8.0/80
+          // 2003  7.1    13.10   7.1/71
+          //
+          // _MSC_VER is the numeric cl version, e.g., 1921 for 19.21.
+          //
+          /**/ if (v.major == 19 && v.minor >= 20) t += "14.2";
+          else if (v.major == 19 && v.minor >= 10) t += "14.1";
+          else if (v.major == 19 && v.minor ==  0) t += "14.0";
+          else if (v.major == 18 && v.minor ==  0) t += "12.0";
+          else if (v.major == 17 && v.minor ==  0) t += "11.0";
+          else if (v.major == 16 && v.minor ==  0) t += "10.0";
+          else if (v.major == 15 && v.minor ==  0) t += "9.0";
+          else if (v.major == 14 && v.minor ==  0) t += "8.0";
+          else if (v.major == 13 && v.minor == 10) t += "7.1";
+          else fail << "unable to map msvc compiler version '" << v.string
+                    << "' to runtime version";
+        }
+
+        ot = t;
+      }
+      else
+        ot = t = *xt;
+
+      // Derive the toolchain pattern.
+      //
+      // If the compiler name is/starts with 'cl' (e.g., cl.exe, cl-14),
+      // then replace it with '*' and use it as a pattern for lib, link,
+      // etc.
+      //
+      string cpat (pattern (xc, "cl", nullptr, ".-"));
+      string bpat (cpat); // Binutils pattern is the same as toolchain.
+
+      // Runtime and standard library.
+      //
+      string rt ("msvc");
+      string csl ("msvc");
+      string xsl;
+      switch (xl)
+      {
+      case lang::c:   xsl = csl;     break;
+      case lang::cxx: xsl = "msvcp"; break;
+      }
+
+      return compiler_info {
+        move (gr.path),
+        move (gr.id),
+        compiler_class::msvc,
+        move (v),
+        move (gr.signature),
+        "",
+        move (t),
+        move (ot),
+        move (cpat),
+        move (bpat),
+        move (rt),
+        move (csl),
+        move (xsl)};
+    }
+
+    // Compiler checks can be expensive (we often need to run the compiler
+    // several times) so we cache the result.
+    //
+    static map<string, compiler_info> cache;
+
+    const compiler_info&
+    guess (const char* xm,
+           lang xl,
+           const path& xc,
+           const string* xis,
+           const string* xv,
+           const string* xt,
+           const strings* c_po, const strings* x_po,
+           const strings* c_co, const strings* x_co,
+           const strings* c_lo, const strings* x_lo)
+    {
+      // First check the cache.
+      //
+      string key;
+      {
+        sha256 cs;
+        cs.append (static_cast<size_t> (xl));
+        cs.append (xc.string ());
+        if (xis != nullptr) cs.append (*xis);
+        if (c_po != nullptr) hash_options (cs, *c_po);
+        if (x_po != nullptr) hash_options (cs, *x_po);
+        if (c_co != nullptr) hash_options (cs, *c_co);
+        if (x_co != nullptr) hash_options (cs, *x_co);
+        if (c_lo != nullptr) hash_options (cs, *c_lo);
+        if (x_lo != nullptr) hash_options (cs, *x_lo);
+        key = cs.string ();
+
+        auto i (cache.find (key));
+        if (i != cache.end ())
+          return i->second;
+      }
+
+      // Parse the user-specified compiler id (config.x.id).
+      //
+      optional<compiler_id> xi;
+      if (xis != nullptr)
+      {
+        try
+        {
+          xi = compiler_id (*xis);
+        }
+        catch (const invalid_argument& e)
+        {
+          fail << "invalid compiler id '" << *xis << "' "
+               << "specified in variable config." << xm << ".id: " << e;
+        }
+      }
+
+      pair<compiler_type, size_t> pre (pre_guess (xl, xc, xi));
+      compiler_type& type (pre.first);
+
+      // If we could pre-guess the type based on the excutable name, then
+      // try the test just for that compiler.
+      //
+      guess_result gr;
+
+      if (type != invalid_compiler_type)
+      {
+        gr = guess (xm, xl, xc, xi, type);
+
+        if (gr.empty ())
+        {
+          warn << xc << " looks like " << type << " but it is not" <<
+            info << "use config." << xm << " to override";
+
+          type = invalid_compiler_type; // Clear pre-guess.
+        }
+      }
+
+      if (gr.empty ())
+        gr = guess (xm, xl, xc, xi, type);
+
+      if (gr.empty ())
+        fail << "unable to guess " << xl << " compiler type of " << xc <<
+          info << "use config." << xm << ".id to specify explicitly";
+
+      compiler_info r;
+      const compiler_id& id (gr.id);
+
+      switch (id.type)
+      {
+      case compiler_type::gcc:
+        {
+          r = guess_gcc (xm, xl, xc, xv, xt,
+                         c_po, x_po, c_co, x_co, c_lo, x_lo,
+                         move (gr));
+          break;
+        }
+      case compiler_type::clang:
+        {
+          r = guess_clang (xm, xl, xc, xv, xt,
+                           c_po, x_po, c_co, x_co, c_lo, x_lo,
+                           move (gr));
+          break;
+        }
+      case compiler_type::msvc:
+        {
+          r = guess_msvc (xm, xl, xc, xv, xt,
+                          c_po, x_po, c_co, x_co, c_lo, x_lo,
+                          move (gr));
+          break;
+        }
+      case compiler_type::icc:
+        {
+          r = guess_icc (xm, xl, xc, xv, xt,
+                         c_po, x_po, c_co, x_co, c_lo, x_lo,
+                         move (gr));
+          break;
+        }
+      }
+
+      // By default use the signature line to generate the checksum.
+      //
+      if (r.checksum.empty ())
+        r.checksum = sha256 (r.signature).string ();
+
+      // Derive binutils pattern unless this has already been done by the
+      // compiler-specific code.
+      //
+
+      // When cross-compiling the whole toolchain is normally prefixed with
+      // the target triplet, e.g., x86_64-w64-mingw32-{gcc,g++,ar,ld}. But
+      // oftentimes it is not quite canonical (and sometimes -- outright
+      // bogus). So instead we are going to first try to derive the prefix
+      // using the pre-guessed position of the compiler name. Note that we
+      // still want to try the target in case we could not pre-guess (think
+      // x86_64-w64-mingw32-c++).
+      //
+      // BTW, for GCC we also get gcc-{ar,ranlib} (but not -ld) which add
+      // support for the LTO plugin though it seems more recent GNU binutils
+      // (2.25) are able to load the plugin when needed automatically. So it
+      // doesn't seem we should bother trying to support this on our end (one
+      // way we could do it is by passing config.bin.{ar,ranlib} as hints).
+      //
+      // It's also normal for native (i.e., non-cross-compiler) builds of GCC
+      // and Clang to not have binutils installed in the same directory and
+      // instead relying on the system ones. In this case, if the compiler is
+      // specified with the absolute path, the pattern will be the fallback
+      // search directory (though it feels like it should be checked first
+      // rather than last).
+      //
+      if (r.bin_pattern.empty ())
+      {
+        if (pre.second != 0 &&
+            pre.second != string::npos &&
+            !path::traits_type::is_separator (xc.string ()[pre.second - 1]))
+        {
+          r.bin_pattern.assign (xc.string (), 0, pre.second);
+          r.bin_pattern += '*'; // '-' or similar is already there.
+        }
+      }
+
+      if (r.bin_pattern.empty ())
+      {
+        const string& t (r.target);
+        size_t n (t.size ());
+
+        if (xc.size () > n + 1)
+        {
+          const string& l (xc.leaf ().string ());
+
+          if (l.size () > n + 1 && l.compare (0, n, t) == 0 && l[n] == '-')
+          {
+            path p (xc.directory ());
+            p /= t;
+            p += "-*";
+            r.bin_pattern = move (p).string ();
+          }
+        }
+      }
+
+      // If we could not derive the pattern, then see if we can come up with a
+      // fallback search directory.
+      //
+      if (r.bin_pattern.empty ())
+      {
+        const path& p (r.path.recall.empty () ? xc : r.path.recall);
+
+        if (!p.simple ())
+          r.bin_pattern = p.directory ().representation (); // Trailing slash.
+      }
+
+      return (cache[key] = move (r));
+    }
+
+    path
+    guess_default (lang xl, const string& cid, const string& pat)
+    {
+      compiler_id id (cid);
+      const char* s (nullptr);
+
+      using type = compiler_type;
+
+      switch (xl)
+      {
+      case lang::c:
+        {
+          switch (id.type)
+          {
+          case type::gcc:    s = "gcc";   break;
+          case type::clang:  s = "clang"; break;
+          case type::icc:    s = "icc";   break;
+          case type::msvc:   s = "cl";    break;
+          }
+
+          break;
+        }
+      case lang::cxx:
+        {
+          switch (id.type)
+          {
+          case type::gcc:    s = "g++";     break;
+          case type::clang:  s = "clang++"; break;
+          case type::icc:    s = "icpc";    break;
+          case type::msvc:   s = "cl";      break;
+          }
+
+          break;
+        }
+      }
+
+      return path (apply_pattern (s, &pat));
+    }
+  }
+}
diff --git a/libbuild2/cc/guess.hxx b/libbuild2/cc/guess.hxx
new file mode 100644
index 0000000..3677cc7
--- /dev/null
+++ b/libbuild2/cc/guess.hxx
@@ -0,0 +1,246 @@
+// file      : libbuild2/cc/guess.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_GUESS_HXX
+#define LIBBUILD2_CC_GUESS_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    // Compiler id consisting of a type and optional variant. If the variant
+    // is not empty, then the id is spelled out as 'type-variant', similar to
+    // target triplets (this also means that the type cannot contain '-').
+    //
+    // Currently recognized compilers and their ids:
+    //
+    // gcc          GCC gcc/g++
+    // clang        Vanilla Clang clang/clang++
+    // clang-apple  Apple Clang clang/clang++ and the gcc/g++ "alias"
+    // msvc         Microsoft cl.exe
+    // icc          Intel icc/icpc
+    //
+    // Note that the user can provide a custom id with one of the predefined
+    // types and a custom variant (say 'gcc-tasking').
+    //
+    enum class compiler_type
+    {
+      gcc = 1, // 0 value represents invalid type.
+      clang,
+      msvc,
+      icc
+      // Update compiler_id(string) and to_string() if adding a new type.
+    };
+
+    const compiler_type invalid_compiler_type = static_cast<compiler_type> (0);
+
+    string
+    to_string (compiler_type);
+
+    inline ostream&
+    operator<< (ostream& o, const compiler_type& t)
+    {
+      return o << to_string (t);
+    }
+
+    struct compiler_id
+    {
+      compiler_type type = invalid_compiler_type;
+      std::string   variant;
+
+      bool
+      empty () const {return type == invalid_compiler_type;}
+
+      std::string
+      string () const;
+
+      compiler_id ()
+          : type (invalid_compiler_type) {}
+
+      compiler_id (compiler_type t, std::string v)
+          : type (t), variant (move (v)) {}
+
+      explicit
+      compiler_id (const std::string&);
+    };
+
+    inline ostream&
+    operator<< (ostream& o, const compiler_id& id)
+    {
+      return o << id.string ();
+    }
+
+    // Compiler class describes a set of compilers that follow more or less
+    // the same command line interface. Compilers that don't belong to any of
+    // the existing classes are in classes of their own (say, Sun CC would be
+    // on its own if we were to support it).
+    //
+    // Currently defined compiler classes:
+    //
+    // gcc          gcc, clang, clang-apple, icc (on non-Windows)
+    // msvc         msvc, clang-cl, icc (Windows)
+    //
+    enum class compiler_class
+    {
+      gcc,
+      msvc
+    };
+
+    string
+    to_string (compiler_class);
+
+    inline ostream&
+    operator<< (ostream& o, compiler_class c)
+    {
+      return o << to_string (c);
+    }
+
+    // Compiler version. Here we map the various compiler version formats to
+    // something that resembles the MAJOR.MINOR.PATCH-BUILD form of the
+    // Semantic Versioning. While the MAJOR.MINOR part is relatively
+    // straightforward, PATCH may be empty and BUILD can contain pretty much
+    // anything (including spaces).
+    //
+    // gcc           A.B.C[ ...]         {A, B, C, ...}
+    // clang         A.B.C[( |-)...]     {A, B, C, ...}
+    // clang-apple   A.B[.C] ...         {A, B, C, ...}
+    // icc           A.B[.C.D] ...       {A, B, C, D ...}
+    // msvc          A.B.C[.D]           {A, B, C, D}
+    //
+    // Note that the clang-apple version is a custom Apple version and does
+    // not correspond to the vanilla clang version.
+    //
+    struct compiler_version
+    {
+      std::string string;
+
+      // Currently all the compilers that we support have numeric MAJOR,
+      // MINOR, and PATCH components and it makes sense to represent them as
+      // integers for easy comparison. If we meet a compiler for which this
+      // doesn't hold, then we will probably just set these to 0 and let the
+      // user deal with the string representation.
+      //
+      uint64_t major;
+      uint64_t minor;
+      uint64_t patch;
+      std::string build;
+    };
+
+    // Compiler information.
+    //
+    // The signature is normally the -v/--version line that was used to guess
+    // the compiler id and its version.
+    //
+    // The checksum is used to detect compiler changes. It is calculated in a
+    // compiler-specific manner (usually the output of -v/--version) and is
+    // not bulletproof (e.g., it most likely won't detect that the underlying
+    // assembler or linker has changed). However, it should detect most
+    // common cases, such as an upgrade to a new version or a configuration
+    // change.
+    //
+    // Note that we assume the checksum incorporates the (default) target so
+    // that if the compiler changes but only in what it targets, then the
+    // checksum will still change. This is currently the case for all the
+    // compilers that we support.
+    //
+    // The target is the compiler's traget architecture triplet. Note that
+    // unlike all the preceding fields, this one takes into account the
+    // compile options (e.g., -m32).
+    //
+    // The pattern is the toolchain program pattern that could sometimes be
+    // derived for some toolchains. For example, i686-w64-mingw32-*-4.9.
+    //
+    // The bin_pattern is the binutils program pattern that could sometimes be
+    // derived for some toolchains. For example, i686-w64-mingw32-*. If the
+    // pattern could not be derived, then it could contain a fallback search
+    // directory, in which case it will end with a directory separator but
+    // will not contain '*'.
+    //
+    struct compiler_info
+    {
+      process_path path;
+      compiler_id id;
+      compiler_class class_;
+      compiler_version version;
+      string signature;
+      string checksum;
+      string target;
+      string original_target; // As reported by the compiler.
+      string pattern;
+      string bin_pattern;
+
+      // Compiler runtime, C standard library, and language (e.g., C++)
+      // standard library.
+      //
+      // The runtime is the low-level compiler runtime library and its name is
+      // the library/project name. Current values are (but can also be some
+      // custom name specified with Clang's --rtlib):
+      //
+      // libgcc
+      // compiler-rt  (clang)
+      // msvc
+      //
+      // The C standard library is normally the library/project name (e.g,
+      // glibc, klibc, newlib, etc) but if there is none, then we fallback to
+      // the vendor name (e.g., freebsd, apple). Current values are:
+      //
+      // glibc
+      // msvc         (msvcrt.lib/msvcrNNN.dll)
+      // freebsd
+      // apple
+      // newlib       (also used by Cygwin)
+      // klibc
+      // bionic
+      // uclibc
+      // musl
+      // dietlibc
+      // other
+      // none
+      //
+      // The C++ standard library is normally the library/project name.
+      // Current values are:
+      //
+      // libstdc++
+      // libc++
+      // msvcp        (msvcprt.lib/msvcpNNN.dll)
+      // other
+      // none
+      //
+      string runtime;
+      string c_stdlib;
+      string x_stdlib;
+    };
+
+    // In a sense this is analagous to the language standard which we handle
+    // via a virtual function in common. However, duplicating this hairy ball
+    // of fur in multiple places doesn't seem wise, especially considering
+    // that most of it will be the same, at least for C and C++.
+    //
+    const compiler_info&
+    guess (const char* xm,    // Module (for variable names in diagnostics).
+           lang xl,           // Language.
+           const path& xc,    // Compiler path.
+           const string* xi,  // Compiler id (optional).
+           const string* xv,  // Compiler version (optional).
+           const string* xt,  // Compiler target (optional).
+           const strings* c_poptions, const strings* x_poptions,
+           const strings* c_coptions, const strings* x_coptions,
+           const strings* c_loptions, const strings* x_loptions);
+
+    // Given a language, compiler id, and optionally an (empty) pattern,
+    // return an appropriate default compiler path.
+    //
+    // For example, for (lang::cxx, gcc, *-4.9) we will get g++-4.9.
+    //
+    path
+    guess_default (lang, const string& cid, const string& pattern);
+  }
+}
+
+#endif // LIBBUILD2_CC_GUESS_HXX
diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx
new file mode 100644
index 0000000..f45a1bf
--- /dev/null
+++ b/libbuild2/cc/init.cxx
@@ -0,0 +1,493 @@
+// file      : libbuild2/cc/init.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/init.hxx>
+
+#include <libbuild2/file.hxx>
+#include <libbuild2/scope.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/config/utility.hxx>
+
+#include <libbuild2/cc/target.hxx>
+#include <libbuild2/cc/utility.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    // Scope operation callback that cleans up module sidebuilds.
+    //
+    static target_state
+    clean_module_sidebuilds (action, const scope& rs, const dir&)
+    {
+      context& ctx (rs.ctx);
+
+      const dir_path& out_root (rs.out_path ());
+
+      dir_path d (out_root / rs.root_extra->build_dir / modules_sidebuild_dir);
+
+      if (exists (d))
+      {
+        if (rmdir_r (ctx, d))
+        {
+          // Clean up cc/ if it became empty.
+          //
+          d = out_root / rs.root_extra->build_dir / module_dir;
+          if (empty (d))
+          {
+            rmdir (ctx, d);
+
+            // And build/ if it also became empty (e.g., in case of a build
+            // with a transient configuration).
+            //
+            d = out_root / rs.root_extra->build_dir;
+            if (empty (d))
+              rmdir (ctx, d);
+          }
+
+          return target_state::changed;
+        }
+      }
+
+      return target_state::unchanged;
+    }
+
+    bool
+    core_vars_init (scope& rs,
+                    scope&,
+                    const location& loc,
+                    unique_ptr<module_base>&,
+                    bool first,
+                    bool,
+                    const variable_map&)
+    {
+      tracer trace ("cc::core_vars_init");
+      l5 ([&]{trace << "for " << rs;});
+
+      assert (first);
+
+      // Load bin.vars (we need its config.bin.target/pattern for hints).
+      //
+      if (!cast_false<bool> (rs["bin.vars.loaded"]))
+        load_module (rs, rs, "bin.vars", loc);
+
+      // Enter variables. Note: some overridable, some not.
+      //
+      auto& v (rs.ctx.var_pool.rw (rs));
+
+      auto v_t (variable_visibility::target);
+
+      v.insert<strings> ("config.cc.poptions", true);
+      v.insert<strings> ("config.cc.coptions", true);
+      v.insert<strings> ("config.cc.loptions", true);
+      v.insert<strings> ("config.cc.aoptions", true);
+      v.insert<strings> ("config.cc.libs",     true);
+
+      v.insert<strings> ("cc.poptions");
+      v.insert<strings> ("cc.coptions");
+      v.insert<strings> ("cc.loptions");
+      v.insert<strings> ("cc.aoptions");
+      v.insert<strings> ("cc.libs");
+
+      v.insert<strings>      ("cc.export.poptions");
+      v.insert<strings>      ("cc.export.coptions");
+      v.insert<strings>      ("cc.export.loptions");
+      v.insert<vector<name>> ("cc.export.libs");
+
+      // Hint variables (not overridable).
+      //
+      v.insert<string>         ("config.cc.id");
+      v.insert<string>         ("config.cc.hinter"); // Hinting module.
+      v.insert<string>         ("config.cc.pattern");
+      v.insert<target_triplet> ("config.cc.target");
+
+      // Compiler runtime and C standard library.
+      //
+      v.insert<string> ("cc.runtime");
+      v.insert<string> ("cc.stdlib");
+
+      // Target type, for example, "C library" or "C++ library". Should be set
+      // on the target as a rule-specific variable by the matching rule to the
+      // name of the module (e.g., "c", "cxx"). Currenly only set for
+      // libraries and is used to decide which *.libs to use during static
+      // linking.
+      //
+      // It can also be the special "cc" value which means a C-common library
+      // but specific language is not known. Used in the import installed
+      // logic.
+      //
+      v.insert<string> ("cc.type", v_t);
+
+      // If set and is true, then this (imported) library has been found in a
+      // system library search directory.
+      //
+      v.insert<bool> ("cc.system", v_t);
+
+      // C++ module name. Set on the bmi*{} target as a rule-specific variable
+      // by the matching rule. Can also be set by the user (normally via the
+      // x.module_name alias) on the x_mod{} source.
+      //
+      v.insert<string> ("cc.module_name", v_t);
+
+      // Ability to disable using preprocessed output for compilation.
+      //
+      v.insert<bool> ("config.cc.reprocess", true);
+      v.insert<bool> ("cc.reprocess");
+
+      // Register scope operation callback.
+      //
+      // It feels natural to do clean up sidebuilds as a post operation but
+      // that prevents the (otherwise-empty) out root directory to be cleaned
+      // up (via the standard fsdir{} chain).
+      //
+      rs.operation_callbacks.emplace (
+        perform_clean_id,
+        scope::operation_callback {&clean_module_sidebuilds, nullptr /*post*/});
+
+      return true;
+    }
+
+    bool
+    core_guess_init (scope& rs,
+                     scope&,
+                     const location& loc,
+                     unique_ptr<module_base>&,
+                     bool first,
+                     bool,
+                     const variable_map& h)
+    {
+      tracer trace ("cc::core_guess_init");
+      l5 ([&]{trace << "for " << rs;});
+
+      assert (first);
+
+      // Load cc.core.vars.
+      //
+      if (!cast_false<bool> (rs["cc.core.vars.loaded"]))
+        load_module (rs, rs, "cc.core.vars", loc);
+
+      // config.cc.{id,hinter}
+      //
+      {
+        // These values must be hinted.
+        //
+        rs.assign<string> ("cc.id") = cast<string> (h["config.cc.id"]);
+        rs.assign<string> ("cc.hinter") = cast<string> (h["config.cc.hinter"]);
+      }
+
+      // config.cc.target
+      //
+      {
+        // This value must be hinted.
+        //
+        const auto& t (cast<target_triplet> (h["config.cc.target"]));
+
+        // Also enter as cc.target.{cpu,vendor,system,version,class} for
+        // convenience of access.
+        //
+        rs.assign<string> ("cc.target.cpu")     = t.cpu;
+        rs.assign<string> ("cc.target.vendor")  = t.vendor;
+        rs.assign<string> ("cc.target.system")  = t.system;
+        rs.assign<string> ("cc.target.version") = t.version;
+        rs.assign<string> ("cc.target.class")   = t.class_;
+
+        rs.assign<target_triplet> ("cc.target") = t;
+      }
+
+      // config.cc.pattern
+      //
+      {
+        // This value could be hinted.
+        //
+        rs.assign<string> ("cc.pattern") =
+          cast_empty<string> (h["config.cc.pattern"]);
+      }
+
+      // cc.runtime
+      // cc.stdlib
+      //
+      rs.assign ("cc.runtime") = cast<string> (h["cc.runtime"]);
+      rs.assign ("cc.stdlib") = cast<string> (h["cc.stdlib"]);
+
+      return true;
+    }
+
+    bool
+    core_config_init (scope& rs,
+                      scope&,
+                      const location& loc,
+                      unique_ptr<module_base>&,
+                      bool first,
+                      bool,
+                      const variable_map& hints)
+    {
+      tracer trace ("cc::core_config_init");
+      l5 ([&]{trace << "for " << rs;});
+
+      assert (first);
+
+      // Load cc.core.guess.
+      //
+      if (!cast_false<bool> (rs["cc.core.guess.loaded"]))
+        load_module (rs, rs, "cc.core.guess", loc);
+
+      // Configure.
+      //
+
+      // Adjust module priority (compiler).
+      //
+      config::save_module (rs, "cc", 250);
+
+      // Note that we are not having a config report since it will just
+      // duplicate what has already been printed by the hinting module.
+
+      // config.cc.{p,c,l}options
+      // config.cc.libs
+      //
+      // @@ Same nonsense as in module.
+      //
+      //
+      rs.assign ("cc.poptions") += cast_null<strings> (
+        config::optional (rs, "config.cc.poptions"));
+
+      rs.assign ("cc.coptions") += cast_null<strings> (
+        config::optional (rs, "config.cc.coptions"));
+
+      rs.assign ("cc.loptions") += cast_null<strings> (
+        config::optional (rs, "config.cc.loptions"));
+
+      rs.assign ("cc.aoptions") += cast_null<strings> (
+        config::optional (rs, "config.cc.aoptions"));
+
+      rs.assign ("cc.libs") += cast_null<strings> (
+        config::optional (rs, "config.cc.libs"));
+
+      if (lookup l = config::omitted (rs, "config.cc.reprocess").first)
+        rs.assign ("cc.reprocess") = *l;
+
+      // Load the bin.config module.
+      //
+      if (!cast_false<bool> (rs["bin.config.loaded"]))
+      {
+        // Prepare configuration hints. They are only used on the first load
+        // of bin.config so we only populate them on our first load.
+        //
+        variable_map h (rs.ctx);
+
+        if (first)
+        {
+          // Note that all these variables have already been registered.
+          //
+          h.assign ("config.bin.target") =
+            cast<target_triplet> (rs["cc.target"]).string ();
+
+          if (auto l = hints["config.bin.pattern"])
+            h.assign ("config.bin.pattern") = cast<string> (l);
+        }
+
+        load_module (rs, rs, "bin.config", loc, false, h);
+      }
+
+      // Verify bin's target matches ours (we do it even if we loaded it
+      // ourselves since the target can come from the configuration and not
+      // our hint).
+      //
+      if (first)
+      {
+        const auto& ct (cast<target_triplet> (rs["cc.target"]));
+        const auto& bt (cast<target_triplet> (rs["bin.target"]));
+
+        if (bt != ct)
+        {
+          const auto& h (cast<string> (rs["cc.hinter"]));
+
+          fail (loc) << h << " and bin module target mismatch" <<
+            info << h << " target is " << ct <<
+            info << "bin target is " << bt;
+        }
+      }
+
+      // Load bin.*.config for bin.* modules we may need (see core_init()
+      // below).
+      //
+      const string& tsys (cast<string> (rs["cc.target.system"]));
+
+      if (!cast_false<bool> (rs["bin.ar.config.loaded"]))
+        load_module (rs, rs, "bin.ar.config", loc);
+
+      if (tsys == "win32-msvc")
+      {
+        if (!cast_false<bool> (rs["bin.ld.config.loaded"]))
+          load_module (rs, rs, "bin.ld.config", loc);
+      }
+
+      if (tsys == "mingw32")
+      {
+        if (!cast_false<bool> (rs["bin.rc.config.loaded"]))
+          load_module (rs, rs, "bin.rc.config", loc);
+      }
+
+      return true;
+    }
+
+    bool
+    core_init (scope& rs,
+               scope&,
+               const location& loc,
+               unique_ptr<module_base>&,
+               bool first,
+               bool,
+               const variable_map& hints)
+    {
+      tracer trace ("cc::core_init");
+      l5 ([&]{trace << "for " << rs;});
+
+      assert (first);
+
+      const string& tsys (cast<string> (rs["cc.target.system"]));
+
+      // Load cc.core.config.
+      //
+      if (!cast_false<bool> (rs["cc.core.config.loaded"]))
+        load_module (rs, rs, "cc.core.config", loc, false, hints);
+
+      // Load the bin module.
+      //
+      if (!cast_false<bool> (rs["bin.loaded"]))
+        load_module (rs, rs, "bin", loc);
+
+      // Load the bin.ar module.
+      //
+      if (!cast_false<bool> (rs["bin.ar.loaded"]))
+        load_module (rs, rs, "bin.ar", loc);
+
+      // For this target we link things directly with link.exe so load the
+      // bin.ld module.
+      //
+      if (tsys == "win32-msvc")
+      {
+        if (!cast_false<bool> (rs["bin.ld.loaded"]))
+          load_module (rs, rs, "bin.ld", loc);
+      }
+
+      // If our target is MinGW, then we will need the resource compiler
+      // (windres) in order to embed manifests into executables.
+      //
+      if (tsys == "mingw32")
+      {
+        if (!cast_false<bool> (rs["bin.rc.loaded"]))
+          load_module (rs, rs, "bin.rc", loc);
+      }
+
+      return true;
+    }
+
+    // The cc module is an "alias" for c and cxx. Its intended use is to make
+    // sure that the C/C++ configuration is captured in an amalgamation rather
+    // than subprojects.
+    //
+    static inline bool
+    init_alias (tracer& trace,
+                scope& rs,
+                scope& bs,
+                const char* m,
+                const char* c,
+                const char* c_loaded,
+                const char* cxx,
+                const char* cxx_loaded,
+                const location& loc,
+                const variable_map& hints)
+    {
+      l5 ([&]{trace << "for " << bs;});
+
+      // We only support root loading (which means there can only be one).
+      //
+      if (&rs != &bs)
+        fail (loc) << m << " module must be loaded in project root";
+
+      // We want to order the loading to match what user specified on the
+      // command line (config.c or config.cxx). This way the first loaded
+      // module (with user-specified config.*) will hint the compiler to the
+      // second.
+      //
+      bool lc (!cast_false<bool> (rs[c_loaded]));
+      bool lp (!cast_false<bool> (rs[cxx_loaded]));
+
+      // If none of them are already loaded, load c first only if config.c
+      // is specified.
+      //
+      if (lc && lp && rs["config.c"])
+      {
+        load_module (rs, rs, c, loc, false, hints);
+        load_module (rs, rs, cxx, loc, false, hints);
+      }
+      else
+      {
+        if (lp) load_module (rs, rs, cxx, loc, false, hints);
+        if (lc) load_module (rs, rs, c, loc, false, hints);
+      }
+
+      return true;
+    }
+
+    bool
+    config_init (scope& rs,
+                 scope& bs,
+                 const location& loc,
+                 unique_ptr<module_base>&,
+                 bool,
+                 bool,
+                 const variable_map& hints)
+    {
+      tracer trace ("cc::config_init");
+      return init_alias (trace, rs, bs,
+                         "cc.config",
+                         "c.config",   "c.config.loaded",
+                         "cxx.config", "cxx.config.loaded",
+                         loc, hints);
+    }
+
+    bool
+    init (scope& rs,
+          scope& bs,
+          const location& loc,
+          unique_ptr<module_base>&,
+          bool,
+          bool,
+          const variable_map& hints)
+    {
+      tracer trace ("cc::init");
+      return init_alias (trace, rs, bs,
+                         "cc",
+                         "c",   "c.loaded",
+                         "cxx", "cxx.loaded",
+                         loc, hints);
+    }
+
+    static const module_functions mod_functions[] =
+    {
+      // NOTE: don't forget to also update the documentation in init.hxx if
+      //       changing anything here.
+
+      {"cc.core.vars",   nullptr, core_vars_init},
+      {"cc.core.guess",  nullptr, core_guess_init},
+      {"cc.core.config", nullptr, core_config_init},
+      {"cc.core",        nullptr, core_init},
+      {"cc.config",      nullptr, config_init},
+      {"cc",             nullptr, init},
+      {nullptr,          nullptr, nullptr}
+    };
+
+    const module_functions*
+    build2_cc_load ()
+    {
+      return mod_functions;
+    }
+  }
+}
diff --git a/libbuild2/cc/init.hxx b/libbuild2/cc/init.hxx
new file mode 100644
index 0000000..b98e816
--- /dev/null
+++ b/libbuild2/cc/init.hxx
@@ -0,0 +1,36 @@
+// file      : libbuild2/cc/init.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_INIT_HXX
+#define LIBBUILD2_CC_INIT_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/module.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    // Module `cc` does not require bootstrapping.
+    //
+    // Submodules:
+    //
+    // `cc.core.vars`   -- registers some variables.
+    // `cc.core.guess`  -- loads cc.core.vars and sets some variables.
+    // `cc.core.config` -- loads cc.core.guess and sets more variables.
+    // `cc.core`        -- loads cc.core.config and registers target types and
+    //                     rules.
+    // `cc.config`      -- loads {c,cxx}.config.
+    // `cc`             -- loads c and cxx.
+    //
+    extern "C" LIBBUILD2_CC_SYMEXPORT const module_functions*
+    build2_cc_load ();
+  }
+}
+
+#endif // LIBBUILD2_CC_INIT_HXX
diff --git a/libbuild2/cc/install-rule.cxx b/libbuild2/cc/install-rule.cxx
new file mode 100644
index 0000000..670757e
--- /dev/null
+++ b/libbuild2/cc/install-rule.cxx
@@ -0,0 +1,355 @@
+// file      : libbuild2/cc/install-rule.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/install-rule.hxx>
+
+#include <libbuild2/algorithm.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/utility.hxx>
+#include <libbuild2/cc/link-rule.hxx> // match()
+
+using namespace std;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    // install_rule
+    //
+    install_rule::
+    install_rule (data&& d, const link_rule& l)
+        : common (move (d)), link_ (l) {}
+
+    const target* install_rule::
+    filter (action a, const target& t, prerequisite_iterator& i) const
+    {
+      // NOTE: see libux_install_rule::filter() if changing anything here.
+
+      const prerequisite& p (i->prerequisite);
+
+      // If this is a shared library prerequisite, install it as long as it
+      // is in the same amalgamation as we are.
+      //
+      // Less obvious: we also want to install a static library prerequisite
+      // of a library (since it could be referenced from its .pc file, etc).
+      //
+      // Note: for now we assume these prerequisites never come from see-
+      // through groups.
+      //
+      // Note: we install ad hoc prerequisites by default.
+      //
+      otype ot (link_type (t).type);
+
+      bool st (t.is_a<exe>  () || t.is_a<libs> ()); // Target needs shared.
+      bool at (t.is_a<liba> () || t.is_a<libs> ()); // Target needs static.
+
+      if ((st && (p.is_a<libx> () || p.is_a<libs> ())) ||
+          (at && (p.is_a<libx> () || p.is_a<liba> ())))
+      {
+        const target* pt (&search (t, p));
+
+        // If this is the lib{}/libu*{} group, pick a member which we would
+        // link. For libu*{} we want the "see through" logic.
+        //
+        if (const libx* l = pt->is_a<libx> ())
+          pt = link_member (*l, a, link_info (t.base_scope (), ot));
+
+        // Note: not redundant since we are returning a member.
+        //
+        if ((st && pt->is_a<libs> ()) || (at && pt->is_a<liba> ()))
+          return pt->in (t.weak_scope ()) ? pt : nullptr;
+
+        // See through to libu*{} members. Note that we are always in the same
+        // project (and thus amalgamation).
+        //
+        if (pt->is_a<libux> ())
+          return pt;
+      }
+
+      // The rest of the tests only succeed if the base filter() succeeds.
+      //
+      const target* pt (file_rule::filter (a, t, p));
+      if (pt == nullptr)
+        return pt;
+
+      // Don't install executable's prerequisite headers and module
+      // interfaces.
+      //
+      // Note that if they come from a group, then we assume the entire
+      // group is not to be installed.
+      //
+      if (t.is_a<exe> ())
+      {
+        if (x_header (p))
+          pt = nullptr;
+        else if (p.type.see_through)
+        {
+          for (i.enter_group (); i.group (); )
+          {
+            if (x_header (*++i))
+              pt = nullptr;
+          }
+        }
+
+        if (pt == nullptr)
+          return pt;
+      }
+
+      // Here is a problem: if the user spells the obj*/bmi*{} targets
+      // explicitly, then the source files, including headers/modules may be
+      // specified as preprequisites of those targets and not of this target.
+      // While this can be worked around for headers by also listing them as
+      // prerequisites of this target, this won't work for modules (since they
+      // are compiled). So what we are going to do here is detect bmi*{} and
+      // translate them to their mxx{} (this doesn't quite work for headers
+      // since there would normally be many of them).
+      //
+      // Note: for now we assume bmi*{} never come from see-through groups.
+      //
+      bool g (false);
+      if (p.is_a<bmi> () || (g = p.is_a (compile_types (ot).bmi)))
+      {
+        if (g)
+          resolve_group (a, *pt);
+
+        for (prerequisite_member pm:
+               group_prerequisite_members (a, *pt, members_mode::maybe))
+        {
+          // This is tricky: we need to "look" inside groups for mxx{} but if
+          // found, remap to the group, not member.
+          //
+          if (pm.is_a (*x_mod))
+          {
+            pt = t.is_a<exe> ()
+              ? nullptr
+              : file_rule::filter (a, *pt, pm.prerequisite);
+            break;
+          }
+        }
+
+        if (pt == nullptr)
+          return pt;
+      }
+
+      return pt;
+    }
+
+    bool install_rule::
+    match (action a, target& t, const string& hint) const
+    {
+      // @@ How do we split the hint between the two?
+      //
+
+      // We only want to handle installation if we are also the ones building
+      // this target. So first run link's match().
+      //
+      return link_.match (a, t, hint) && file_rule::match (a, t, "");
+    }
+
+    recipe install_rule::
+    apply (action a, target& t) const
+    {
+      recipe r (file_rule::apply (a, t));
+
+      if (a.operation () == update_id)
+      {
+        // Signal to the link rule that this is update for install. And if the
+        // update has already been executed, verify it was done for install.
+        //
+        auto& md (t.data<link_rule::match_data> ());
+
+        if (md.for_install)
+        {
+          if (!*md.for_install)
+            fail << "target " << t << " already updated but not for install";
+        }
+        else
+          md.for_install = true;
+      }
+      else // install or uninstall
+      {
+        // Derive shared library paths and cache them in the target's aux
+        // storage if we are un/installing (used in the *_extra() functions
+        // below).
+        //
+        static_assert (sizeof (link_rule::libs_paths) <= target::data_size,
+                       "insufficient space");
+
+        if (file* f = t.is_a<libs> ())
+        {
+          if (!f->path ().empty ()) // Not binless.
+          {
+            const string* p (cast_null<string> (t["bin.lib.prefix"]));
+            const string* s (cast_null<string> (t["bin.lib.suffix"]));
+            t.data (
+              link_.derive_libs_paths (*f,
+                                       p != nullptr ? p->c_str (): nullptr,
+                                       s != nullptr ? s->c_str (): nullptr));
+          }
+        }
+      }
+
+      return r;
+    }
+
+    bool install_rule::
+    install_extra (const file& t, const install_dir& id) const
+    {
+      bool r (false);
+
+      if (t.is_a<libs> ())
+      {
+        // Here we may have a bunch of symlinks that we need to install.
+        //
+        const scope& rs (t.root_scope ());
+        auto& lp (t.data<link_rule::libs_paths> ());
+
+        auto ln = [&rs, &id] (const path& f, const path& l)
+        {
+          install_l (rs, id, f.leaf (), l.leaf (), 2 /* verbosity */);
+          return true;
+        };
+
+        const path& lk (lp.link);
+        const path& ld (lp.load);
+        const path& so (lp.soname);
+        const path& in (lp.interm);
+
+        const path* f (lp.real);
+
+        if (!in.empty ()) {r = ln (*f, in) || r; f = &in;}
+        if (!so.empty ()) {r = ln (*f, so) || r; f = &so;}
+        if (!ld.empty ()) {r = ln (*f, ld) || r; f = &ld;}
+        if (!lk.empty ()) {r = ln (*f, lk) || r;         }
+      }
+
+      return r;
+    }
+
+    bool install_rule::
+    uninstall_extra (const file& t, const install_dir& id) const
+    {
+      bool r (false);
+
+      if (t.is_a<libs> ())
+      {
+        // Here we may have a bunch of symlinks that we need to uninstall.
+        //
+        const scope& rs (t.root_scope ());
+        auto& lp (t.data<link_rule::libs_paths> ());
+
+        auto rm = [&rs, &id] (const path& l)
+        {
+          return uninstall_f (rs, id, nullptr, l.leaf (), 2 /* verbosity */);
+        };
+
+        const path& lk (lp.link);
+        const path& ld (lp.load);
+        const path& so (lp.soname);
+        const path& in (lp.interm);
+
+        if (!lk.empty ()) r = rm (lk) || r;
+        if (!ld.empty ()) r = rm (ld) || r;
+        if (!so.empty ()) r = rm (so) || r;
+        if (!in.empty ()) r = rm (in) || r;
+      }
+
+      return r;
+    }
+
+    // libux_install_rule
+    //
+    libux_install_rule::
+    libux_install_rule (data&& d, const link_rule& l)
+        : common (move (d)), link_ (l) {}
+
+    const target* libux_install_rule::
+    filter (action a, const target& t, prerequisite_iterator& i) const
+    {
+      const prerequisite& p (i->prerequisite);
+
+      // The "see through" semantics that should be parallel to install_rule
+      // above. In particular, here we use libue/libua/libus{} as proxies for
+      // exe/liba/libs{} there.
+      //
+      otype ot (link_type (t).type);
+
+      bool st (t.is_a<libue> () || t.is_a<libus> ()); // Target needs shared.
+      bool at (t.is_a<libua> () || t.is_a<libus> ()); // Target needs static.
+
+      if ((st && (p.is_a<libx> () || p.is_a<libs> ())) ||
+          (at && (p.is_a<libx> () || p.is_a<liba> ())))
+      {
+        const target* pt (&search (t, p));
+
+        if (const libx* l = pt->is_a<libx> ())
+          pt = link_member (*l, a, link_info (t.base_scope (), ot));
+
+        if ((st && pt->is_a<libs> ()) || (at && pt->is_a<liba> ()))
+          return pt->in (t.weak_scope ()) ? pt : nullptr;
+
+        if (pt->is_a<libux> ())
+          return pt;
+      }
+
+      const target* pt (install::file_rule::instance.filter (a, t, p));
+      if (pt == nullptr)
+        return pt;
+
+      if (t.is_a<libue> ())
+      {
+        if (x_header (p))
+          pt = nullptr;
+        else if (p.type.see_through)
+        {
+          for (i.enter_group (); i.group (); )
+          {
+            if (x_header (*++i))
+              pt = nullptr;
+          }
+        }
+
+        if (pt == nullptr)
+          return pt;
+      }
+
+      bool g (false);
+      if (p.is_a<bmi> () || (g = p.is_a (compile_types (ot).bmi)))
+      {
+        if (g)
+          resolve_group (a, *pt);
+
+        for (prerequisite_member pm:
+               group_prerequisite_members (a, *pt, members_mode::maybe))
+        {
+          if (pm.is_a (*x_mod))
+          {
+            pt = t.is_a<libue> ()
+              ? nullptr
+              : install::file_rule::instance.filter (a, *pt, pm.prerequisite);
+            break;
+          }
+        }
+
+        if (pt == nullptr)
+          return pt;
+      }
+
+      return pt;
+    }
+
+    bool libux_install_rule::
+    match (action a, target& t, const string& hint) const
+    {
+      // We only want to handle installation if we are also the ones building
+      // this target. So first run link's match().
+      //
+      return link_.match (a, t, hint) && alias_rule::match (a, t, "");
+    }
+  }
+}
diff --git a/libbuild2/cc/install-rule.hxx b/libbuild2/cc/install-rule.hxx
new file mode 100644
index 0000000..6d7ceb8
--- /dev/null
+++ b/libbuild2/cc/install-rule.hxx
@@ -0,0 +1,82 @@
+// file      : libbuild2/cc/install-rule.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_INSTALL_RULE_HXX
+#define LIBBUILD2_CC_INSTALL_RULE_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/install/rule.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    class link_rule;
+
+    // Installation rule for exe{} and lib*{}. Here we do:
+    //
+    // 1. Signal to the link rule that this is update for install.
+    //
+    // 2. Custom filtering of prerequisites (e.g., headers of an exe{}).
+    //
+    // 3. Extra un/installation (e.g., libs{} symlinks).
+    //
+    class LIBBUILD2_CC_SYMEXPORT install_rule: public install::file_rule,
+                                               virtual common
+    {
+    public:
+      install_rule (data&&, const link_rule&);
+
+      virtual const target*
+      filter (action, const target&, prerequisite_iterator&) const override;
+
+      virtual bool
+      match (action, target&, const string&) const override;
+
+      virtual recipe
+      apply (action, target&) const override;
+
+      virtual bool
+      install_extra (const file&, const install_dir&) const override;
+
+      virtual bool
+      uninstall_extra (const file&, const install_dir&) const override;
+
+    private:
+      const link_rule& link_;
+    };
+
+    // Installation rule for libu*{}.
+    //
+    // While libu*{} members themselves are not installable, we need to see
+    // through them in case they depend on stuff that we need to install
+    // (e.g., headers). Note that we use the alias_rule as a base.
+    //
+    class LIBBUILD2_CC_SYMEXPORT libux_install_rule:
+      public install::alias_rule,
+      virtual common
+    {
+    public:
+      libux_install_rule (data&&, const link_rule&);
+
+      virtual const target*
+      filter (action, const target&, prerequisite_iterator&) const override;
+
+      virtual bool
+      match (action, target&, const string&) const override;
+
+    private:
+      const link_rule& link_;
+    };
+  }
+}
+
+#endif // LIBBUILD2_CC_INSTALL_RULE_HXX
diff --git a/libbuild2/cc/lexer+char-literal.test.testscript b/libbuild2/cc/lexer+char-literal.test.testscript
new file mode 100644
index 0000000..afd16dd
--- /dev/null
+++ b/libbuild2/cc/lexer+char-literal.test.testscript
@@ -0,0 +1,67 @@
+# file      : libbuild2/cc/lexer+char-literal.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test character literals.
+#
+
+: normal
+:
+$* <<EOI >>EOO
+'a'
+'aa'
+'"'
+EOI
+<char literal>
+<char literal>
+<char literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+L'a'
+U'a'
+u'a'
+u8'a'
+u8R'a'
+EOI
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+'u8R'
+<char literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+'a'x
+'a'_X123
+EOI
+<char literal>
+<char literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+'\''
+'\\'
+'\\\''
+'\n'
+U'\U0001f34c'
+EOI
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+EOO
+
+: unterminated
+:
+$* <"'a" 2>>EOE != 0
+stdin:1:1: error: unterminated character literal
+EOE
diff --git a/libbuild2/cc/lexer+comment.test.testscript b/libbuild2/cc/lexer+comment.test.testscript
new file mode 100644
index 0000000..bfcc440
--- /dev/null
+++ b/libbuild2/cc/lexer+comment.test.testscript
@@ -0,0 +1,88 @@
+# file      : libbuild2/cc/lexer+comment.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test C and C++ comments.
+#
+
+: c-comment
+:
+$* <<EOI
+/* 'one' */
+/* "two" // three
+*/
+/**
+four
+// five */
+/**
+six /*
+*/
+EOI
+
+: cxx-comment
+:
+$* <<EOI
+// 'one'
+// "two" // three
+// four /* five */
+EOI
+
+: commented-out
+:
+$* <<EOI >"';'"
+// /*
+;
+// */
+EOI
+
+: c-unterminated
+:
+$* <<EOI 2>>EOE != 0
+/*
+comment
+EOI
+stdin:1:2: error: unterminated comment
+EOE
+
+: cxx-unterminated
+:
+$* <<:EOI
+// comment
+EOI
+
+: in-char-literal
+:
+$* <<EOI >>EOO
+'//'
+'/*'*/
+EOI
+<char literal>
+<char literal>
+<punctuation>
+<punctuation>
+EOO
+
+: in-string-literal
+:
+$* <<EOI >>EOO
+"//foo"
+"/*"*/
+EOI
+<string literal>
+<string literal>
+<punctuation>
+<punctuation>
+EOO
+
+: in-raw-string-literal
+:
+$* <<EOI >>EOO
+R"X(
+// foo
+/* bar
+)X"*/
+EOI
+<string literal>
+<punctuation>
+<punctuation>
+EOO
diff --git a/libbuild2/cc/lexer+line.test.testscript b/libbuild2/cc/lexer+line.test.testscript
new file mode 100644
index 0000000..560c092
--- /dev/null
+++ b/libbuild2/cc/lexer+line.test.testscript
@@ -0,0 +1,67 @@
+# file      : libbuild2/cc/lexer+line.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test line continuations.
+#
+
+: identifier
+:
+$* <<EOI >"'foo123'"
+fo\
+o\
+1\
+2\
+3
+EOI
+
+: punctuation
+:
+$* <<EOI >'<punctuation>'
+.\
+.\
+.
+EOI
+
+: c-comment
+:
+$* <<EOI
+/\
+*
+comment
+*\
+/\
+
+EOI
+
+: cxx-comment
+:
+$* <<EOI
+/\
+/ comment\
+more\
+more
+EOI
+
+: other
+:
+$* <<EOI >>EOO
+\abc
+EOI
+<punctuation>
+'abc'
+EOO
+
+: multiple
+:
+$* <<EOI >>EOO
+\\
+EOI
+<punctuation>
+EOO
+
+: unterminated
+:
+$* <<:EOI >'<punctuation>'
+\
+EOI
diff --git a/libbuild2/cc/lexer+number.test.testscript b/libbuild2/cc/lexer+number.test.testscript
new file mode 100644
index 0000000..f361245
--- /dev/null
+++ b/libbuild2/cc/lexer+number.test.testscript
@@ -0,0 +1,48 @@
+# file      : libbuild2/cc/lexer+number.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test numbers.
+#
+
+$* <'1'  >'<number literal>'
+$* <'.1' >'<number literal>'
+$* <'1.' >'<number literal>'
+
+$* <'0b101'  >'<number literal>'
+$* <'0123'   >'<number literal>'
+$* <'0X12AB' >'<number literal>'
+
+$* <'1e10'     >'<number literal>'
+$* <'1E+10'    >'<number literal>'
+$* <'0x1.p10'  >'<number literal>'
+$* <'0x1.P-10' >'<number literal>'
+
+$* <"123'456"     >'<number literal>'
+$* <"0xff00'00ff" >'<number literal>'
+
+$* <'123f'  >'<number literal>'
+$* <'123UL' >'<number literal>'
+$* <'123_X' >'<number literal>'
+
+: separate-punctuation
+:
+$* <'123;' >>EOO
+<number literal>
+';'
+EOO
+
+: separate-plus-minus
+:
+$* <'1.0_a+2.0' >>EOO
+<number literal>
+<punctuation>
+<number literal>
+EOO
+
+: separate-whitespace
+:
+$* <'123 abc' >>EOO
+<number literal>
+'abc'
+EOO
diff --git a/libbuild2/cc/lexer+preprocessor.test.testscript b/libbuild2/cc/lexer+preprocessor.test.testscript
new file mode 100644
index 0000000..e33eb90
--- /dev/null
+++ b/libbuild2/cc/lexer+preprocessor.test.testscript
@@ -0,0 +1,73 @@
+# file      : libbuild2/cc/lexer+preprocessor.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test preprocessor lines.
+#
+
+: normal
+:
+$* <<EOI
+#pragma message("abc")
+EOI
+
+: multiline
+:
+$* <<EOI
+#pragma message \
+( \
+"abc" \
+)
+EOI
+
+: comment
+:
+$* <<EOI
+#pragma foo /*
+bar
+baz
+*/
+#pragma foo // bar baz
+EOI
+
+: line
+:
+$* -l <<EOI >>EOO
+;
+# 1 "test.cxx" 2
+;
+  ;
+# 4
+;
+#line 8 "z:\\tmp\\test.hxx"
+;
+#line 10
+;
+# 5 "test.cxx"
+;
+EOI
+';' stdin:1:1
+';' test.cxx:1:1
+';' test.cxx:2:3
+';' test.cxx:4:1
+';' z:\tmp\test.hxx:8:1
+';' z:\tmp\test.hxx:10:1
+';' test.cxx:5:1
+EOO
+
+: include
+:
+$* <<EOI 2>>EOE != 0
+#include <foo/bar>
+EOI
+stdin:1:1: error: unexpected #include directive
+EOE
+
+: nested
+:
+$* <<EOI >>EOO
+#define FOO(x) #y
+;
+EOI
+';'
+EOO
diff --git a/libbuild2/cc/lexer+raw-string-literal.test.testscript b/libbuild2/cc/lexer+raw-string-literal.test.testscript
new file mode 100644
index 0000000..93cddc1
--- /dev/null
+++ b/libbuild2/cc/lexer+raw-string-literal.test.testscript
@@ -0,0 +1,90 @@
+# file      : libbuild2/cc/lexer+raw-string-literal.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test raw string literals.
+#
+
+: normal
+:
+$* <<EOI >>EOO
+R"()"
+R"(ab)"
+R"(a"b)"
+R"(a)b)"
+R"%(a%)b)%"
+R"X(a
+    b)X"
+R"X(a\
+    b)X"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+LR"(ab)"
+UR"(ab)"
+uR"(ab)"
+u8R"(ab)"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+R"(ab)"x
+R"(ab)"_X123
+EOI
+<string literal>
+<string literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+R"(\)"
+EOI
+<string literal>
+EOO
+
+: invalid-no-paren
+:
+$* <'R"a"' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-paren
+:
+$* <'R")()("' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-unterminated-paren
+:
+$* <'R"(abc"' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-unterminated-delimiter
+:
+$* <'R"X(abc)"' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
+
+: invalid-unterminated-quote
+:
+$* <'R"X(abc)X' 2>>EOE != 0
+stdin:1:2: error: invalid raw string literal
+EOE
diff --git a/libbuild2/cc/lexer+string-literal.test.testscript b/libbuild2/cc/lexer+string-literal.test.testscript
new file mode 100644
index 0000000..a2509c9
--- /dev/null
+++ b/libbuild2/cc/lexer+string-literal.test.testscript
@@ -0,0 +1,65 @@
+# file      : libbuild2/cc/lexer+string-literal.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test string literals (except raw).
+#
+
+: normal
+:
+$* <<EOI >>EOO
+"aa"
+"'"
+"a""b"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+L"ab"
+U"ab"
+u"ab"
+u8"ab"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+"ab"x
+"ab"_X123
+EOI
+<string literal>
+<string literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+"\"\""
+"\\\\"
+"\\\"\\"
+"\n\t"
+U"a\U0001f34c"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: unterminated
+:
+$* <'"ab' 2>>EOE != 0
+stdin:1:1: error: unterminated string literal
+EOE
diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx
new file mode 100644
index 0000000..6eba57e
--- /dev/null
+++ b/libbuild2/cc/lexer.cxx
@@ -0,0 +1,1129 @@
+// file      : libbuild2/cc/lexer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+// bit 0 - identifier character (_0-9A-Ba-b).
+//
+static const uint8_t char_flags[256] =
+//0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
+{
+  0,   0,   0,   0,   0,   0,   0,   0,     0,   0,   0,   0,   0,   0,   0,   0, // 0
+  0,   0,   0,   0,   0,   0,   0,   0,     0,   0,   0,   0,   0,   0,   0,   0, // 1
+  0,   0,   0,   0,   0,   0,   0,   0,     0,   0,   0,   0,   0,   0,   0,   0, // 2
+  1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   0,   0,   0,   0,   0,   0, // 3
+  0,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
+  1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   0,   0,   0,   0,   1, // 5
+  0,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
+  1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   0,   0,   0,   0,   0, // 7
+
+  // 128-255
+  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
+};
+
+// Diagnostics plumbing.
+//
+namespace butl // ADL
+{
+  inline build2::location
+  get_location (const butl::char_scanner::xchar& c, const void* data)
+  {
+    using namespace build2;
+
+    assert (data != nullptr); // E.g., must be &lexer::name_.
+    return location (static_cast<const path*> (data), c.line, c.column);
+  }
+}
+
+namespace build2
+{
+  namespace cc
+  {
+    auto lexer::
+    peek (bool e) -> xchar
+    {
+      if (unget_)
+        return ungetc_;
+
+      if (unpeek_)
+        return unpeekc_;
+
+      xchar c (base::peek ());
+
+      if (e && c == '\\')
+      {
+        get (c);
+        xchar p (base::peek ());
+
+        // Handle Windows CRLF sequence. Similar to char_scanner, we treat a
+        // single CR as if it was followed by LF and also collapse multiple
+        // CRs.
+        //
+        while (p == '\r')
+        {
+          get (p);
+          p = base::peek ();
+
+          if (p == '\n')
+            break;
+
+          // Pretend '\n' was there and recurse.
+          //
+          if (p != '\r')
+            return peek (e);
+        }
+
+        if (p == '\n')
+        {
+          get (p);
+          return peek (e); // Recurse.
+        }
+
+        // Save in the unpeek buffer so that it is returned on the subsequent
+        // calls to peek() (until get()).
+        //
+        unpeek_ = true;
+        unpeekc_ = c;
+      }
+
+      return c;
+    }
+
+    inline auto lexer::
+    get (bool e) -> xchar
+    {
+      if (unget_)
+      {
+        unget_ = false;
+        return ungetc_;
+      }
+      else
+      {
+        xchar c (peek (e));
+        get (c);
+        return c;
+      }
+    }
+
+    inline void lexer::
+    get (const xchar& c)
+    {
+      // Increment the logical line similar to how base will increment the
+      // physical (the column counts are the same).
+      //
+      if (log_line_ && c == '\n' && !unget_)
+        ++*log_line_;
+
+      base::get (c);
+    }
+
+    inline auto lexer::
+    geth (bool e) -> xchar
+    {
+      xchar c (get (e));
+      cs_.append (c);
+      return c;
+    }
+
+    inline void lexer::
+    geth (const xchar& c)
+    {
+      get (c);
+      cs_.append (c);
+    }
+
+    using type = token_type;
+
+    void lexer::
+    next (token& t, xchar c, bool ignore_pp)
+    {
+      for (;; c = skip_spaces ())
+      {
+        t.file = log_file_;
+        t.line = log_line_ ? *log_line_ : c.line;
+        t.column = c.column;
+
+        if (eos (c))
+        {
+          t.type = type::eos;
+          return;
+        }
+
+        const location l (&name_, c.line, c.column);
+
+        // Hash the token's line. The reason is debug info. In fact, doing
+        // this will make quite a few "noop" changes (like adding a newline
+        // anywhere in the source) cause the checksum change. But there
+        // doesn't seem to be any way around it: the case where we benefit
+        // from the precise change detection the most (development) is also
+        // where we will most likely have debug info enable.
+        //
+        // Note that in order not to make this completely useless we don't
+        // hash the column. Even if it is part of the debug info, having it a
+        // bit off shouldn't cause any significant mis-positioning. We also
+        // don't hash the file path for each token instead only hashing it
+        // when changed with the #line directive (as well as in the
+        // constructor for the initial path).
+        //
+        cs_.append (t.line);
+        cs_.append (c);
+
+        switch (c)
+        {
+          // Preprocessor lines.
+          //
+        case '#':
+          {
+            // It is tempting to simply scan until the newline ignoring
+            // anything in between. However, these lines can start a
+            // multi-line C-style comment. So we have to tokenize them (and
+            // hash the data for each token).
+            //
+            // Note that this may not work for things like #error that can
+            // contain pretty much anything. Also note that lines that start
+            // with '#' can contain '#' further down. In this case we need to
+            // be careful not to recurse (and consume multiple newlines). Thus
+            // the ignore_pp flag.
+            //
+            // Finally, to support diagnostics properly we need to recognize
+            // #line directives.
+            //
+            if (ignore_pp)
+            {
+              for (bool first (true);;)
+              {
+                // Note that we keep using the passed token for buffers.
+                //
+                c = skip_spaces (false); // Stop at newline.
+
+                if (eos (c) || c == '\n')
+                  break;
+
+                if (first)
+                {
+                  first = false;
+
+                  // Recognize #line and its shorthand version:
+                  //
+                  // #line <integer> [<string literal>] ...
+                  // #     <integer> [<string literal>] ...
+                  //
+                  // Also diagnose #include while at it.
+                  //
+                  if (!(c >= '0' && c <= '9'))
+                  {
+                    next (t, c, false);
+
+                    if (t.type == type::identifier)
+                    {
+                      if (t.value == "include")
+                        fail (l) << "unexpected #include directive";
+                      else if (t.value != "line")
+                        continue;
+                    }
+                    else
+                      continue;
+
+                    if (t.type != type::identifier || t.value != "line")
+                      continue;
+
+                    c = skip_spaces (false);
+
+                    if (!(c >= '0' && c <= '9'))
+                      fail (c) << "line number expected after #line directive";
+                  }
+
+                  // Ok, this is #line and next comes the line number.
+                  //
+                  line_directive (t, c);
+                  continue; // Parse the tail, if any.
+                }
+
+                next (t, c, false);
+              }
+              break;
+            }
+            else
+            {
+              t.type = type::punctuation;
+              return;
+            }
+          }
+          // Single-letter punctuation.
+          //
+        case ';': t.type = type::semi;    return;
+        case '{': t.type = type::lcbrace; return;
+        case '}': t.type = type::rcbrace; return;
+          // Other single-letter punctuation.
+          //
+        case '(':
+        case ')':
+        case '[':
+        case ']':
+        case ',':
+        case '?':
+        case '~':
+        case '\\': t.type = type::punctuation; return;
+          // Potentially multi-letter punctuation.
+          //
+        case '.': // . .* .<N> ...
+          {
+            xchar p (peek ());
+
+            if (p == '*')
+            {
+              geth (p);
+              t.type = type::punctuation;
+              return;
+            }
+            else if (p >= '0' && p <= '9')
+            {
+              number_literal (t, c);
+              return;
+            }
+            else if (p == '.')
+            {
+              get (p);
+
+              xchar q (peek ());
+              if (q == '.')
+              {
+                cs_.append (p);
+
+                geth (q);
+                t.type = type::punctuation;
+                return;
+              }
+              unget (p);
+              // Fall through.
+            }
+
+            t.type = type::dot;
+            return;
+          }
+        case '=': // = ==
+        case '!': // ! !=
+        case '*': // * *=
+        case '/': // / /=   (/* and // handled by skip_spaced() above)
+        case '%': // % %=
+        case '^': // ^ ^=
+          {
+            xchar p (peek ());
+
+            if (p == '=')
+              geth (p);
+
+            t.type = type::punctuation;
+            return;
+          }
+        case '<': // < <= << <<=
+        case '>': // > >= >> >>=
+          {
+            xchar p (peek ());
+
+            if (p == c)
+            {
+              geth (p);
+              if ((p = peek ()) == '=')
+                geth (p);
+              t.type = type::punctuation;
+            }
+            else if (p == '=')
+            {
+              geth (p);
+              t.type = type::punctuation;
+            }
+            else
+              t.type = (c == '<' ? type::less : type::greater);
+
+            return;
+          }
+        case '+': // + ++ +=
+        case '-': // - -- -= -> ->*
+          {
+            xchar p (peek ());
+
+            if (p == c || p == '=')
+              geth (p);
+            else if (c == '-' && p == '>')
+            {
+              geth (p);
+              if ((p = peek ()) == '*')
+                geth (p);
+            }
+
+            t.type = type::punctuation;
+            return;
+          }
+        case '&': // & && &=
+        case '|': // | || |=
+          {
+            xchar p (peek ());
+
+            if (p == c || p == '=')
+              geth (p);
+
+            t.type = type::punctuation;
+            return;
+          }
+        case ':': // : ::
+          {
+            xchar p (peek ());
+
+            if (p == ':')
+              geth (p);
+
+            t.type = type::punctuation;
+            return;
+          }
+          // Number (and also .<N> above).
+          //
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+          {
+            number_literal (t, c);
+            return;
+          }
+          // Char/string literal, identifier, or other (\, $, @, `).
+          //
+        default:
+          {
+            bool raw (false); // Raw string literal.
+
+            // Note: known not to be a digit (see above).
+            //
+            if (char_flags[static_cast<uint8_t> (c)] & 0x01)
+            {
+              // This smells a little: we know skip_spaces() did not peek at
+              // the next character because this is not '/'. Which means the
+              // position in the stream must be of this character + 1.
+              //
+              t.position = buf_->tellg () - 1;
+
+              string& id (t.value);
+              id = c;
+
+              while (char_flags[static_cast<uint8_t> (c = peek ())] & 0x01)
+              {
+                geth (c);
+                id += c;
+
+                // Direct buffer scan. Note that we always follow up with the
+                // normal peek() call which may load the next chunk, handle
+                // line continuations, etc. In other words, the end of the
+                // "raw" scan doesn't necessarily mean the end.
+                //
+                const char* b (gptr_);
+                const char* p (b);
+
+                for (const char* e (egptr_);
+                     p != e && char_flags[static_cast<uint8_t> (*p)] & 0x01;
+                     ++p) ;
+
+                // Unrolling this loop doesn't make a difference.
+                //
+                // for (const char* e (egptr_ - 4); p < e; p += 4)
+                // {
+                //   uint8_t c;
+                //
+                //  c = static_cast<uint8_t> (p[0]);
+                //  if (!(char_flags[c] & 0x01)) break;
+                //
+                //  c = static_cast<uint8_t> (p[1]);
+                //  if (!(char_flags[c] & 0x01)) {p += 1; break;}
+                //
+                //  c = static_cast<uint8_t> (p[2]);
+                //  if (!(char_flags[c] & 0x01)) {p += 2; break;}
+                //
+                //  c = static_cast<uint8_t> (p[3]);
+                //  if (!(char_flags[c] & 0x01)) {p += 3; break;}
+                // }
+
+                size_t n (p - b);
+                id.append (b, n); cs_.append (b, n);
+                gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+              }
+
+              // If the following character is a quote, see if the identifier
+              // is one of the literal prefixes.
+              //
+              if (c == '\'' || c == '\"')
+              {
+                size_t n (id.size ()), i (0);
+                switch (id[0])
+                {
+                case 'u':
+                  {
+                    if (n > 1 && id[1] == '8')
+                      ++i;
+                  }
+                  // Fall through.
+                case 'L':
+                case 'U':
+                  {
+                    ++i;
+
+                    if (c == '\"' && n > i && id[i] == 'R')
+                    {
+                      ++i;
+                      raw = true;
+                    }
+                    break;
+                  }
+                case 'R':
+                  {
+                    if (c == '\"')
+                    {
+                      ++i;
+                      raw = true;
+                    }
+                    break;
+                  }
+                }
+
+                if (i == n) // All characters "consumed".
+                {
+                  geth (c);
+                  id.clear ();
+                }
+              }
+
+              if (!id.empty ())
+              {
+                t.type = type::identifier;
+                return;
+              }
+            }
+
+            switch (c)
+            {
+            case '\'':
+              {
+                char_literal (t, c);
+                return;
+              }
+            case '\"':
+              {
+                if (raw)
+                  raw_string_literal (t, c);
+                else
+                  string_literal (t, c);
+                return;
+              }
+            default:
+              {
+                t.type = type::other;
+                return;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    void lexer::
+    number_literal (token& t, xchar c)
+    {
+      // note: c is hashed
+
+      // A number (integer or floating point literal) can:
+      //
+      // 1. Start with a dot (which must be followed by a digit, e.g., .123).
+      //
+      // 2. Can have a radix prefix (0b101, 0123, 0X12AB).
+      //
+      // 3. Can have an exponent (1e10, 0x1.p-10, 1.).
+      //
+      // 4. Digits can be separated with ' (123'456, 0xff00'00ff).
+      //
+      // 5. End with a built-in or user defined literal (123f, 123UL, 123_X)
+      //
+      // Quoting from GCC's preprocessor documentation:
+      //
+      // "Formally preprocessing numbers begin with an optional period, a
+      // required decimal digit, and then continue with any sequence of
+      // letters, digits, underscores, periods, and exponents. Exponents are
+      // the two-character sequences 'e+', 'e-', 'E+', 'E-', 'p+', 'p-', 'P+',
+      // and 'P-'."
+      //
+      // So it looks like a "C++ number" is then any unseparated (with
+      // whitespace or punctuation) sequence of those plus '. The only mildly
+      // tricky part is then to recognize +/- as being part of the exponent.
+      //
+      while (!eos ((c = peek ())))
+      {
+        switch (c)
+        {
+          // All the whitespace, punctuation, and other characters that end
+          // the number.
+          //
+        case ' ':
+        case '\n':
+        case '\t':
+        case '\r':
+        case '\f':
+        case '\v':
+
+        case '#':
+        case ';':
+        case '{':
+        case '}':
+        case '(':
+        case ')':
+        case '[':
+        case ']':
+        case ',':
+        case '?':
+        case '~':
+        case '=':
+        case '!':
+        case '*':
+        case '/':
+        case '%':
+        case '^':
+        case '>':
+        case '<':
+        case '&':
+        case '|':
+        case ':':
+        case '+': // The exponent case is handled below.
+        case '-': // The exponent case is handled below.
+        case '"':
+        case '\\':
+
+        case '@':
+        case '$':
+        case '`':
+          break;
+
+          // Recognize +/- after the exponent.
+          //
+        case 'e':
+        case 'E':
+        case 'p':
+        case 'P':
+          {
+            geth (c);
+            c = peek ();
+            if (c == '+' || c == '-')
+              geth (c);
+            continue;
+          }
+
+        case '_':
+        case '.':
+        case '\'':
+        default: // Digits and letters.
+          {
+            geth (c);
+            continue;
+          }
+        }
+
+        break;
+      }
+
+      t.type = type::number;
+    }
+
+    void lexer::
+    char_literal (token& t, xchar c)
+    {
+      // note: c is hashed
+
+      const location l (&name_, c.line, c.column);
+
+      for (char p (c);;) // Previous character (see below).
+      {
+        c = geth ();
+
+        if (eos (c) || c == '\n')
+          fail (l) << "unterminated character literal";
+
+        if (c == '\'' && p != '\\')
+          break;
+
+        // Keep track of \\-escapings so we don't confuse them with \', as in
+        // '\\'.
+        //
+        p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
+      }
+
+      // See if we have a user-defined suffix (which is an identifier).
+      //
+      if ((c = peek ()) == '_' || alpha (c))
+        literal_suffix (c);
+
+      t.type = type::character;
+    }
+
+    void lexer::
+    string_literal (token& t, xchar c)
+    {
+      // note: c is hashed
+
+      const location l (&name_, c.line, c.column);
+
+      for (char p (c);;) // Previous character (see below).
+      {
+        c = geth ();
+
+        if (eos (c) || c == '\n')
+          fail (l) << "unterminated string literal";
+
+        if (c == '\"' && p != '\\')
+          break;
+
+        // Keep track of \\-escapings so we don't confuse them with \", as in
+        // "\\".
+        //
+        p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
+
+        // Direct buffer scan.
+        //
+        if (p != '\\')
+        {
+          const char* b (gptr_);
+          const char* e (egptr_);
+          const char* p (b);
+
+          for (char c;
+               p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
+               ++p) ;
+
+          size_t n (p - b);
+          cs_.append (b, n);
+          gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+        }
+      }
+
+      // See if we have a user-defined suffix (which is an identifier).
+      //
+      if ((c = peek ()) == '_' || alpha (c))
+        literal_suffix (c);
+
+      t.type = type::string;
+    }
+
+    void lexer::
+    raw_string_literal (token& t, xchar c)
+    {
+      // note: c is hashed
+
+      // The overall form is:
+      //
+      // R"<delimiter>(<raw_characters>)<delimiter>"
+      //
+      // Where <delimiter> is a potentially-empty character sequence made of
+      // any source character but parentheses, backslash and spaces. It can be
+      // at most 16 characters long.
+      //
+      // Note that the <raw_characters> are not processed in any way, not even
+      // for line continuations.
+      //
+      const location l (&name_, c.line, c.column);
+
+      // As a first step, parse the delimiter (including the openning paren).
+      //
+      string d (1, ')');
+
+      for (;;)
+      {
+        c = geth ();
+
+        if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
+          fail (l) << "invalid raw string literal";
+
+        if (c == '(')
+          break;
+
+        d += c;
+      }
+
+      d += '"';
+
+      // Now parse the raw characters while trying to match the closing
+      // delimiter.
+      //
+      for (size_t i (0);;) // Position to match in d.
+      {
+        c = geth (false); // No newline escaping.
+
+        if (eos (c)) // Note: newline is ok.
+          fail (l) << "invalid raw string literal";
+
+        if (c != d[i] && i != 0) // Restart from the beginning.
+          i = 0;
+
+        if (c == d[i])
+        {
+          if (++i == d.size ())
+            break;
+        }
+      }
+
+      // See if we have a user-defined suffix (which is an identifier).
+      //
+      if ((c = peek ()) == '_' || alpha (c))
+        literal_suffix (c);
+
+      t.type = type::string;
+    }
+
+    void lexer::
+    literal_suffix (xchar c)
+    {
+      // note: c is unhashed
+
+      // Parse a user-defined literal suffix identifier.
+      //
+      for (geth (c); (c = peek ()) == '_' || alnum (c); geth (c)) ;
+    }
+
+    void lexer::
+    line_directive (token& t, xchar c)
+    {
+      // enter: first digit of the line number
+      // leave: last character of the line number or file string
+      // note:  c is unhashed
+
+      // If our number and string tokens contained the literal values, then we
+      // could have used that. However, we ignore the value (along with escape
+      // processing, etc), for performance. Let's keep it that way and instead
+      // handle it ourselves.
+      //
+      // Note also that we are not hashing these at the character level
+      // instead hashing the switch to a new file path below and leaving the
+      // line number to the token line hashing.
+      //
+      {
+        string& s (t.value);
+
+        for (s = c; (c = peek ()) >= '0' && c <= '9'; get (c))
+          s += c;
+
+        // The newline that ends the directive will increment the logical line
+        // so subtract one to compensate. Note: can't be 0 and shouldn't throw
+        // for valid lines.
+        //
+        log_line_ = stoull (s.c_str ()) - 1;
+      }
+
+      // See if we have the file.
+      //
+      c = skip_spaces (false);
+
+      if (c == '\"')
+      {
+        const location l (&name_, c.line, c.column);
+
+        // It is common to have a large number of #line directives that don't
+        // change the file (they seem to be used to track macro locations or
+        // some such). So we are going to optimize for this by comparing the
+        // current path to what's in #line.
+        //
+        string& s (tmp_file_);
+        s.clear ();
+
+        for (char p ('\0'); p != '\"'; ) // Previous character.
+        {
+          c = get ();
+
+          if (eos (c) || c == '\n')
+            fail (l) << "unterminated string literal";
+
+          // Handle escapes.
+          //
+          if (p == '\\')
+          {
+            p = '\0'; // Clear so we don't confuse \" and \\".
+
+            // We only handle what can reasonably be expected in a file name.
+            //
+            switch (c)
+            {
+            case '\\':
+            case '\'':
+            case '\"': break; // Add as is.
+            default:
+              fail (c) << "unsupported escape sequence in #line directive";
+            }
+          }
+          else
+          {
+            p = c;
+
+            switch (c)
+            {
+            case '\\':
+            case '\"': continue;
+            }
+          }
+
+          s += c;
+
+          // Direct buffer scan.
+          //
+          if (p != '\\')
+          {
+            const char* b (gptr_);
+            const char* e (egptr_);
+            const char* p (b);
+
+            for (char c;
+                 p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
+                 ++p) ;
+
+            size_t n (p - b);
+            s.append (b, n);
+            gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+          }
+        }
+
+        if (log_file_.string () == s)
+          return;
+
+        // Swap the two string buffers.
+        //
+        {
+          string r (move (log_file_).string ()); // Move string rep out.
+          r.swap (s);
+          log_file_ = path (move (r)); // Move back in.
+        }
+
+        // If the path is relative, then prefix it with the current working
+        // directory. Failed that, we will end up with different checksums for
+        // invocations from different directories.
+        //
+        // While this should work fine for normal cross-compilation, it's an
+        // entirely different story for the emulated case (e.g., msvc-linux
+        // where the preprocessed output contains absolute Windows paths). So
+        // we try to sense if things look fishy and leave the path alone.
+        //
+        // Also detect special names like <built-in> and <command-line>. Plus
+        // GCC sometimes adds what looks like working directory (has trailing
+        // slash). So ignore that as well.
+        //
+        // We now switched to using absolute translation unit paths (because
+        // of __FILE__/assert(); see compile.cxx for details). But we might
+        // still need this logic when we try to calculate location-independent
+        // hash for distributed compilation/caching. The idea is to only hash
+        // the part starting from the project root which is immutable. Plus
+        // we will need -ffile-prefix-map to deal with __FILE__.
+        //
+        if (!log_file_.to_directory ())
+          cs_.append (log_file_.string ());
+#if 0
+        {
+          using tr = path::traits;
+          const string& f (log_file_.string ());
+
+          if (f.find (':') != string::npos            ||
+              (f.front () == '<' && f.back () == '>') ||
+              log_file_.absolute ())
+            cs_.append (f);
+          else
+          {
+            // This gets complicated and slow: the path may contain '..' and
+            // '.'  so strictly speaking we would need to normalize it.
+            // Instead, we are going to handle leading '..'s ourselves (the
+            // sane case) and ignore everything else (so if you have '..'  or
+            // '.' somewhere in the middle, then things might not work
+            // optimally for you).
+            //
+            const string& d (work.string ());
+
+            // Iterate over leading '..' in f "popping" the corresponding
+            // number of trailing components from d.
+            //
+            size_t fp (0);
+            size_t dp (d.size () - 1);
+
+            for (size_t p;; )
+            {
+              // Note that in file we recognize any directory separator, not
+              // just of this platform (see note about emulation above).
+              //
+              if (f.compare (fp, 2, "..") != 0  ||
+                  (f[fp + 2] != '/' && f[fp + 2] != '\\') || // Could be '\0'.
+                  (p = tr::rfind_separator (d, dp)) == string::npos)
+                break;
+
+              fp += 3;
+              dp = p - 1;
+            }
+
+            cs_.append (d.c_str (), dp + 1);
+            cs_.append (tr::directory_separator); // Canonical in work.
+            cs_.append (f.c_str () + fp);
+          }
+        }
+#endif
+      }
+      else
+        unget (c);
+    }
+
+    auto lexer::
+    skip_spaces (bool nl) -> xchar
+    {
+      xchar c (get ());
+
+      for (; !eos (c); c = get ())
+      {
+        switch (c)
+        {
+        case '\n':
+          if (!nl) break;
+          // Fall through.
+        case ' ':
+        case '\t':
+        case '\r':
+        case '\f':
+        case '\v':
+          {
+            // Direct buffer scan.
+            //
+            const char* b (gptr_);
+            const char* e (egptr_);
+            const char* p (b);
+
+            for (char c;
+                 p != e && ((c = *p) == ' ' || c == '\t');
+                 ++p) ;
+
+            size_t n (p - b);
+            gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+
+            continue;
+          }
+        case '/':
+          {
+            xchar p (peek ());
+
+            // C++ comment.
+            //
+            if (p == '/')
+            {
+              get (p);
+
+              for (;;)
+              {
+                c = get ();
+                if (c == '\n' || eos (c))
+                  break;
+
+                // Direct buffer scan.
+                //
+                const char* b (gptr_);
+                const char* e (egptr_);
+                const char* p (b);
+
+                for (char c;
+                     p != e && (c = *p) != '\n' && c != '\\';
+                     ++p) ;
+
+                size_t n (p - b);
+                gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
+              }
+
+              if (!nl)
+                break;
+
+              continue;
+            }
+
+            // C comment.
+            //
+            if (p == '*')
+            {
+              get (p);
+
+              for (;;)
+              {
+                c = get ();
+
+                if (eos (c))
+                  fail (p) << "unterminated comment";
+
+                if (c == '*' && (c = peek ()) == '/')
+                {
+                  get (c);
+                  break;
+                }
+
+                // Direct buffer scan.
+                //
+                const char* b (gptr_);
+                const char* e (egptr_);
+                const char* p (b);
+
+                for (char c;
+                     p != e && (c = *p) != '*' && c != '\\';
+                     ++p)
+                {
+                  if (c == '\n')
+                  {
+                    if (log_line_) ++*log_line_;
+                    ++line;
+                    column = 1;
+                  }
+                  else
+                    ++column;
+                }
+
+                gptr_ = p; buf_->gbump (static_cast<int> (p - b));
+              }
+              continue;
+            }
+            break;
+          }
+        }
+        break;
+      }
+
+      return c;
+    }
+
+    ostream&
+    operator<< (ostream& o, const token& t)
+    {
+      switch (t.type)
+      {
+      case type::dot:         o << "'.'";                   break;
+      case type::semi:        o << "';'";                   break;
+      case type::less:        o << "'<'";                   break;
+      case type::greater:     o << "'>'";                   break;
+      case type::lcbrace:     o << "'{'";                   break;
+      case type::rcbrace:     o << "'}'";                   break;
+      case type::punctuation: o << "<punctuation>";         break;
+
+      case type::identifier:  o << '\'' << t.value << '\''; break;
+
+      case type::number:      o << "<number literal>";      break;
+      case type::character:   o << "<char literal>";        break;
+      case type::string:      o << "<string literal>";      break;
+
+      case type::other:       o << "<other>";               break;
+      case type::eos:         o << "<end of file>";         break;
+      }
+
+      return o;
+    }
+  }
+}
diff --git a/libbuild2/cc/lexer.hxx b/libbuild2/cc/lexer.hxx
new file mode 100644
index 0000000..cb2b3a5
--- /dev/null
+++ b/libbuild2/cc/lexer.hxx
@@ -0,0 +1,190 @@
+// file      : libbuild2/cc/lexer.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_LEXER_HXX
+#define LIBBUILD2_CC_LEXER_HXX
+
+#include <libbutl/sha256.mxx>
+#include <libbutl/char-scanner.mxx>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/diagnostics.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    // Preprocessor-level tokenization of C/C++ source. In other words, the
+    // sequence of tokens returned is similar to what a real C/C++ compiler
+    // would see from its preprocessor.
+    //
+    // The input is a (partially-)preprocessed translation unit that may still
+    // contain comments, line continuations, and preprocessor directives such
+    // as #line, #pragma, but not #include (which is diagnosed). Currently,
+    // all preprocessor directives except #line are ignored and no values are
+    // saved from literals. The #line directive (and its shorthand notation)
+    // is recognized to provide the logical token location.
+    //
+    // While at it we also calculate the checksum of the input ignoring
+    // comments, whitespaces, etc. This is used to detect changes that do not
+    // alter the resulting token stream.
+    //
+    enum class token_type
+    {
+      // NOTE: remember to update operator<<() if changing anything here!
+      //
+      eos,
+
+      dot,         // .
+      semi,        // ;
+      less,        // <
+      greater,     // >
+      lcbrace,     // {
+      rcbrace,     // }
+
+      punctuation, // Other punctuation.
+
+      identifier,
+
+      number,      // Number literal.
+      character,   // Char   literal.
+      string,      // String literal.
+
+      other        // Other token.
+    };
+
+    struct token
+    {
+      token_type type = token_type::eos;
+      string     value;
+
+      // Logical position.
+      //
+      path     file;
+      uint64_t line   = 0;
+      uint64_t column = 0;
+
+      // Physical position in the stream, currently only for identifiers.
+      //
+      uint64_t position = 0;
+    };
+
+    // Output the token value in a format suitable for diagnostics.
+    //
+    ostream&
+    operator<< (ostream&, const token&);
+
+    class lexer: protected butl::char_scanner
+    {
+    public:
+      lexer (ifdstream& is, const path& name)
+          : char_scanner (is, false),
+            name_ (name),
+            fail ("error", &name_),
+            log_file_ (name) {}
+
+      const path&
+      name () const {return name_;}
+
+      string
+      checksum () const {return cs_.string ();}
+
+      // Note that it is ok to call next() again after getting eos.
+      //
+      token
+      next ()
+      {
+        token t;
+        next (t, skip_spaces (), true);
+        return t;
+      }
+
+      // As above but reuse the token to avoid a (potential) memory
+      // allocation. Typical usage:
+      //
+      // for (token t; l.next (t) != token_type::eos; )
+      //   ...
+      //
+      token_type
+      next (token& t)
+      {
+        next (t, skip_spaces (), true);
+        return t.type;
+      }
+
+    private:
+      void
+      next (token&, xchar, bool);
+
+      void
+      number_literal (token&, xchar);
+
+      void
+      char_literal (token&, xchar);
+
+      void
+      string_literal (token&, xchar);
+
+      void
+      raw_string_literal (token&, xchar);
+
+      void
+      literal_suffix (xchar);
+
+      void
+      line_directive (token&, xchar);
+
+      xchar
+      skip_spaces (bool newline = true);
+
+      // The char_scanner adaptation for newline escape sequence processing.
+      // Enabled by default and is only disabled in the raw string literals.
+      //
+    private:
+      using base = char_scanner;
+
+      xchar
+      peek (bool escape = true);
+
+      xchar
+      get (bool escape = true);
+
+      void
+      get (const xchar& peeked);
+
+      // Hashing versions.
+      //
+      xchar
+      geth (bool escape = true);
+
+      void
+      geth (const xchar& peeked);
+
+    private:
+      const path name_;
+      const fail_mark fail;
+
+      // Logical file and line as set by the #line directives. Note that the
+      // lexer diagnostics still uses the physical file/lines.
+      //
+      path               log_file_;
+      optional<uint64_t> log_line_;
+
+      string tmp_file_;
+      sha256 cs_;
+    };
+
+    // Diagnostics plumbing.
+    //
+    inline location
+    get_location (const token& t, const void* = nullptr)
+    {
+      return location (&t.file, t.line, t.column);
+    }
+  }
+}
+
+#endif // LIBBUILD2_CC_LEXER_HXX
diff --git a/libbuild2/cc/lexer.test.cxx b/libbuild2/cc/lexer.test.cxx
new file mode 100644
index 0000000..0aeadba
--- /dev/null
+++ b/libbuild2/cc/lexer.test.cxx
@@ -0,0 +1,80 @@
+// file      : libbuild2/cc/lexer.test.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    // Usage: argv[0] [-l] [<file>]
+    //
+    int
+    main (int argc, char* argv[])
+    {
+      bool loc (false);
+      const char* file (nullptr);
+
+      for (int i (1); i != argc; ++i)
+      {
+        string a (argv[i]);
+
+        if (a == "-l")
+          loc = true;
+        else
+        {
+          file = argv[i];
+          break;
+        }
+      }
+
+      try
+      {
+        ifdstream is;
+        if (file != nullptr)
+          is.open (file);
+        else
+        {
+          file = "stdin";
+          is.open (fddup (stdin_fd ()));
+        }
+
+        lexer l (is, path (file));
+
+        // No use printing eos since we will either get it or loop forever.
+        //
+        for (token t; l.next (t) != token_type::eos; )
+        {
+          cout << t;
+
+          if (loc)
+            cout << ' ' << t.file << ':' << t.line << ':' << t.column;
+
+          cout << endl;
+        }
+      }
+      catch (const failed&)
+      {
+        return 1;
+      }
+
+      return 0;
+    }
+  }
+}
+
+int
+main (int argc, char* argv[])
+{
+  return build2::cc::main (argc, argv);
+}
diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx
new file mode 100644
index 0000000..110a992
--- /dev/null
+++ b/libbuild2/cc/link-rule.cxx
@@ -0,0 +1,3043 @@
+// file      : libbuild2/cc/link-rule.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/link-rule.hxx>
+
+#include <map>
+#include <cstdlib>  // exit()
+#include <cstring>  // strlen()
+
+#include <libbutl/filesystem.mxx> // file_exists()
+
+#include <libbuild2/depdb.hxx>
+#include <libbuild2/scope.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/target.hxx>  // c, pc*
+#include <libbuild2/cc/utility.hxx>
+
+using std::map;
+using std::exit;
+
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    link_rule::
+    link_rule (data&& d)
+        : common (move (d)),
+          rule_id (string (x) += ".link 1")
+    {
+      static_assert (sizeof (match_data) <= target::data_size,
+                     "insufficient space");
+    }
+
+    link_rule::match_result link_rule::
+    match (action a,
+           const target& t,
+           const target* g,
+           otype ot,
+           bool library) const
+    {
+      // NOTE: the target may be a group (see utility library logic below).
+
+      match_result r;
+
+      // Scan prerequisites and see if we can work with what we've got. Note
+      // that X could be C (as in language). We handle this by always checking
+      // for X first.
+      //
+      // Note also that we treat bmi{} as obj{}. @@ MODHDR hbmi{}?
+      //
+      for (prerequisite_member p:
+             prerequisite_members (a, t, group_prerequisites (t, g)))
+      {
+        // If excluded or ad hoc, then don't factor it into our tests.
+        //
+        if (include (a, t, p) != include_type::normal)
+          continue;
+
+        if (p.is_a (x_src)                        ||
+            (x_mod != nullptr && p.is_a (*x_mod)) ||
+            // Header-only X library (or library with C source and X header).
+            (library          && x_header (p, false /* c_hdr */)))
+        {
+          r.seen_x = r.seen_x || true;
+        }
+        else if (p.is_a<c> ()            ||
+                 // Header-only C library.
+                 (library && p.is_a<h> ()))
+        {
+          r.seen_c = r.seen_c || true;
+        }
+        else if (p.is_a<obj> () || p.is_a<bmi> ())
+        {
+          r.seen_obj = r.seen_obj || true;
+        }
+        else if (p.is_a<obje> () || p.is_a<bmie> ())
+        {
+          // We can make these "no-match" if/when there is a valid use case.
+          //
+          if (ot != otype::e)
+            fail << p.type ().name << "{} as prerequisite of " << t;
+
+          r.seen_obj = r.seen_obj || true;
+        }
+        else if (p.is_a<obja> () || p.is_a<bmia> ())
+        {
+          if (ot != otype::a)
+            fail << p.type ().name << "{} as prerequisite of " << t;
+
+          r.seen_obj = r.seen_obj || true;
+        }
+        else if (p.is_a<objs> () || p.is_a<bmis> ())
+        {
+          if (ot != otype::s)
+            fail << p.type ().name << "{} as prerequisite of " << t;
+
+          r.seen_obj = r.seen_obj || true;
+        }
+        else if (p.is_a<libul> () || p.is_a<libux> ())
+        {
+          // For a unility library we look at its prerequisites, recursively.
+          // Since these checks are not exactly light-weight, only do them if
+          // we haven't already seen any X prerequisites.
+          //
+          if (!r.seen_x)
+          {
+            // This is a bit iffy: in our model a rule can only search a
+            // target's prerequisites if it matches. But we don't yet know
+            // whether we match. However, it seems correct to assume that any
+            // rule-specific search will always resolve to an existing target
+            // if there is one. So perhaps it's time to relax this restriction
+            // a little? Note that this fits particularly well with what we
+            // doing here since if there is no existing target, then there can
+            // be no prerequisites.
+            //
+            // Note, however, that we cannot linkup a prerequisite target
+            // member to its group since we are not matching this target. As
+            // result we have to do all the steps except for setting t.group
+            // and pass both member and group (we also cannot query t.group
+            // since it's racy).
+            //
+            const target* pg (nullptr);
+            const target* pt (p.search_existing ());
+
+            if (p.is_a<libul> ())
+            {
+              if (pt != nullptr)
+              {
+                // If this is a group then try to pick (again, if exists) a
+                // suitable member. If it doesn't exist, then we will only be
+                // considering the group's prerequisites.
+                //
+                if (const target* pm =
+                    link_member (pt->as<libul> (),
+                                 a,
+                                 linfo {ot, lorder::a /* unused */},
+                                 true /* existing */))
+                {
+                  pg = pt;
+                  pt = pm;
+                }
+              }
+              else
+              {
+                // It's possible we have no group but have a member so try
+                // that.
+                //
+                const target_type& tt (ot == otype::a ? libua::static_type :
+                                       ot == otype::s ? libus::static_type :
+                                       libue::static_type);
+
+                // We know this prerequisite member is a prerequisite since
+                // otherwise the above search would have returned the member
+                // target.
+                //
+                pt = search_existing (t.ctx, p.prerequisite.key (tt));
+              }
+            }
+            else if (!p.is_a<libue> ())
+            {
+              // See if we also/instead have a group.
+              //
+              pg = search_existing (t.ctx,
+                                    p.prerequisite.key (libul::static_type));
+
+              if (pt == nullptr)
+                swap (pt, pg);
+            }
+
+            if (pt != nullptr)
+            {
+              // If we are matching a target, use the original output type
+              // since that would be the member that we pick.
+              //
+              otype pot (pt->is_a<libul> () ? ot : link_type (*pt).type);
+              match_result pr (match (a, *pt, pg, pot, true /* lib */));
+
+              // Do we need to propagate any other seen_* values? Hm, that
+              // would in fact match with the "see-through" semantics of
+              // utility libraries we have in other places.
+              //
+              r.seen_x = pr.seen_x;
+            }
+            else
+              r.seen_lib = r.seen_lib || true; // Consider as just a library.
+          }
+        }
+        else if (p.is_a<lib> ()  ||
+                 p.is_a<liba> () ||
+                 p.is_a<libs> ())
+        {
+          r.seen_lib = r.seen_lib || true;
+        }
+        // Some other c-common header/source (say C++ in a C rule) other than
+        // a C header (we assume everyone can hanle that).
+        //
+        else if (p.is_a<cc> () && !(x_header (p, true /* c_hdr */)))
+        {
+          r.seen_cc = true;
+          break;
+        }
+      }
+
+      return r;
+    }
+
+    bool link_rule::
+    match (action a, target& t, const string& hint) const
+    {
+      // NOTE: may be called multiple times and for both inner and outer
+      //       operations (see the install rules).
+
+      tracer trace (x, "link_rule::match");
+
+      ltype lt (link_type (t));
+
+      // If this is a group member library, link-up to our group (this is the
+      // target group protocol which means this can be done whether we match
+      // or not).
+      //
+      // If we are called for the outer operation (see install rules), then
+      // use resolve_group() to delegate to inner.
+      //
+      if (lt.member_library ())
+      {
+        if (a.outer ())
+          resolve_group (a, t);
+        else if (t.group == nullptr)
+          t.group = &search (t,
+                             lt.utility ? libul::static_type : lib::static_type,
+                             t.dir, t.out, t.name);
+      }
+
+      match_result r (match (a, t, t.group, lt.type, lt.library ()));
+
+      // If this is some other c-common header/source (say C++ in a C rule),
+      // then we shouldn't try to handle that (it may need to be compiled,
+      // etc).
+      //
+      if (r.seen_cc)
+      {
+        l4 ([&]{trace << "non-" << x_lang << " prerequisite "
+                      << "for target " << t;});
+        return false;
+      }
+
+      if (!(r.seen_x || r.seen_c || r.seen_obj || r.seen_lib))
+      {
+        l4 ([&]{trace << "no " << x_lang << ", C, or obj/lib prerequisite "
+                      << "for target " << t;});
+        return false;
+      }
+
+      // We will only chain a C source if there is also an X source or we were
+      // explicitly told to.
+      //
+      if (r.seen_c && !r.seen_x && hint < x)
+      {
+        l4 ([&]{trace << "C prerequisite without " << x_lang << " or hint "
+                      << "for target " << t;});
+        return false;
+      }
+
+      return true;
+    }
+
+    auto link_rule::
+    derive_libs_paths (file& t,
+                       const char* pfx,
+                       const char* sfx) const -> libs_paths
+    {
+      bool win (tclass == "windows");
+
+      // Get default prefix and extension.
+      //
+      const char* ext (nullptr);
+      if (win)
+      {
+        if (tsys == "mingw32")
+        {
+          if (pfx == nullptr)
+            pfx = "lib";
+        }
+
+        ext = "dll";
+      }
+      else
+      {
+        if (pfx == nullptr)
+          pfx = "lib";
+
+        if (tclass == "macos")
+          ext = "dylib";
+        else
+          ext = "so";
+      }
+
+      // First sort out which extension we are using.
+      //
+      const string& e (t.derive_extension (ext));
+
+      auto append_ext = [&e] (path& p)
+      {
+        if (!e.empty ())
+        {
+          p += '.';
+          p += e;
+        }
+      };
+
+      // See if we have the load suffix.
+      //
+      const string& ls (cast_empty<string> (t["bin.lib.load_suffix"]));
+
+      // Figure out the version.
+      //
+      string ver;
+      using verion_map = map<string, string>;
+      if (const verion_map* m = cast_null<verion_map> (t["bin.lib.version"]))
+      {
+        // First look for the target system.
+        //
+        auto i (m->find (tsys));
+
+        // Then look for the target class.
+        //
+        if (i == m->end ())
+          i = m->find (tclass);
+
+        // Then look for the wildcard. Since it is higly unlikely one can have
+        // a version that will work across platforms, this is only useful to
+        // say "all others -- no version".
+        //
+        if (i == m->end ())
+          i = m->find ("*");
+
+        // At this stage the only platform-specific version we support is the
+        // "no version" override.
+        //
+        if (i != m->end () && !i->second.empty ())
+          fail << i->first << "-specific bin.lib.version not yet supported";
+
+        // Finally look for the platform-independent version.
+        //
+        if (i == m->end ())
+          i = m->find ("");
+
+        // If we didn't find anything, fail. If the bin.lib.version was
+        // specified, then it should explicitly handle all the targets.
+        //
+        if (i == m->end ())
+          fail << "no version for " << ctgt << " in bin.lib.version" <<
+            info << "considere adding " << tsys << "@<ver> or " << tclass
+               << "@<ver>";
+
+        ver = i->second;
+      }
+
+      // Now determine the paths.
+      //
+      path lk, ld, so, in;
+
+      // We start with the basic path.
+      //
+      path b (t.dir);
+
+      if (pfx != nullptr && pfx[0] != '\0')
+      {
+        b /= pfx;
+        b += t.name;
+      }
+      else
+        b /= t.name;
+
+      if (sfx != nullptr && sfx[0] != '\0')
+        b += sfx;
+
+      // Clean pattern.
+      //
+      path cp (b);
+      cp += "?*"; // Don't match empty (like the libfoo.so symlink).
+      append_ext (cp);
+
+      // On Windows the real path is to libs{} and the link path is empty.
+      // Note that we still need to derive the import library path.
+      //
+      if (win)
+      {
+        // Usually on Windows with MSVC the import library is called the same
+        // as the DLL but with the .lib extension. Which means it clashes with
+        // the static library. Instead of decorating the static library name
+        // with ugly suffixes (as is customary), let's use the MinGW approach
+        // (one must admit it's quite elegant) and call it .dll.lib.
+        //
+        libi& i (*find_adhoc_member<libi> (t));
+
+        if (i.path ().empty ())
+        {
+          path ip (b);
+          append_ext (ip);
+          i.derive_path (move (ip), tsys == "mingw32" ? "a" : "lib");
+        }
+      }
+      // We will only need the link name if the following name differs.
+      //
+      else if (!ver.empty () || !ls.empty ())
+      {
+        lk = b;
+        append_ext (lk);
+      }
+
+      // See if we have the load suffix.
+      //
+      if (!ls.empty ())
+      {
+        b += ls;
+
+        // We will only need the load name if the following name differs.
+        //
+        if (!ver.empty ())
+        {
+          ld = b;
+          append_ext (ld);
+        }
+      }
+
+      if (!ver.empty ())
+        b += ver;
+
+      const path& re (t.derive_path (move (b)));
+
+      return libs_paths {
+        move (lk), move (ld), move (so), move (in), &re, move (cp)};
+    }
+
+    // Look for binary-full utility library recursively until we hit a
+    // non-utility "barier".
+    //
+    static bool
+    find_binfull (action a, const target& t, linfo li)
+    {
+      for (const target* pt: t.prerequisite_targets[a])
+      {
+        if (pt == nullptr || unmark (pt) != 0) // Called after pass 1 below.
+          continue;
+
+        const file* pf;
+
+        // If this is the libu*{} group, then pick the appropriate member.
+        //
+        if (const libul* ul = pt->is_a<libul> ())
+        {
+          pf = &link_member (*ul, a, li)->as<file> ();
+        }
+        else if ((pf = pt->is_a<libue> ()) ||
+                 (pf = pt->is_a<libus> ()) ||
+                 (pf = pt->is_a<libua> ()))
+          ;
+        else
+          continue;
+
+        if (!pf->path ().empty () || find_binfull (a, *pf, li))
+          return true;
+      }
+
+      return false;
+    };
+
+    recipe link_rule::
+    apply (action a, target& xt) const
+    {
+      tracer trace (x, "link_rule::apply");
+
+      file& t (xt.as<file> ());
+      context& ctx (t.ctx);
+
+      // Note that for_install is signalled by install_rule and therefore
+      // can only be relied upon during execute.
+      //
+      match_data& md (t.data (match_data ()));
+
+      const scope& bs (t.base_scope ());
+      const scope& rs (*bs.root_scope ());
+
+      ltype lt (link_type (t));
+      otype ot (lt.type);
+      linfo li (link_info (bs, ot));
+
+      // Set the library type (C, C++, etc) as rule-specific variable.
+      //
+      if (lt.library ())
+        t.state[a].assign (c_type) = string (x);
+
+      bool binless (lt.library ()); // Binary-less until proven otherwise.
+
+      // Inject dependency on the output directory. Note that we do it even
+      // for binless libraries since there could be other output (e.g., .pc
+      // files).
+      //
+      inject_fsdir (a, t);
+
+      // Process prerequisites, pass 1: search and match prerequisite
+      // libraries, search obj/bmi{} targets, and search targets we do rule
+      // chaining for.
+      //
+      // Also clear the binless flag if we see any source or object files.
+      // Note that if we don't see any this still doesn't mean the library is
+      // binless since it can depend on a binfull utility library. This we
+      // check below, after matching the libraries.
+      //
+      // We do libraries first in order to indicate that we will execute these
+      // targets before matching any of the obj/bmi{}. This makes it safe for
+      // compile::apply() to unmatch them and therefore not to hinder
+      // parallelism.
+      //
+      // We also create obj/bmi{} chain targets because we need to add
+      // (similar to lib{}) all the bmi{} as prerequisites to all the other
+      // obj/bmi{} that we are creating. Note that this doesn't mean that the
+      // compile rule will actually treat them all as prerequisite targets.
+      // Rather, they are used to resolve actual module imports. We don't
+      // really have to search obj{} targets here but it's the same code so we
+      // do it here to avoid duplication.
+      //
+      // Also, when cleaning, we ignore prerequisites that are not in the same
+      // or a subdirectory of our project root. Except for libraries: if we
+      // ignore them, then they won't be added to synthesized dependencies and
+      // this will break things if we do, say, update after clean in the same
+      // invocation. So for libraries we ignore them later, on pass 3.
+      //
+      optional<dir_paths> usr_lib_dirs; // Extract lazily.
+      compile_target_types tts (compile_types (ot));
+
+      auto skip = [&a, &rs] (const target* pt) -> bool
+      {
+        return a.operation () == clean_id && !pt->dir.sub (rs.out_path ());
+      };
+
+      auto& pts (t.prerequisite_targets[a]);
+      size_t start (pts.size ());
+
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        include_type pi (include (a, t, p));
+
+        // We pre-allocate a NULL slot for each (potential; see clean)
+        // prerequisite target.
+        //
+        pts.push_back (prerequisite_target (nullptr, pi));
+        const target*& pt (pts.back ());
+
+        if (pi != include_type::normal) // Skip excluded and ad hoc.
+          continue;
+
+        // Mark:
+        //   0 - lib
+        //   1 - src
+        //   2 - mod
+        //   3 - obj/bmi and also lib not to be cleaned
+        //
+        uint8_t m (0);
+
+        bool mod (x_mod != nullptr && p.is_a (*x_mod));
+
+        if (mod || p.is_a (x_src) || p.is_a<c> ())
+        {
+          binless = binless && false;
+
+          // Rule chaining, part 1.
+          //
+
+          // Which scope shall we use to resolve the root? Unlikely, but
+          // possible, the prerequisite is from a different project
+          // altogether. So we are going to use the target's project.
+          //
+
+          // If the source came from the lib{} group, then create the obj{}
+          // group and add the source as a prerequisite of the obj{} group,
+          // not the obj*{} member. This way we only need one prerequisite
+          // for, say, both liba{} and libs{}. The same goes for bmi{}.
+          //
+          bool group (!p.prerequisite.belongs (t)); // Group's prerequisite.
+
+          const target_type& rtt (mod
+                                  ? (group ? bmi::static_type : tts.bmi)
+                                  : (group ? obj::static_type : tts.obj));
+
+          const prerequisite_key& cp (p.key ()); // Source key.
+
+          // Come up with the obj*/bmi*{} target. The source prerequisite
+          // directory can be relative (to the scope) or absolute. If it is
+          // relative, then use it as is. If absolute, then translate it to
+          // the corresponding directory under out_root. While the source
+          // directory is most likely under src_root, it is also possible it
+          // is under out_root (e.g., generated source).
+          //
+          dir_path d;
+          {
+            const dir_path& cpd (*cp.tk.dir);
+
+            if (cpd.relative () || cpd.sub (rs.out_path ()))
+              d = cpd;
+            else
+            {
+              if (!cpd.sub (rs.src_path ()))
+                fail << "out of project prerequisite " << cp <<
+                  info << "specify corresponding " << rtt.name << "{} "
+                     << "target explicitly";
+
+              d = rs.out_path () / cpd.leaf (rs.src_path ());
+            }
+          }
+
+          // obj/bmi{} is always in the out tree. Note that currently it could
+          // be the group -- we will pick a member in part 2 below.
+          //
+          pt = &search (t, rtt, d, dir_path (), *cp.tk.name, nullptr, cp.scope);
+
+          // If we shouldn't clean obj{}, then it is fair to assume we
+          // shouldn't clean the source either (generated source will be in
+          // the same directory as obj{} and if not, well, go find yourself
+          // another build system ;-)).
+          //
+          if (skip (pt))
+          {
+            pt = nullptr;
+            continue;
+          }
+
+          m = mod ? 2 : 1;
+        }
+        else if (p.is_a<libx> () ||
+                 p.is_a<liba> () ||
+                 p.is_a<libs> () ||
+                 p.is_a<libux> ())
+        {
+          // Handle imported libraries.
+          //
+          // Note that since the search is rule-specific, we don't cache the
+          // target in the prerequisite.
+          //
+          if (p.proj ())
+            pt = search_library (
+              a, sys_lib_dirs, usr_lib_dirs, p.prerequisite);
+
+          // The rest is the same basic logic as in search_and_match().
+          //
+          if (pt == nullptr)
+            pt = &p.search (t);
+
+          if (skip (pt))
+            m = 3; // Mark so it is not matched.
+
+          // If this is the lib{}/libu{} group, then pick the appropriate
+          // member.
+          //
+          if (const libx* l = pt->is_a<libx> ())
+            pt = link_member (*l, a, li);
+        }
+        else
+        {
+          // If this is the obj{} or bmi{} target group, then pick the
+          // appropriate member.
+          //
+          if      (p.is_a<obj> ()) pt = &search (t, tts.obj, p.key ());
+          else if (p.is_a<bmi> ()) pt = &search (t, tts.bmi, p.key ());
+          //
+          // Windows module definition (.def). For other platforms (and for
+          // static libraries) treat it as an ordinary prerequisite.
+          //
+          else if (p.is_a<def> () && tclass == "windows" && ot != otype::a)
+          {
+            pt = &p.search (t);
+          }
+          //
+          // Something else. This could be something unrelated that the user
+          // tacked on (e.g., a doc{}). Or it could be some ad hoc input to
+          // the linker (say a linker script or some such).
+          //
+          else
+          {
+            if (!p.is_a<objx> () && !p.is_a<bmix> ())
+            {
+              // @@ Temporary hack until we get the default outer operation
+              // for update. This allows operations like test and install to
+              // skip such tacked on stuff.
+              //
+              // Note that ad hoc inputs have to be explicitly marked with the
+              // include=adhoc prerequisite-specific variable.
+              //
+              if (ctx.current_outer_oif != nullptr)
+                continue;
+            }
+
+            pt = &p.search (t);
+          }
+
+          if (skip (pt))
+          {
+            pt = nullptr;
+            continue;
+          }
+
+          // @@ MODHDR: hbmix{} has no objx{}
+          //
+          binless = binless && !(pt->is_a<objx> () || pt->is_a<bmix> ());
+
+          m = 3;
+        }
+
+        mark (pt, m);
+      }
+
+      // Match lib{} (the only unmarked) in parallel and wait for completion.
+      //
+      match_members (a, t, pts, start);
+
+      // Check if we have any binfull utility libraries.
+      //
+      binless = binless && !find_binfull (a, t, li);
+
+      // Now that we know for sure whether we are binless, derive file name(s)
+      // and add ad hoc group members. Note that for binless we still need the
+      // .pc member (whose name depends on the libray prefix) so we take care
+      // to not derive the path for the library target itself inside.
+      //
+      {
+        const char* e (nullptr); // Extension.
+        const char* p (nullptr); // Prefix.
+        const char* s (nullptr); // Suffix.
+
+        if (lt.utility)
+        {
+          // These are all static libraries with names indicating the kind of
+          // object files they contain (similar to how we name object files
+          // themselves). We add the 'u' extension to avoid clashes with
+          // real libraries/import stubs.
+          //
+          // libue  libhello.u.a     hello.exe.u.lib
+          // libua  libhello.a.u.a   hello.lib.u.lib
+          // libus  libhello.so.u.a  hello.dll.u.lib  hello.dylib.u.lib
+          //
+          // Note that we currently don't add bin.lib.{prefix,suffix} since
+          // these are not installed.
+          //
+          if (tsys == "win32-msvc")
+          {
+            switch (ot)
+            {
+            case otype::e: e = "exe.u.lib"; break;
+            case otype::a: e = "lib.u.lib"; break;
+            case otype::s: e = "dll.u.lib"; break;
+            }
+          }
+          else
+          {
+            p = "lib";
+
+            if (tsys == "mingw32")
+            {
+              switch (ot)
+              {
+              case otype::e: e = "exe.u.a"; break;
+              case otype::a: e = "a.u.a";   break;
+              case otype::s: e = "dll.u.a"; break;
+              }
+
+            }
+            else if (tsys == "darwin")
+            {
+              switch (ot)
+              {
+              case otype::e: e = "u.a";       break;
+              case otype::a: e = "a.u.a";     break;
+              case otype::s: e = "dylib.u.a"; break;
+              }
+            }
+            else
+            {
+              switch (ot)
+              {
+              case otype::e: e = "u.a";    break;
+              case otype::a: e = "a.u.a";  break;
+              case otype::s: e = "so.u.a"; break;
+              }
+            }
+          }
+
+          if (binless)
+            t.path (empty_path);
+          else
+            t.derive_path (e, p, s);
+        }
+        else
+        {
+          if (auto l = t[ot == otype::e ? "bin.exe.prefix" : "bin.lib.prefix"])
+            p = cast<string> (l).c_str ();
+          if (auto l = t[ot == otype::e ? "bin.exe.suffix" : "bin.lib.suffix"])
+            s = cast<string> (l).c_str ();
+
+          switch (ot)
+          {
+          case otype::e:
+            {
+              if (tclass == "windows")
+                e = "exe";
+              else
+                e = "";
+
+              t.derive_path (e, p, s);
+              break;
+            }
+          case otype::a:
+            {
+              if (tsys == "win32-msvc")
+                e = "lib";
+              else
+              {
+                if (p == nullptr) p = "lib";
+                e = "a";
+              }
+
+              if (binless)
+                t.path (empty_path);
+              else
+                t.derive_path (e, p, s);
+
+              break;
+            }
+          case otype::s:
+            {
+              if (binless)
+                t.path (empty_path);
+              else
+              {
+                // On Windows libs{} is an ad hoc group. The libs{} itself is
+                // the DLL and we add libi{} import library as its member.
+                //
+                if (tclass == "windows")
+                {
+                  e = "dll";
+                  add_adhoc_member<libi> (t);
+                }
+
+                md.libs_paths = derive_libs_paths (t, p, s);
+              }
+
+              break;
+            }
+          }
+
+          // Add VC's .pdb. Note that we are looking for the link.exe /DEBUG
+          // option.
+          //
+          if (!binless && ot != otype::a && tsys == "win32-msvc")
+          {
+            if (find_option ("/DEBUG", t, c_loptions, true) ||
+                find_option ("/DEBUG", t, x_loptions, true))
+            {
+              const target_type& tt (*bs.find_target_type ("pdb"));
+
+              // We call the target foo.{exe,dll}.pdb rather than just foo.pdb
+              // because we can have both foo.exe and foo.dll in the same
+              // directory.
+              //
+              file& pdb (add_adhoc_member<file> (t, tt, e));
+
+              // Note that the path is derived from the exe/dll path (so it
+              // will include the version in case of a dll).
+              //
+              if (pdb.path ().empty ())
+                pdb.derive_path (t.path (), "pdb");
+            }
+          }
+
+          // Add pkg-config's .pc file.
+          //
+          // Note that we do it regardless of whether we are installing or not
+          // for two reasons. Firstly, it is not easy to detect this situation
+          // here since the for_install hasn't yet been communicated by
+          // install_rule. Secondly, always having this member takes care of
+          // cleanup automagically. The actual generation happens in
+          // perform_update() below.
+          //
+          if (ot != otype::e)
+          {
+            file& pc (add_adhoc_member<file> (t,
+                                              (ot == otype::a
+                                               ? pca::static_type
+                                               : pcs::static_type)));
+
+            // Note that here we always use the lib name prefix, even on
+            // Windows with VC. The reason is the user needs a consistent name
+            // across platforms by which they can refer to the library. This
+            // is also the reason why we use the .static and .shared second-
+            // level extensions rather that a./.lib and .so/.dylib/.dll.
+            //
+            if (pc.path ().empty ())
+              pc.derive_path (nullptr, (p == nullptr ? "lib" : p), s);
+          }
+
+          // Add the Windows rpath emulating assembly directory as fsdir{}.
+          //
+          // Currently this is used in the backlinking logic and in the future
+          // could also be used for clean (though there we may want to clean
+          // old assemblies).
+          //
+          if (ot == otype::e && tclass == "windows")
+          {
+            // Note that here we cannot determine whether we will actually
+            // need one (for_install, library timestamps are not available at
+            // this point to call windows_rpath_timestamp()). So we may add
+            // the ad hoc target but actually not produce the assembly. So
+            // whomever relies on this must check if the directory actually
+            // exists (windows_rpath_assembly() does take care to clean it up
+            // if not used).
+            //
+#ifdef _WIN32
+            target& dir =
+#endif
+              add_adhoc_member (t,
+                                fsdir::static_type,
+                                path_cast<dir_path> (t.path () + ".dlls"),
+                                t.out,
+                                string () /* name */);
+
+            // By default our backlinking logic will try to symlink the
+            // directory and it can even be done on Windows using junctions.
+            // The problem is the Windows DLL assembly "logic" refuses to
+            // recognize a junction as a valid assembly for some reason. So we
+            // are going to resort to copy-link (i.e., a real directory with a
+            // bunch of links).
+            //
+            // Interestingly, the directory symlink works just fine under
+            // Wine. So we only resort to copy-link'ing if we are running on
+            // Windows.
+            //
+#ifdef _WIN32
+            dir.state[a].assign (ctx.var_backlink) = "copy";
+#endif
+          }
+        }
+      }
+
+      // Process prerequisites, pass 2: finish rule chaining but don't start
+      // matching anything yet since that may trigger recursive matching of
+      // bmi{} targets we haven't completed yet. Hairy, I know.
+      //
+
+      // Parallel prerequisites/prerequisite_targets loop.
+      //
+      size_t i (start);
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        const target*& pt (pts[i].target);
+        uintptr_t&     pd (pts[i++].data);
+
+        if (pt == nullptr)
+          continue;
+
+        // New mark:
+        //  1 - completion
+        //  2 - verification
+        //
+        uint8_t m (unmark (pt));
+
+        if (m == 3)                // obj/bmi or lib not to be cleaned
+        {
+          m = 1; // Just completion.
+
+          // Note that if this is a library not to be cleaned, we keep it
+          // marked for completion (see the next phase).
+        }
+        else if (m == 1 || m == 2) // Source/module chain.
+        {
+          bool mod (m == 2);
+
+          m = 1;
+
+          const target& rt (*pt);
+          bool group (!p.prerequisite.belongs (t)); // Group's prerequisite.
+
+          // If we have created a obj/bmi{} target group, pick one of its
+          // members; the rest would be primarily concerned with it.
+          //
+          pt =
+            group
+            ? &search (t, (mod ? tts.bmi : tts.obj), rt.dir, rt.out, rt.name)
+            : &rt;
+
+          const target_type& rtt (mod
+                                  ? (group ? bmi::static_type : tts.bmi)
+                                  : (group ? obj::static_type : tts.obj));
+
+          // If this obj*{} already has prerequisites, then verify they are
+          // "compatible" with what we are doing here. Otherwise, synthesize
+          // the dependency. Note that we may also end up synthesizing with
+          // someone beating us to it. In this case also verify.
+          //
+          bool verify (true);
+
+          // Note that we cannot use has_group_prerequisites() since the
+          // target is not yet matched. So we check the group directly. Of
+          // course, all of this is racy (see below).
+          //
+          if (!pt->has_prerequisites () &&
+              (!group || !rt.has_prerequisites ()))
+          {
+            prerequisites ps {p.as_prerequisite ()}; // Source.
+
+            // Add our lib*{} (see the export.* machinery for details) and
+            // bmi*{} (both original and chained; see module search logic)
+            // prerequisites.
+            //
+            // Note that we don't resolve lib{} to liba{}/libs{} here
+            // instead leaving it to whomever (e.g., the compile rule) will
+            // be needing *.export.*. One reason for doing it there is that
+            // the object target might be specified explicitly by the user
+            // in which case they will have to specify the set of lib{}
+            // prerequisites and it's much cleaner to do as lib{} rather
+            // than liba{}/libs{}.
+            //
+            // Initially, we were only adding imported libraries, but there
+            // is a problem with this approach: the non-imported library
+            // might depend on the imported one(s) which we will never "see"
+            // unless we start with this library.
+            //
+            // Note: have similar logic in make_module_sidebuild().
+            //
+            size_t j (start);
+            for (prerequisite_member p: group_prerequisite_members (a, t))
+            {
+              const target* pt (pts[j++]);
+
+              if (pt == nullptr) // Note: ad hoc is taken care of.
+                continue;
+
+              // NOTE: pt may be marked (even for a library -- see clean
+              // above). So watch out for a faux pax in this careful dance.
+              //
+              if (p.is_a<libx> () ||
+                  p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> () ||
+                  p.is_a<bmi> ()  || p.is_a (tts.bmi))
+              {
+                ps.push_back (p.as_prerequisite ());
+              }
+              else if (x_mod != nullptr && p.is_a (*x_mod)) // Chained module.
+              {
+                // Searched during pass 1 but can be NULL or marked.
+                //
+                if (pt != nullptr && i != j) // Don't add self (note: both +1).
+                {
+                  // This is sticky: pt might have come before us and if it
+                  // was a group, then we would have picked up a member. So
+                  // here we may have to "unpick" it.
+                  //
+                  bool group (j < i && !p.prerequisite.belongs (t));
+
+                  unmark (pt);
+                  ps.push_back (prerequisite (group ? *pt->group : *pt));
+                }
+              }
+            }
+
+            // Note: adding to the group, not the member.
+            //
+            verify = !rt.prerequisites (move (ps));
+
+            // Recheck that the target still has no prerequisites. If that's
+            // no longer the case, then verify the result is compatible with
+            // what we need.
+            //
+            // Note that there are scenarios where we will not detect this or
+            // the detection will be racy. For example, thread 1 adds the
+            // prerequisite to the group and then thread 2, which doesn't use
+            // the group, adds the prerequisite to the member. This could be
+            // triggered by something like this (undetectable):
+            //
+            // lib{foo}: cxx{foo}
+            // exe{foo}: cxx{foo}
+            //
+            // Or this (detection is racy):
+            //
+            // lib{bar}: cxx{foo}
+            // liba{baz}: cxx{foo}
+            //
+            // The current feeling, however, is that in non-contrived cases
+            // (i.e., the source file is the same) this should be harmless.
+            //
+            if (!verify && group)
+              verify = pt->has_prerequisites ();
+          }
+
+          if (verify)
+          {
+            // This gets a bit tricky. We need to make sure the source files
+            // are the same which we can only do by comparing the targets to
+            // which they resolve. But we cannot search ot's prerequisites --
+            // only the rule that matches can. Note, however, that if all this
+            // works out, then our next step is to match the obj*{} target. If
+            // things don't work out, then we fail, in which case searching
+            // and matching speculatively doesn't really hurt. So we start the
+            // async match here and finish this verification in the "harvest"
+            // loop below.
+            //
+            resolve_group (a, *pt); // Not matched yet so resolve group.
+
+            bool src (false);
+            for (prerequisite_member p1: group_prerequisite_members (a, *pt))
+            {
+              // Most of the time we will have just a single source so fast-
+              // path that case.
+              //
+              if (p1.is_a (mod ? *x_mod : x_src) || p1.is_a<c> ())
+              {
+                src = true;
+                continue; // Check the rest of the prerequisites.
+              }
+
+              // Ignore some known target types (fsdir, headers, libraries,
+              // modules).
+              //
+              if (p1.is_a<fsdir> ()                                         ||
+                  p1.is_a<libx>  ()                                         ||
+                  p1.is_a<liba> () || p1.is_a<libs> () || p1.is_a<libux> () ||
+                  p1.is_a<bmi>  () || p1.is_a<bmix> ()                      ||
+                  (p.is_a (mod ? *x_mod : x_src) && x_header (p1))          ||
+                  (p.is_a<c> () && p1.is_a<h> ()))
+                continue;
+
+              fail << "synthesized dependency for prerequisite " << p
+                   << " would be incompatible with existing target " << *pt <<
+                info << "unexpected existing prerequisite type " << p1 <<
+                info << "specify corresponding " << rtt.name << "{} "
+                   << "dependency explicitly";
+            }
+
+            if (!src)
+              fail << "synthesized dependency for prerequisite " << p
+                   << " would be incompatible with existing target " << *pt <<
+                info << "no existing c/" << x_name << " source prerequisite" <<
+                info << "specify corresponding " << rtt.name << "{} "
+                   << "dependency explicitly";
+
+            m = 2; // Needs verification.
+          }
+        }
+        else // lib*{}
+        {
+          // If this is a static library, see if we need to link it whole.
+          // Note that we have to do it after match since we rely on the
+          // group link-up.
+          //
+          bool u;
+          if ((u = pt->is_a<libux> ()) || pt->is_a<liba> ())
+          {
+            const variable& var (ctx.var_pool["bin.whole"]); // @@ Cache.
+
+            // See the bin module for the lookup semantics discussion. Note
+            // that the variable is not overridable so we omit find_override()
+            // calls.
+            //
+            lookup l (p.prerequisite.vars[var]);
+
+            if (!l.defined ())
+              l = pt->find_original (var, true).first;
+
+            if (!l.defined ())
+            {
+              bool g (pt->group != nullptr);
+              l = bs.find_original (var,
+                                    &pt->type (),
+                                    &pt->name,
+                                    (g ? &pt->group->type () : nullptr),
+                                    (g ? &pt->group->name : nullptr)).first;
+            }
+
+            if (l ? cast<bool> (*l) : u)
+              pd |= lflag_whole;
+          }
+        }
+
+        mark (pt, m);
+      }
+
+      // Process prerequisites, pass 3: match everything and verify chains.
+      //
+
+      // Wait with unlocked phase to allow phase switching.
+      //
+      wait_guard wg (ctx, ctx.count_busy (), t[a].task_count, true);
+
+      i = start;
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        bool adhoc (pts[i].adhoc);
+        const target*& pt (pts[i++]);
+
+        uint8_t m;
+
+        if (pt == nullptr)
+        {
+          // Handle ad hoc prerequisities.
+          //
+          if (!adhoc)
+            continue;
+
+          pt = &p.search (t);
+          m = 1; // Mark for completion.
+        }
+        else if ((m = unmark (pt)) != 0)
+        {
+          // If this is a library not to be cleaned, we can finally blank it
+          // out.
+          //
+          if (skip (pt))
+          {
+            pt = nullptr;
+            continue;
+          }
+        }
+
+        match_async (a, *pt, ctx.count_busy (), t[a].task_count);
+        mark (pt, m);
+      }
+
+      wg.wait ();
+
+      // The "harvest" loop: finish matching the targets we have started. Note
+      // that we may have bailed out early (thus the parallel i/n for-loop).
+      //
+      i = start;
+      for (prerequisite_member p: group_prerequisite_members (a, t))
+      {
+        const target*& pt (pts[i++]);
+
+        // Skipped or not marked for completion.
+        //
+        uint8_t m;
+        if (pt == nullptr || (m = unmark (pt)) == 0)
+          continue;
+
+        build2::match (a, *pt);
+
+        // Nothing else to do if not marked for verification.
+        //
+        if (m == 1)
+          continue;
+
+        // Finish verifying the existing dependency (which is now matched)
+        // compared to what we would have synthesized.
+        //
+        bool mod (x_mod != nullptr && p.is_a (*x_mod));
+
+        // Note: group already resolved in the previous loop.
+
+        for (prerequisite_member p1: group_prerequisite_members (a, *pt))
+        {
+          if (p1.is_a (mod ? *x_mod : x_src) || p1.is_a<c> ())
+          {
+            // Searching our own prerequisite is ok, p1 must already be
+            // resolved.
+            //
+            const target& tp (p.search (t));
+            const target& tp1 (p1.search (*pt));
+
+            if (&tp != &tp1)
+            {
+              bool group (!p.prerequisite.belongs (t));
+
+              const target_type& rtt (mod
+                                      ? (group ? bmi::static_type : tts.bmi)
+                                      : (group ? obj::static_type : tts.obj));
+
+              fail << "synthesized dependency for prerequisite " << p << " "
+                   << "would be incompatible with existing target " << *pt <<
+                info << "existing prerequisite " << p1 << " does not match "
+                   << p <<
+                info << p1 << " resolves to target " << tp1 <<
+                info << p << " resolves to target " << tp <<
+                info << "specify corresponding " << rtt.name << "{} "
+                   << "dependency explicitly";
+            }
+
+            break;
+          }
+        }
+      }
+
+      md.binless = binless;
+      md.start = start;
+
+      switch (a)
+      {
+      case perform_update_id: return [this] (action a, const target& t)
+        {
+          return perform_update (a, t);
+        };
+      case perform_clean_id: return [this] (action a, const target& t)
+        {
+          return perform_clean (a, t);
+        };
+      default: return noop_recipe; // Configure update.
+      }
+    }
+
+    void link_rule::
+    append_libraries (strings& args,
+                      const file& l, bool la, lflags lf,
+                      const scope& bs, action a, linfo li) const
+    {
+      struct data
+      {
+        strings&             args;
+        const file&          l;
+        action               a;
+        linfo                li;
+        compile_target_types tts;
+      } d {args, l, a, li, compile_types (li.type)};
+
+      auto imp = [] (const file&, bool la)
+      {
+        return la;
+      };
+
+      auto lib = [&d, this] (const file* const* lc,
+                             const string& p,
+                             lflags f,
+                             bool)
+      {
+        const file* l (lc != nullptr ? *lc : nullptr);
+
+        if (l == nullptr)
+        {
+          // Don't try to link a library (whether -lfoo or foo.lib) to a
+          // static library.
+          //
+          if (d.li.type != otype::a)
+            d.args.push_back (p);
+        }
+        else
+        {
+          bool lu (l->is_a<libux> ());
+
+          // The utility/non-utility case is tricky. Consider these two
+          // scenarios:
+          //
+          // exe -> (libu1-e -> libu1-e) -> (liba) -> libu-a -> (liba1)
+          // exe -> (liba) -> libu1-a -> libu1-a -> (liba1) -> libu-a1
+          //
+          // Libraries that should be linked are in '()'. That is, we need to
+          // link the initial sequence of utility libraries and then, after
+          // encountering a first non-utility, only link non-utilities
+          // (because they already contain their utility's object files).
+          //
+          if (lu)
+          {
+            for (ptrdiff_t i (-1); lc[i] != nullptr; --i)
+              if (!lc[i]->is_a<libux> ())
+                return;
+          }
+
+          if (d.li.type == otype::a)
+          {
+            // Linking a utility library to a static library.
+            //
+            // Note that utility library prerequisites of utility libraries
+            // are automatically handled by process_libraries(). So all we
+            // have to do is implement the "thin archive" logic.
+            //
+            // We may also end up trying to link a non-utility library to a
+            // static library via a utility library (direct linking is taken
+            // care of by perform_update()). So we cut it off here.
+            //
+            if (!lu)
+              return;
+
+            if (l->mtime () == timestamp_unreal) // Binless.
+              return;
+
+            for (const target* pt: l->prerequisite_targets[d.a])
+            {
+              if (pt == nullptr)
+                continue;
+
+              if (modules)
+              {
+                if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{}
+                  pt = find_adhoc_member (*pt, d.tts.obj);
+              }
+
+              // We could have dependency diamonds with utility libraries.
+              // Repeats will be handled by the linker (in fact, it could be
+              // required to repeat them to satisfy all the symbols) but here
+              // we have to suppress duplicates ourselves.
+              //
+              if (const file* f = pt->is_a<objx> ())
+              {
+                string p (relative (f->path ()).string ());
+                if (find (d.args.begin (), d.args.end (), p) == d.args.end ())
+                  d.args.push_back (move (p));
+              }
+            }
+          }
+          else
+          {
+            // Linking a library to a shared library or executable.
+            //
+
+            if (l->mtime () == timestamp_unreal) // Binless.
+              return;
+
+            // On Windows a shared library is a DLL with the import library as
+            // an ad hoc group member. MinGW though can link directly to DLLs
+            // (see search_library() for details).
+            //
+            if (tclass == "windows" && l->is_a<libs> ())
+            {
+              if (const libi* li = find_adhoc_member<libi> (*l))
+                l = li;
+            }
+
+            string p (relative (l->path ()).string ());
+
+            if (f & lflag_whole)
+            {
+              if (tsys == "win32-msvc")
+              {
+                p.insert (0, "/WHOLEARCHIVE:"); // Only available from VC14U2.
+              }
+              else if (tsys == "darwin")
+              {
+                p.insert (0, "-Wl,-force_load,");
+              }
+              else
+              {
+                d.args.push_back ("-Wl,--whole-archive");
+                d.args.push_back (move (p));
+                d.args.push_back ("-Wl,--no-whole-archive");
+                return;
+              }
+            }
+
+            d.args.push_back (move (p));
+          }
+        }
+      };
+
+      auto opt = [&d, this] (const file& l,
+                             const string& t,
+                             bool com,
+                             bool exp)
+      {
+        // Don't try to pass any loptions when linking a static library.
+        //
+        if (d.li.type == otype::a)
+          return;
+
+        // If we need an interface value, then use the group (lib{}).
+        //
+        if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
+        {
+          const variable& var (
+            com
+            ? (exp ? c_export_loptions : c_loptions)
+            : (t == x
+               ? (exp ? x_export_loptions : x_loptions)
+               : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")]));
+
+          append_options (d.args, *g, var);
+        }
+      };
+
+      process_libraries (
+        a, bs, li, sys_lib_dirs, l, la, lf, imp, lib, opt, true);
+    }
+
+    void link_rule::
+    hash_libraries (sha256& cs,
+                    bool& update, timestamp mt,
+                    const file& l, bool la, lflags lf,
+                    const scope& bs, action a, linfo li) const
+    {
+      struct data
+      {
+        sha256&         cs;
+        const dir_path& out_root;
+        bool&           update;
+        timestamp       mt;
+        linfo           li;
+      } d {cs, bs.root_scope ()->out_path (), update, mt, li};
+
+      auto imp = [] (const file&, bool la)
+      {
+        return la;
+      };
+
+      auto lib = [&d, this] (const file* const* lc,
+                             const string& p,
+                             lflags f,
+                             bool)
+      {
+        const file* l (lc != nullptr ? *lc : nullptr);
+
+        if (l == nullptr)
+        {
+          if (d.li.type != otype::a)
+            d.cs.append (p);
+        }
+        else
+        {
+          bool lu (l->is_a<libux> ());
+
+          if (lu)
+          {
+            for (ptrdiff_t i (-1); lc[i] != nullptr; --i)
+              if (!lc[i]->is_a<libux> ())
+                return;
+          }
+
+          // We also don't need to do anything special for linking a utility
+          // library to a static library. If any of its object files (or the
+          // set of its object files) changes, then the library will have to
+          // be updated as well. In other words, we use the library timestamp
+          // as a proxy for all of its member's timestamps.
+          //
+          // We do need to cut of the static to static linking, just as in
+          // append_libraries().
+          //
+          if (d.li.type == otype::a && !lu)
+            return;
+
+          if (l->mtime () == timestamp_unreal) // Binless.
+            return;
+
+          // Check if this library renders us out of date.
+          //
+          d.update = d.update || l->newer (d.mt);
+
+          // On Windows a shared library is a DLL with the import library as
+          // an ad hoc group member. MinGW though can link directly to DLLs
+          // (see search_library() for details).
+          //
+          if (tclass == "windows" && l->is_a<libs> ())
+          {
+            if (const libi* li = find_adhoc_member<libi> (*l))
+              l = li;
+          }
+
+          d.cs.append (f);
+          hash_path (d.cs, l->path (), d.out_root);
+        }
+      };
+
+      auto opt = [&d, this] (const file& l,
+                             const string& t,
+                             bool com,
+                             bool exp)
+      {
+        if (d.li.type == otype::a)
+          return;
+
+        if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
+        {
+          const variable& var (
+            com
+            ? (exp ? c_export_loptions : c_loptions)
+            : (t == x
+               ? (exp ? x_export_loptions : x_loptions)
+               : l.ctx.var_pool[t + (exp ? ".export.loptions" : ".loptions")]));
+
+          hash_options (d.cs, *g, var);
+        }
+      };
+
+      process_libraries (
+        a, bs, li, sys_lib_dirs, l, la, lf, imp, lib, opt, true);
+    }
+
+    void link_rule::
+    rpath_libraries (strings& args,
+                     const target& t,
+                     const scope& bs,
+                     action a,
+                     linfo li,
+                     bool link) const
+    {
+      // Use -rpath-link only on targets that support it (Linux, *BSD). Note
+      // that we don't really need it for top-level libraries.
+      //
+      if (link)
+      {
+        if (tclass != "linux" && tclass != "bsd")
+          return;
+      }
+
+      auto imp = [link] (const file& l, bool la)
+      {
+        // If we are not rpath-link'ing, then we only need to rpath interface
+        // libraries (they will include rpath's for their implementations)
+        // Otherwise, we have to do this recursively. In both cases we also
+        // want to see through utility libraries.
+        //
+        // The rpath-link part is tricky: ideally we would like to get only
+        // implementations and only of shared libraries. We are not interested
+        // in interfaces because we are linking their libraries explicitly.
+        // However, in our model there is no such thing as "implementation
+        // only"; it is either interface or interface and implementation. So
+        // we are going to rpath-link all of them which should be harmless
+        // except for some noise on the command line.
+        //
+        //
+        return (link ? !la : false) || l.is_a<libux> ();
+      };
+
+      // Package the data to keep within the 2-pointer small std::function
+      // optimization limit.
+      //
+      struct
+      {
+        strings& args;
+        bool     link;
+      } d {args, link};
+
+      auto lib = [&d, this] (const file* const* lc,
+                             const string& f,
+                             lflags,
+                             bool sys)
+      {
+        const file* l (lc != nullptr ? *lc : nullptr);
+
+        // We don't rpath system libraries. Why, you may ask? There are many
+        // good reasons and I have them written on a napkin somewhere...
+        //
+        if (sys)
+          return;
+
+        if (l != nullptr)
+        {
+          if (!l->is_a<libs> ())
+            return;
+
+          if (l->mtime () == timestamp_unreal) // Binless.
+            return;
+        }
+        else
+        {
+          // This is an absolute path and we need to decide whether it is
+          // a shared or static library. Doesn't seem there is anything
+          // better than checking for a platform-specific extension (maybe
+          // we should cache it somewhere).
+          //
+          size_t p (path::traits_type::find_extension (f));
+
+          if (p == string::npos)
+            return;
+
+          ++p; // Skip dot.
+
+          bool c (true);
+          const char* e;
+
+          if      (tclass == "windows") {e = "dll"; c = false;}
+          else if (tsys == "darwin")     e = "dylib";
+          else                           e = "so";
+
+          if ((c
+               ? f.compare (p, string::npos, e)
+               : casecmp (f.c_str () + p, e)) != 0)
+            return;
+        }
+
+        // Ok, if we are here then it means we have a non-system, shared
+        // library and its absolute path is in f.
+        //
+        string o (d.link ? "-Wl,-rpath-link," : "-Wl,-rpath,");
+
+        size_t p (path::traits_type::rfind_separator (f));
+        assert (p != string::npos);
+
+        o.append (f, 0, (p != 0 ? p : 1)); // Don't include trailing slash.
+        d.args.push_back (move (o));
+      };
+
+      // In case we don't have the "small function object" optimization.
+      //
+      const function<bool (const file&, bool)> impf (imp);
+      const function<
+        void (const file* const*, const string&, lflags, bool)> libf (lib);
+
+      for (const prerequisite_target& pt: t.prerequisite_targets[a])
+      {
+        if (pt == nullptr)
+          continue;
+
+        bool la;
+        const file* f;
+
+        if ((la = (f = pt->is_a<liba>  ())) ||
+            (la = (f = pt->is_a<libux> ())) ||
+            (      f = pt->is_a<libs>  ()))
+        {
+          if (!link && !la)
+          {
+            // Top-level shared library dependency.
+            //
+            if (!f->path ().empty ()) // Not binless.
+            {
+              // It is either matched or imported so should be a cc library.
+              //
+              if (!cast_false<bool> (f->vars[c_system]))
+                args.push_back (
+                  "-Wl,-rpath," + f->path ().directory ().string ());
+            }
+          }
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             *f, la, pt.data,
+                             impf, libf, nullptr);
+        }
+      }
+    }
+
+    // Filter link.exe noise (msvc.cxx).
+    //
+    void
+    msvc_filter_link (ifdstream&, const file&, otype);
+
+    // Translate target CPU to the link.exe/lib.exe /MACHINE option.
+    //
+    const char*
+    msvc_machine (const string& cpu); // msvc.cxx
+
+    target_state link_rule::
+    perform_update (action a, const target& xt) const
+    {
+      tracer trace (x, "link_rule::perform_update");
+
+      const file& t (xt.as<file> ());
+      const path& tp (t.path ());
+
+      context& ctx (t.ctx);
+
+      const scope& bs (t.base_scope ());
+      const scope& rs (*bs.root_scope ());
+
+      match_data& md (t.data<match_data> ());
+
+      // Unless the outer install rule signalled that this is update for
+      // install, signal back that we've performed plain update.
+      //
+      if (!md.for_install)
+        md.for_install = false;
+
+      bool for_install (*md.for_install);
+
+      ltype lt (link_type (t));
+      otype ot (lt.type);
+      linfo li (link_info (bs, ot));
+      compile_target_types tts (compile_types (ot));
+
+      bool binless (md.binless);
+      assert (ot != otype::e || !binless); // Sanity check.
+
+      // Determine if we are out-of-date.
+      //
+      bool update (false);
+      bool scratch (false);
+      timestamp mt (binless ? timestamp_unreal : t.load_mtime ());
+
+      // Update prerequisites. We determine if any relevant non-ad hoc ones
+      // render us out-of-date manually below.
+      //
+      // Note that execute_prerequisites() blanks out all the ad hoc
+      // prerequisites so we don't need to worry about them from now on.
+      //
+      target_state ts;
+
+      if (optional<target_state> s =
+          execute_prerequisites (a,
+                                 t,
+                                 mt,
+                                 [] (const target&, size_t) {return false;}))
+        ts = *s;
+      else
+      {
+        // An ad hoc prerequisite renders us out-of-date. Let's update from
+        // scratch for good measure.
+        //
+        scratch = update = true;
+        ts = target_state::changed;
+      }
+
+      // Check for the for_install variable on each prerequisite and blank out
+      // those that don't match. Note that we have to do it after updating
+      // prerequisites to keep the dependency counts straight.
+      //
+      if (const variable* var_fi = ctx.var_pool.find ("for_install"))
+      {
+        // Parallel prerequisites/prerequisite_targets loop.
+        //
+        size_t i (md.start);
+        for (prerequisite_member p: group_prerequisite_members (a, t))
+        {
+          const target*& pt (t.prerequisite_targets[a][i++]);
+
+          if (pt == nullptr)
+            continue;
+
+          if (lookup l = p.prerequisite.vars[var_fi])
+          {
+            if (cast<bool> (l) != for_install)
+            {
+              l5 ([&]{trace << "excluding " << *pt << " due to for_install";});
+              pt = nullptr;
+            }
+          }
+        }
+      }
+
+      // (Re)generate pkg-config's .pc file. While the target itself might be
+      // up-to-date from a previous run, there is no guarantee that .pc exists
+      // or also up-to-date. So to keep things simple we just regenerate it
+      // unconditionally.
+      //
+      // Also, if you are wondering why don't we just always produce this .pc,
+      // install or no install, the reason is unless and until we are updating
+      // for install, we have no idea where-to things will be installed.
+      //
+      if (for_install && lt.library () && !lt.utility)
+        pkgconfig_save (a, t, lt.static_library (), binless);
+
+      // If we have no binary to build then we are done.
+      //
+      if (binless)
+      {
+        t.mtime (timestamp_unreal);
+        return ts;
+      }
+
+      // Open the dependency database (do it before messing with Windows
+      // manifests to diagnose missing output directory).
+      //
+      depdb dd (tp + ".d");
+
+      // If targeting Windows, take care of the manifest.
+      //
+      path manifest; // Manifest itself (msvc) or compiled object file.
+      timestamp rpath_timestamp = timestamp_nonexistent; // DLLs timestamp.
+
+      if (lt.executable () && tclass == "windows")
+      {
+        // First determine if we need to add our rpath emulating assembly. The
+        // assembly itself is generated later, after updating the target. Omit
+        // it if we are updating for install.
+        //
+        if (!for_install && cast_true<bool> (t["bin.rpath.auto"]))
+          rpath_timestamp = windows_rpath_timestamp (t, bs, a, li);
+
+        auto p (windows_manifest (t, rpath_timestamp != timestamp_nonexistent));
+        path& mf (p.first);
+        timestamp mf_mt (p.second);
+
+        if (tsys == "mingw32")
+        {
+          // Compile the manifest into the object file with windres. While we
+          // are going to synthesize an .rc file to pipe to windres' stdin, we
+          // will still use .manifest to check if everything is up-to-date.
+          //
+          manifest = mf + ".o";
+
+          if (mf_mt == timestamp_nonexistent || mf_mt > mtime (manifest))
+          {
+            path of (relative (manifest));
+
+            const process_path& rc (cast<process_path> (rs["bin.rc.path"]));
+
+            // @@ Would be good to add this to depdb (e.g,, rc changes).
+            //
+            const char* args[] = {
+              rc.recall_string (),
+              "--input-format=rc",
+              "--output-format=coff",
+              "-o", of.string ().c_str (),
+              nullptr};
+
+            if (verb >= 3)
+              print_process (args);
+
+            if (!ctx.dry_run)
+            {
+              auto_rmfile rm (of);
+
+              try
+              {
+                process pr (rc, args, -1);
+
+                try
+                {
+                  ofdstream os (move (pr.out_fd));
+
+                  // 1 is resource ID, 24 is RT_MANIFEST. We also need to
+                  // escape Windows path backslashes.
+                  //
+                  os << "1 24 \"";
+
+                  const string& s (mf.string ());
+                  for (size_t i (0), j;; i = j + 1)
+                  {
+                    j = s.find ('\\', i);
+                    os.write (s.c_str () + i,
+                              (j == string::npos ? s.size () : j) - i);
+
+                    if (j == string::npos)
+                      break;
+
+                    os.write ("\\\\", 2);
+                  }
+
+                  os << "\"" << endl;
+
+                  os.close ();
+                  rm.cancel ();
+                }
+                catch (const io_error& e)
+                {
+                  if (pr.wait ()) // Ignore if child failed.
+                    fail << "unable to pipe resource file to " << args[0]
+                         << ": " << e;
+                }
+
+                run_finish (args, pr);
+              }
+              catch (const process_error& e)
+              {
+                error << "unable to execute " << args[0] << ": " << e;
+
+                if (e.child)
+                  exit (1);
+
+                throw failed ();
+              }
+            }
+
+            update = true; // Manifest changed, force update.
+          }
+        }
+        else
+        {
+          manifest = move (mf); // Save for link.exe's /MANIFESTINPUT.
+
+          if (mf_mt == timestamp_nonexistent || mf_mt > mt)
+            update = true; // Manifest changed, force update.
+        }
+      }
+
+      // Check/update the dependency database.
+      //
+      // First should come the rule name/version.
+      //
+      if (dd.expect (rule_id) != nullptr)
+        l4 ([&]{trace << "rule mismatch forcing update of " << t;});
+
+      lookup ranlib;
+
+      // Then the linker checksum (ar/ranlib or the compiler).
+      //
+      if (lt.static_library ())
+      {
+        ranlib = rs["bin.ranlib.path"];
+
+        const char* rl (
+          ranlib
+          ? cast<string> (rs["bin.ranlib.checksum"]).c_str ()
+          : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+
+        if (dd.expect (cast<string> (rs["bin.ar.checksum"])) != nullptr)
+          l4 ([&]{trace << "ar mismatch forcing update of " << t;});
+
+        if (dd.expect (rl) != nullptr)
+          l4 ([&]{trace << "ranlib mismatch forcing update of " << t;});
+      }
+      else
+      {
+        // For VC we use link.exe directly.
+        //
+        const string& cs (
+          cast<string> (
+            rs[tsys == "win32-msvc"
+               ? ctx.var_pool["bin.ld.checksum"]
+               : x_checksum]));
+
+        if (dd.expect (cs) != nullptr)
+          l4 ([&]{trace << "linker mismatch forcing update of " << t;});
+      }
+
+      // Next check the target. While it might be incorporated into the linker
+      // checksum, it also might not (e.g., VC link.exe).
+      //
+      if (dd.expect (ctgt.string ()) != nullptr)
+        l4 ([&]{trace << "target mismatch forcing update of " << t;});
+
+      // Start building the command line. While we don't yet know whether we
+      // will really need it, we need to hash it to find out. So the options
+      // are to either replicate the exact process twice, first for hashing
+      // then for building or to go ahead and start building and hash the
+      // result. The first approach is probably more efficient while the
+      // second is simpler. Let's got with the simpler for now (actually it's
+      // kind of a hybrid).
+      //
+      cstrings args {nullptr}; // Reserve one for config.bin.ar/config.x.
+
+      // Storage.
+      //
+      string arg1, arg2;
+      strings sargs;
+
+      if (lt.static_library ())
+      {
+        if (tsys == "win32-msvc")
+        {
+          // lib.exe has /LIBPATH but it's not clear/documented what it's used
+          // for. Perhaps for link-time code generation (/LTCG)? If that's the
+          // case, then we may need to pass *.loptions.
+          //
+          args.push_back ("/NOLOGO");
+
+          // Add /MACHINE.
+          //
+          args.push_back (msvc_machine (cast<string> (rs[x_target_cpu])));
+        }
+        else
+        {
+          // If the user asked for ranlib, don't try to do its function with
+          // -s. Some ar implementations (e.g., the LLVM one) don't support
+          // leading '-'.
+          //
+          arg1 = ranlib ? "rc" : "rcs";
+
+          // For utility libraries use thin archives if possible.
+          //
+          // Thin archives are supported by GNU ar since binutils 2.19.1 and
+          // LLVM ar since LLVM 3.8.0. Note that strictly speaking thin
+          // archives also have to be supported by the linker but it is
+          // probably safe to assume that the two came from the same version
+          // of binutils/LLVM.
+          //
+          if (lt.utility)
+          {
+            const string& id (cast<string> (rs["bin.ar.id"]));
+
+            for (bool g (id == "gnu"); g || id == "llvm"; ) // Breakout loop.
+            {
+              auto mj (cast<uint64_t> (rs["bin.ar.version.major"]));
+              if (mj <  (g ? 2 : 3)) break;
+              if (mj == (g ? 2 : 3))
+              {
+                auto mi (cast<uint64_t> (rs["bin.ar.version.minor"]));
+                if (mi  < (g ? 18 : 8)) break;
+                if (mi == 18 && g)
+                {
+                  auto pa (cast<uint64_t> (rs["bin.ar.version.patch"]));
+                  if (pa < 1) break;
+                }
+              }
+
+              arg1 += 'T';
+              break;
+            }
+          }
+
+          args.push_back (arg1.c_str ());
+        }
+
+        append_options (args, t, c_aoptions);
+        append_options (args, t, x_aoptions);
+      }
+      else
+      {
+        if (tsys == "win32-msvc")
+        {
+          // We are using link.exe directly so don't pass the compiler
+          // options.
+        }
+        else
+        {
+          append_options (args, t, c_coptions);
+          append_options (args, t, x_coptions);
+          append_options (args, tstd);
+        }
+
+        append_options (args, t, c_loptions);
+        append_options (args, t, x_loptions);
+
+        // Extra system library dirs (last).
+        //
+        // @@ /LIBPATH:<path>, not /LIBPATH <path>
+        //
+        assert (sys_lib_dirs_extra <= sys_lib_dirs.size ());
+        append_option_values (
+          args,
+          cclass == compiler_class::msvc ? "/LIBPATH:" : "-L",
+          sys_lib_dirs.begin () + sys_lib_dirs_extra, sys_lib_dirs.end (),
+          [] (const dir_path& d) {return d.string ().c_str ();});
+
+        // Handle soname/rpath.
+        //
+        if (tclass == "windows")
+        {
+          // Limited emulation for Windows with no support for user-defined
+          // rpath/rpath-link.
+          //
+          lookup l;
+
+          if ((l = t["bin.rpath"]) && !l->empty ())
+            fail << ctgt << " does not support rpath";
+
+          if ((l = t["bin.rpath_link"]) && !l->empty ())
+            fail << ctgt << " does not support rpath-link";
+        }
+        else
+        {
+          // Set soname.
+          //
+          if (lt.shared_library ())
+          {
+            const libs_paths& paths (md.libs_paths);
+            const string& leaf (paths.effect_soname ().leaf ().string ());
+
+            if (tclass == "macos")
+            {
+              // With Mac OS 10.5 (Leopard) Apple finally caved in and gave us
+              // a way to emulate vanilla -rpath.
+              //
+              // It may seem natural to do something different on update for
+              // install. However, if we don't make it @rpath, then the user
+              // won't be able to use config.bin.rpath for installed libraries.
+              //
+              arg1 = "-install_name";
+              arg2 = "@rpath/" + leaf;
+            }
+            else
+              arg1 = "-Wl,-soname," + leaf;
+
+            if (!arg1.empty ())
+              args.push_back (arg1.c_str ());
+
+            if (!arg2.empty ())
+              args.push_back (arg2.c_str ());
+          }
+
+          // Add rpaths. We used to first add the ones specified by the user
+          // so that they take precedence. But that caused problems if we have
+          // old versions of the libraries sitting in the rpath location
+          // (e.g., installed libraries). And if you think about this, it's
+          // probably correct to prefer libraries that we explicitly imported
+          // to the ones found via rpath.
+          //
+          // Note also that if this is update for install, then we don't add
+          // rpath of the imported libraries (i.e., we assume they are also
+          // installed). But we add -rpath-link for some platforms.
+          //
+          if (cast_true<bool> (t[for_install
+                                 ? "bin.rpath_link.auto"
+                                 : "bin.rpath.auto"]))
+            rpath_libraries (sargs, t, bs, a, li, for_install /* link */);
+
+          lookup l;
+
+          if ((l = t["bin.rpath"]) && !l->empty ())
+            for (const dir_path& p: cast<dir_paths> (l))
+              sargs.push_back ("-Wl,-rpath," + p.string ());
+
+          if ((l = t["bin.rpath_link"]) && !l->empty ())
+          {
+            // Only certain targets support -rpath-link (Linux, *BSD).
+            //
+            if (tclass != "linux" && tclass != "bsd")
+              fail << ctgt << " does not support rpath-link";
+
+            for (const dir_path& p: cast<dir_paths> (l))
+              sargs.push_back ("-Wl,-rpath-link," + p.string ());
+          }
+        }
+      }
+
+      // All the options should now be in. Hash them and compare with the db.
+      //
+      {
+        sha256 cs;
+
+        for (size_t i (1); i != args.size (); ++i)
+          cs.append (args[i]);
+
+        for (size_t i (0); i != sargs.size (); ++i)
+          cs.append (sargs[i]);
+
+        // @@ Note that we don't hash output options so if one of the ad hoc
+        //    members that we manage gets renamed, we will miss a rebuild.
+
+        if (dd.expect (cs.string ()) != nullptr)
+          l4 ([&]{trace << "options mismatch forcing update of " << t;});
+      }
+
+      // Finally, hash and compare the list of input files.
+      //
+      // Should we capture actual file names or their checksum? The only good
+      // reason for capturing actual files is diagnostics: we will be able to
+      // pinpoint exactly what is causing the update. On the other hand, the
+      // checksum is faster and simpler. And we like simple.
+      //
+      const file* def (nullptr); // Cached if present.
+      {
+        sha256 cs;
+
+        for (const prerequisite_target& p: t.prerequisite_targets[a])
+        {
+          const target* pt (p.target);
+
+          if (pt == nullptr)
+            continue;
+
+          // If this is bmi*{}, then obj*{} is its ad hoc member.
+          //
+          if (modules)
+          {
+            if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{}
+              pt = find_adhoc_member (*pt, tts.obj);
+          }
+
+          const file* f;
+          bool la (false), ls (false);
+
+          // We link utility libraries to everything except other utility
+          // libraries. In case of linking to liba{} we follow the "thin
+          // archive" lead and "see through" to their object file
+          // prerequisites (recursively, until we encounter a non-utility).
+          //
+          if ((f = pt->is_a<objx> ())           ||
+              (!lt.utility &&
+               (la = (f = pt->is_a<libux> ()))) ||
+              (!lt.static_library () &&
+               ((la = (f = pt->is_a<liba>  ())) ||
+                (ls = (f = pt->is_a<libs>  ())))))
+          {
+            // Link all the dependent interface libraries (shared) or interface
+            // and implementation (static), recursively.
+            //
+            // Also check if any of them render us out of date. The tricky
+            // case is, say, a utility library (static) that depends on a
+            // shared library. When the shared library is updated, there is no
+            // reason to re-archive the utility but those who link the utility
+            // have to "see through" the changes in the shared library.
+            //
+            if (la || ls)
+            {
+              hash_libraries (cs, update, mt, *f, la, p.data, bs, a, li);
+              f = nullptr; // Timestamp checked by hash_libraries().
+            }
+            else
+              hash_path (cs, f->path (), rs.out_path ());
+          }
+          else if ((f = pt->is_a<bin::def> ()))
+          {
+            if (tclass == "windows" && !lt.static_library ())
+            {
+              // At least link.exe only allows a single .def file.
+              //
+              if (def != nullptr)
+                fail << "multiple module definition files specified for " << t;
+
+              hash_path (cs, f->path (), rs.out_path ());
+              def = f;
+            }
+            else
+              f = nullptr; // Not an input.
+          }
+          else
+            f = pt->is_a<exe> (); // Consider executable mtime (e.g., linker).
+
+          // Check if this input renders us out of date.
+          //
+          if (f != nullptr)
+            update = update || f->newer (mt);
+        }
+
+        // Treat it as input for both MinGW and VC (mtime checked above).
+        //
+        if (!manifest.empty ())
+          hash_path (cs, manifest, rs.out_path ());
+
+        // Treat *.libs variable values as inputs, not options.
+        //
+        if (!lt.static_library ())
+        {
+          hash_options (cs, t, c_libs);
+          hash_options (cs, t, x_libs);
+        }
+
+        if (dd.expect (cs.string ()) != nullptr)
+          l4 ([&]{trace << "file set mismatch forcing update of " << t;});
+      }
+
+      // If any of the above checks resulted in a mismatch (different linker,
+      // options or input file set), or if the database is newer than the
+      // target (interrupted update) then force the target update. Also note
+      // this situation in the "from scratch" flag.
+      //
+      if (dd.writing () || dd.mtime > mt)
+        scratch = update = true;
+
+      dd.close ();
+
+      // If nothing changed, then we are done.
+      //
+      if (!update)
+        return ts;
+
+      // Ok, so we are updating. Finish building the command line.
+      //
+      string in, out, out1, out2, out3; // Storage.
+
+      // Translate paths to relative (to working directory) ones. This results
+      // in easier to read diagnostics.
+      //
+      path relt (relative (tp));
+
+      const process_path* ld (nullptr);
+      if (lt.static_library ())
+      {
+        ld = &cast<process_path> (rs["bin.ar.path"]);
+
+        if (tsys == "win32-msvc")
+        {
+          out = "/OUT:" + relt.string ();
+          args.push_back (out.c_str ());
+        }
+        else
+          args.push_back (relt.string ().c_str ());
+      }
+      else
+      {
+        // The options are usually similar enough to handle executables
+        // and shared libraries together.
+        //
+        if (tsys == "win32-msvc")
+        {
+          // Using link.exe directly.
+          //
+          ld = &cast<process_path> (rs["bin.ld.path"]);
+          args.push_back ("/NOLOGO");
+
+          if (ot == otype::s)
+            args.push_back ("/DLL");
+
+          // Add /MACHINE.
+          //
+          args.push_back (msvc_machine (cast<string> (rs[x_target_cpu])));
+
+          // Unless explicitly enabled with /INCREMENTAL, disable incremental
+          // linking (it is implicitly enabled if /DEBUG is specified). The
+          // reason is the .ilk file: its name cannot be changed and if we
+          // have, say, foo.exe and foo.dll, then they will end up stomping on
+          // each other's .ilk's.
+          //
+          // So the idea is to disable it by default but let the user request
+          // it explicitly if they are sure their project doesn't suffer from
+          // the above issue. We can also have something like 'incremental'
+          // config initializer keyword for this.
+          //
+          // It might also be a good idea to ask Microsoft to add an option.
+          //
+          if (!find_option ("/INCREMENTAL", args, true))
+            args.push_back ("/INCREMENTAL:NO");
+
+          if (ctype == compiler_type::clang)
+          {
+            // According to Clang's MSVC.cpp, we shall link libcmt.lib (static
+            // multi-threaded runtime) unless -nostdlib or -nostartfiles is
+            // specified.
+            //
+            if (!find_options ({"-nostdlib", "-nostartfiles"}, t, c_coptions) &&
+                !find_options ({"-nostdlib", "-nostartfiles"}, t, x_coptions))
+              args.push_back ("/DEFAULTLIB:libcmt.lib");
+          }
+
+          // If you look at the list of libraries Visual Studio links by
+          // default, it includes everything and a couple of kitchen sinks
+          // (winspool32.lib, ole32.lib, odbc32.lib, etc) while we want to
+          // keep our low-level build as pure as possible. However, there seem
+          // to be fairly essential libraries that are not linked by link.exe
+          // by default (use /VERBOSE:LIB to see the list). For example, MinGW
+          // by default links advapi32, shell32, user32, and kernel32. And so
+          // we follow suit and make sure those are linked.  advapi32 and
+          // kernel32 are already on the default list and we only need to add
+          // the other two.
+          //
+          // The way we are going to do it is via the /DEFAULTLIB option
+          // rather than specifying the libraries as normal inputs (as VS
+          // does). This way the user can override our actions with the
+          // /NODEFAULTLIB option.
+          //
+          args.push_back ("/DEFAULTLIB:shell32.lib");
+          args.push_back ("/DEFAULTLIB:user32.lib");
+
+          // Take care of the manifest (will be empty for the DLL).
+          //
+          if (!manifest.empty ())
+          {
+            out3 = "/MANIFESTINPUT:";
+            out3 += relative (manifest).string ();
+            args.push_back ("/MANIFEST:EMBED");
+            args.push_back (out3.c_str ());
+          }
+
+          if (def != nullptr)
+          {
+            in = "/DEF:" + relative (def->path ()).string ();
+            args.push_back (in.c_str ());
+          }
+
+          if (ot == otype::s)
+          {
+            // On Windows libs{} is the DLL and an ad hoc group member is the
+            // import library.
+            //
+            // This will also create the .exp export file. Its name will be
+            // derived from the import library by changing the extension.
+            // Lucky for us -- there is no option to name it.
+            //
+            const file& imp (*find_adhoc_member<libi> (t));
+
+            out2 = "/IMPLIB:";
+            out2 += relative (imp.path ()).string ();
+            args.push_back (out2.c_str ());
+          }
+
+          // If we have /DEBUG then name the .pdb file. It is an ad hoc group
+          // member.
+          //
+          if (find_option ("/DEBUG", args, true))
+          {
+            const file& pdb (
+              *find_adhoc_member<file> (t, *bs.find_target_type ("pdb")));
+
+            out1 = "/PDB:";
+            out1 += relative (pdb.path ()).string ();
+            args.push_back (out1.c_str ());
+          }
+
+          // @@ An executable can have an import library and VS seems to
+          //    always name it. I wonder what would trigger its generation?
+          //    Could it be the presence of export symbols? Yes, link.exe will
+          //    generate the import library iff there are exported symbols.
+          //    Which means there could be a DLL without an import library
+          //    (which we currently don't handle very well).
+          //
+          out = "/OUT:" + relt.string ();
+          args.push_back (out.c_str ());
+        }
+        else
+        {
+          switch (cclass)
+          {
+          case compiler_class::gcc:
+            {
+              ld = &cpath;
+
+              // Add the option that triggers building a shared library and
+              // take care of any extras (e.g., import library).
+              //
+              if (ot == otype::s)
+              {
+                if (tclass == "macos")
+                  args.push_back ("-dynamiclib");
+                else
+                  args.push_back ("-shared");
+
+                if (tsys == "mingw32")
+                {
+                  // On Windows libs{} is the DLL and an ad hoc group member
+                  // is the import library.
+                  //
+                  const file& imp (*find_adhoc_member<libi> (t));
+                  out = "-Wl,--out-implib=" + relative (imp.path ()).string ();
+                  args.push_back (out.c_str ());
+                }
+              }
+
+              args.push_back ("-o");
+              args.push_back (relt.string ().c_str ());
+
+              // For MinGW the .def file is just another input.
+              //
+              if (def != nullptr)
+              {
+                in = relative (def->path ()).string ();
+                args.push_back (in.c_str ());
+              }
+
+              break;
+            }
+          case compiler_class::msvc: assert (false);
+          }
+        }
+      }
+
+      args[0] = ld->recall_string ();
+
+      // Append input files noticing the position of the first.
+      //
+#ifdef _WIN32
+      size_t args_input (args.size ());
+#endif
+
+      // The same logic as during hashing above. See also a similar loop
+      // inside append_libraries().
+      //
+      for (const prerequisite_target& p: t.prerequisite_targets[a])
+      {
+        const target* pt (p.target);
+
+        if (pt == nullptr)
+          continue;
+
+        if (modules)
+        {
+          if (pt->is_a<bmix> ()) // @@ MODHDR: hbmix{} has no objx{}
+            pt = find_adhoc_member (*pt, tts.obj);
+        }
+
+        const file* f;
+        bool la (false), ls (false);
+
+        if ((f = pt->is_a<objx> ())           ||
+            (!lt.utility &&
+             (la = (f = pt->is_a<libux> ()))) ||
+            (!lt.static_library () &&
+             ((la = (f = pt->is_a<liba>  ())) ||
+              (ls = (f = pt->is_a<libs>  ())))))
+        {
+          if (la || ls)
+            append_libraries (sargs, *f, la, p.data, bs, a, li);
+          else
+            sargs.push_back (relative (f->path ()).string ()); // string()&&
+        }
+      }
+
+      // For MinGW manifest is an object file.
+      //
+      if (!manifest.empty () && tsys == "mingw32")
+        sargs.push_back (relative (manifest).string ());
+
+      // Shallow-copy sargs to args. Why not do it as we go along pushing into
+      // sargs? Because of potential reallocations in sargs.
+      //
+      for (const string& a: sargs)
+        args.push_back (a.c_str ());
+
+      if (!lt.static_library ())
+      {
+        append_options (args, t, c_libs);
+        append_options (args, t, x_libs);
+      }
+
+      args.push_back (nullptr);
+
+      // Cleanup old (versioned) libraries. Let's do it even for dry-run to
+      // keep things simple.
+      //
+      if (lt.shared_library ())
+      {
+        const libs_paths& paths (md.libs_paths);
+        const path& p (paths.clean);
+
+        if (!p.empty ())
+        try
+        {
+          if (verb >= 4) // Seeing this with -V doesn't really add any value.
+            text << "rm " << p;
+
+          auto rm = [&paths, this] (path&& m, const string&, bool interm)
+          {
+            if (!interm)
+            {
+              // Filter out paths that have one of the current paths as a
+              // prefix.
+              //
+              auto test = [&m] (const path& p)
+              {
+                const string& s (p.string ());
+                return s.empty () || m.string ().compare (0, s.size (), s) != 0;
+              };
+
+              if (test (*paths.real)   &&
+                  test ( paths.interm) &&
+                  test ( paths.soname) &&
+                  test ( paths.load)   &&
+                  test ( paths.link))
+              {
+                try_rmfile (m);
+                try_rmfile (m + ".d");
+
+                if (tsys == "win32-msvc")
+                {
+                  try_rmfile (m.base () += ".ilk");
+                  try_rmfile (m += ".pdb");
+                }
+              }
+            }
+            return true;
+          };
+
+          // Note: doesn't follow symlinks.
+          //
+          path_search (p, rm, dir_path () /* start */, path_match_flags::none);
+        }
+        catch (const system_error&) {} // Ignore errors.
+      }
+      else if (lt.static_library ())
+      {
+        // We use relative paths to the object files which means we may end
+        // up with different ones depending on CWD and some implementation
+        // treat them as different archive members. So remote the file to
+        // be sure. Note that we ignore errors leaving it to the archiever
+        // to complain.
+        //
+        if (mt != timestamp_nonexistent)
+          try_rmfile (relt, true);
+      }
+
+      if (verb == 1)
+        text << (lt.static_library () ? "ar " : "ld ") << t;
+      else if (verb == 2)
+        print_process (args);
+
+      // Do any necessary fixups to the command line to make it runnable.
+      //
+      // Notice the split in the diagnostics: at verbosity level 1 we print
+      // the "logical" command line while at level 2 and above -- what we are
+      // actually executing.
+      //
+      // On Windows we need to deal with the command line length limit. The
+      // best workaround seems to be passing (part of) the command line in an
+      // "options file" ("response file" in Microsoft's terminology). Both
+      // Microsoft's link.exe/lib.exe as well as GNU g??.exe/ar.exe support
+      // the same @<file> notation (and with a compatible subset of the
+      // content format; see below). Note also that GCC is smart enough to use
+      // an options file to call the underlying linker if we called it with
+      // @<file>. We will also assume that any other linker that we might be
+      // using supports this notation.
+      //
+      // Note that this is a limitation of the host platform, not the target
+      // (and Wine, where these lines are a bit blurred, does not have this
+      // length limitation).
+      //
+#ifdef _WIN32
+      auto_rmfile trm;
+      string targ;
+      {
+        // Calculate the would-be command line length similar to how process'
+        // implementation does it.
+        //
+        auto quote = [s = string ()] (const char* a) mutable -> const char*
+        {
+          return process::quote_argument (a, s);
+        };
+
+        size_t n (0);
+        for (const char* a: args)
+        {
+          if (a != nullptr)
+          {
+            if (n != 0)
+              n++; // For the space separator.
+
+            n += strlen (quote (a));
+          }
+        }
+
+        if (n > 32766) // 32768 - "Unicode terminating null character".
+        {
+          // Use the .t extension (for "temporary").
+          //
+          const path& f ((trm = auto_rmfile (relt + ".t")).path);
+
+          try
+          {
+            ofdstream ofs (f);
+
+            // Both Microsoft and GNU support a space-separated list of
+            // potentially-quoted arguments. GNU also supports backslash-
+            // escaping (whether Microsoft supports it is unclear; but it
+            // definitely doesn't need it for backslashes themselves, for
+            // example, in paths).
+            //
+            bool e (tsys != "win32-msvc"); // Assume GNU if not MSVC.
+            string b;
+
+            for (size_t i (args_input), n (args.size () - 1); i != n; ++i)
+            {
+              const char* a (args[i]);
+
+              if (e) // We will most likely have backslashes so just do it.
+              {
+                for (b.clear (); *a != '\0'; ++a)
+                {
+                  if (*a != '\\')
+                    b += *a;
+                  else
+                    b += "\\\\";
+                }
+
+                a = b.c_str ();
+              }
+
+              ofs << (i != args_input ? " " : "") << quote (a);
+            }
+
+            ofs << '\n';
+            ofs.close ();
+          }
+          catch (const io_error& e)
+          {
+            fail << "unable to write " << f << ": " << e;
+          }
+
+          // Replace input arguments with @file.
+          //
+          targ = '@' + f.string ();
+          args.resize (args_input);
+          args.push_back (targ.c_str());
+          args.push_back (nullptr);
+
+          //@@ TODO: leave .t file if linker failed and verb > 2?
+        }
+      }
+#endif
+
+      if (verb > 2)
+        print_process (args);
+
+      // Remove the target file if any of the subsequent (after the linker)
+      // actions fail or if the linker fails but does not clean up its mess
+      // (like link.exe). If we don't do that, then we will end up with a
+      // broken build that is up-to-date.
+      //
+      auto_rmfile rm;
+
+      if (!ctx.dry_run)
+      {
+        rm = auto_rmfile (relt);
+
+        try
+        {
+          // VC tools (both lib.exe and link.exe) send diagnostics to stdout.
+          // Also, link.exe likes to print various gratuitous messages. So for
+          // link.exe we redirect stdout to a pipe, filter that noise out, and
+          // send the rest to stderr.
+          //
+          // For lib.exe (and any other insane linker that may try to pull off
+          // something like this) we are going to redirect stdout to stderr.
+          // For sane compilers this should be harmless.
+          //
+          bool filter (tsys == "win32-msvc" && !lt.static_library ());
+
+          process pr (*ld, args.data (), 0, (filter ? -1 : 2));
+
+          if (filter)
+          {
+            try
+            {
+              ifdstream is (
+                move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit);
+
+              msvc_filter_link (is, t, ot);
+
+              // If anything remains in the stream, send it all to stderr.
+              // Note that the eof check is important: if the stream is at
+              // eof, this and all subsequent writes to the diagnostics stream
+              // will fail (and you won't see a thing).
+              //
+              if (is.peek () != ifdstream::traits_type::eof ())
+                diag_stream_lock () << is.rdbuf ();
+
+              is.close ();
+            }
+            catch (const io_error&) {} // Assume exits with error.
+          }
+
+          run_finish (args, pr);
+        }
+        catch (const process_error& e)
+        {
+          error << "unable to execute " << args[0] << ": " << e;
+
+          // In a multi-threaded program that fork()'ed but did not exec(), it
+          // is unwise to try to do any kind of cleanup (like unwinding the
+          // stack and running destructors).
+          //
+          if (e.child)
+          {
+            rm.cancel ();
+#ifdef _WIN32
+            trm.cancel ();
+#endif
+            exit (1);
+          }
+
+          throw failed ();
+        }
+
+        // VC link.exe creates an import library and .exp file for an
+        // executable if any of its object files export any symbols (think a
+        // unit test linking libus{}). And, no, there is no way to suppress
+        // it. Well, there is a way: create a .def file with an empty EXPORTS
+        // section, pass it to lib.exe to create a dummy .exp (and .lib), and
+        // then pass this empty .exp to link.exe. Wanna go this way? Didn't
+        // think so. Having no way to disable this, the next simplest thing
+        // seems to be just cleaning the mess up.
+        //
+        // Note also that if at some point we decide to support such "shared
+        // executables" (-rdynamic, etc), then it will probably have to be a
+        // different target type (exes{}?) since it will need a different set
+        // of object files (-fPIC so probably objs{}), etc.
+        //
+        if (lt.executable () && tsys == "win32-msvc")
+        {
+          path b (relt.base ());
+          try_rmfile (b + ".lib", true /* ignore_errors */);
+          try_rmfile (b + ".exp", true /* ignore_errors */);
+        }
+      }
+
+      if (ranlib)
+      {
+        const process_path& rl (cast<process_path> (ranlib));
+
+        const char* args[] = {
+          rl.recall_string (),
+          relt.string ().c_str (),
+          nullptr};
+
+        if (verb >= 2)
+          print_process (args);
+
+        if (!ctx.dry_run)
+          run (rl, args);
+      }
+
+      // For Windows generate (or clean up) rpath-emulating assembly.
+      //
+      if (tclass == "windows")
+      {
+        if (lt.executable ())
+          windows_rpath_assembly (t, bs, a, li,
+                                  cast<string> (rs[x_target_cpu]),
+                                  rpath_timestamp,
+                                  scratch);
+      }
+
+      if (lt.shared_library ())
+      {
+        // For shared libraries we may need to create a bunch of symlinks (or
+        // fallback to hardlinks/copies on Windows).
+        //
+        auto ln = [&ctx] (const path& f, const path& l)
+        {
+          if (verb >= 3)
+            text << "ln -sf " << f << ' ' << l;
+
+          if (ctx.dry_run)
+            return;
+
+          try
+          {
+            try
+            {
+              // The -f part.
+              //
+              if (file_exists (l, false /* follow_symlinks */))
+                try_rmfile (l);
+
+              mkanylink (f, l, true /* copy */, true /* relative */);
+            }
+            catch (system_error& e)
+            {
+              throw pair<entry_type, system_error> (entry_type::symlink,
+                                                    move (e));
+            }
+          }
+          catch (const pair<entry_type, system_error>& e)
+          {
+            const char* w (e.first == entry_type::regular ? "copy"     :
+                           e.first == entry_type::symlink ? "symlink"  :
+                           e.first == entry_type::other   ? "hardlink" :
+                           nullptr);
+
+            fail << "unable to make " << w << ' ' << l << ": " << e.second;
+          }
+        };
+
+        const libs_paths& paths (md.libs_paths);
+
+        const path& lk (paths.link);
+        const path& ld (paths.load);
+        const path& so (paths.soname);
+        const path& in (paths.interm);
+
+        const path* f (paths.real);
+
+        if (!in.empty ()) {ln (*f, in); f = &in;}
+        if (!so.empty ()) {ln (*f, so); f = &so;}
+        if (!ld.empty ()) {ln (*f, ld); f = &ld;}
+        if (!lk.empty ()) {ln (*f, lk);}
+      }
+      else if (lt.static_library ())
+      {
+        // Apple ar (from cctools) for some reason truncates fractional
+        // seconds when running on APFS (HFS has a second resolution so it's
+        // not an issue there). This can lead to object files being newer than
+        // the archive, which is naturally bad news. Filed as bug 49604334,
+        // reportedly fixed in Xcode 11 beta 5.
+        //
+        // Note that this block is not inside #ifdef __APPLE__ because we
+        // could be cross-compiling, theoretically. We also make sure we use
+        // Apple's ar (which is (un)recognized as 'generic') instead of, say,
+        // llvm-ar.
+        //
+        if (tsys == "darwin" && cast<string> (rs["bin.ar.id"]) == "generic")
+        {
+          if (!ctx.dry_run)
+            touch (ctx, tp, false /* create */, verb_never);
+        }
+      }
+
+      if (!ctx.dry_run)
+      {
+        rm.cancel ();
+        dd.check_mtime (tp);
+      }
+
+      // Should we go to the filesystem and get the new mtime? We know the
+      // file has been modified, so instead just use the current clock time.
+      // It has the advantage of having the subseconds precision. Plus, in
+      // case of dry-run, the file won't be modified.
+      //
+      t.mtime (system_clock::now ());
+      return target_state::changed;
+    }
+
+    target_state link_rule::
+    perform_clean (action a, const target& xt) const
+    {
+      const file& t (xt.as<file> ());
+
+      ltype lt (link_type (t));
+      const match_data& md (t.data<match_data> ());
+
+      clean_extras extras;
+      clean_adhoc_extras adhoc_extras;
+
+      if (md.binless)
+        ; // Clean prerequsites/members.
+      else
+      {
+        if (tclass != "windows")
+          ; // Everything is the default.
+        else if (tsys == "mingw32")
+        {
+          if (lt.executable ())
+          {
+            extras = {".d", ".dlls/", ".manifest.o", ".manifest"};
+          }
+
+          // For shared and static library it's the default.
+        }
+        else
+        {
+          // Assuming MSVC or alike.
+          //
+          if (lt.executable ())
+          {
+            // Clean up .ilk in case the user enabled incremental linking
+            // (notice that the .ilk extension replaces .exe).
+            //
+            extras = {".d", ".dlls/", ".manifest", "-.ilk"};
+          }
+          else if (lt.shared_library ())
+          {
+            // Clean up .ilk and .exp.
+            //
+            // Note that .exp is based on the .lib, not .dll name. And with
+            // versioning their bases may not be the same.
+            //
+            extras = {".d", "-.ilk"};
+            adhoc_extras.push_back ({libi::static_type, {"-.exp"}});
+          }
+
+          // For static library it's the default.
+        }
+
+        if (extras.empty ())
+          extras = {".d"}; // Default.
+
+#ifdef _WIN32
+        extras.push_back (".t"); // Options file.
+#endif
+        // For shared libraries we may have a bunch of symlinks that we need
+        // to remove.
+        //
+        if (lt.shared_library ())
+        {
+          const libs_paths& lp (md.libs_paths);
+
+          auto add = [&extras] (const path& p)
+          {
+            if (!p.empty ())
+              extras.push_back (p.string ().c_str ());
+          };
+
+          add (lp.link);
+          add (lp.load);
+          add (lp.soname);
+          add (lp.interm);
+        }
+      }
+
+      return perform_clean_extra (a, t, extras, adhoc_extras);
+    }
+  }
+}
diff --git a/libbuild2/cc/link-rule.hxx b/libbuild2/cc/link-rule.hxx
new file mode 100644
index 0000000..2a296a7
--- /dev/null
+++ b/libbuild2/cc/link-rule.hxx
@@ -0,0 +1,188 @@
+// file      : libbuild2/cc/link-rule.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_LINK_RULE_HXX
+#define LIBBUILD2_CC_LINK_RULE_HXX
+
+#include <set>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/rule.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    class LIBBUILD2_CC_SYMEXPORT link_rule: public rule, virtual common
+    {
+    public:
+      link_rule (data&&);
+
+      struct match_result
+      {
+        bool seen_x   = false;
+        bool seen_c   = false;
+        bool seen_cc  = false;
+        bool seen_obj = false;
+        bool seen_lib = false;
+      };
+
+      match_result
+      match (action, const target&, const target*, otype, bool) const;
+
+      virtual bool
+      match (action, target&, const string&) const override;
+
+      virtual recipe
+      apply (action, target&) const override;
+
+      target_state
+      perform_update (action, const target&) const;
+
+      target_state
+      perform_clean (action, const target&) const;
+
+    private:
+      friend class install_rule;
+      friend class libux_install_rule;
+
+      // Shared library paths.
+      //
+      struct libs_paths
+      {
+        // If any (except real) is empty, then it is the same as the next
+        // one. Except for load and intermediate, for which empty indicates
+        // that it is not used.
+        //
+        // Note that the paths must form a "hierarchy" with subsequent paths
+        // adding extra information as suffixes. This is relied upon by the
+        // clean pattern (see below).
+        //
+        // The libs{} path is always the real path. On Windows what we link
+        // to is the import library and the link path is empty.
+        //
+        path        link;   // What we link: libfoo.so
+        path        load;   // What we load (with dlopen() or similar)
+        path        soname; // SONAME:       libfoo-1.so, libfoo.so.1
+        path        interm; // Intermediate: libfoo.so.1.2
+        const path* real;   // Real:         libfoo.so.1.2.3
+
+        inline const path&
+        effect_link () const {return link.empty () ? effect_soname () : link;}
+
+        inline const path&
+        effect_soname () const {return soname.empty () ? *real : soname;}
+
+        // Cleanup pattern used to remove previous versions. If empty, no
+        // cleanup is performed. The above (current) names are automatically
+        // filtered out.
+        //
+        path clean;
+      };
+
+      libs_paths
+      derive_libs_paths (file&, const char*, const char*) const;
+
+      struct match_data
+      {
+        // The "for install" condition is signalled to us by install_rule when
+        // it is matched for the update operation. It also verifies that if we
+        // have already been executed, then it was for install.
+        //
+        // This has an interesting implication: it means that this rule cannot
+        // be used to update targets during match. Specifically, we cannot be
+        // executed for group resolution purposes (not a problem) nor as part
+        // of the generated source update. The latter case can be a problem:
+        // imagine a code generator that itself may need to be updated before
+        // it can be used to re-generate some out-of-date source code. As an
+        // aside, note that even if we were somehow able to communicate the
+        // "for install" in this case, the result of such an update may not
+        // actually be "usable" (e.g., not runnable because of the missing
+        // rpaths). There is another prominent case where the result may not
+        // be usable: cross-compilation.
+        //
+        // So the current (admittedly fuzzy) thinking is that a project shall
+        // not try to use its own build for update since it may not be usable
+        // (because of cross-compilations, being "for install", etc). Instead,
+        // it should rely on another, "usable" build of itself (this, BTW, is
+        // related to bpkg's build-time vs run-time dependencies).
+        //
+        optional<bool> for_install;
+
+        bool binless; // Binary-less library.
+        size_t start; // Parallel prerequisites/prerequisite_targets start.
+
+        link_rule::libs_paths libs_paths;
+      };
+
+      // Library handling.
+      //
+      void
+      append_libraries (strings&,
+                        const file&, bool, lflags,
+                        const scope&, action, linfo) const;
+
+      void
+      hash_libraries (sha256&,
+                      bool&, timestamp,
+                      const file&, bool, lflags,
+                      const scope&, action, linfo) const;
+
+      void
+      rpath_libraries (strings&,
+                       const target&,
+                       const scope&, action, linfo,
+                       bool) const;
+
+      // Windows rpath emulation (windows-rpath.cxx).
+      //
+      struct windows_dll
+      {
+        const string& dll;
+        const string* pdb; // NULL if none.
+        string pdb_storage;
+
+        bool operator< (const windows_dll& y) const {return dll < y.dll;}
+      };
+
+      using windows_dlls = std::set<windows_dll>;
+
+      timestamp
+      windows_rpath_timestamp (const file&,
+                               const scope&,
+                               action, linfo) const;
+
+      windows_dlls
+      windows_rpath_dlls (const file&, const scope&, action, linfo) const;
+
+      void
+      windows_rpath_assembly (const file&, const scope&, action, linfo,
+                              const string&,
+                              timestamp,
+                              bool) const;
+
+      // Windows-specific (windows-manifest.cxx).
+      //
+      pair<path, timestamp>
+      windows_manifest (const file&, bool rpath_assembly) const;
+
+      // pkg-config's .pc file generation (pkgconfig.cxx).
+      //
+      void
+      pkgconfig_save (action, const file&, bool, bool) const;
+
+    private:
+      const string rule_id;
+    };
+  }
+}
+
+#endif // LIBBUILD2_CC_LINK_RULE_HXX
diff --git a/libbuild2/cc/module.cxx b/libbuild2/cc/module.cxx
new file mode 100644
index 0000000..3113b5c
--- /dev/null
+++ b/libbuild2/cc/module.cxx
@@ -0,0 +1,781 @@
+// file      : libbuild2/cc/module.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/module.hxx>
+
+#include <iomanip> // left, setw()
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/target.hxx> // pc*
+
+#include <libbuild2/config/utility.hxx>
+#include <libbuild2/install/utility.hxx>
+
+#include <libbuild2/cc/guess.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    void config_module::
+    guess (scope& rs, const location& loc, const variable_map&)
+    {
+      tracer trace (x, "guess_init");
+
+      bool cc_loaded (cast_false<bool> (rs["cc.core.guess.loaded"]));
+
+      // Adjust module priority (compiler). Also order cc module before us
+      // (we don't want to use priorities for that in case someone manages
+      // to slot in-between).
+      //
+      if (!cc_loaded)
+        config::save_module (rs, "cc", 250);
+
+      config::save_module (rs, x, 250);
+
+      auto& vp (rs.ctx.var_pool.rw (rs));
+
+      // Must already exist.
+      //
+      const variable& config_c_poptions (vp["config.cc.poptions"]);
+      const variable& config_c_coptions (vp["config.cc.coptions"]);
+      const variable& config_c_loptions (vp["config.cc.loptions"]);
+
+      // config.x
+      //
+
+      // Normally we will have a persistent configuration and computing the
+      // default value every time will be a waste. So try without a default
+      // first.
+      //
+      auto p (config::omitted (rs, config_x));
+
+      if (!p.first)
+      {
+        // If there is a config.x value for one of the modules that can hint
+        // us the toolchain, load it's .guess module. This makes sure that the
+        // order in which we load the modules is unimportant and that the user
+        // can specify the toolchain using any of the config.x values.
+        //
+        if (!cc_loaded)
+        {
+          for (const char* const* pm (x_hinters); *pm != nullptr; ++pm)
+          {
+            string m (*pm);
+
+            // Must be the same as in module's init().
+            //
+            const variable& v (vp.insert<path> ("config." + m, true));
+
+            if (rs[v].defined ())
+            {
+              load_module (rs, rs, m + ".guess", loc);
+              cc_loaded = true;
+              break;
+            }
+          }
+        }
+
+        // If cc.core.config is already loaded then use its toolchain id and
+        // (optional) pattern to guess an appropriate default (e.g., for {gcc,
+        // *-4.9} we will get g++-4.9).
+        //
+        path d;
+
+        if (cc_loaded)
+          d = guess_default (x_lang,
+                             cast<string> (rs["cc.id"]),
+                             cast<string> (rs["cc.pattern"]));
+        else
+        {
+          d = path (x_default);
+
+          if (d.empty ())
+            fail << "not built with default " << x_lang << " compiler" <<
+              info << "use config." << x << " to specify";
+        }
+
+        // If this value was hinted, save it as commented out so that if the
+        // user changes the source of the pattern, this one will get updated
+        // as well.
+        //
+        p = config::required (rs,
+                              config_x,
+                              d,
+                              false,
+                              cc_loaded ? config::save_commented : 0);
+      }
+
+      // Figure out which compiler we are dealing with, its target, etc.
+      //
+      ci_ = &build2::cc::guess (
+        x,
+        x_lang,
+        cast<path> (*p.first),
+        cast_null<string> (config::omitted (rs, config_x_id).first),
+        cast_null<string> (config::omitted (rs, config_x_version).first),
+        cast_null<string> (config::omitted (rs, config_x_target).first),
+        cast_null<strings> (rs[config_c_poptions]),
+        cast_null<strings> (rs[config_x_poptions]),
+        cast_null<strings> (rs[config_c_coptions]),
+        cast_null<strings> (rs[config_x_coptions]),
+        cast_null<strings> (rs[config_c_loptions]),
+        cast_null<strings> (rs[config_x_loptions]));
+
+      const compiler_info& ci (*ci_);
+
+      // Split/canonicalize the target. First see if the user asked us to
+      // use config.sub.
+      //
+      target_triplet tt;
+      {
+        string ct;
+
+        if (config_sub)
+        {
+          ct = run<string> (3,
+                            *config_sub,
+                            ci.target.c_str (),
+                            [] (string& l, bool) {return move (l);});
+          l5 ([&]{trace << "config.sub target: '" << ct << "'";});
+        }
+
+        try
+        {
+          tt = target_triplet (ct.empty () ? ci.target : ct);
+          l5 ([&]{trace << "canonical target: '" << tt.string () << "'; "
+                        << "class: " << tt.class_;});
+        }
+        catch (const invalid_argument& e)
+        {
+          // This is where we suggest that the user specifies --config-sub to
+          // help us out.
+          //
+          fail << "unable to parse " << x_lang << " compiler target '"
+               << ci.target << "': " << e <<
+            info << "consider using the --config-sub option";
+        }
+      }
+
+      // Assign values to variables that describe the compiler.
+      //
+      rs.assign (x_id) = ci.id.string ();
+      rs.assign (x_id_type) = to_string (ci.id.type);
+      rs.assign (x_id_variant) = ci.id.variant;
+
+      rs.assign (x_class) = to_string (ci.class_);
+
+      rs.assign (x_version) = ci.version.string;
+      rs.assign (x_version_major) = ci.version.major;
+      rs.assign (x_version_minor) = ci.version.minor;
+      rs.assign (x_version_patch) = ci.version.patch;
+      rs.assign (x_version_build) = ci.version.build;
+
+      // Also enter as x.target.{cpu,vendor,system,version,class} for
+      // convenience of access.
+      //
+      rs.assign (x_target_cpu)     = tt.cpu;
+      rs.assign (x_target_vendor)  = tt.vendor;
+      rs.assign (x_target_system)  = tt.system;
+      rs.assign (x_target_version) = tt.version;
+      rs.assign (x_target_class)   = tt.class_;
+
+      rs.assign (x_target) = move (tt);
+
+      rs.assign (x_pattern) = ci.pattern;
+
+      if (!x_stdlib.alias (c_stdlib))
+        rs.assign (x_stdlib) = ci.x_stdlib;
+
+      new_ = p.second;
+
+      // Load cc.core.guess.
+      //
+      if (!cc_loaded)
+      {
+        // Prepare configuration hints.
+        //
+        variable_map h (rs.ctx);
+
+        // Note that all these variables have already been registered.
+        //
+        h.assign ("config.cc.id") = cast<string> (rs[x_id]);
+        h.assign ("config.cc.hinter") = string (x);
+        h.assign ("config.cc.target") = cast<target_triplet> (rs[x_target]);
+
+        if (!ci.pattern.empty ())
+          h.assign ("config.cc.pattern") = ci.pattern;
+
+        h.assign (c_runtime) = ci.runtime;
+        h.assign (c_stdlib) = ci.c_stdlib;
+
+        load_module (rs, rs, "cc.core.guess", loc, false, h);
+      }
+      else
+      {
+        // If cc.core.guess is already loaded, verify its configuration
+        // matched ours since it could have been loaded by another c-family
+        // module.
+        //
+        const auto& h (cast<string> (rs["cc.hinter"]));
+
+        auto check = [&loc, &h, this] (const auto& cv,
+                                       const auto& xv,
+                                       const char* what,
+                                       bool error = true)
+        {
+          if (cv != xv)
+          {
+            diag_record dr (error ? fail (loc) : warn (loc));
+
+            dr << h << " and " << x << " module " << what << " mismatch" <<
+            info << h << " is '" << cv << "'" <<
+            info << x << " is '" << xv << "'" <<
+            info << "consider explicitly specifying config." << h
+                 << " and config." << x;
+          }
+        };
+
+        check (cast<string> (rs["cc.id"]),
+               cast<string> (rs[x_id]),
+               "toolchain");
+
+        // We used to not require that patterns match assuming that if the
+        // toolchain id and target are the same, then where exactly the tools
+        // come from doesn't really matter. But in most cases it will be the
+        // g++-7 vs gcc kind of mistakes. So now we warn since even if
+        // intentional, it is still probably a bad idea.
+        //
+        check (cast<string> (rs["cc.pattern"]),
+               cast<string> (rs[x_pattern]),
+               "toolchain pattern",
+               false);
+
+        check (cast<target_triplet> (rs["cc.target"]),
+               cast<target_triplet> (rs[x_target]),
+               "target");
+
+        check (cast<string> (rs["cc.runtime"]),
+               ci.runtime,
+               "runtime");
+
+        check (cast<string> (rs["cc.stdlib"]),
+               ci.c_stdlib,
+               "c standard library");
+      }
+    }
+
+#ifndef _WIN32
+    static const dir_path usr_inc     ("/usr/include");
+    static const dir_path usr_loc_lib ("/usr/local/lib");
+    static const dir_path usr_loc_inc ("/usr/local/include");
+#  ifdef __APPLE__
+    static const dir_path a_usr_inc (
+      "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include");
+#  endif
+#endif
+
+    void config_module::
+    init (scope& rs, const location& loc, const variable_map&)
+    {
+      tracer trace (x, "config_init");
+
+      const compiler_info& ci (*ci_);
+      const target_triplet& tt (cast<target_triplet> (rs[x_target]));
+
+      // config.x.std overrides x.std
+      //
+      {
+        lookup l (config::omitted (rs, config_x_std).first);
+
+        const string* v;
+        if (l.defined ())
+        {
+          v = cast_null<string> (l);
+          rs.assign (x_std) = v;
+        }
+        else
+          v = cast_null<string> (rs[x_std]);
+
+        // Translate x_std value (if any) to the compiler option(s) (if any).
+        //
+        tstd = translate_std (ci, rs, v);
+      }
+
+      // Extract system header/library search paths from the compiler and
+      // determine if we need any additional search paths.
+      //
+      dir_paths lib_dirs;
+      dir_paths inc_dirs;
+
+      switch (ci.class_)
+      {
+      case compiler_class::gcc:
+        {
+          lib_dirs = gcc_library_search_paths (ci.path, rs);
+          inc_dirs = gcc_header_search_paths (ci.path, rs);
+          break;
+        }
+      case compiler_class::msvc:
+        {
+          lib_dirs = msvc_library_search_paths (ci.path, rs);
+          inc_dirs = msvc_header_search_paths (ci.path, rs);
+          break;
+        }
+      }
+
+      sys_lib_dirs_extra = lib_dirs.size ();
+      sys_inc_dirs_extra = inc_dirs.size ();
+
+#ifndef _WIN32
+      // Add /usr/local/{include,lib}. We definitely shouldn't do this if we
+      // are cross-compiling. But even if the build and target are the same,
+      // it's possible the compiler uses some carefully crafted sysroot and by
+      // adding /usr/local/* we will just mess things up. So the heuristics
+      // that we will use is this: if the compiler's system include directories
+      // contain /usr[/local]/include then we add /usr/local/*.
+      //
+      // Note that similar to GCC we also check for the directory existence.
+      // Failed that, we can end up with some bizarre yo-yo'ing cases where
+      // uninstall removes the directories which in turn triggers a rebuild
+      // on the next invocation.
+      //
+      {
+        auto& is (inc_dirs);
+        auto& ls (lib_dirs);
+
+        bool ui  (find (is.begin (), is.end (), usr_inc)     != is.end ());
+        bool uli (find (is.begin (), is.end (), usr_loc_inc) != is.end ());
+
+#ifdef __APPLE__
+        // On Mac OS starting from 10.14 there is no longer /usr/include.
+        // Instead we get the following:
+        //
+        // Homebrew GCC 9:
+        //
+        // /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include
+        //
+        // Apple Clang 10.0.1:
+        //
+        // /Library/Developer/CommandLineTools/usr/include
+        // /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include
+        //
+        // What exactly all this means is anyone's guess, of course. So for
+        // now we will assume that anything that is or resolves (like that
+        // MacOSX10.14.sdk symlink) to:
+        //
+        // /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include
+        //
+        // Is Apple's /usr/include.
+        //
+        if (!ui && !uli)
+        {
+          for (const dir_path& d: inc_dirs)
+          {
+            // Both Clang and GCC skip non-existent paths but let's handle
+            // (and ignore) directories that cause any errors, for good
+            // measure.
+            //
+            try
+            {
+              if (d == a_usr_inc || dir_path (d).realize () == a_usr_inc)
+              {
+                ui = true;
+                break;
+              }
+            }
+            catch (...) {}
+          }
+        }
+#endif
+        if (ui || uli)
+        {
+          bool ull (find (ls.begin (), ls.end (), usr_loc_lib) != ls.end ());
+
+          // Many platforms don't search in /usr/local/lib by default (but do
+          // for headers in /usr/local/include). So add it as the last option.
+          //
+          if (!ull && exists (usr_loc_lib, true /* ignore_error */))
+            ls.push_back (usr_loc_lib);
+
+          // FreeBSD is at least consistent: it searches in neither. Quoting
+          // its wiki: "FreeBSD can't even find libraries that it installed."
+          // So let's help it a bit.
+          //
+          if (!uli && exists (usr_loc_inc, true /* ignore_error */))
+            is.push_back (usr_loc_inc);
+        }
+      }
+#endif
+
+      // If this is a new value (e.g., we are configuring), then print the
+      // report at verbosity level 2 and up (-v).
+      //
+      if (verb >= (new_ ? 2 : 3))
+      {
+        diag_record dr (text);
+
+        {
+          dr << x << ' ' << project (rs) << '@' << rs << '\n'
+             << "  " << left << setw (11) << x << ci.path << '\n'
+             << "  id         " << ci.id << '\n'
+             << "  version    " << ci.version.string << '\n'
+             << "  major      " << ci.version.major << '\n'
+             << "  minor      " << ci.version.minor << '\n'
+             << "  patch      " << ci.version.patch << '\n';
+        }
+
+        if (!ci.version.build.empty ())
+        {
+          dr << "  build      " << ci.version.build << '\n';
+        }
+
+        {
+          const string& ct (tt.string ()); // Canonical target.
+
+          dr << "  signature  " << ci.signature << '\n'
+             << "  checksum   " << ci.checksum << '\n'
+             << "  target     " << ct;
+
+          if (ct != ci.original_target)
+            dr << " (" << ci.original_target << ")";
+
+          dr << "\n  runtime    " << ci.runtime
+             << "\n  stdlib     " << ci.x_stdlib;
+
+          if (!x_stdlib.alias (c_stdlib))
+            dr << "\n  c stdlib   " << ci.c_stdlib;
+        }
+
+        if (!tstd.empty ())
+        {
+          dr << "\n  std       "; // One less space.
+          for (const string& o: tstd) dr << ' ' << o;
+        }
+
+        if (!ci.pattern.empty ()) // Note: bin_pattern printed by bin
+        {
+          dr << "\n  pattern    " << ci.pattern;
+        }
+
+        if (verb >= 3 && !inc_dirs.empty ())
+        {
+          dr << "\n  inc dirs";
+          for (size_t i (0); i != inc_dirs.size (); ++i)
+          {
+            if (i == sys_inc_dirs_extra)
+              dr << "\n    --";
+            dr << "\n    " << inc_dirs[i];
+          }
+        }
+
+        if (verb >= 3 && !lib_dirs.empty ())
+        {
+          dr << "\n  lib dirs";
+          for (size_t i (0); i != lib_dirs.size (); ++i)
+          {
+            if (i == sys_lib_dirs_extra)
+              dr << "\n    --";
+            dr << "\n    " << lib_dirs[i];
+          }
+        }
+      }
+
+      rs.assign (x_path) = process_path (ci.path, false /* init */);
+      rs.assign (x_sys_lib_dirs) = move (lib_dirs);
+      rs.assign (x_sys_inc_dirs) = move (inc_dirs);
+
+      rs.assign (x_signature) = ci.signature;
+      rs.assign (x_checksum) = ci.checksum;
+
+      // config.x.{p,c,l}options
+      // config.x.libs
+      //
+      // These are optional. We also merge them into the corresponding
+      // x.* variables.
+      //
+      // The merging part gets a bit tricky if this module has already
+      // been loaded in one of the outer scopes. By doing the straight
+      // append we would just be repeating the same options over and
+      // over. So what we are going to do is only append to a value if
+      // it came from this scope. Then the usage for merging becomes:
+      //
+      // x.coptions = <overridable options> # Note: '='.
+      // using x
+      // x.coptions += <overriding options> # Note: '+='.
+      //
+      rs.assign (x_poptions) += cast_null<strings> (
+        config::optional (rs, config_x_poptions));
+
+      rs.assign (x_coptions) += cast_null<strings> (
+        config::optional (rs, config_x_coptions));
+
+      rs.assign (x_loptions) += cast_null<strings> (
+        config::optional (rs, config_x_loptions));
+
+      rs.assign (x_aoptions) += cast_null<strings> (
+        config::optional (rs, config_x_aoptions));
+
+      rs.assign (x_libs) += cast_null<strings> (
+        config::optional (rs, config_x_libs));
+
+      // config.x.importable_header
+      //
+      // It's still fuzzy whether specifying (or maybe tweaking) this list in
+      // the configuration will be a common thing to do so for now we use
+      // omitted. It's also probably too early to think whether we should have
+      // the cc.* version and what the semantics should be.
+      //
+      if (x_importable_headers != nullptr)
+      {
+        lookup l (config::omitted (rs, *config_x_importable_headers).first);
+
+        // @@ MODHDR: if(modules) ?
+        //
+        rs.assign (x_importable_headers) += cast_null<strings> (l);
+      }
+
+      // Load cc.core.config.
+      //
+      if (!cast_false<bool> (rs["cc.core.config.loaded"]))
+      {
+        variable_map h (rs.ctx);
+
+        if (!ci.bin_pattern.empty ())
+          h.assign ("config.bin.pattern") = ci.bin_pattern;
+
+        load_module (rs, rs, "cc.core.config", loc, false, h);
+      }
+    }
+
+    void module::
+    init (scope& rs, const location& loc, const variable_map&)
+    {
+      tracer trace (x, "init");
+
+      // Load cc.core. Besides other things, this will load bin (core) plus
+      // extra bin.* modules we may need.
+      //
+      if (!cast_false<bool> (rs["cc.core.loaded"]))
+        load_module (rs, rs, "cc.core", loc);
+
+      // Process, sort, and cache (in this->import_hdr) importable headers.
+      // Keep the cache NULL if unused or empty.
+      //
+      // @@ MODHDR TODO: support exclusions entries (e.g., -<stdio.h>)?
+      //
+      if (modules && x_importable_headers != nullptr)
+      {
+        strings* ih (cast_null<strings> (rs.assign (x_importable_headers)));
+
+        if (ih != nullptr && !ih->empty ())
+        {
+          // Translate <>-style header names to absolute paths using the
+          // compiler's include search paths. Otherwise complete and normalize
+          // since when searching in this list we always use the absolute and
+          // normalized header target path.
+          //
+          for (string& h: *ih)
+          {
+            if (h.empty ())
+              continue;
+
+            path f;
+            if (h.front () == '<' && h.back () == '>')
+            {
+              h.pop_back ();
+              h.erase (0, 1);
+
+              for (const dir_path& d: sys_inc_dirs)
+              {
+                if (file_exists ((f = d, f /= h),
+                                 true /* follow_symlinks */,
+                                 true /* ignore_errors */))
+                  goto found;
+              }
+
+              // What should we do if not found? While we can fail, this could
+              // be too drastic if, for example, the header is "optional" and
+              // may or may not be present/used. So for now let's restore the
+              // original form to aid debugging (it can't possibly match any
+              // absolute path).
+              //
+              h.insert (0, 1, '<');
+              h.push_back ('>');
+              continue;
+
+            found:
+              ; // Fall through.
+            }
+            else
+            {
+              f = path (move (h));
+
+              if (f.relative ())
+                f.complete ();
+            }
+
+            // @@ MODHDR: should we use the more elaborate but robust
+            //            normalize/realize scheme so the we get the same
+            //            path? Feels right.
+            f.normalize ();
+            h = move (f).string ();
+          }
+
+          sort (ih->begin (), ih->end ());
+          import_hdr = ih;
+        }
+      }
+
+      // Register target types and configure their "installability".
+      //
+      bool install_loaded (cast_false<bool> (rs["install.loaded"]));
+
+      {
+        using namespace install;
+
+        rs.insert_target_type (x_src);
+
+        auto insert_hdr = [&rs, install_loaded] (const target_type& tt)
+        {
+          rs.insert_target_type (tt);
+
+          // Install headers into install.include.
+          //
+          if (install_loaded)
+            install_path (rs, tt, dir_path ("include"));
+        };
+
+        // Note: module (x_mod) is in x_hdr.
+        //
+        for (const target_type* const* ht (x_hdr); *ht != nullptr; ++ht)
+          insert_hdr (**ht);
+
+        // Also register the C header for C-derived languages.
+        //
+        if (*x_hdr != &h::static_type)
+          insert_hdr (h::static_type);
+
+        rs.insert_target_type<pca> ();
+        rs.insert_target_type<pcs> ();
+
+        if (install_loaded)
+          install_path<pc> (rs, dir_path ("pkgconfig"));
+      }
+
+      // Register rules.
+      //
+      {
+        using namespace bin;
+
+        auto& r (rs.rules);
+
+        // We register for configure so that we detect unresolved imports
+        // during configuration rather that later, e.g., during update.
+        //
+        const compile_rule& cr (*this);
+        const link_rule&    lr (*this);
+
+        r.insert<obje> (perform_update_id,    x_compile, cr);
+        r.insert<obje> (perform_clean_id,     x_compile, cr);
+        r.insert<obje> (configure_update_id,  x_compile, cr);
+
+        r.insert<obja> (perform_update_id,    x_compile, cr);
+        r.insert<obja> (perform_clean_id,     x_compile, cr);
+        r.insert<obja> (configure_update_id,  x_compile, cr);
+
+        r.insert<objs> (perform_update_id,   x_compile, cr);
+        r.insert<objs> (perform_clean_id,    x_compile, cr);
+        r.insert<objs> (configure_update_id, x_compile, cr);
+
+        if (modules)
+        {
+          r.insert<bmie> (perform_update_id,    x_compile, cr);
+          r.insert<bmie> (perform_clean_id,     x_compile, cr);
+          r.insert<bmie> (configure_update_id,  x_compile, cr);
+
+          r.insert<hbmie> (perform_update_id,    x_compile, cr);
+          r.insert<hbmie> (perform_clean_id,     x_compile, cr);
+          r.insert<hbmie> (configure_update_id,  x_compile, cr);
+
+          r.insert<bmia> (perform_update_id,    x_compile, cr);
+          r.insert<bmia> (perform_clean_id,     x_compile, cr);
+          r.insert<bmia> (configure_update_id,  x_compile, cr);
+
+          r.insert<hbmia> (perform_update_id,    x_compile, cr);
+          r.insert<hbmia> (perform_clean_id,     x_compile, cr);
+          r.insert<hbmia> (configure_update_id,  x_compile, cr);
+
+          r.insert<bmis> (perform_update_id,   x_compile, cr);
+          r.insert<bmis> (perform_clean_id,    x_compile, cr);
+          r.insert<bmis> (configure_update_id, x_compile, cr);
+
+          r.insert<hbmis> (perform_update_id,   x_compile, cr);
+          r.insert<hbmis> (perform_clean_id,    x_compile, cr);
+          r.insert<hbmis> (configure_update_id, x_compile, cr);
+        }
+
+        r.insert<libue> (perform_update_id,    x_link, lr);
+        r.insert<libue> (perform_clean_id,     x_link, lr);
+        r.insert<libue> (configure_update_id,  x_link, lr);
+
+        r.insert<libua> (perform_update_id,    x_link, lr);
+        r.insert<libua> (perform_clean_id,     x_link, lr);
+        r.insert<libua> (configure_update_id,  x_link, lr);
+
+        r.insert<libus> (perform_update_id,    x_link, lr);
+        r.insert<libus> (perform_clean_id,     x_link, lr);
+        r.insert<libus> (configure_update_id,  x_link, lr);
+
+        r.insert<exe>  (perform_update_id,    x_link, lr);
+        r.insert<exe>  (perform_clean_id,     x_link, lr);
+        r.insert<exe>  (configure_update_id,  x_link, lr);
+
+        r.insert<liba> (perform_update_id,    x_link, lr);
+        r.insert<liba> (perform_clean_id,     x_link, lr);
+        r.insert<liba> (configure_update_id,  x_link, lr);
+
+        r.insert<libs> (perform_update_id,   x_link, lr);
+        r.insert<libs> (perform_clean_id,    x_link, lr);
+        r.insert<libs> (configure_update_id, x_link, lr);
+
+        // Note that while libu*{} are not installable, we need to see through
+        // them in case they depend on stuff that we need to install (see the
+        // install rule implementations for details).
+        //
+        if (install_loaded)
+        {
+          const install_rule&  ir (*this);
+
+          r.insert<exe>  (perform_install_id,   x_install,   ir);
+          r.insert<exe>  (perform_uninstall_id, x_uninstall, ir);
+
+          r.insert<liba> (perform_install_id,   x_install,   ir);
+          r.insert<liba> (perform_uninstall_id, x_uninstall, ir);
+
+          r.insert<libs> (perform_install_id,   x_install,   ir);
+          r.insert<libs> (perform_uninstall_id, x_uninstall, ir);
+
+          const libux_install_rule& lr (*this);
+
+          r.insert<libue> (perform_install_id,   x_install,   lr);
+          r.insert<libue> (perform_uninstall_id, x_uninstall, lr);
+
+          r.insert<libua> (perform_install_id,   x_install,   lr);
+          r.insert<libua> (perform_uninstall_id, x_uninstall, lr);
+
+          r.insert<libus> (perform_install_id,   x_install,   lr);
+          r.insert<libus> (perform_uninstall_id, x_uninstall, lr);
+        }
+      }
+    }
+  }
+}
diff --git a/libbuild2/cc/module.hxx b/libbuild2/cc/module.hxx
new file mode 100644
index 0000000..43670c3
--- /dev/null
+++ b/libbuild2/cc/module.hxx
@@ -0,0 +1,103 @@
+// file      : libbuild2/cc/module.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_MODULE_HXX
+#define LIBBUILD2_CC_MODULE_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/module.hxx>
+#include <libbuild2/variable.hxx>
+
+#include <libbuild2/cc/common.hxx>
+
+#include <libbuild2/cc/compile-rule.hxx>
+#include <libbuild2/cc/link-rule.hxx>
+#include <libbuild2/cc/install-rule.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    struct compiler_info;
+
+    class LIBBUILD2_CC_SYMEXPORT config_module: public module_base,
+                                                public virtual config_data
+    {
+    public:
+      explicit
+      config_module (config_data&& d) : config_data (move (d)) {}
+
+      // We split the configuration process into into two parts: guessing the
+      // compiler information and the actual configuration. This allows one to
+      // adjust configuration (say the standard or enabled experimental
+      // features) base on the compiler information by first loading the
+      // guess module.
+      //
+      void
+      guess (scope&, const location&, const variable_map&);
+
+      void
+      init (scope&, const location&, const variable_map&);
+
+      // Translate the x.std value (if any) to the standard-selecting
+      // option(s) (if any). May also check/set x.features.* variables on the
+      // root scope.
+      //
+      virtual strings
+      translate_std (const compiler_info&, scope&, const string*) const = 0;
+
+      strings tstd;
+      size_t sys_lib_dirs_extra; // First extra path (size if none).
+      size_t sys_inc_dirs_extra; // First extra path (size if none).
+
+      const compiler_info* ci_;
+
+    private:
+      // Defined in gcc.cxx.
+      //
+      dir_paths
+      gcc_header_search_paths (const process_path&, scope&) const;
+
+      dir_paths
+      gcc_library_search_paths (const process_path&, scope&) const;
+
+      // Defined in msvc.cxx.
+      //
+      dir_paths
+      msvc_header_search_paths (const process_path&, scope&) const;
+
+      dir_paths
+      msvc_library_search_paths (const process_path&, scope&) const;
+
+    private:
+      bool new_; // See guess() and init() for details.
+    };
+
+    class LIBBUILD2_CC_SYMEXPORT module: public module_base,
+                                         public virtual common,
+                                         link_rule,
+                                         compile_rule,
+                                         install_rule,
+                                         libux_install_rule
+    {
+    public:
+      explicit
+      module (data&& d)
+          : common (move (d)),
+            link_rule (move (d)),
+            compile_rule (move (d)),
+            install_rule (move (d), *this),
+            libux_install_rule (move (d), *this) {}
+
+      void
+      init (scope&, const location&, const variable_map&);
+    };
+  }
+}
+
+#endif // LIBBUILD2_CC_MODULE_HXX
diff --git a/libbuild2/cc/msvc.cxx b/libbuild2/cc/msvc.cxx
new file mode 100644
index 0000000..d802b98
--- /dev/null
+++ b/libbuild2/cc/msvc.cxx
@@ -0,0 +1,502 @@
+// file      : libbuild2/cc/msvc.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <cstring> // strcmp()
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+#include <libbuild2/cc/common.hxx>
+#include <libbuild2/cc/module.hxx>
+
+using std::strcmp;
+
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    // Translate the target triplet CPU to lib.exe/link.exe /MACHINE option.
+    //
+    const char*
+    msvc_machine (const string& cpu)
+    {
+      const char* m (cpu == "i386" || cpu == "i686"  ? "/MACHINE:x86"   :
+                     cpu == "x86_64"                 ? "/MACHINE:x64"   :
+                     cpu == "arm"                    ? "/MACHINE:ARM"   :
+                     cpu == "arm64"                  ? "/MACHINE:ARM64" :
+                     nullptr);
+
+      if (m == nullptr)
+        fail << "unable to translate CPU " << cpu << " to /MACHINE";
+
+      return m;
+    }
+
+    // Sanitize cl.exe options.
+    //
+    void
+    msvc_sanitize_cl (cstrings& args)
+    {
+      // VC is trying to be "helpful" and warn about one command line option
+      // overriding another. For example:
+      //
+      // cl : Command line warning D9025 : overriding '/W1' with '/W2'
+      //
+      // So we have to sanitize the command line and suppress duplicates of
+      // certain options.
+      //
+      // Note also that it is theoretically possible we will treat an option's
+      // argument as an option. Oh, well, nobody is perfect in the Microsoft
+      // land.
+
+      // We want to keep the last option seen at the position (relative to
+      // other options) that it was encountered. If we were to iterate forward
+      // and keep positions of the enountered options, then we would have had
+      // to adjust some of them once we remove a duplicate. So instead we are
+      // going to iterate backwards, in which case we don't even need to keep
+      // positions, just flags. Note that args[0] is cl.exe itself in which we
+      // are conveniently not interested.
+      //
+      bool W (false); // /WN /Wall /w
+
+      for (size_t i (args.size () - 1); i != 0; --i)
+      {
+        auto erase = [&args, &i] ()
+        {
+          args.erase (args.begin () + i);
+        };
+
+        const char* a (args[i]);
+
+        if (*a != '/' && *a != '-') // Not an option.
+          continue;
+
+        ++a;
+
+        // /WN /Wall /w
+        //
+        if ((a[0] == 'W' && digit (a[1]) && a[2] == '\0') || // WN
+            (a[0] == 'W' && strcmp (a + 1, "all") == 0)   || // Wall
+            (a[0] == 'w' && a[1] == '\0'))                   // w
+        {
+          if (W)
+            erase ();
+          else
+            W = true;
+        }
+      }
+    }
+
+    // Sense whether this is a diagnostics line returning the postion of the
+    // NNNN code in XNNNN and npos otherwise.
+    //
+    size_t
+    msvc_sense_diag (const string& l, char f)
+    {
+      size_t p (l.find (':'));
+
+      // Note that while the C-numbers seems to all be in the ' CNNNN:' form,
+      // the D ones can be ' DNNNN :', for example:
+      //
+      // cl : Command line warning D9025 : overriding '/W3' with '/W4'
+      //
+      for (size_t n (l.size ());
+           p != string::npos;
+           p = ++p != n ? l.find_first_of (": ", p) : string::npos)
+      {
+        if (p > 5 &&
+            l[p - 6] == ' '  &&
+            l[p - 5] == f    &&
+            digit (l[p - 4]) &&
+            digit (l[p - 3]) &&
+            digit (l[p - 2]) &&
+            digit (l[p - 1]))
+        {
+          p -= 4; // Start of the error code.
+          break;
+        }
+      }
+
+      return p;
+    }
+
+    // Filter cl.exe and link.exe noise.
+    //
+    void
+    msvc_filter_cl (ifdstream& is, const path& src)
+    {
+      // While it appears VC always prints the source name (event if the
+      // file does not exist), let's do a sanity check. Also handle the
+      // command line errors/warnings which come before the file name.
+      //
+      for (string l; !eof (getline (is, l)); )
+      {
+        if (l != src.leaf ().string ())
+        {
+          diag_stream_lock () << l << endl;
+
+          if (msvc_sense_diag (l, 'D') != string::npos)
+            continue;
+        }
+
+        break;
+      }
+    }
+
+    void
+    msvc_filter_link (ifdstream& is, const file& t, otype lt)
+    {
+      // Filter lines until we encounter something we don't recognize. We also
+      // have to assume the messages can be translated.
+      //
+      for (string l; getline (is, l); )
+      {
+        // "   Creating library foo\foo.dll.lib and object foo\foo.dll.exp"
+        //
+        // This can also appear when linking executables if any of the object
+        // files export any symbols.
+        //
+        if (l.compare (0, 3, "   ") == 0)
+        {
+          // Use the actual import library name if this is a library (since we
+          // override this name) and the executable name otherwise (by default
+          // .lib/.exp are named by replacing the .exe extension).
+          //
+          path i (
+            lt == otype::s
+            ? find_adhoc_member<libi> (t)->path ().leaf ()
+            : t.path ().leaf ().base () + ".lib");
+
+          if (l.find (i.string ())                  != string::npos &&
+              l.find (i.base ().string () + ".exp") != string::npos)
+            continue;
+        }
+
+        // /INCREMENTAL causes linker to sometimes issue messages but now I
+        // can't quite reproduce it.
+        //
+
+        diag_stream_lock () << l << endl;
+        break;
+      }
+    }
+
+    // Extract system header search paths from MSVC.
+    //
+    dir_paths config_module::
+    msvc_header_search_paths (const process_path&, scope&) const
+    {
+      // The compiler doesn't seem to have any built-in paths and all of them
+      // come from the INCLUDE environment variable.
+
+      // @@ VC: how are we going to do this? E.g., cl-14 does this internally.
+      //    cl.exe /Be prints INCLUDE.
+      //
+      //    Should we actually bother? INCLUDE is normally used for system
+      //    headers and its highly unlikely we will see an imported library
+      //    that lists one of those directories in pkg-config Cflags value.
+      //    Let's wait and see.
+      //
+      return dir_paths ();
+     }
+
+    // Extract system library search paths from MSVC.
+    //
+    dir_paths config_module::
+    msvc_library_search_paths (const process_path&, scope&) const
+    {
+      // The linker doesn't seem to have any built-in paths and all of them
+      // come from the LIB environment variable.
+
+      // @@ VC: how are we going to do this? E.g., cl-14 does this internally.
+      //    cl.exe /Be prints LIB.
+      //
+      //    Should we actually bother? LIB is normally used for system
+      //    libraries and its highly unlikely we will see an explicit import
+      //    for a library from one of those directories. Let's wait and see.
+      //
+      return dir_paths ();
+    }
+
+    // Inspect the file and determine if it is static or import library.
+    // Return otype::e if it is neither (which we quietly ignore).
+    //
+    static otype
+    library_type (const process_path& ld, const path& l)
+    {
+      // The are several reasonably reliable methods to tell whether it is a
+      // static or import library. One is lib.exe /LIST -- if there aren't any
+      // .obj members, then it is most likely an import library (it can also
+      // be an empty static library in which case there won't be any members).
+      // For an import library /LIST will print a bunch of .dll members.
+      //
+      // Another approach is dumpbin.exe (link.exe /DUMP) with /ARCHIVEMEMBERS
+      // (similar to /LIST) and /LINKERMEMBER (looking for __impl__ symbols or
+      // _IMPORT_DESCRIPTOR_).
+      //
+      // Note also, that apparently it is possible to have a hybrid library.
+      //
+      // While the lib.exe approach is probably the simplest, the problem is
+      // it will require us loading the bin.ar module even if we are not
+      // building any static libraries. On the other hand, if we are searching
+      // for libraries then we have bin.ld. So we will use the link.exe /DUMP
+      // /ARCHIVEMEMBERS.
+      //
+      const char* args[] = {ld.recall_string (),
+                            "/DUMP",               // Must come first.
+                            "/NOLOGO",
+                            "/ARCHIVEMEMBERS",
+                            l.string ().c_str (),
+                            nullptr};
+
+      if (verb >= 3)
+        print_process (args);
+
+      // Link.exe seem to always dump everything to stdout but just in case
+      // redirect stderr to stdout.
+      //
+      process pr (run_start (ld,
+                             args,
+                             0     /* stdin */,
+                             -1    /* stdout */,
+                             false /* error */));
+
+      bool obj (false), dll (false);
+      string s;
+
+      try
+      {
+        ifdstream is (
+          move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit);
+
+        while (getline (is, s))
+        {
+          // Detect the one error we should let through.
+          //
+          if (s.compare (0, 18, "unable to execute ") == 0)
+            break;
+
+          // The lines we are interested in seem to have this form (though
+          // presumably the "Archive member name at" part can be translated):
+          //
+          // Archive member name at 746: [...]hello.dll[/][ ]*
+          // Archive member name at 8C70: [...]hello.lib.obj[/][ ]*
+          //
+          size_t n (s.size ());
+
+          for (; n != 0 && s[n - 1] == ' '; --n) ; // Skip trailing spaces.
+
+          if (n >= 7) // At least ": X.obj" or ": X.dll".
+          {
+            --n;
+
+            if (s[n] == '/') // Skip trailing slash if one is there.
+              --n;
+
+            n -= 3; // Beginning of extension.
+
+            if (s[n] == '.')
+            {
+              // Make sure there is ": ".
+              //
+              size_t p (s.rfind (':', n - 1));
+
+              if (p != string::npos && s[p + 1] == ' ')
+              {
+                const char* e (s.c_str () + n + 1);
+
+                if (casecmp (e, "obj", 3) == 0)
+                  obj = true;
+
+                if (casecmp (e, "dll", 3) == 0)
+                  dll = true;
+              }
+            }
+          }
+        }
+      }
+      catch (const io_error&)
+      {
+        // Presumably the child process failed. Let run_finish() deal with
+        // that.
+      }
+
+      if (!run_finish (args, pr, false, s))
+        return otype::e;
+
+      if (obj && dll)
+      {
+        warn << l << " looks like hybrid static/import library, ignoring";
+        return otype::e;
+      }
+
+      if (!obj && !dll)
+      {
+        warn << l << " looks like empty static or import library, ignoring";
+        return otype::e;
+      }
+
+      return obj ? otype::a : otype::s;
+    }
+
+    template <typename T>
+    static T*
+    msvc_search_library (const process_path& ld,
+                         const dir_path& d,
+                         const prerequisite_key& p,
+                         otype lt,
+                         const char* pfx,
+                         const char* sfx,
+                         bool exist,
+                         tracer& trace)
+    {
+      // Pretty similar logic to search_library().
+      //
+      assert (p.scope != nullptr);
+
+      const optional<string>& ext (p.tk.ext);
+      const string& name (*p.tk.name);
+
+      // Assemble the file path.
+      //
+      path f (d);
+
+      if (*pfx != '\0')
+      {
+        f /= pfx;
+        f += name;
+      }
+      else
+        f /= name;
+
+      if (*sfx != '\0')
+        f += sfx;
+
+      const string& e (!ext || p.is_a<lib> () // Only for liba/libs.
+                       ? string ("lib")
+                       : *ext);
+
+      if (!e.empty ())
+      {
+        f += '.';
+        f += e;
+      }
+
+      // Check if the file exists and is of the expected type.
+      //
+      timestamp mt (mtime (f));
+
+      if (mt != timestamp_nonexistent && library_type (ld, f) == lt)
+      {
+        // Enter the target.
+        //
+        T* t;
+        common::insert_library (p.scope->ctx, t, name, d, e, exist, trace);
+
+        t->mtime (mt);
+        t->path (move (f));
+
+        return t;
+      }
+
+      return nullptr;
+    }
+
+    liba* common::
+    msvc_search_static (const process_path& ld,
+                        const dir_path& d,
+                        const prerequisite_key& p,
+                        bool exist) const
+    {
+      tracer trace (x, "msvc_search_static");
+
+      liba* r (nullptr);
+
+      auto search = [&r, &ld, &d, &p, exist, &trace] (
+        const char* pf, const char* sf) -> bool
+      {
+        r = msvc_search_library<liba> (
+          ld, d, p, otype::a, pf, sf, exist, trace);
+        return r != nullptr;
+      };
+
+      // Try:
+      //      foo.lib
+      //   libfoo.lib
+      //      foolib.lib
+      //      foo_static.lib
+      //
+      return
+        search ("",    "")    ||
+        search ("lib", "")    ||
+        search ("",    "lib") ||
+        search ("",    "_static") ? r : nullptr;
+    }
+
+    libs* common::
+    msvc_search_shared (const process_path& ld,
+                        const dir_path& d,
+                        const prerequisite_key& pk,
+                        bool exist) const
+    {
+      tracer trace (x, "msvc_search_shared");
+
+      assert (pk.scope != nullptr);
+
+      libs* s (nullptr);
+
+      auto search = [&s, &ld, &d, &pk, exist, &trace] (
+        const char* pf, const char* sf) -> bool
+      {
+        if (libi* i = msvc_search_library<libi> (
+              ld, d, pk, otype::s, pf, sf, exist, trace))
+        {
+          ulock l (
+            insert_library (
+              pk.scope->ctx, s, *pk.tk.name, d, nullopt, exist, trace));
+
+          if (!exist)
+          {
+            if (l.owns_lock ())
+            {
+              s->member = i; // We are first.
+              l.unlock ();
+            }
+            else
+              assert (find_adhoc_member<libi> (*s) == i);
+
+            // Presumably there is a DLL somewhere, we just don't know where.
+            //
+            s->mtime (i->mtime ());
+            s->path (path ());
+          }
+        }
+
+        return s != nullptr;
+      };
+
+      // Try:
+      //      foo.lib
+      //   libfoo.lib
+      //      foodll.lib
+      //
+      return
+        search ("",    "")    ||
+        search ("lib", "")    ||
+        search ("",    "dll") ? s : nullptr;
+    }
+  }
+}
diff --git a/libbuild2/cc/parser+module.test.testscript b/libbuild2/cc/parser+module.test.testscript
new file mode 100644
index 0000000..d51ac0a
--- /dev/null
+++ b/libbuild2/cc/parser+module.test.testscript
@@ -0,0 +1,147 @@
+# file      : libbuild2/cc/parser+module.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license   : MIT; see accompanying LICENSE file
+
+# Test C++ module constructs.
+#
+
+# NOTE: currently header unit imports don't produce anything.
+#
+
+: import
+:
+$* <<EOI >>EOI
+import foo;
+import foo.bar;
+import foo.bar.baz;
+EOI
+
+: import-header
+:
+$* <<EOI
+import "foo.h";
+import <sys/foo.h>;
+__import "/usr/include/stdio.h";
+EOI
+
+: module-implementation
+:
+$* <<EOI >>EOI
+module foo;
+EOI
+
+: module-interface
+:
+$* <<EOI >>EOI
+export module foo;
+EOI
+
+: export-imported
+:
+$* <<EOI >>EOO
+export import foo;
+export import "foo.h";
+export import <sys/foo.h>;
+EOI
+export import foo;
+EOO
+
+: non-module
+:
+$* <<EOI
+#pragma import module foo;
+#pragma export module foo;
+#pragma module foo;
+export namespace bar {int fox ();}
+EOI
+
+: attribute
+:
+$* <<EOI >>EOO
+import foo [[export({import})]];
+import "foo.h" [[export({import})]];
+module bar [[module({module})]];
+EOI
+import foo;
+module bar;
+EOO
+
+: import-duplicate
+:
+$* <<EOI >>EOO
+import foo;
+import bar.baz;
+import foo;
+import bar . baz;
+EOI
+import foo;
+import bar.baz;
+EOO
+
+: brace-missing
+:
+$* <<EOI 2>>EOE != 0
+export
+{
+  class foo
+  {
+  //};
+  module foo;
+}
+EOI
+stdin:8:1: error: {}-imbalance detected
+EOE
+
+: brace-stray
+:
+$* <<EOI 2>>EOE != 0
+export
+{
+  class foo
+  {
+  };}
+}
+module foo;
+EOI
+stdin:6:1: error: {}-imbalance detected
+EOE
+
+: import-missing-name
+:
+$* <<EOI 2>>EOE != 0
+import ;
+EOI
+stdin:1:8: error: module or header name expected instead of ';'
+EOE
+
+: module-missing-name
+:
+$* <<EOI 2>>EOE != 0
+module ;
+EOI
+stdin:1:1: error: module declaration expected after leading module marker
+EOE
+
+: import-missing-semi
+:
+$* <<EOI 2>>EOE != 0
+import foo
+EOI
+stdin:2:1: error: ';' expected instead of <end of file>
+EOE
+
+: module-missing-semi
+:
+$* <<EOI 2>>EOE != 0
+export module foo
+EOI
+stdin:2:1: error: ';' expected instead of <end of file>
+EOE
+
+: import-missing-header
+:
+$* <<EOI 2>>EOE != 0
+import <foo.h;
+EOI
+stdin:2:1: error: closing '>' expected after header name
+EOE
diff --git a/libbuild2/cc/parser.cxx b/libbuild2/cc/parser.cxx
new file mode 100644
index 0000000..179043e
--- /dev/null
+++ b/libbuild2/cc/parser.cxx
@@ -0,0 +1,263 @@
+// file      : libbuild2/cc/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/parser.hxx>
+
+#include <libbuild2/cc/lexer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    using type = token_type;
+
+    unit parser::
+    parse (ifdstream& is, const path& name)
+    {
+      lexer l (is, name);
+      l_ = &l;
+
+      unit u;
+      u_ = &u;
+
+      // If the source has errors then we want the compiler to issues the
+      // diagnostics. However, the errors could as likely be because we are
+      // mis-parsing things. Initially, as a middle ground, we were going to
+      // issue warnings. But the problem with this approach is that they are
+      // easy to miss. So for now we fail. And it turns out we don't mis-
+      // parse much.
+      //
+      size_t bb (0); // {}-balance.
+
+      token t;
+      for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; )
+      {
+        // Break to stop, continue to continue, set n to false if the
+        // next token already extracted.
+        //
+        n = true;
+
+        switch (t.type)
+        {
+        case type::lcbrace:
+          {
+            ++bb;
+            continue;
+          }
+        case type::rcbrace:
+          {
+            if (bb-- == 0)
+              break; // Imbalance.
+
+            continue;
+          }
+        case type::identifier:
+          {
+            // Constructs we need to recognize:
+            //
+            //           module                              ;
+            // [export]  import <module-name> [<attributes>] ;
+            // [export]  import <header-name> [<attributes>] ;
+            // [export]  module <module-name> [<attributes>] ;
+            //
+            // Additionally, when include is translated to an import, it's
+            // normally replaced with the special __import keyword since it
+            // may appear in C context.
+            //
+            const string& id (t.value);
+
+            if (bb == 0)
+            {
+              if      (id == "import" || id == "__import")
+              {
+                parse_import (t, false);
+              }
+              else if (id == "module")
+              {
+                parse_module (t, false);
+              }
+              else if (id == "export")
+              {
+                if (l_->next (t) == type::identifier)
+                {
+                  if      (id == "module") parse_module (t, true);
+                  else if (id == "import") parse_import (t, true);
+                  else n = false; // Something else (e.g., export namespace).
+                }
+                else
+                  n = false;
+              }
+            }
+            continue;
+          }
+        default: continue;
+        }
+
+        break;
+      }
+
+      if (bb != 0)
+        /*warn*/ fail (t) << "{}-imbalance detected";
+
+      if (module_marker_ && u.module_info.name.empty ())
+        fail (*module_marker_) << "module declaration expected after "
+                               << "leading module marker";
+
+      checksum = l.checksum ();
+      return u;
+    }
+
+    void parser::
+    parse_import (token& t, bool ex)
+    {
+      // enter: import keyword
+      // leave: semi
+
+      string un;
+      unit_type ut;
+      switch (l_->next (t)) // Start of module/header name.
+      {
+      case type::less:
+      case type::string:
+        {
+          un = parse_header_name (t);
+          ut = unit_type::module_header;
+          break;
+        }
+      case type::identifier:
+        {
+          un = parse_module_name (t);
+          ut = unit_type::module_iface;
+          break;
+        }
+      default:
+        fail (t) << "module or header name expected instead of " << t << endf;
+      }
+
+      // Should be {}-balanced.
+      //
+      for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+      if (t.type != type::semi)
+        fail (t) << "';' expected instead of " << t;
+
+      // For now we skip header units (see a comment on module type/info
+      // string serialization in compile rule for details). Note that
+      // currently parse_header_name() always returns empty name.
+      //
+      if (ut == unit_type::module_header)
+        return;
+
+      // Ignore duplicates. We don't expect a large numbers of (direct)
+      // imports so vector/linear search is probably more efficient than a
+      // set.
+      //
+      auto& is (u_->module_info.imports);
+
+      auto i (find_if (is.begin (), is.end (),
+                       [&un] (const module_import& i)
+                       {
+                         return i.name == un;
+                       }));
+
+      if (i == is.end ())
+        is.push_back (module_import {ut, move (un), ex, 0});
+      else
+        i->exported = i->exported || ex;
+    }
+
+    void parser::
+    parse_module (token& t, bool ex)
+    {
+      // enter: module keyword
+      // leave: semi
+
+      location l (get_location (t));
+
+      l_->next (t);
+
+      // Handle the leading 'module;' marker (p0713).
+      //
+      // Note that we don't bother diagnosing invalid/duplicate markers
+      // leaving that to the compiler.
+      //
+      if (!ex && t.type == type::semi)
+      {
+        module_marker_ = move (l);
+        return;
+      }
+
+      // Otherwise it should be the start of the module name.
+      //
+      string n (parse_module_name (t));
+
+      // Should be {}-balanced.
+      //
+      for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+
+      if (t.type != type::semi)
+        fail (t) << "';' expected instead of " << t;
+
+      if (!u_->module_info.name.empty ())
+        fail (l) << "multiple module declarations";
+
+      u_->type = ex ? unit_type::module_iface : unit_type::module_impl;
+      u_->module_info.name = move (n);
+    }
+
+    string parser::
+    parse_module_name (token& t)
+    {
+      // enter: first token of module name
+      // leave: token after module name
+
+      string n;
+
+      // <identifier>[ . <identifier>]*
+      //
+      for (;; l_->next (t))
+      {
+        if (t.type != type::identifier)
+          fail (t) << "module name expected instead of " << t;
+
+        n += t.value;
+
+        if (l_->next (t) != type::dot)
+          break;
+
+        n += '.';
+      }
+
+      return n;
+    }
+
+    string parser::
+    parse_header_name (token& t)
+    {
+      // enter: first token of module name, either string or less
+      // leave: token after module name
+
+      string n;
+
+      // NOTE: actual name is a TODO if/when we need it.
+      //
+      if (t.type == type::string)
+        /*n = move (t.value)*/;
+      else
+      {
+        while (l_->next (t) != type::greater)
+        {
+          if (t.type == type::eos)
+            fail (t) << "closing '>' expected after header name" << endf;
+        }
+      }
+
+      l_->next (t);
+      return n;
+    }
+  }
+}
diff --git a/libbuild2/cc/parser.hxx b/libbuild2/cc/parser.hxx
new file mode 100644
index 0000000..324b62a
--- /dev/null
+++ b/libbuild2/cc/parser.hxx
@@ -0,0 +1,55 @@
+// file      : libbuild2/cc/parser.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_PARSER_HXX
+#define LIBBUILD2_CC_PARSER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    // Extract translation unit information from a preprocessed C/C++ source.
+    //
+    struct token;
+    class lexer;
+
+    class parser
+    {
+    public:
+      unit
+      parse (ifdstream&, const path& name);
+
+    private:
+      void
+      parse_import (token&, bool);
+
+      void
+      parse_module (token&, bool);
+
+      string
+      parse_module_name (token&);
+
+      string
+      parse_header_name (token&);
+
+    public:
+      string checksum; // Translation unit checksum.
+
+    private:
+      lexer* l_;
+      unit* u_;
+
+      optional<location> module_marker_;
+    };
+  }
+}
+
+#endif // LIBBUILD2_CC_PARSER_HXX
diff --git a/libbuild2/cc/parser.test.cxx b/libbuild2/cc/parser.test.cxx
new file mode 100644
index 0000000..82c68d1
--- /dev/null
+++ b/libbuild2/cc/parser.test.cxx
@@ -0,0 +1,67 @@
+// file      : libbuild2/cc/parser.test.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/cc/parser.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    // Usage: argv[0] [<file>]
+    //
+    int
+    main (int argc, char* argv[])
+    {
+      try
+      {
+        const char* file;
+
+        ifdstream is;
+        if (argc > 1)
+        {
+          file = argv[1];
+          is.open (file);
+        }
+        else
+        {
+          file = "stdin";
+          is.open (fddup (stdin_fd ()));
+        }
+
+        parser p;
+        unit u (p.parse (is, path (file)));
+        unit_type ut (u.type);
+
+        for (const module_import& m: u.module_info.imports)
+          cout << (m.exported ? "export " : "")
+               << "import " << m.name << ';' << endl;
+
+        if (ut == unit_type::module_iface || ut == unit_type::module_impl)
+          cout << (ut == unit_type::module_iface ? "export " : "")
+               << "module " << u.module_info.name << ';' << endl;
+      }
+      catch (const failed&)
+      {
+        return 1;
+      }
+
+      return 0;
+    }
+  }
+}
+
+int
+main (int argc, char* argv[])
+{
+  return build2::cc::main (argc, argv);
+}
diff --git a/libbuild2/cc/pkgconfig.cxx b/libbuild2/cc/pkgconfig.cxx
new file mode 100644
index 0000000..0669b02
--- /dev/null
+++ b/libbuild2/cc/pkgconfig.cxx
@@ -0,0 +1,1550 @@
+// file      : libbuild2/cc/pkgconfig.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+// In order not to complicate the bootstrap procedure with libpkgconf building
+// exclude functionality that involves reading of .pc files.
+//
+#ifndef BUILD2_BOOTSTRAP
+#  include <libpkgconf/libpkgconf.h>
+#endif
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/install/utility.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+#include <libbuild2/cc/target.hxx>  // pc
+#include <libbuild2/cc/utility.hxx>
+
+#include <libbuild2/cc/common.hxx>
+#include <libbuild2/cc/compile-rule.hxx>
+#include <libbuild2/cc/link-rule.hxx>
+
+#ifndef BUILD2_BOOTSTRAP
+
+// Note that the libpkgconf library doesn't provide the version macro that we
+// could use to compile the code conditionally against different API versions.
+// Thus, we need to sense the pkgconf_client_new() function signature
+// ourselves to call it properly.
+//
+namespace details
+{
+  void*
+  pkgconf_cross_personality_default (); // Never called.
+}
+
+using namespace details;
+
+template <typename H>
+static inline pkgconf_client_t*
+call_pkgconf_client_new (pkgconf_client_t* (*f) (H, void*),
+                         H error_handler,
+                         void* error_handler_data)
+{
+  return f (error_handler, error_handler_data);
+}
+
+template <typename H, typename P>
+static inline pkgconf_client_t*
+call_pkgconf_client_new (pkgconf_client_t* (*f) (H, void*, P),
+                         H error_handler,
+                         void* error_handler_data)
+{
+  return f (error_handler,
+            error_handler_data,
+            ::pkgconf_cross_personality_default ());
+}
+
+#endif
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+#ifndef BUILD2_BOOTSTRAP
+
+  // Load package information from a .pc file. Filter out the -I/-L options
+  // that refer to system directories.
+  //
+  // Note that the prerequisite package .pc files search order is as follows:
+  //
+  // - in directory of the specified file
+  // - in pc_dirs directories (in the natural order)
+  //
+  class pkgconf
+  {
+  public:
+    using path_type = build2::path;
+
+    path_type path;
+
+  public:
+    explicit
+    pkgconf (path_type,
+             const dir_paths& pc_dirs,
+             const dir_paths& sys_inc_dirs,
+             const dir_paths& sys_lib_dirs);
+
+    // Create a special empty object. Querying package information on such
+    // an object is illegal.
+    //
+    pkgconf () = default;
+
+    ~pkgconf ();
+
+    // Movable-only type.
+    //
+    pkgconf (pkgconf&& p)
+        : path (move (p.path)),
+          client_ (p.client_),
+          pkg_ (p.pkg_)
+    {
+      p.client_ = nullptr;
+      p.pkg_ = nullptr;
+    }
+
+    pkgconf&
+    operator= (pkgconf&& p)
+    {
+      if (this != &p)
+      {
+        this->~pkgconf ();
+        new (this) pkgconf (move (p)); // Assume noexcept move-construction.
+      }
+      return *this;
+    }
+
+    pkgconf (const pkgconf&) = delete;
+    pkgconf& operator= (const pkgconf&) = delete;
+
+    strings
+    cflags (bool stat) const;
+
+    strings
+    libs (bool stat) const;
+
+    string
+    variable (const char*) const;
+
+    string
+    variable (const string& s) const {return variable (s.c_str ());}
+
+  private:
+    // Keep them as raw pointers not to deal with API thread-unsafety in
+    // deleters and introducing additional mutex locks.
+    //
+    pkgconf_client_t* client_ = nullptr;
+    pkgconf_pkg_t* pkg_ = nullptr;
+  };
+
+  // Currently the library is not thread-safe, even on the pkgconf_client_t
+  // level (see issue #128 for details).
+  //
+  // @@ An update: seems that the obvious thread-safety issues are fixed.
+  //    However, let's keep mutex locking for now not to introduce potential
+  //    issues before we make sure that there are no other ones.
+  //
+  static mutex pkgconf_mutex;
+
+  // The package dependency traversal depth limit.
+  //
+  static const int pkgconf_max_depth = 100;
+
+  // Normally the error_handler() callback can be called multiple times to
+  // report a single error (once per message line), to produce a multi-line
+  // message like this:
+  //
+  //   Package foo was not found in the pkg-config search path.\n
+  //   Perhaps you should add the directory containing `foo.pc'\n
+  //   to the PKG_CONFIG_PATH environment variable\n
+  //   Package 'foo', required by 'bar', not found\n
+  //
+  // For the above example callback will be called 4 times. To suppress all the
+  // junk we will use PKGCONF_PKG_PKGF_SIMPLIFY_ERRORS to get just:
+  //
+  //   Package 'foo', required by 'bar', not found\n
+  //
+  static const int pkgconf_flags = PKGCONF_PKG_PKGF_SIMPLIFY_ERRORS;
+
+  static bool
+  pkgconf_error_handler (const char* msg, const pkgconf_client_t*, const void*)
+  {
+    error << runtime_error (msg); // Sanitize the message.
+    return true;
+  }
+
+  // Deleters. Note that they are thread-safe.
+  //
+  struct fragments_deleter
+  {
+    void operator() (pkgconf_list_t* f) const {pkgconf_fragment_free (f);}
+  };
+
+  // Convert fragments to strings. Skip the -I/-L options that refer to system
+  // directories.
+  //
+  static strings
+  to_strings (const pkgconf_list_t& frags,
+              char type,
+              const pkgconf_list_t& sysdirs)
+  {
+    assert (type == 'I' || type == 'L');
+
+    strings r;
+
+    auto add = [&r] (const pkgconf_fragment_t* frag)
+    {
+      string s;
+      if (frag->type != '\0')
+      {
+        s += '-';
+        s += frag->type;
+      }
+
+      s += frag->data;
+      r.push_back (move (s));
+    };
+
+    // Option that is separated from its value, for example:
+    //
+    // -I /usr/lib
+    //
+    const pkgconf_fragment_t* opt (nullptr);
+
+    pkgconf_node_t *node;
+    PKGCONF_FOREACH_LIST_ENTRY(frags.head, node)
+    {
+      auto frag (static_cast<const pkgconf_fragment_t*> (node->data));
+
+      // Add the separated option and directory, unless the latest is a system
+      // one.
+      //
+      if (opt != nullptr)
+      {
+        // Note that we should restore the directory path that was
+        // (mis)interpreted as an option, for example:
+        //
+        // -I -Ifoo
+        //
+        // In the above example option '-I' is followed by directory '-Ifoo',
+        // which is represented by libpkgconf library as fragment 'foo' with
+        // type 'I'.
+        //
+        if (!pkgconf_path_match_list (
+              frag->type == '\0'
+              ? frag->data
+              : (string ({'-', frag->type}) + frag->data).c_str (),
+              &sysdirs))
+        {
+          add (opt);
+          add (frag);
+        }
+
+        opt = nullptr;
+        continue;
+      }
+
+      // Skip the -I/-L option if it refers to a system directory.
+      //
+      if (frag->type == type)
+      {
+        // The option is separated from a value, that will (presumably) follow.
+        //
+        if (*frag->data == '\0')
+        {
+          opt = frag;
+          continue;
+        }
+
+        if (pkgconf_path_match_list (frag->data, &sysdirs))
+          continue;
+      }
+
+      add (frag);
+    }
+
+    if (opt != nullptr) // Add the dangling option.
+      add (opt);
+
+    return r;
+  }
+
+  // Note that some libpkgconf functions can potentially return NULL, failing
+  // to allocate the required memory block. However, we will not check the
+  // returned value for NULL as the library doesn't do so, prior to filling the
+  // allocated structures. So such a code complication on our side would be
+  // useless. Also, for some functions the NULL result has a special semantics,
+  // for example "not found".
+  //
+  pkgconf::
+  pkgconf (path_type p,
+           const dir_paths& pc_dirs,
+           const dir_paths& sys_lib_dirs,
+           const dir_paths& sys_inc_dirs)
+      : path (move (p))
+  {
+    auto add_dirs = [] (pkgconf_list_t& dir_list,
+                        const dir_paths& dirs,
+                        bool suppress_dups,
+                        bool cleanup = false)
+    {
+      if (cleanup)
+      {
+        pkgconf_path_free (&dir_list);
+        dir_list = PKGCONF_LIST_INITIALIZER;
+      }
+
+      for (const auto& d: dirs)
+        pkgconf_path_add (d.string ().c_str (), &dir_list, suppress_dups);
+    };
+
+    mlock l (pkgconf_mutex);
+
+    // Initialize the client handle.
+    //
+    unique_ptr<pkgconf_client_t, void (*) (pkgconf_client_t*)> c (
+      call_pkgconf_client_new (&pkgconf_client_new,
+                               pkgconf_error_handler,
+                               nullptr /* handler_data */),
+      [] (pkgconf_client_t* c) {pkgconf_client_free (c);});
+
+    pkgconf_client_set_flags (c.get (), pkgconf_flags);
+
+    // Note that the system header and library directory lists are
+    // automatically pre-filled by the pkgconf_client_new() call (see above).
+    // We will re-create these lists from scratch.
+    //
+    add_dirs (c->filter_libdirs,
+              sys_lib_dirs,
+              false /* suppress_dups */,
+              true  /* cleanup */);
+
+    add_dirs (c->filter_includedirs,
+              sys_inc_dirs,
+              false /* suppress_dups */,
+              true  /* cleanup */);
+
+    // Note that the loaded file directory is added to the (yet empty) search
+    // list. Also note that loading of the prerequisite packages is delayed
+    // until flags retrieval, and their file directories are not added to the
+    // search list.
+    //
+    pkg_ = pkgconf_pkg_find (c.get (), path.string ().c_str ());
+
+    if (pkg_ == nullptr)
+      fail << "package '" << path << "' not found or invalid";
+
+    // Add the .pc file search directories.
+    //
+    assert (c->dir_list.length == 1); // Package file directory (see above).
+    add_dirs (c->dir_list, pc_dirs, true /* suppress_dups */);
+
+    client_ = c.release ();
+  }
+
+  pkgconf::
+  ~pkgconf ()
+  {
+    if (client_ != nullptr) // Not empty.
+    {
+      assert (pkg_ != nullptr);
+
+      mlock l (pkgconf_mutex);
+      pkgconf_pkg_unref (client_, pkg_);
+      pkgconf_client_free (client_);
+    }
+  }
+
+  strings pkgconf::
+  cflags (bool stat) const
+  {
+    assert (client_ != nullptr); // Must not be empty.
+
+    mlock l (pkgconf_mutex);
+
+    pkgconf_client_set_flags (
+      client_,
+      pkgconf_flags |
+
+      // Walk through the private package dependencies (Requires.private)
+      // besides the public ones while collecting the flags. Note that we do
+      // this for both static and shared linking.
+      //
+      PKGCONF_PKG_PKGF_SEARCH_PRIVATE |
+
+      // Collect flags from Cflags.private besides those from Cflags for the
+      // static linking.
+      //
+      (stat
+       ? PKGCONF_PKG_PKGF_MERGE_PRIVATE_FRAGMENTS
+       : 0));
+
+    pkgconf_list_t f = PKGCONF_LIST_INITIALIZER; // Aggregate initialization.
+    int e (pkgconf_pkg_cflags (client_, pkg_, &f, pkgconf_max_depth));
+
+    if (e != PKGCONF_PKG_ERRF_OK)
+      throw failed (); // Assume the diagnostics is issued.
+
+    unique_ptr<pkgconf_list_t, fragments_deleter> fd (&f); // Auto-deleter.
+    return to_strings (f, 'I', client_->filter_includedirs);
+  }
+
+  strings pkgconf::
+  libs (bool stat) const
+  {
+    assert (client_ != nullptr); // Must not be empty.
+
+    mlock l (pkgconf_mutex);
+
+    pkgconf_client_set_flags (
+      client_,
+      pkgconf_flags |
+
+      // Additionally collect flags from the private dependency packages
+      // (see above) and from the Libs.private value for the static linking.
+      //
+      (stat
+       ? PKGCONF_PKG_PKGF_SEARCH_PRIVATE |
+         PKGCONF_PKG_PKGF_MERGE_PRIVATE_FRAGMENTS
+       : 0));
+
+    pkgconf_list_t f = PKGCONF_LIST_INITIALIZER; // Aggregate initialization.
+    int e (pkgconf_pkg_libs (client_, pkg_, &f, pkgconf_max_depth));
+
+    if (e != PKGCONF_PKG_ERRF_OK)
+      throw failed (); // Assume the diagnostics is issued.
+
+    unique_ptr<pkgconf_list_t, fragments_deleter> fd (&f); // Auto-deleter.
+    return to_strings (f, 'L', client_->filter_libdirs);
+  }
+
+  string pkgconf::
+  variable (const char* name) const
+  {
+    assert (client_ != nullptr); // Must not be empty.
+
+    mlock l (pkgconf_mutex);
+    const char* r (pkgconf_tuple_find (client_, &pkg_->vars, name));
+    return r != nullptr ? string (r) : string ();
+  }
+
+#endif
+
+  namespace cc
+  {
+    using namespace bin;
+
+    // In pkg-config backslashes, spaces, etc are escaped with a backslash.
+    //
+    static string
+    escape (const string& s)
+    {
+      string r;
+
+      for (size_t p (0);;)
+      {
+        size_t sp (s.find_first_of ("\\ ", p));
+
+        if (sp != string::npos)
+        {
+          r.append (s, p, sp - p);
+          r += '\\';
+          r += s[sp];
+          p = sp + 1;
+        }
+        else
+        {
+          r.append (s, p, sp);
+          break;
+        }
+      }
+
+      return r;
+    }
+
+    // Try to find a .pc file in the pkgconfig/ subdirectory of libd, trying
+    // several names derived from stem. If not found, return false. If found,
+    // load poptions, loptions, libs, and modules, set the corresponding
+    // *.export.* variables and add prerequisites on targets, and return true.
+    // Note that we assume the targets are locked so that all of this is
+    // MT-safe.
+    //
+    // System library search paths (those extracted from the compiler) are
+    // passed in top_sysd while the user-provided (via -L) in top_usrd.
+    //
+    // Note that scope and link order should be "top-level" from the
+    // search_library() POV.
+    //
+    // Also note that the bootstrapped version of build2 will not search for
+    // .pc files, always returning false (see above for the reasoning).
+    //
+#ifndef BUILD2_BOOTSTRAP
+
+    // Iterate over pkgconf directories that correspond to the specified
+    // library directory, passing them to the callback function for as long as
+    // it returns false (not found). Return true if the callback returned
+    // true.
+    //
+    bool common::
+    pkgconfig_search (const dir_path& d, const pkgconfig_callback& f) const
+    {
+      dir_path pd (d);
+
+      // First always check the pkgconfig/ subdirectory in this library
+      // directory. Even on platforms where this is not the canonical place,
+      // .pc files of autotools-based packages installed by the user often
+      // still end up there.
+      //
+      if (exists (pd /= "pkgconfig") && f (move (pd)))
+        return true;
+
+      // Platform-specific locations.
+      //
+      if (tsys == "freebsd")
+      {
+        // On FreeBSD .pc files go to libdata/pkgconfig/, not lib/pkgconfig/.
+        //
+        (((pd = d) /= "..") /= "libdata") /= "pkgconfig";
+
+        if (exists (pd) && f (move (pd)))
+          return true;
+      }
+
+      return false;
+    }
+
+    // Search for the .pc files in the pkgconf directories that correspond to
+    // the specified library directory. If found, return static (first) and
+    // shared (second) library .pc files. If common is false, then only
+    // consider our .static/.shared files.
+    //
+    pair<path, path> common::
+    pkgconfig_search (const dir_path& libd,
+                      const optional<project_name>& proj,
+                      const string& stem,
+                      bool common) const
+    {
+      // When it comes to looking for .pc files we have to decide where to
+      // search (which directory(ies)) as well as what to search for (which
+      // names). Suffix is our ".shared" or ".static" extension.
+      //
+      auto search_dir = [&proj, &stem] (const dir_path& dir,
+                                        const string& sfx) -> path
+      {
+        path f;
+
+        // See if there is a corresponding .pc file. About half of them are
+        // called foo.pc and half libfoo.pc (and one of the pkg-config's
+        // authors suggests that some of you should call yours foolib.pc, just
+        // to keep things interesting, you know).
+        //
+        // Given the (general) import in the form <proj>%lib{<stem>}, we will
+        // first try lib<stem>.pc, then <stem>.pc. Maybe it also makes sense
+        // to try <proj>.pc, just in case. Though, according to pkg-config
+        // docs, the .pc file should correspond to a library, not project. But
+        // then you get something like zlib which calls it zlib.pc. So let's
+        // just do it.
+        //
+        f = dir;
+        f /= "lib";
+        f += stem;
+        f += sfx;
+        f += ".pc";
+        if (exists (f))
+          return f;
+
+        f = dir;
+        f /= stem;
+        f += sfx;
+        f += ".pc";
+        if (exists (f))
+          return f;
+
+        if (proj)
+        {
+          f = dir;
+          f /= proj->string ();
+          f += sfx;
+          f += ".pc";
+          if (exists (f))
+            return f;
+        }
+
+        return path ();
+      };
+
+      // Return false (and so stop the iteration) if a .pc file is found.
+      //
+      // Note that we rely on the "small function object" optimization here.
+      //
+      struct data
+      {
+        path a;
+        path s;
+        bool common;
+      } d {path (), path (), common};
+
+      auto check = [&d, &search_dir] (dir_path&& p) -> bool
+      {
+        // First look for static/shared-specific files.
+        //
+        d.a = search_dir (p, ".static");
+        d.s = search_dir (p, ".shared");
+
+        if (!d.a.empty () || !d.s.empty ())
+          return true;
+
+        // Then the common.
+        //
+        if (d.common)
+          d.a = d.s = search_dir (p, "");
+
+        return !d.a.empty ();
+      };
+
+      pair<path, path> r;
+
+      if (pkgconfig_search (libd, check))
+      {
+        r.first  = move (d.a);
+        r.second = move (d.s);
+      }
+
+      return r;
+    };
+
+    bool common::
+    pkgconfig_load (action a,
+                    const scope& s,
+                    lib& lt,
+                    liba* at,
+                    libs* st,
+                    const optional<project_name>& proj,
+                    const string& stem,
+                    const dir_path& libd,
+                    const dir_paths& top_sysd,
+                    const dir_paths& top_usrd) const
+    {
+      assert (at != nullptr || st != nullptr);
+
+      pair<path, path> p (
+        pkgconfig_search (libd, proj, stem, true /* common */));
+
+      if (p.first.empty () && p.second.empty ())
+        return false;
+
+      pkgconfig_load (a, s, lt, at, st, p, libd, top_sysd, top_usrd);
+      return true;
+    }
+
+    void common::
+    pkgconfig_load (action a,
+                    const scope& s,
+                    lib& lt,
+                    liba* at,
+                    libs* st,
+                    const pair<path, path>& paths,
+                    const dir_path& libd,
+                    const dir_paths& top_sysd,
+                    const dir_paths& top_usrd) const
+    {
+      tracer trace (x, "pkgconfig_load");
+
+      assert (at != nullptr || st != nullptr);
+
+      const path& ap (paths.first);
+      const path& sp (paths.second);
+
+      assert (!ap.empty () || !sp.empty ());
+
+      // Extract --cflags and set them as lib?{}:export.poptions. Note that we
+      // still pass --static in case this is pkgconf which has Cflags.private.
+      //
+      auto parse_cflags = [&trace, this] (target& t,
+                                          const pkgconf& pc,
+                                          bool la)
+      {
+        strings pops;
+
+        bool arg (false);
+        for (auto& o: pc.cflags (la))
+        {
+          if (arg)
+          {
+            // Can only be an argument for -I, -D, -U options.
+            //
+            pops.push_back (move (o));
+            arg = false;
+            continue;
+          }
+
+          size_t n (o.size ());
+
+          // We only keep -I, -D and -U.
+          //
+          if (n >= 2 &&
+              o[0] == '-' &&
+              (o[1] == 'I' || o[1] == 'D' || o[1] == 'U'))
+          {
+            pops.push_back (move (o));
+            arg = (n == 2);
+            continue;
+          }
+
+          l4 ([&]{trace << "ignoring " << pc.path << " --cflags option "
+                        << o;});
+        }
+
+        if (arg)
+          fail << "argument expected after " << pops.back () <<
+            info << "while parsing pkg-config --cflags " << pc.path;
+
+        if (!pops.empty ())
+        {
+          auto p (t.vars.insert (c_export_poptions));
+
+          // The only way we could already have this value is if this same
+          // library was also imported as a project (as opposed to installed).
+          // Unlikely but possible. In this case the values were set by the
+          // export stub and we shouldn't touch them.
+          //
+          if (p.second)
+            p.first.get () = move (pops);
+        }
+      };
+
+      // Parse --libs into loptions/libs (interface and implementation). If
+      // ps is not NULL, add each resolves library target as a prerequisite.
+      //
+      auto parse_libs = [a, &s, top_sysd, this] (target& t,
+                                                 bool binless,
+                                                 const pkgconf& pc,
+                                                 bool la,
+                                                 prerequisites* ps)
+      {
+        strings lops;
+        vector<name> libs;
+
+        // Normally we will have zero or more -L's followed by one or more
+        // -l's, with the first one being the library itself, unless the
+        // library is binless. But sometimes we may have other linker options,
+        // for example, -Wl,... or -pthread. It's probably a bad idea to
+        // ignore them. Also, theoretically, we could have just the library
+        // name/path.
+        //
+        // The tricky part, of course, is to know whether what follows after
+        // an option we don't recognize is its argument or another option or
+        // library. What we do at the moment is stop recognizing just library
+        // names (without -l) after seeing an unknown option.
+        //
+        bool arg (false), first (true), known (true), have_L;
+        for (auto& o: pc.libs (la))
+        {
+          if (arg)
+          {
+            // Can only be an argument for an loption.
+            //
+            lops.push_back (move (o));
+            arg = false;
+            continue;
+          }
+
+          size_t n (o.size ());
+
+          // See if this is -L.
+          //
+          if (n >= 2 && o[0] == '-' && o[1] == 'L')
+          {
+            have_L = true;
+            lops.push_back (move (o));
+            arg = (n == 2);
+            continue;
+          }
+
+          // See if that's -l or just the library name/path.
+          //
+          if ((known && o[0] != '-') ||
+              (n > 2 && o[0] == '-' && o[1] == 'l'))
+          {
+            // Unless binless, the first one is the library itself, which we
+            // skip. Note that we don't verify this and theoretically it could
+            // be some other library, but we haven't encountered such a beast
+            // yet.
+            //
+            if (first)
+            {
+              first = false;
+
+              if (!binless)
+                continue;
+            }
+
+            // @@ If by some reason this is the library itself (doesn't go
+            //    first or libpkgconf parsed libs in some bizarre way) we will
+            //    hang trying to lock it's target inside search_library() (or
+            //    fail an assertion if run serially) as by now it is already
+            //    locked. To be safe we probably shouldn't rely on the position
+            //    and filter out all occurrences of the library itself (by
+            //    name?) and complain if none were encountered.
+            //
+            libs.push_back (name (move (o)));
+            continue;
+          }
+
+          // Otherwise we assume it is some other loption.
+          //
+          known = false;
+          lops.push_back (move (o));
+        }
+
+        if (arg)
+          fail << "argument expected after " << lops.back () <<
+            info << "while parsing pkg-config --libs " << pc.path;
+
+        // Space-separated list of escaped library flags.
+        //
+        auto lflags = [&pc, la] () -> string
+        {
+          string r;
+          for (const auto& o: pc.libs (la))
+          {
+            if (!r.empty ())
+              r += ' ';
+            r += escape (o);
+          }
+          return r;
+        };
+
+        if (first && !binless)
+          fail << "library expected in '" << lflags () << "'" <<
+            info << "while parsing pkg-config --libs " << pc.path;
+
+        // Resolve -lfoo into the library file path using our import installed
+        // machinery (i.e., we are going to call search_library() that will
+        // probably call us again, and so on).
+        //
+        // The reason we do it is the link order. For general libraries it
+        // shouldn't matter if we imported them via an export stub, direct
+        // import installed, or via a .pc file (which we could have generated
+        // from the export stub). The exception is "runtime libraries" (which
+        // are really the extension of libc) such as -lm, -ldl, -lpthread,
+        // etc. Those we will detect and leave as -l*.
+        //
+        // If we managed to resolve all the -l's (sans runtime), then we can
+        // omit -L's for nice and tidy command line.
+        //
+        bool all (true);
+        optional<dir_paths> usrd; // Populate lazily.
+
+        for (name& n: libs)
+        {
+          string& l (n.value);
+
+          // These ones are common/standard/POSIX.
+          //
+          if (l[0] != '-'      || // e.g., shell32.lib
+              l == "-lm"       ||
+              l == "-ldl"      ||
+              l == "-lrt"      ||
+              l == "-lpthread")
+            continue;
+
+          // Note: these list are most likely incomplete.
+          //
+          if (tclass == "linux")
+          {
+            // Some extras from libc (see libc6-dev) and other places.
+            //
+            if (l == "-lanl"     ||
+                l == "-lcrypt"   ||
+                l == "-lnsl"     ||
+                l == "-lresolv"  ||
+                l == "-lgcc")
+            continue;
+          }
+          else if (tclass == "macos")
+          {
+            if (l == "-lSystem")
+              continue;
+          }
+
+          // Prepare user search paths by entering the -L paths from the .pc
+          // file.
+          //
+          if (have_L && !usrd)
+          {
+            usrd = dir_paths ();
+
+            for (auto i (lops.begin ()); i != lops.end (); ++i)
+            {
+              const string& o (*i);
+
+              if (o.size () >= 2 && o[0] == '-' && o[1] == 'L')
+              {
+                string p;
+
+                if (o.size () == 2)
+                  p = *++i; // We've verified it's there.
+                else
+                  p = string (o, 2);
+
+                dir_path d (move (p));
+
+                if (d.relative ())
+                  fail << "relative -L directory in '" << lflags () << "'" <<
+                    info << "while parsing pkg-config --libs " << pc.path;
+
+                usrd->push_back (move (d));
+              }
+            }
+          }
+
+          // @@ OUT: for now we assume out is undetermined, just like in
+          // resolve_library().
+          //
+          dir_path out;
+          string name (l, 2); // Sans -l.
+
+          prerequisite_key pk {
+            nullopt, {&lib::static_type, &out, &out, &name, nullopt}, &s};
+
+          if (const target* lt = search_library (a, top_sysd, usrd, pk))
+          {
+            // We used to pick a member but that doesn't seem right since the
+            // same target could be used with different link orders.
+            //
+            n.dir = lt->dir;
+            n.type = lib::static_type.name;
+            n.value = lt->name;
+
+            if (ps != nullptr)
+              ps->push_back (prerequisite (*lt));
+          }
+          else
+            // If we couldn't find the library, then leave it as -l.
+            //
+            all = false;
+        }
+
+        // If all the -l's resolved and there were no other options, then drop
+        // all the -L's. If we have unknown options, then leave them in to be
+        // safe.
+        //
+        if (all && known)
+          lops.clear ();
+
+        if (!lops.empty ())
+        {
+          if (cclass == compiler_class::msvc)
+          {
+            // Translate -L to /LIBPATH.
+            //
+            for (auto i (lops.begin ()); i != lops.end (); )
+            {
+              string& o (*i);
+              size_t n (o.size ());
+
+              if (n >= 2 && o[0] == '-' && o[1] == 'L')
+              {
+                o.replace (0, 2, "/LIBPATH:");
+
+                if (n == 2)
+                {
+                  o += *++i; // We've verified it's there.
+                  i = lops.erase (i);
+                  continue;
+                }
+              }
+
+              ++i;
+            }
+          }
+
+          auto p (t.vars.insert (c_export_loptions));
+
+          if (p.second)
+            p.first.get () = move (lops);
+        }
+
+        // Set even if empty (export override).
+        //
+        {
+          auto p (t.vars.insert (c_export_libs));
+
+          if (p.second)
+            p.first.get () = move (libs);
+        }
+      };
+
+      // On Windows pkg-config will escape backslahses in paths. In fact, it
+      // may escape things even on non-Windows platforms, for example,
+      // spaces. So we use a slightly modified version of next_word().
+      //
+      auto next = [] (const string& s, size_t& b, size_t& e) -> string
+      {
+        string r;
+        size_t n (s.size ());
+
+        if (b != e)
+          b = e;
+
+        // Skip leading delimiters.
+        //
+        for (; b != n && s[b] == ' '; ++b) ;
+
+        if (b == n)
+        {
+          e = n;
+          return r;
+        }
+
+        // Find first trailing delimiter while taking care of escapes.
+        //
+        r = s[b];
+        for (e = b + 1; e != n && s[e] != ' '; ++e)
+        {
+          if (s[e] == '\\')
+          {
+            if (++e == n)
+              fail << "dangling escape in pkg-config output '" << s << "'";
+          }
+
+          r += s[e];
+        }
+
+        return r;
+      };
+
+      // Parse modules and add them to the prerequisites.
+      //
+      auto parse_modules = [&trace, &next, &s, this]
+        (const pkgconf& pc, prerequisites& ps)
+      {
+        string mstr (pc.variable ("cxx_modules"));
+
+        string m;
+        for (size_t b (0), e (0); !(m = next (mstr, b, e)).empty (); )
+        {
+          // The format is <name>=<path>.
+          //
+          size_t p (m.find ('='));
+          if (p == string::npos ||
+              p == 0            || // Empty name.
+              p == m.size () - 1)  // Empty path.
+            fail << "invalid module information in '" << mstr << "'" <<
+              info << "while parsing pkg-config --variable=cxx_modules "
+                   << pc.path;
+
+          string mn (m, 0, p);
+          path mp (m, p + 1, string::npos);
+          path mf (mp.leaf ());
+
+          // Extract module properties, if any.
+          //
+          string pp (pc.variable ("cxx_module_preprocessed." + mn));
+          string se (pc.variable ("cxx_module_symexport." + mn));
+
+          // For now there are only C++ modules.
+          //
+          auto tl (
+            s.ctx.targets.insert_locked (
+              *x_mod,
+              mp.directory (),
+              dir_path (),
+              mf.base ().string (),
+              mf.extension (),
+              true, // Implied.
+              trace));
+
+          target& mt (tl.first);
+
+          // If the target already exists, then setting its variables is not
+          // MT-safe. So currently we only do it if we have the lock (and thus
+          // nobody can see this target yet) assuming that this has already
+          // been done otherwise.
+          //
+          // @@ This is not quite correct, though: this target could already
+          //    exist but for a "different purpose" (e.g., it could be used as
+          //    a header).
+          //
+          // @@ Could setting it in the rule-specific vars help? (But we
+          //    are not matching a rule for it.) Note that we are setting
+          //    it on the module source, not bmi*{}! So rule-specific vars
+          //    don't seem to the answer here.
+          //
+          if (tl.second.owns_lock ())
+          {
+            mt.vars.assign (c_module_name) = move (mn);
+
+            // Set module properties. Note that if unspecified we should still
+            // set them to their default values since the hosting project may
+            // have them set to incompatible value.
+            //
+            {
+              value& v (mt.vars.assign (x_preprocessed)); // NULL
+              if (!pp.empty ()) v = move (pp);
+            }
+
+            {
+              mt.vars.assign (x_symexport) = (se == "true");
+            }
+
+            tl.second.unlock ();
+          }
+
+          ps.push_back (prerequisite (mt));
+        }
+      };
+
+      // For now we only populate prerequisites for lib{}. To do it for
+      // liba{} would require weeding out duplicates that are already in
+      // lib{}.
+      //
+      prerequisites prs;
+
+      pkgconf apc;
+      pkgconf spc;
+
+      // Create the .pc files search directory list.
+      //
+      dir_paths pc_dirs;
+
+      // Note that we rely on the "small function object" optimization here.
+      //
+      auto add_pc_dir = [&pc_dirs] (dir_path&& d) -> bool
+      {
+        pc_dirs.emplace_back (move (d));
+        return false;
+      };
+
+      pkgconfig_search (libd, add_pc_dir);
+      for (const dir_path& d: top_usrd) pkgconfig_search (d, add_pc_dir);
+      for (const dir_path& d: top_sysd) pkgconfig_search (d, add_pc_dir);
+
+      bool pa (at != nullptr && !ap.empty ());
+      if (pa || sp.empty ())
+        apc = pkgconf (ap, pc_dirs, sys_lib_dirs, sys_inc_dirs);
+
+      bool ps (st != nullptr && !sp.empty ());
+      if (ps || ap.empty ())
+        spc = pkgconf (sp, pc_dirs, sys_lib_dirs, sys_inc_dirs);
+
+      // Sort out the interface dependencies (which we are setting on lib{}).
+      // If we have the shared .pc variant, then we use that.  Otherwise --
+      // static but extract without the --static option (see also the saving
+      // logic).
+      //
+      pkgconf& ipc (ps ? spc : apc); // Interface package info.
+
+      parse_libs (
+        lt,
+        (ps ? st->mtime () : at->mtime ()) == timestamp_unreal /* binless */,
+        ipc,
+        false,
+        &prs);
+
+      if (pa)
+      {
+        parse_cflags (*at, apc, true);
+        parse_libs (*at, at->path ().empty (), apc, true, nullptr);
+      }
+
+      if (ps)
+        parse_cflags (*st, spc, false);
+
+      // For now we assume static and shared variants export the same set of
+      // modules. While technically possible, having a different set will
+      // most likely lead to all sorts of trouble (at least for installed
+      // libraries) and life is short.
+      //
+      if (modules)
+        parse_modules (ipc, prs);
+
+      assert (!lt.has_prerequisites ());
+      if (!prs.empty ())
+        lt.prerequisites (move (prs));
+
+      // Bless the library group with a "trust me it exists" timestamp. Failed
+      // that, if we add it as a prerequisite (like we do above), the fallback
+      // file rule won't match.
+      //
+      lt.mtime (mtime (ipc.path));
+    }
+
+#else
+
+    pair<path, path> common::
+    pkgconfig_search (const dir_path&,
+                      const optional<project_name>&,
+                      const string&,
+                      bool) const
+    {
+      return pair<path, path> ();
+    }
+
+    bool common::
+    pkgconfig_load (action,
+                    const scope&,
+                    lib&,
+                    liba*,
+                    libs*,
+                    const optional<project_name>&,
+                    const string&,
+                    const dir_path&,
+                    const dir_paths&,
+                    const dir_paths&) const
+    {
+      return false;
+    }
+
+    void common::
+    pkgconfig_load (action,
+                    const scope&,
+                    lib&,
+                    liba*,
+                    libs*,
+                    const pair<path, path>&,
+                    const dir_path&,
+                    const dir_paths&,
+                    const dir_paths&) const
+    {
+      assert (false); // Should never be called.
+    }
+
+#endif
+
+    void link_rule::
+    pkgconfig_save (action a, const file& l, bool la, bool binless) const
+    {
+      tracer trace (x, "pkgconfig_save");
+
+      context& ctx (l.ctx);
+
+      const scope& bs (l.base_scope ());
+      const scope& rs (*bs.root_scope ());
+
+      auto* t (find_adhoc_member<pc> (l));
+      assert (t != nullptr);
+
+      // By default we assume things go into install.{include, lib}.
+      //
+      using install::resolve_dir;
+
+      dir_path idir (resolve_dir (l, cast<dir_path> (l["install.include"])));
+      dir_path ldir (resolve_dir (l, cast<dir_path> (l["install.lib"])));
+
+      const path& p (t->path ());
+
+      if (verb >= 2)
+        text << "cat >" << p;
+
+      if (ctx.dry_run)
+        return;
+
+      auto_rmfile arm (p);
+
+      try
+      {
+        ofdstream os (p);
+
+        {
+          const project_name& n (project (rs));
+
+          if (n.empty ())
+            fail << "no project name in " <<  rs;
+
+          lookup vl (rs.vars[ctx.var_version]);
+          if (!vl)
+            fail << "no version variable in project " << n <<
+              info << "while generating " << p;
+
+          const string& v (cast<string> (vl));
+
+          os << "Name: " << n << endl;
+          os << "Version: " << v << endl;
+
+          // This one is required so make something up if unspecified.
+          //
+          os << "Description: ";
+          if (const string* s = cast_null<string> (rs[ctx.var_project_summary]))
+            os << *s << endl;
+          else
+            os << n << ' ' << v << endl;
+
+          if (const string* u = cast_null<string> (rs[ctx.var_project_url]))
+            os << "URL: " << *u << endl;
+        }
+
+        auto save_poptions = [&l, &os] (const variable& var)
+        {
+          if (const strings* v = cast_null<strings> (l[var]))
+          {
+            for (auto i (v->begin ()); i != v->end (); ++i)
+            {
+              const string& o (*i);
+              size_t n (o.size ());
+
+              // Filter out -I (both -I<dir> and -I <dir> forms).
+              //
+              if (n >= 2 && o[0] == '-' && o[1] == 'I')
+              {
+                if (n == 2)
+                  ++i;
+
+                continue;
+              }
+
+              os << ' ' << escape (o);
+            }
+          }
+        };
+
+        // Given a library save its -l-style library name.
+        //
+        auto save_library = [&os, this] (const file& l)
+        {
+          // If available (it may not, in case of import-installed libraris),
+          // use the .pc file name to derive the -l library name (in case of
+          // the shared library, l.path() may contain version).
+          //
+          string n;
+
+          auto strip_lib = [&n] ()
+          {
+            if (n.size () > 3 &&
+                path::traits_type::compare (n.c_str (), 3, "lib", 3) == 0)
+              n.erase (0, 3);
+          };
+
+          if (auto* t = find_adhoc_member<pc> (l))
+          {
+            // We also want to strip the lib prefix unless it is part of the
+            // target name while keeping custom library prefix/suffix, if any.
+            //
+            n = t->path ().leaf ().base ().base ().string ();
+
+            if (path::traits_type::compare (n.c_str (), n.size (),
+                                       l.name.c_str (), l.name.size ()) != 0)
+              strip_lib ();
+          }
+          else
+          {
+            // Derive -l-name from the file name in a fuzzy, platform-specific
+            // manner.
+            //
+            n = l.path ().leaf ().base ().string ();
+
+            if (cclass != compiler_class::msvc)
+              strip_lib ();
+          }
+
+          os << " -l" << n;
+        };
+
+        // @@ TODO: support whole archive?
+        //
+
+        // Cflags.
+        //
+        os << "Cflags:";
+        os << " -I" << escape (idir.string ());
+        save_poptions (c_export_poptions);
+        save_poptions (x_export_poptions);
+        os << endl;
+
+        // Libs.
+        //
+        // While we generate split shared/static .pc files, in case of static
+        // we still want to sort things out into Libs/Libs.private. This is
+        // necessary to distinguish between interface and implementation
+        // dependencies if we don't have the shared variant (see the load
+        // logic for details).
+        //
+        //@@ TODO: would be nice to weed out duplicates. But is it always
+        //   safe? Think linking archives: will have to keep duplicates in
+        //   the second position, not first. Gets even trickier with
+        //   Libs.private split.
+        //
+        {
+          os << "Libs:";
+
+          // While we don't need it for a binless library itselt, it may be
+          // necessary to resolve its binfull dependencies.
+          //
+          os << " -L" << escape (ldir.string ());
+
+          // Now process ourselves as if we were being linked to something (so
+          // pretty similar to link_rule::append_libraries()).
+          //
+          bool priv (false);
+          auto imp = [&priv] (const file&, bool la) {return priv && la;};
+
+          auto lib = [&os, &save_library] (const file* const* c,
+                                           const string& p,
+                                           lflags,
+                                           bool)
+          {
+            const file* l (c != nullptr ? *c : nullptr);
+
+            if (l != nullptr)
+            {
+              if (l->is_a<libs> () || l->is_a<liba> ()) // See through libux.
+                save_library (*l);
+            }
+            else
+              os << ' ' << p; // Something "system'y", pass as is.
+          };
+
+          auto opt = [] (const file&,
+                         const string&,
+                         bool, bool)
+          {
+            //@@ TODO: should we filter -L similar to -I?
+            //@@ TODO: how will the Libs/Libs.private work?
+            //@@ TODO: remember to use escape()
+
+            /*
+            // If we need an interface value, then use the group (lib{}).
+            //
+            if (const target* g = exp && l.is_a<libs> () ? l.group : &l)
+            {
+              const variable& var (
+                com
+                ? (exp ? c_export_loptions : c_loptions)
+                : (t == x
+                   ? (exp ? x_export_loptions : x_loptions)
+                   : var_pool[t + (exp ? ".export.loptions" : ".loptions")]));
+
+              append_options (args, *g, var);
+            }
+            */
+          };
+
+          // Pretend we are linking an executable using what would be normal,
+          // system-default link order.
+          //
+          linfo li {otype::e, la ? lorder::a_s : lorder::s_a};
+
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             l, la, 0, // Link flags.
+                             imp, lib, opt, !binless);
+          os << endl;
+
+          if (la)
+          {
+            os << "Libs.private:";
+
+            priv = true;
+            process_libraries (a, bs, li, sys_lib_dirs,
+                               l, la, 0, // Link flags.
+                               imp, lib, opt, false);
+            os << endl;
+          }
+        }
+
+        // If we have modules, list them in the modules variable. We also save
+        // some extra info about them (yes, the rabbit hole runs deep). This
+        // code is pretty similar to compiler::search_modules().
+        //
+        if (modules)
+        {
+          struct module
+          {
+            string name;
+            path file;
+
+            string pp;
+            bool symexport;
+          };
+          vector<module> modules;
+
+          for (const target* pt: l.prerequisite_targets[a])
+          {
+            // @@ UTL: we need to (recursively) see through libu*{} (and
+            //    also in search_modules()).
+            //
+            if (pt != nullptr && pt->is_a<bmix> ())
+            {
+              // What we have is a binary module interface. What we need is
+              // a module interface source it was built from. We assume it's
+              // the first mxx{} target that we see.
+              //
+              const target* mt (nullptr);
+              for (const target* t: pt->prerequisite_targets[a])
+              {
+                if ((mt = t->is_a (*x_mod)))
+                  break;
+              }
+
+              // Can/should there be a bmi{} without mxx{}? Can't think of a
+              // reason.
+              //
+              assert (mt != nullptr);
+
+              path p (install::resolve_file (mt->as<file> ()));
+
+              if (p.empty ()) // Not installed.
+                continue;
+
+              string pp;
+              if (const string* v = cast_null<string> ((*mt)[x_preprocessed]))
+                pp = *v;
+
+              modules.push_back (
+                module {
+                  cast<string> (pt->state[a].vars[c_module_name]),
+                  move (p),
+                  move (pp),
+                  symexport
+                });
+            }
+          }
+
+          if (!modules.empty ())
+          {
+            os << endl
+               << "cxx_modules =";
+
+            // Module names shouldn't require escaping.
+            //
+            for (const module& m: modules)
+              os << ' ' << m.name << '=' << escape (m.file.string ());
+
+            os << endl;
+
+            // Module-specific properties. The format is:
+            //
+            // <lang>_module_<property>.<module> = <value>
+            //
+            for (const module& m: modules)
+            {
+              if (!m.pp.empty ())
+                os << "cxx_module_preprocessed." << m.name << " = " << m.pp
+                   << endl;
+
+              if (m.symexport)
+                os << "cxx_module_symexport." << m.name << " = true" << endl;
+            }
+          }
+        }
+
+        os.close ();
+        arm.cancel ();
+      }
+      catch (const io_error& e)
+      {
+        fail << "unable to write " << p << ": " << e;
+      }
+    }
+  }
+}
diff --git a/libbuild2/cc/target.cxx b/libbuild2/cc/target.cxx
new file mode 100644
index 0000000..a438898
--- /dev/null
+++ b/libbuild2/cc/target.cxx
@@ -0,0 +1,101 @@
+// file      : libbuild2/cc/target.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/target.hxx>
+
+#include <libbuild2/context.hxx>
+
+using namespace std;
+
+namespace build2
+{
+  namespace cc
+  {
+    const target_type cc::static_type
+    {
+      "cc",
+      &file::static_type,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      &target_search,
+      false
+    };
+
+    extern const char h_ext_def[] = "h";
+
+    const target_type h::static_type
+    {
+      "h",
+      &cc::static_type,
+      &target_factory<h>,
+      nullptr, /* fixed_extension */
+      &target_extension_var<h_ext_def>,
+      &target_pattern_var<h_ext_def>,
+      nullptr,
+      &file_search,
+      false
+    };
+
+    extern const char c_ext_def[] = "c";
+
+    const target_type c::static_type
+    {
+      "c",
+      &cc::static_type,
+      &target_factory<c>,
+      nullptr, /* fixed_extension */
+      &target_extension_var<c_ext_def>,
+      &target_pattern_var<c_ext_def>,
+      nullptr,
+      &file_search,
+      false
+    };
+
+    const target_type pc::static_type
+    {
+      "pc",
+      &file::static_type,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      &target_search,
+      false
+    };
+
+    extern const char pca_ext[] = "static.pc"; // VC14 rejects constexpr.
+
+    const target_type pca::static_type
+    {
+      "pca",
+      &pc::static_type,
+      &target_factory<pca>,
+      &target_extension_fix<pca_ext>,
+      nullptr, /* default_extension */
+      &target_pattern_fix<pca_ext>,
+      &target_print_0_ext_verb, // Fixed extension, no use printing.
+      &file_search,
+      false
+    };
+
+    extern const char pcs_ext[] = "shared.pc"; // VC14 rejects constexpr.
+
+    const target_type pcs::static_type
+    {
+      "pcs",
+      &pc::static_type,
+      &target_factory<pcs>,
+      &target_extension_fix<pcs_ext>,
+      nullptr, /* default_extension */
+      &target_pattern_fix<pcs_ext>,
+      &target_print_0_ext_verb, // Fixed extension, no use printing.
+      &file_search,
+      false
+    };
+  }
+}
diff --git a/libbuild2/cc/target.hxx b/libbuild2/cc/target.hxx
new file mode 100644
index 0000000..885bf68
--- /dev/null
+++ b/libbuild2/cc/target.hxx
@@ -0,0 +1,96 @@
+// file      : libbuild2/cc/target.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_TARGET_HXX
+#define LIBBUILD2_CC_TARGET_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target.hxx>
+
+#include <libbuild2/cc/export.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    // This is an abstract base target for all c-common header/source files.
+    // We use this arrangement during rule matching to detect "unknown" (to
+    // this rule) source/header files that it cannot handle but should not
+    // ignore either. For example, a C link rule that sees a C++ source file.
+    //
+    class LIBBUILD2_CC_SYMEXPORT cc: public file
+    {
+    public:
+      using file::file;
+
+    public:
+      static const target_type static_type;
+      virtual const target_type& dynamic_type () const = 0;
+    };
+
+    // There is hardly a c-family compilation without a C header inclusion.
+    // As a result, this target type is registered for any c-family module.
+    //
+    class LIBBUILD2_CC_SYMEXPORT h: public cc
+    {
+    public:
+      using cc::cc;
+
+    public:
+      static const target_type static_type;
+      virtual const target_type& dynamic_type () const {return static_type;}
+    };
+
+    // This one we define in cc but the target type is only registered by the
+    // c module. This way we can implement rule chaining without jumping
+    // through too many hoops (like resolving target type dynamically) but
+    // also without relaxing things too much (i.e., the user still won't be
+    // able to refer to c{} without loading the c module).
+    //
+    class LIBBUILD2_CC_SYMEXPORT c: public cc
+    {
+    public:
+      using cc::cc;
+
+    public:
+      static const target_type static_type;
+      virtual const target_type& dynamic_type () const {return static_type;}
+    };
+
+    // pkg-config file targets.
+    //
+    class LIBBUILD2_CC_SYMEXPORT pc: public file
+    {
+    public:
+      using file::file;
+
+    public:
+      static const target_type static_type;
+    };
+
+    class LIBBUILD2_CC_SYMEXPORT pca: public pc // .static.pc
+    {
+    public:
+      using pc::pc;
+
+    public:
+      static const target_type static_type;
+      virtual const target_type& dynamic_type () const {return static_type;}
+    };
+
+    class LIBBUILD2_CC_SYMEXPORT pcs: public pc // .shared.pc
+    {
+    public:
+      using pc::pc;
+
+    public:
+      static const target_type static_type;
+      virtual const target_type& dynamic_type () const {return static_type;}
+    };
+  }
+}
+
+#endif // LIBBUILD2_CC_TARGET_HXX
diff --git a/libbuild2/cc/types.hxx b/libbuild2/cc/types.hxx
new file mode 100644
index 0000000..280dcbf
--- /dev/null
+++ b/libbuild2/cc/types.hxx
@@ -0,0 +1,116 @@
+// file      : libbuild2/cc/types.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_TYPES_HXX
+#define LIBBUILD2_CC_TYPES_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target-type.hxx>
+
+namespace build2
+{
+  namespace cc
+  {
+    // Translation unit information.
+    //
+    // We use absolute and normalized header path as the header unit module
+    // name.
+    //
+    // Note that our terminology doesn't exactly align with the (current)
+    // standard where a header unit is not a module (that is, you either
+    // import a "module [interface translation unit]" or a "[synthesized]
+    // header [translation] unit"). On the other hand, lots of the underlying
+    // mechanics suggest that a header unit is module-like; they end up having
+    // BMIs (which stand for "binary module interface"), etc. In a sense, a
+    // header unit is an "interface unit" for (a part of) the global module
+    // (maybe a partition).
+    //
+    enum class unit_type
+    {
+      non_modular,
+      module_iface,
+      module_impl,
+      module_header
+    };
+
+    struct module_import
+    {
+      unit_type  type;      // Either module_iface or module_header.
+      string     name;
+      bool       exported;  // True if re-exported (export import M;).
+      size_t     score;     // Match score (see compile::search_modules()).
+    };
+
+    using module_imports = vector<module_import>;
+
+    struct module_info
+    {
+      string         name;     // Empty if non-modular.
+      module_imports imports;  // Imported modules.
+    };
+
+    struct unit
+    {
+      unit_type               type = unit_type::non_modular;
+      build2::cc::module_info module_info;
+    };
+
+    // Compiler language.
+    //
+    enum class lang {c, cxx};
+
+    inline ostream&
+    operator<< (ostream& os, lang l)
+    {
+      return os << (l == lang::c ? "C" : "C++");
+    }
+
+    // Compile/link output type (executable, static, or shared).
+    //
+    enum class otype {e, a, s};
+
+    struct ltype
+    {
+      otype type;
+      bool  utility; // True for utility libraries.
+
+      bool executable ()     const {return type == otype::e && !utility;}
+      bool library ()        const {return type != otype::e ||  utility;}
+      bool static_library () const {return type == otype::a ||  utility;}
+      bool shared_library () const {return type == otype::s && !utility;}
+      bool member_library () const {return type != otype::e;}
+    };
+
+    // Compile target types.
+    //
+    struct compile_target_types
+    {
+      const target_type& obj;
+      const target_type& bmi;
+      const target_type& hbmi;
+    };
+
+    // Library link order.
+    //
+    enum class lorder {a, s, a_s, s_a};
+
+    // Link information: output type and link order.
+    //
+    struct linfo
+    {
+      otype  type;
+      lorder order;
+    };
+
+    // Prerequisite link flags.
+    //
+    using lflags = uintptr_t; // To match prerequisite_target::data.
+
+    const lflags lflag_whole = 0x00000001U; // Link whole liba{}/libu*}.
+  }
+}
+
+#endif // LIBBUILD2_CC_TYPES_HXX
diff --git a/libbuild2/cc/utility.cxx b/libbuild2/cc/utility.cxx
new file mode 100644
index 0000000..07f3b2e
--- /dev/null
+++ b/libbuild2/cc/utility.cxx
@@ -0,0 +1,114 @@
+// file      : libbuild2/cc/utility.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/cc/utility.hxx>
+
+#include <libbuild2/file.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx> // search()
+
+#include <libbuild2/bin/rule.hxx>
+#include <libbuild2/bin/target.hxx>
+
+using namespace std;
+
+namespace build2
+{
+  namespace cc
+  {
+    using namespace bin;
+
+    const dir_path module_dir ("cc");
+    const dir_path modules_sidebuild_dir (dir_path (module_dir) /= "modules");
+
+    lorder
+    link_order (const scope& bs, otype ot)
+    {
+      // Initialize to suppress 'may be used uninitialized' warning produced
+      // by MinGW GCC 5.4.0.
+      //
+      const char* var (nullptr);
+
+      switch (ot)
+      {
+      case otype::e: var = "bin.exe.lib";  break;
+      case otype::a: var = "bin.liba.lib"; break;
+      case otype::s: var = "bin.libs.lib"; break;
+      }
+
+      const auto& v (cast<strings> (bs[var]));
+      return v[0] == "shared"
+        ? v.size () > 1 && v[1] == "static" ? lorder::s_a : lorder::s
+        : v.size () > 1 && v[1] == "shared" ? lorder::a_s : lorder::a;
+    }
+
+    const target*
+    link_member (const bin::libx& x, action a, linfo li, bool exist)
+    {
+      if (x.is_a<libul> ())
+      {
+        // For libul{} that is linked to an executable the member choice
+        // should be dictated by the members of lib{} this libul{} is
+        // "primarily" for. If both are being built, then it seems natural to
+        // prefer static over shared since it could be faster (but I am sure
+        // someone will probably want this configurable).
+        //
+        if (li.type == otype::e)
+        {
+          // Utility libraries are project-local which means the primarily
+          // target should be in the same project as us.
+          //
+          li.type = lib_rule::build_members (x.root_scope ()).a
+            ? otype::a
+            : otype::s;
+        }
+
+        const target_type& tt (li.type == otype::a
+                               ? libua::static_type
+                               : libus::static_type);
+
+        // Called by the compile rule during execute.
+        //
+        return x.ctx.phase == run_phase::match && !exist
+          ? &search (x, tt, x.dir, x.out, x.name)
+          : search_existing (x.ctx, tt, x.dir, x.out, x.name);
+      }
+      else
+      {
+        assert (!exist);
+
+        const lib& l (x.as<lib> ());
+
+        // Make sure group members are resolved.
+        //
+        group_view gv (resolve_members (a, l));
+        assert (gv.members != nullptr);
+
+        lorder lo (li.order);
+
+        bool ls (true);
+        switch (lo)
+        {
+        case lorder::a:
+        case lorder::a_s:
+          ls = false; // Fall through.
+        case lorder::s:
+        case lorder::s_a:
+          {
+            if (ls ? l.s == nullptr : l.a == nullptr)
+            {
+              if (lo == lorder::a_s || lo == lorder::s_a)
+                ls = !ls;
+              else
+                fail << (ls ? "shared" : "static") << " variant of " << l
+                     << " is not available";
+            }
+          }
+        }
+
+        return ls ? static_cast<const target*> (l.s) : l.a;
+      }
+    }
+  }
+}
diff --git a/libbuild2/cc/utility.hxx b/libbuild2/cc/utility.hxx
new file mode 100644
index 0000000..3ee07bd
--- /dev/null
+++ b/libbuild2/cc/utility.hxx
@@ -0,0 +1,73 @@
+// file      : libbuild2/cc/utility.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_CC_UTILITY_HXX
+#define LIBBUILD2_CC_UTILITY_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target.hxx>
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/types.hxx>
+
+namespace build2
+{
+  struct variable;
+
+  namespace cc
+  {
+    // To form the complete path do:
+    //
+    //   root.out_path () / root.root_extra->build_dir / module_dir
+    //
+    extern const dir_path module_dir;             // cc/
+    extern const dir_path modules_sidebuild_dir;  // cc/modules/
+
+    // Compile output type.
+    //
+    otype
+    compile_type (const target&, unit_type);
+
+    compile_target_types
+    compile_types (otype);
+
+    // Link output type.
+    //
+    ltype
+    link_type (const target&);
+
+    // Library link order.
+    //
+    // The reason we pass scope and not the target is because this function is
+    // called not only for exe/lib but also for obj as part of the library
+    // meta-information protocol implementation. Normally the bin.*.lib values
+    // will be project-wide. With this scheme they can be customized on the
+    // per-directory basis but not per-target which means all exe/lib in the
+    // same directory have to have the same link order.
+    //
+    lorder
+    link_order (const scope& base, otype);
+
+    inline linfo
+    link_info (const scope& base, otype ot)
+    {
+      return linfo {ot, link_order (base, ot)};
+    }
+
+    // Given the link order return the library member to link. That is, liba{}
+    // or libs{} for lib{} and libua{} or libus{} for libul{}.
+    //
+    // If existing is true, then only return the member target if it exists
+    // (currently only used and supported for utility libraries).
+    //
+    const target*
+    link_member (const bin::libx&, action, linfo, bool existing = false);
+  }
+}
+
+#include <libbuild2/cc/utility.ixx>
+
+#endif // LIBBUILD2_CC_UTILITY_HXX
diff --git a/libbuild2/cc/utility.ixx b/libbuild2/cc/utility.ixx
new file mode 100644
index 0000000..1509bf2
--- /dev/null
+++ b/libbuild2/cc/utility.ixx
@@ -0,0 +1,73 @@
+// file      : libbuild2/cc/utility.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+  namespace cc
+  {
+    inline otype
+    compile_type (const target& t, unit_type u)
+    {
+      using namespace bin;
+
+      auto test = [&t, u] (const auto& h, const auto& i, const auto& o)
+      {
+        return t.is_a (u == unit_type::module_header ? h :
+                       u == unit_type::module_iface  ? i :
+                       o);
+      };
+
+      return
+        test (hbmie::static_type, bmie::static_type, obje::static_type) ? otype::e :
+        test (hbmia::static_type, bmia::static_type, obja::static_type) ? otype::a :
+        otype::s;
+    }
+
+    inline ltype
+    link_type (const target& t)
+    {
+      using namespace bin;
+
+      bool u (false);
+      otype o (
+        t.is_a<exe>  () || (u = t.is_a<libue> ()) ? otype::e :
+        t.is_a<liba> () || (u = t.is_a<libua> ()) ? otype::a :
+        t.is_a<libs> () || (u = t.is_a<libus> ()) ? otype::s :
+        static_cast<otype> (0xFF));
+
+      return ltype {o, u};
+    }
+
+    inline compile_target_types
+    compile_types (otype t)
+    {
+      using namespace bin;
+
+      const target_type* o (nullptr);
+      const target_type* i (nullptr);
+      const target_type* h (nullptr);
+
+      switch (t)
+      {
+      case otype::e:
+        o = &obje::static_type;
+        i = &bmie::static_type;
+        h = &hbmie::static_type;
+        break;
+      case otype::a:
+        o = &obja::static_type;
+        i = &bmia::static_type;
+        h = &hbmia::static_type;
+        break;
+      case otype::s:
+        o = &objs::static_type;
+        i = &bmis::static_type;
+        h = &hbmis::static_type;
+        break;
+      }
+
+      return compile_target_types {*o, *i, *h};
+    }
+  }
+}
diff --git a/libbuild2/cc/windows-manifest.cxx b/libbuild2/cc/windows-manifest.cxx
new file mode 100644
index 0000000..8d67f0c
--- /dev/null
+++ b/libbuild2/cc/windows-manifest.cxx
@@ -0,0 +1,143 @@
+// file      : libbuild2/cc/windows-manifest.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/target.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/cc/link-rule.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    // Translate the compiler target CPU value to the processorArchitecture
+    // attribute value.
+    //
+    const char*
+    windows_manifest_arch (const string& tcpu)
+    {
+      const char* pa (tcpu == "i386" || tcpu == "i686"  ? "x86"   :
+                      tcpu == "x86_64"                  ? "amd64" :
+                      nullptr);
+
+      if (pa == nullptr)
+        fail << "unable to translate CPU " << tcpu << " to manifest "
+             << "processor architecture";
+
+      return pa;
+    }
+
+    // Generate a Windows manifest and if necessary create/update the manifest
+    // file corresponding to the exe{} target. Return the manifest file path
+    // and its timestamp if unchanged or timestamp_nonexistent otherwise.
+    //
+    pair<path, timestamp> link_rule::
+    windows_manifest (const file& t, bool rpath_assembly) const
+    {
+      tracer trace (x, "link_rule::windows_manifest");
+
+      const scope& rs (t.root_scope ());
+
+      const char* pa (windows_manifest_arch (cast<string> (rs[x_target_cpu])));
+
+      string m;
+
+      m += "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n";
+      m += "<assembly xmlns='urn:schemas-microsoft-com:asm.v1'\n";
+      m += "          manifestVersion='1.0'>\n";
+
+      // Program name, version, etc.
+      //
+      string name (t.path ().leaf ().string ());
+
+      m += "  <assemblyIdentity name='"; m += name; m += "'\n";
+      m += "                    type='win32'\n";
+      m += "                    processorArchitecture='"; m += pa; m += "'\n";
+      m += "                    version='0.0.0.0'/>\n";
+
+      // Our rpath-emulating assembly.
+      //
+      if (rpath_assembly)
+      {
+        m += "  <dependency>\n";
+        m += "    <dependentAssembly>\n";
+        m += "      <assemblyIdentity name='"; m += name; m += ".dlls'\n";
+        m += "                        type='win32'\n";
+        m += "                        processorArchitecture='"; m += pa; m += "'\n";
+        m += "                        language='*'\n";
+        m += "                        version='0.0.0.0'/>\n";
+        m += "    </dependentAssembly>\n";
+        m += "  </dependency>\n";
+      }
+
+      // UAC information. Without it Windows will try to guess, which, as you
+      // can imagine, doesn't end well.
+      //
+      m += "  <trustInfo xmlns='urn:schemas-microsoft-com:asm.v3'>\n";
+      m += "    <security>\n";
+      m += "      <requestedPrivileges>\n";
+      m += "        <requestedExecutionLevel level='asInvoker' uiAccess='false'/>\n";
+      m += "      </requestedPrivileges>\n";
+      m += "    </security>\n";
+      m += "  </trustInfo>\n";
+
+      m += "</assembly>\n";
+
+      // If the manifest file exists, compare to its content. If nothing
+      // changed (common case), then we can avoid any further updates.
+      //
+      // The potentially faster alternative would be to hash it and store an
+      // entry in depdb. This, however, gets a bit complicated since we will
+      // need to avoid a race between the depdb and .manifest updates.
+      //
+      path mf (t.path () + ".manifest");
+
+      timestamp mt (mtime (mf));
+
+      if (mt != timestamp_nonexistent)
+      {
+        try
+        {
+          ifdstream is (mf);
+          if (is.read_text () == m)
+            return make_pair (move (mf), mt);
+        }
+        catch (const io_error&)
+        {
+          // Whatever the reason we failed for, let's rewrite the file.
+        }
+      }
+
+      if (verb >= 3)
+        text << "cat >" << mf;
+
+      if (!t.ctx.dry_run)
+      {
+        auto_rmfile rm (mf);
+
+        try
+        {
+          ofdstream os (mf);
+          os << m;
+          os.close ();
+          rm.cancel ();
+
+        }
+        catch (const io_error& e)
+        {
+          fail << "unable to write to " << mf << ": " << e;
+        }
+      }
+
+      return make_pair (move (mf), timestamp_nonexistent);
+    }
+  }
+}
diff --git a/libbuild2/cc/windows-rpath.cxx b/libbuild2/cc/windows-rpath.cxx
new file mode 100644
index 0000000..5583315
--- /dev/null
+++ b/libbuild2/cc/windows-rpath.cxx
@@ -0,0 +1,400 @@
+// file      : libbuild2/cc/windows-rpath.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <errno.h> // E*
+
+#include <libbuild2/scope.hxx>
+#include <libbuild2/context.hxx>
+#include <libbuild2/variable.hxx>
+#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/bin/target.hxx>
+
+#include <libbuild2/cc/link-rule.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+  namespace cc
+  {
+    // Provide limited emulation of the rpath functionality on Windows using a
+    // side-by-side assembly. In a nutshell, the idea is to create an assembly
+    // with links to all the prerequisite DLLs.
+    //
+    // Note that currently our assemblies contain all the DLLs that the
+    // executable depends on, recursively. The alternative approach could be
+    // to also create assemblies for DLLs. This appears to be possible (but we
+    // will have to use the resource ID 2 for such a manifest). And it will
+    // probably be necessary for DLLs that are loaded dynamically with
+    // LoadLibrary(). The tricky part is how such nested assemblies will be
+    // found. Since we are effectively (from the loader's point of view)
+    // copying the DLLs, we will also have to copy their assemblies (because
+    // the loader looks for them in the same directory as the DLL). It's not
+    // clear how well such nested assemblies are supported (e.g., in Wine).
+    //
+    // What if the DLL is in the same directory as the executable, will it
+    // still be found even if there is an assembly? On the other hand,
+    // handling it as any other won't hurt us much.
+    //
+    using namespace bin;
+
+    // Return the greatest (newest) timestamp of all the DLLs that we will be
+    // adding to the assembly or timestamp_nonexistent if there aren't any.
+    //
+    timestamp link_rule::
+    windows_rpath_timestamp (const file& t,
+                             const scope& bs,
+                             action a,
+                             linfo li) const
+    {
+      timestamp r (timestamp_nonexistent);
+
+      // We need to collect all the DLLs, so go into implementation of both
+      // shared and static (in case they depend on shared).
+      //
+      auto imp = [] (const file&, bool) {return true;};
+
+      auto lib = [&r] (const file* const* lc,
+                       const string& f,
+                       lflags,
+                       bool sys)
+      {
+        const file* l (lc != nullptr ? *lc : nullptr);
+
+        // We don't rpath system libraries.
+        //
+        if (sys)
+          return;
+
+        // Skip static libraries.
+        //
+        if (l != nullptr)
+        {
+          // This can be an "undiscovered" DLL (see search_library()).
+          //
+          if (!l->is_a<libs> () || l->path ().empty ()) // Also covers binless.
+            return;
+        }
+        else
+        {
+          // This is an absolute path and we need to decide whether it is
+          // a shared or static library.
+          //
+          // @@ This is so broken: we don't link to DLLs, we link to .lib or
+          //    .dll.a! Should we even bother? Maybe only for "our" DLLs
+          //    (.dll.lib/.dll.a)? But the DLL can also be in a different
+          //    directory (lib/../bin).
+          //
+          //    Though this can happen on MinGW with direct DLL link...
+          //
+          size_t p (path::traits_type::find_extension (f));
+
+          if (p == string::npos || casecmp (f.c_str () + p + 1, "dll") != 0)
+            return;
+        }
+
+        // Ok, this is a DLL.
+        //
+        timestamp t (l != nullptr
+                     ? l->load_mtime ()
+                     : mtime (f.c_str ()));
+
+        if (t > r)
+          r = t;
+      };
+
+      for (const prerequisite_target& pt: t.prerequisite_targets[a])
+      {
+        if (pt == nullptr || pt.adhoc)
+          continue;
+
+        bool la;
+        const file* f;
+
+        if ((la = (f = pt->is_a<liba>  ())) ||
+            (la = (f = pt->is_a<libux> ())) || // See through.
+            (     f = pt->is_a<libs>  ()))
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             *f, la, pt.data,
+                             imp, lib, nullptr, true);
+      }
+
+      return r;
+    }
+
+    // Like *_timestamp() but actually collect the DLLs (and weed out the
+    // duplicates).
+    //
+    auto link_rule::
+    windows_rpath_dlls (const file& t,
+                        const scope& bs,
+                        action a,
+                        linfo li) const -> windows_dlls
+    {
+      windows_dlls r;
+
+      auto imp = [] (const file&, bool) {return true;};
+
+      auto lib = [&r, &bs] (const file* const* lc,
+                            const string& f,
+                            lflags,
+                            bool sys)
+      {
+        const file* l (lc != nullptr ? *lc : nullptr);
+
+        if (sys)
+          return;
+
+        if (l != nullptr)
+        {
+          if (l->is_a<libs> () && !l->path ().empty ()) // Also covers binless.
+          {
+            // Get .pdb if there is one.
+            //
+            const target_type* tt (bs.find_target_type ("pdb"));
+            const target* pdb (tt != nullptr
+                               ? find_adhoc_member (*l, *tt)
+                               : nullptr);
+            r.insert (
+              windows_dll {
+                f,
+                pdb != nullptr ? &pdb->as<file> ().path ().string () : nullptr,
+                string ()
+              });
+          }
+        }
+        else
+        {
+          size_t p (path::traits_type::find_extension (f));
+
+          if (p != string::npos && casecmp (f.c_str () + p + 1, "dll") == 0)
+          {
+            // See if we can find a corresponding .pdb.
+            //
+            windows_dll wd {f, nullptr, string ()};
+            string& pdb (wd.pdb_storage);
+
+            // First try "our" naming: foo.dll.pdb.
+            //
+            pdb = f;
+            pdb += ".pdb";
+
+            if (!exists (path (pdb)))
+            {
+              // Then try the usual naming: foo.pdb.
+              //
+              pdb.assign (f, 0, p);
+              pdb += ".pdb";
+
+              if (!exists (path (pdb)))
+                pdb.clear ();
+            }
+
+            if (!pdb.empty ())
+              wd.pdb = &pdb;
+
+            r.insert (move (wd));
+          }
+        }
+      };
+
+      for (const prerequisite_target& pt: t.prerequisite_targets[a])
+      {
+        if (pt == nullptr || pt.adhoc)
+          continue;
+
+        bool la;
+        const file* f;
+
+        if ((la = (f = pt->is_a<liba>  ())) ||
+            (la = (f = pt->is_a<libux> ())) || // See through.
+            (      f = pt->is_a<libs>  ()))
+          process_libraries (a, bs, li, sys_lib_dirs,
+                             *f, la, pt.data,
+                             imp, lib, nullptr, true);
+      }
+
+      return r;
+    }
+
+    const char*
+    windows_manifest_arch (const string& tcpu); // windows-manifest.cxx
+
+    // The ts argument should be the DLLs timestamp returned by *_timestamp().
+    //
+    // The scratch argument should be true if the DLL set has changed and we
+    // need to regenerate everything from scratch. Otherwise, we try to avoid
+    // unnecessary work by comparing the DLLs timestamp against the assembly
+    // manifest file.
+    //
+    void link_rule::
+    windows_rpath_assembly (const file& t,
+                            const scope& bs,
+                            action a,
+                            linfo li,
+                            const string& tcpu,
+                            timestamp ts,
+                            bool scratch) const
+    {
+      // Assembly paths and name.
+      //
+      dir_path ad (path_cast<dir_path> (t.path () + ".dlls"));
+      string an (ad.leaf ().string ());
+      path am (ad / path (an + ".manifest"));
+
+      // First check if we actually need to do anything. Since most of the
+      // time we won't, we don't want to combine it with the *_dlls() call
+      // below which allocates memory, etc.
+      //
+      if (!scratch)
+      {
+        // The corner case here is when _timestamp() returns nonexistent
+        // signalling that there aren't any DLLs but the assembly manifest
+        // file exists. This, however, can only happen if we somehow managed
+        // to transition from the "have DLLs" state to "no DLLs" without going
+        // through the "from scratch" update. Actually this can happen when
+        // switching to update-for-install.
+        //
+        if (ts != timestamp_nonexistent && ts <= mtime (am))
+          return;
+      }
+
+      // Next collect the set of DLLs that will be in our assembly. We need to
+      // do this recursively which means we may end up with duplicates. Also,
+      // it is possible that there aren't/no longer are any DLLs which means
+      // we just need to clean things up.
+      //
+      bool empty (ts == timestamp_nonexistent);
+
+      windows_dlls dlls;
+      if (!empty)
+        dlls = windows_rpath_dlls (t, bs, a, li);
+
+      // Clean the assembly directory and make sure it exists. Maybe it would
+      // have been faster to overwrite the existing manifest rather than
+      // removing the old one and creating a new one. But this is definitely
+      // simpler.
+      //
+      {
+        rmdir_status s (rmdir_r (t.ctx, ad, empty, 3));
+
+        if (empty)
+          return;
+
+        if (s == rmdir_status::not_exist)
+          mkdir (ad, 3);
+      }
+
+      // Symlink or copy the DLLs.
+      //
+      {
+        const scope& as (t.weak_scope ()); // Amalgamation.
+
+        auto link = [&as] (const path& f, const path& l)
+        {
+          auto print = [&f, &l] (const char* cmd)
+          {
+            if (verb >= 3)
+              text << cmd << ' ' << f << ' ' << l;
+          };
+
+          // First we try to create a symlink. If that fails (e.g., "Windows
+          // happens"), then we resort to hard links. If that doesn't work
+          // out either (e.g., not on the same filesystem), then we fall back
+          // to copies.
+          //
+          // For the symlink use a relative target path if both paths are part
+          // of the same amalgamation. This way if the amalgamation is moved
+          // as a whole, the links will remain valid.
+          //
+          try
+          {
+            switch (mkanylink (f, l,
+                               true                   /* copy */,
+                               f.sub (as.out_path ()) /* relative */))
+            {
+            case entry_type::regular: print ("cp");    break;
+            case entry_type::symlink: print ("ln -s"); break;
+            case entry_type::other:   print ("ln");    break;
+            default:                  assert (false);
+            }
+          }
+          catch (const pair<entry_type, system_error>& e)
+          {
+            const char* w (nullptr);
+            switch (e.first)
+            {
+            case entry_type::regular: print ("cp");    w = "copy";     break;
+            case entry_type::symlink: print ("ln -s"); w = "symlink";  break;
+            case entry_type::other:   print ("ln");    w = "hardlink"; break;
+            default:                  assert (false);
+            }
+
+            fail << "unable to make " << w << ' ' << l << ": " << e.second;
+          }
+        };
+
+        for (const windows_dll& wd: dlls)
+        {
+          //@@ Would be nice to avoid copying. Perhaps reuse buffers
+          //   by adding path::assign() and traits::leaf().
+          //
+          path dp (wd.dll);     // DLL path.
+          path dn (dp.leaf ()); // DLL name.
+
+          link (dp, ad / dn);
+
+          // Link .pdb if there is one.
+          //
+          if (wd.pdb != nullptr)
+          {
+            path pp (*wd.pdb);
+            link (pp, ad / pp.leaf ());
+          }
+        }
+      }
+
+      if (verb >= 3)
+        text << "cat >" << am;
+
+      if (t.ctx.dry_run)
+        return;
+
+      auto_rmfile rm (am);
+
+      try
+      {
+        ofdstream os (am);
+
+        const char* pa (windows_manifest_arch (tcpu));
+
+        os << "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n"
+           << "<assembly xmlns='urn:schemas-microsoft-com:asm.v1'\n"
+           << "          manifestVersion='1.0'>\n"
+           << "  <assemblyIdentity name='" << an << "'\n"
+           << "                    type='win32'\n"
+           << "                    processorArchitecture='" << pa << "'\n"
+           << "                    version='0.0.0.0'/>\n";
+
+
+
+        for (const windows_dll& wd: dlls)
+          os << "  <file name='" << path (wd.dll).leaf () << "'/>\n";
+
+        os << "</assembly>\n";
+
+        os.close ();
+        rm.cancel ();
+      }
+      catch (const io_error& e)
+      {
+        fail << "unable to write to " << am << ": " << e;
+      }
+    }
+  }
+}
-- 
cgit v1.1