// file : libbuild2/cc/guess.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbuild2/cc/guess.hxx> // Bootstrap build is always performed in the VC's command prompt and thus // doesn't require the VC search functionality. // #if defined(_WIN32) && !defined(BUILD2_BOOTSTRAP) # include <libbutl/win32-utility.hxx> # include <unknwn.h> // IUnknown # include <stdlib.h> // _MAX_PATH # include <oleauto.h> // SysFreeString() # include <guiddef.h> // CLSID, IID # include <objbase.h> // CoInitializeEx(), CoCreateInstance(), etc. // MinGW may lack some macro definitions used in msvc-setup.h (see below), so // we provide them if that's the case. // # ifndef MAXUINT # define MAXUINT UINT_MAX # endif // MinGW's sal.h (Microsoft's Source Code Annotation Language) may not contain // all the in/out annotation macros. // # ifndef _In_z_ # define _In_z_ # endif # ifndef _In_opt_z_ # define _In_opt_z_ # endif # ifndef _Out_opt_ # define _Out_opt_ # endif # ifndef _Deref_out_opt_ # define _Deref_out_opt_ # endif # ifndef _Out_writes_to_ # define _Out_writes_to_(X, Y) # endif # ifndef _Deref_out_range_ # define _Deref_out_range_(X, Y) # endif # ifndef _Outptr_result_maybenull_ # define _Outptr_result_maybenull_ # endif # ifndef _Reserved_ # define _Reserved_ # endif // API for enumerating Visual Studio setup instances and querying information // about them (see the LICENSE file for details). // # include <libbuild2/cc/msvc-setup.h> # include <libbuild2/filesystem.hxx> #endif #include <cstring> // strlen(), strchr(), strstr() #include <libbuild2/diagnostics.hxx> using namespace std; namespace build2 { namespace cc { using std::to_string; string to_string (compiler_type t) { string r; switch (t) { case compiler_type::clang: r = "clang"; break; case compiler_type::gcc: r = "gcc"; break; case compiler_type::msvc: r = "msvc"; break; case compiler_type::icc: r = "icc"; break; } return r; } compiler_id:: compiler_id (const std::string& id) { using std::string; size_t p (id.find ('-')); if (id.compare (0, p, "gcc" ) == 0) type = compiler_type::gcc; else if (id.compare (0, p, "clang") == 0) type = compiler_type::clang; else if (id.compare (0, p, "msvc" ) == 0) type = compiler_type::msvc; else if (id.compare (0, p, "icc" ) == 0) type = compiler_type::icc; else throw invalid_argument ( "invalid compiler type '" + string (id, 0, p) + "'"); if (p != string::npos) { variant.assign (id, p + 1, string::npos); if (variant.empty ()) throw invalid_argument ("empty compiler variant"); } } string compiler_id:: string () const { std::string r (to_string (type)); if (!variant.empty ()) { r += '-'; r += variant; } return r; } string to_string (compiler_class c) { string r; switch (c) { case compiler_class::gcc: r = "gcc"; break; case compiler_class::msvc: r = "msvc"; break; } return r; } // Standard library detection for GCC-class compilers. // // The src argument should detect the standard library based on the // preprocessor macros and output the result in the stdlib:="XXX" form. // static string stdlib (lang xl, const process_path& xp, const strings& x_mo, const strings* c_po, const strings* x_po, const strings* c_co, const strings* x_co, const char* src) { cstrings args {xp.recall_string ()}; if (c_po != nullptr) append_options (args, *c_po); if (x_po != nullptr) append_options (args, *x_po); if (c_co != nullptr) append_options (args, *c_co); if (x_co != nullptr) append_options (args, *x_co); append_options (args, x_mo); args.push_back ("-x"); switch (xl) { case lang::c: args.push_back ("c"); break; case lang::cxx: args.push_back ("c++"); break; } args.push_back ("-E"); args.push_back ("-"); // Read stdin. args.push_back (nullptr); // The source we are going to preprocess may contains #include's which // may fail to resolve if, for example, there is no standard library // (-nostdinc/-nostdinc++). So we are going to suppress diagnostics and // assume the error exit code means no standard library (of course it // could also be because there is something wrong with the compiler or // options but that we simply leave to blow up later). // process pr (run_start (3 /* verbosity */, xp, args, -1 /* stdin */, -1 /* stdout */, false /* error */)); string l, r; try { // Here we have to simultaneously write to stdin and read from stdout // with both operations having the potential to block. For now we // assume that src fits into the pipe's buffer. // ofdstream os (move (pr.out_fd)); ifdstream is (move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit); os << src << endl; os.close (); while (!eof (getline (is, l))) { size_t p (l.find_first_not_of (' ')); if (p != string::npos && l.compare (p, 9, "stdlib:=\"") == 0) { p += 9; r = string (l, p, l.size () - p - 1); // One for closing \". break; } } is.close (); } catch (const io_error&) { // Presumably the child process failed. Let run_finish() deal with // that. } if (!run_finish_code (args.data (), pr, l)) r = "none"; if (r.empty ()) fail << "unable to determine " << xl << " standard library"; return r; } // C standard library detection on POSIX (i.e., non-Windows) systems. // Notes: // // - We place platform macro-based checks (__FreeBSD__, __APPLE__, etc) // after library macro-based ones in case a non-default libc is used. // static const char* c_stdlib_src = "#if !defined(__STDC_HOSTED__) || __STDC_HOSTED__ == 1 \n" "# include <stddef.h> /* Forces defining __KLIBC__ for klibc. */ \n" "# include <limits.h> /* Includes features.h for glibc. */ \n" "# include <sys/types.h> /* Includes sys/cdefs.h for bionic. */ \n" " /* Includes sys/features.h for newlib. */ \n" " /* Includes features.h for uclibc. */ \n" "# if defined(__KLIBC__) \n" " stdlib:=\"klibc\" \n" "# elif defined(__BIONIC__) \n" " stdlib:=\"bionic\" \n" "# elif defined(__NEWLIB__) \n" " stdlib:=\"newlib\" \n" "# elif defined(__UCLIBC__) \n" " stdlib:=\"uclibc\" \n" "# elif defined(__dietlibc__) /* Also has to be defined manually by */ \n" " stdlib:=\"dietlibc\" /* or some wrapper. */ \n" "# elif defined(__MUSL__) /* This libc refuses to define __MUSL__ */ \n" " stdlib:=\"musl\" /* so it has to be defined by user. */ \n" "# elif defined(__GLIBC__) /* Check for glibc last since some libc's */ \n" " stdlib:=\"glibc\" /* pretend to be it. */ \n" "# elif defined(__FreeBSD__) \n" " stdlib:=\"freebsd\" \n" "# elif defined(__NetBSD__) \n" " stdlib:=\"netbsd\" \n" "# elif defined(__APPLE__) \n" " stdlib:=\"apple\" \n" "# elif defined(__EMSCRIPTEN__) \n" " stdlib:=\"emscripten\" \n" "# else \n" " stdlib:=\"other\" \n" "# endif \n" "#else \n" " stdlib:=\"none\" \n" "#endif \n"; // Pre-guess the compiler type and optionally variant based on the // compiler executable name and also return the start of that name in the // path (used to derive the toolchain pattern). Return invalid type and // npos if can't make a guess (for example, because the compiler name is a // generic 'c++'). // struct pre_guess_result { compiler_type type; optional<string> variant; size_t pos; }; static inline ostream& operator<< (ostream& os, const pre_guess_result& r) { os << r.type; if (r.variant && !r.variant->empty ()) os << '-' << *r.variant; return os; } static pre_guess_result pre_guess (lang xl, const path& xc, const optional<compiler_id>& xi) { tracer trace ("cc::pre_guess"); // Analyze the last path component only. // const string& s (xc.string ()); size_t s_p (path::traits_type::find_leaf (s)); size_t s_n (s.size ()); using type = compiler_type; // If the user specified the compiler id, then only check the stem for // that compiler. // auto check = [&xi, &s, s_p, s_n] (type t, const char* stem, const char* v = nullptr) -> optional<pre_guess_result> { if (!xi || (xi->type == t && (v == nullptr || xi->variant == v))) { size_t p (find_stem (s, s_p, s_n, stem)); if (p != string::npos) { if (v == nullptr && xi) v = xi->variant.c_str (); return pre_guess_result { t, v != nullptr ? optional<string> (v) : nullopt, p}; } } return nullopt; }; // Warn if the user specified a C compiler instead of C++ or vice versa. // lang o; // Other language. const char* as (nullptr); // Actual stem. const char* es (nullptr); // Expected stem. switch (xl) { case lang::c: { // Try more specific variants first. Keep msvc last since 'cl' is // very generic. // if (auto r = check (type::msvc, "clang-cl", "clang" )) return *r; if (auto r = check (type::clang, "clang" )) return *r; if (auto r = check (type::gcc, "gcc" )) return *r; if (auto r = check (type::icc, "icc" )) return *r; if (auto r = check (type::clang, "emcc", "emscripten")) return *r; if (auto r = check (type::msvc, "cl" )) return *r; if (check (type::clang, as = "clang++")) es = "clang"; else if (check (type::gcc, as = "g++") ) es = "gcc"; else if (check (type::icc, as = "icpc") ) es = "icc"; else if (check (type::clang, as = "em++") ) es = "emcc"; else if (check (type::msvc, as = "c++") ) es = "cc"; o = lang::cxx; break; } case lang::cxx: { // Try more specific variants first. Keep msvc last since 'cl' is // very generic. // if (auto r = check (type::msvc, "clang-cl", "clang" )) return *r; if (auto r = check (type::clang, "clang++" )) return *r; if (auto r = check (type::gcc, "g++" )) return *r; if (auto r = check (type::icc, "icpc" )) return *r; if (auto r = check (type::clang, "em++", "emscripten")) return *r; if (auto r = check (type::msvc, "cl" )) return *r; if (check (type::clang, as = "clang")) es = "clang++"; else if (check (type::gcc, as = "gcc") ) es = "g++"; else if (check (type::icc, as = "icc") ) es = "icpc"; else if (check (type::clang, as = "emcc") ) es = "em++"; else if (check (type::msvc, as = "cc") ) es = "c++"; o = lang::c; break; } } if (es != nullptr) warn << xc << " looks like a " << o << " compiler" << info << "should it be '" << es << "' instead of '" << as << "'?"; // If the user specified the id, then continue as if we pre-guessed. // if (xi) return pre_guess_result {xi->type, xi->variant, string::npos}; l4 ([&]{trace << "unable to guess compiler type of " << xc;}); return pre_guess_result {invalid_compiler_type, nullopt, string::npos}; } // Return the latest MSVC and Platform SDK installation information if // both are discovered on the system and nullopt otherwise. In particular, // don't fail on the underlying COM/OS errors returning nullopt instead. // This way a broken VC setup will be silently ignored. // // Note that Visual Studio versions prior to 15.0 are not supported. // struct msvc_info { dir_path msvc_dir; // VC directory (...\Tools\MSVC\<ver>\). dir_path psdk_dir; // Platfor SDK version (under Include/, Lib/, etc). string psdk_ver; // Platfor SDK directory (...\Windows Kits\<ver>\). }; #if defined(_WIN32) && !defined(BUILD2_BOOTSTRAP) static inline void msvc_info_deleter (void* p) { delete static_cast<msvc_info*> (p); } // We more or less follow the logic in the Clang 'simplementation (see // MSVC.cpp for details) but don't use the high level APIs (bstr_t, // com_ptr_t, etc) and the VC extensions (__uuidof(), class uuid // __declspecs, etc) that are poorly supported by MinGW GCC and Clang. // struct com_deleter { void operator() (IUnknown* p) const {if (p != nullptr) p->Release ();} }; struct bstr_deleter { void operator() (BSTR p) const {if (p != nullptr) SysFreeString (p);} }; // We don't use the __uuidof keyword (see above) and so define the // class/interface ids manually. // static const CLSID msvc_setup_config_clsid { 0x177F0C4A, 0x1CD3, 0x4DE7, {0xA3, 0x2C, 0x71, 0xDB, 0xBB, 0x9F, 0xA3, 0x6D}}; static const IID msvc_setup_config_iid { 0x26AAB78C, 0x4A60, 0x49D6, {0xAF, 0x3B, 0x3C, 0x35, 0xBC, 0x93, 0x36, 0x5D}}; static const IID msvc_setup_helper_iid { 0x42B21B78, 0x6192, 0x463E, {0x87, 0xBF, 0xD5, 0x77, 0x83, 0x8F, 0x1D, 0x5C}}; // If cl is not empty, then find an installation that contains this cl.exe // path. // static optional<msvc_info> find_msvc (const path& cl = path ()) { using namespace butl; msvc_info r; // Try to obtain the MSVC directory. // { // Initialize the COM library for use by the current thread. // if (CoInitializeEx (nullptr /* pvReserved */, COINIT_APARTMENTTHREADED) != S_OK) return nullopt; auto uninitializer (make_guard ([] () {CoUninitialize ();})); // Obtain the VS information retrieval interface. Failed that, assume // there is no VS installed. // unique_ptr<ISetupConfiguration2, com_deleter> sc; { ISetupConfiguration2* p; if (CoCreateInstance (msvc_setup_config_clsid, nullptr /* pUnkOuter */, CLSCTX_ALL, msvc_setup_config_iid, reinterpret_cast<LPVOID*> (&p)) != S_OK) return nullopt; sc.reset (p); } // Obtain the VS instance enumerator interface. // unique_ptr<IEnumSetupInstances, com_deleter> ei; { IEnumSetupInstances* p; if (sc->EnumAllInstances (&p) != S_OK) return nullopt; ei.reset (p); } // If we search for the latest VS then obtain an interface that helps // with the VS version parsing. // unique_ptr<ISetupHelper, com_deleter> sh; if (cl.empty ()) { ISetupHelper* p; if (sc->QueryInterface (msvc_setup_helper_iid, reinterpret_cast<LPVOID*> (&p)) != S_OK) return nullopt; sh.reset (p); } using vs_ptr = unique_ptr<ISetupInstance, com_deleter>; // Return the Visual Studio instance VC directory path or the empty // path on error. // auto vc_dir = [] (const vs_ptr& vs) { // Note: we cannot use bstr_t due to the Clang 9.0 bug #42842. // BSTR p; if (vs->ResolvePath (L"VC", &p) != S_OK) return dir_path (); unique_ptr<wchar_t, bstr_deleter> deleter (p); // Convert BSTR to the NULL-terminated character string and then to // a path. Bail out if anything goes wrong. // dir_path r; try { int n (WideCharToMultiByte (CP_ACP, 0 /* dwFlags */, p, -1, /*cchWideChar */ nullptr /* lpMultiByteStr */, 0 /* cbMultiByte */, 0 /* lpDefaultChar */, 0 /* lpUsedDefaultChar */)); if (n != 0) // Note: must include the terminating NULL character. { vector<char> ps (n); if (WideCharToMultiByte (CP_ACP, 0, p, -1, ps.data (), n, 0, 0) != 0) r = dir_path (ps.data ()); } } catch (const invalid_path&) {} if (r.relative ()) // Also covers the empty directory case. return dir_path (); return r; }; // Iterate over the VS instances and pick the latest or containing // cl.exe, if its path is specified. Bail out if any COM interface // function call fails. // vs_ptr vs; unsigned long long vs_ver (0); // VS version numeric representation. HRESULT hr; for (ISetupInstance* p; (hr = ei->Next (1, &p, nullptr /* pceltFetched */)) == S_OK; ) { vs_ptr i (p); if (!cl.empty ()) // Searching for VS containing cl.exe. { dir_path d (vc_dir (i)); if (d.empty ()) return nullopt; if (cl.sub (d)) { vs = move (i); r.msvc_dir = move (d); // Save not to query repeatedly. break; } } else // Searching for the latest VS. { BSTR iv; // For example, 16.3.29324.140. if (i->GetInstallationVersion (&iv) != S_OK) return nullopt; unique_ptr<wchar_t, bstr_deleter> deleter (iv); assert (sh != nullptr); unsigned long long v; if (sh->ParseVersion (iv, &v) != S_OK) return nullopt; if (vs == nullptr || v > vs_ver) { vs = move (i); vs_ver = v; } } } // Bail out if no VS instance is found or we didn't manage to iterate // through them successfully. // if (vs == nullptr || (hr != S_FALSE && hr != S_OK)) return nullopt; // Note: we may already have the directory (search by cl.exe case). // if (r.msvc_dir.empty ()) { assert (cl.empty ()); r.msvc_dir = vc_dir (vs); if (r.msvc_dir.empty ()) return nullopt; } // Read the VC version from the file and bail out on error. // string vc_ver; // For example, 14.23.28105. path vp ( r.msvc_dir / path ("Auxiliary\\Build\\Microsoft.VCToolsVersion.default.txt")); try { ifdstream is (vp); vc_ver = trim (is.read_text ()); } catch (const io_error&) {} // Make sure that the VC version directory exists. // if (!vc_ver.empty ()) try { ((r.msvc_dir /= "Tools") /= "MSVC") /= vc_ver; if (!dir_exists (r.msvc_dir)) r.msvc_dir.clear (); } catch (const invalid_path&) {} catch (const system_error&) {} if (r.msvc_dir.empty ()) return nullopt; } // Try to obtain the latest Platform SDK directory and version. // { // Read the Platform SDK directory path from the registry. Failed // that, assume there is no Platform SDK installed. // HKEY h; if (RegOpenKeyExA ( HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots", 0 /* ulOptions */, KEY_READ, &h) != ERROR_SUCCESS) return nullopt; DWORD t; // Reserve space for the terminating NULL character. // DWORD n (_MAX_PATH + 1); char buf[_MAX_PATH + 1]; LSTATUS st (RegQueryValueExA (h, "KitsRoot10", nullptr, &t, reinterpret_cast<LPBYTE> (buf), &n)); // Unlikely to fail, but we can't do much if that's the case. // RegCloseKey (h); // Note that the value length includes the terminating NULL character // and so cannot be zero. // if (st != ERROR_SUCCESS || t != REG_SZ || n == 0) return nullopt; try { r.psdk_dir = dir_path (buf); if (r.psdk_dir.relative ()) // Also covers the empty directory case. return nullopt; // Obtain the latest Platform SDK version as the lexicographically // greatest sub-directory name in the <psdk-dir>/Include directory. // for (const dir_entry& de: dir_iterator (r.psdk_dir / dir_path ("Include"), false /* ignore_dangling */)) { if (de.type () == entry_type::directory) { const string& v (de.path ().string ()); if (v.compare (0, 3, "10.") == 0 && v > r.psdk_ver) r.psdk_ver = v; } } } catch (const invalid_path&) {return nullopt;} catch (const system_error&) {return nullopt;} if (r.psdk_ver.empty ()) return nullopt; } return r; } #endif // Guess the compiler type and variant by running it. If the pre argument // is not empty, then only "confirm" the pre-guess. Return empty result if // unable to guess. // // If the compiler has both type and variant signatures (say, like // clang-emscripten), then the variant goes to signature and type goes to // type_signature. Otherwise, type_signature is not used. // struct guess_result { compiler_id id; string signature; string type_signature; string checksum; process_path path; // Optional additional information (for example, msvc_info). // static void null_info_deleter (void* p) { assert (p == nullptr); } using info_ptr = unique_ptr<void, void (*) (void*)>; info_ptr info = {nullptr, null_info_deleter}; guess_result () = default; guess_result (compiler_id i, string&& s, string&& ts = {}) : id (move (i)), signature (move (s)), type_signature (move (ts)) {} bool empty () const {return id.empty ();} }; // Note: allowed to change pre if succeeds. // static guess_result guess (const char* xm, lang xl, const path& xc, const strings& x_mo, const optional<compiler_id>& xi, pre_guess_result& pre, sha256& cs) { tracer trace ("cc::guess"); assert (!xi || (xi->type == pre.type && xi->variant == *pre.variant)); using type = compiler_type; const type invalid = invalid_compiler_type; const type& pt (pre.type); const optional<string>& pv (pre.variant); using info_ptr = guess_result::info_ptr; guess_result r; process_path xp; info_ptr search_info (nullptr, guess_result::null_info_deleter); { auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << " to override"; }); // Normally we just search in PATH but in some situations we may need // to fallback to an ad hoc search method. And the tricky question in // this case is what should the recall path be. It's natural to make // it the same as effective (which happens automatically if we use the // fallback directory mechanism of run_search()) so that any command // lines that we print are re-runnable by the user. // // On the other hand, passing the effective path (which would normally // be absolute) to recursive instances of the build system (e.g., when // running tests) will inhibit the ad hoc search which may supply // other parts of the "environment" necessary to use the compiler. The // good example of this is MSVC cl.exe which doesn't have any default // header/library search paths (and which are normally supplied by the // INCLUDE/LIB environment variables or explicitly via the command // line). // // So a sensible strategy here would be to use the effective path if // that's all that's required for the compiler to function (as, for // example, is the case for Clang targeting MSVC) and use the initial // path otherwise, thus triggering the same ad hoc search in any // recursive instances. // // The main drawback of the latter, of course, is that the commands we // print are no longer re-runnable (even though we may have supplied // the rest of the "environment" explicitly on the command line). Plus // we would need to save whatever environment variables we used to // form the fallback path in case of hermetic configuration. // // An alternative strategy is to try and obtain the corresponding // "environment" in case of the effective (absolute) path similar to // how it is done in case of the ad hoc search. // dir_path fb; // Fallback search directory. #ifdef _WIN32 // If we are running in the Visual Studio command prompt, add the // potentially bundled Clang directory as a fallback (for some reason // the Visual Studio prompts don't add it to PATH themselves). // if (xc.simple () && (pt == type::clang || (pt == type::msvc && pv && *pv == "clang"))) { if (optional<string> v = getenv ("VCINSTALLDIR")) { try { fb = ((dir_path (move (*v)) /= "Tools") /= "Llvm") /= "bin"; } catch (const invalid_path&) { // Ignore it. } } } #endif // Only search in PATH (specifically, omitting the current // executable's directory on Windows). // // Note that the process_path instance will be cached (as part of // compiler_info) so init is false. // xp = run_try_search (xc, false /* init */, fb, true /* path_only */); #if defined(_WIN32) && !defined(BUILD2_BOOTSTRAP) // If we pre-guessed MSVC or Clang (including clang-cl) try the search // and if not found, try to locate the MSVC installation and fallback // on that. // if (xp.empty ()) { if (xc.simple () && (pt == type::clang || (pt == type::msvc && (!pv || *pv == "clang")))) { if (optional<msvc_info> mi = find_msvc ()) { try { if (pt == type::msvc && !pv) { // With MSVC you get a compiler binary per target (i.e., // there is nothing like -m32/-m64 or /MACHINE). Targeting // 64-bit seems like as good of a default as any. // fb = ((dir_path (mi->msvc_dir) /= "bin") /= "Hostx64") /= "x64"; search_info = info_ptr ( new msvc_info (move (*mi)), msvc_info_deleter); } else { // Get to ...\VC\Tools\ from ...\VC\Tools\MSVC\<ver>\. // fb = (dir_path (mi->msvc_dir) /= "..") /= ".."; fb.normalize (); (fb /= "Llvm") /= "bin"; // Note that in this case we drop msvc_info and extract it // directly from Clang later. } xp = run_try_search (xc, false, fb, true); } catch (const invalid_path&) { // Ignore it. } } } } else { // We try to find the matching installation only for MSVC (for Clang // we extract this information from the compiler). // if (xc.absolute () && (pt == type::msvc && !pv)) { if (optional<msvc_info> mi = find_msvc (xc)) { search_info = info_ptr ( new msvc_info (move (*mi)), msvc_info_deleter); } } } #endif if (xp.empty ()) run_search_fail (xc); } // Run the compiler with the specified option (-v, --version, etc; can // also be NULL) calling the specified function on each trimmed output // line (see build2::run() for details). // // Note that we suppress all the compiler errors because we may be // trying an unsupported option (but still consider the exit code). // cstrings args {xp.recall_string ()}; append_options (args, x_mo); args.push_back (nullptr); // Placeholder for the option. args.push_back (nullptr); process_env env (xp); // For now let's assume that all the platforms other than Windows // recognize LC_ALL. // #ifndef _WIN32 const char* evars[] = {"LC_ALL=C", nullptr}; env.vars = evars; #endif string cache; auto run = [&cs, &env, &args, &cache] ( const char* o, auto&& f, bool checksum = false) -> guess_result { args[args.size () - 2] = o; cache.clear (); return build2::run<guess_result> ( 3 /* verbosity */, env, args.data (), forward<decltype (f)> (f), false /* error */, false /* ignore_exit */, checksum ? &cs : nullptr); }; // Start with -v. This will cover gcc and clang (including clang-cl and // Emscripten clang). // // While icc also writes what may seem like something we can use to // detect it: // // icpc version 16.0.2 (gcc version 4.9.0 compatibility) // // That first word is actually the executable name. So if we rename // icpc to foocpc, we will get: // // foocpc version 16.0.2 (gcc version 4.9.0 compatibility) // // In fact, if someone renames icpc to g++, there will be no way for // us to detect this. Oh, well, their problem. // if (r.empty () && (pt == invalid || pt == type::gcc || pt == type::clang || (pt == type::msvc && pv && *pv == "clang"))) { auto f = [&xi, &pt, &cache] (string& l, bool last) -> guess_result { if (xi) { //@@ TODO: what about type_signature? Or do we just assume that // the variant version will be specified along with type // version? Do we even have this ability? // The signature line is first in Clang and last in GCC. // return (xi->type != type::gcc || last ? guess_result (*xi, move (l)) : guess_result ()); } size_t p; // The gcc -v output will have a last line in the form: // // "gcc version X.Y[.Z][...] ..." // // The "version" word can probably be translated. For example: // // gcc version 3.4.4 // gcc version 4.2.1 // gcc version 4.8.2 (GCC) // gcc version 4.8.5 (Ubuntu 4.8.5-2ubuntu1~14.04.1) // gcc version 4.9.2 (Ubuntu 4.9.2-0ubuntu1~14.04) // gcc version 5.1.0 (Ubuntu 5.1.0-0ubuntu11~14.04.1) // gcc version 6.0.0 20160131 (experimental) (GCC) // gcc version 9.3-win32 20200320 (GCC) // if (cache.empty ()) { if (last && l.compare (0, 4, "gcc ") == 0) return guess_result (compiler_id {type::gcc, ""}, move (l)); } // The Apple clang -v output will have a line (currently first) in // the form: // // "Apple (LLVM|clang) version X.Y.Z ..." // // Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn) // Apple clang version 4.0 (tags/Apple/clang-421.0.60) (based on LLVM 3.1svn) // Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn) // Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn) // Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn) // Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn) // Apple LLVM version 6.0 (clang-600.0.57) (based on LLVM 3.5svn) // Apple LLVM version 6.1.0 (clang-602.0.53) (based on LLVM 3.6.0svn) // Apple LLVM version 7.0.0 (clang-700.0.53) // Apple LLVM version 7.0.0 (clang-700.1.76) // Apple LLVM version 7.0.2 (clang-700.1.81) // Apple LLVM version 7.3.0 (clang-703.0.16.1) // Apple clang version 12.0.0 (clang-1200.0.32.27) // // Note that the gcc/g++ "aliases" for clang/clang++ also include // this line but it is (currently) preceded by "Configured with: // ...". // // Check for Apple clang before the vanilla one since the above line // also includes "clang". // if (cache.empty ()) { if (l.compare (0, 6, "Apple ") == 0 && (l.compare (6, 5, "LLVM ") == 0 || l.compare (6, 6, "clang ") == 0)) return guess_result (compiler_id {type::clang, "apple"}, move (l)); } // Emscripten emcc -v prints its own version and the clang version, // for example: // // emcc (...) 2.0.8 // clang version 12.0.0 (...) // // The order, however is not guaranteed (see Emscripten issue // #12654). So things are going to get hairy. // if (l.compare (0, 5, "emcc ") == 0) { if (cache.empty ()) { // Cache the emcc line and continue in order to get the clang // line. // cache = move (l); return guess_result (); } else if (cache.find ("clang ") != string::npos) { return guess_result (compiler_id {type::clang, "emscripten"}, move (l), move (cache)); } } // The vanilla clang -v output will have a first line in the form: // // "[... ]clang version X.Y.Z[-...] ..." // // The "version" word can probably be translated. For example: // // FreeBSD clang version 3.4.1 (tags/RELEASE_34/dot1-final 208032) 20140512 // Ubuntu clang version 3.5.0-4ubuntu2~trusty2 (tags/RELEASE_350/final) (based on LLVM 3.5.0) // Ubuntu clang version 3.6.0-2ubuntu1~trusty1 (tags/RELEASE_360/final) (based on LLVM 3.6.0) // clang version 3.7.0 (tags/RELEASE_370/final) // // The clang-cl output is exactly the same, which means the only way // to distinguish it is based on the executable name. // // We must also watch out for potential misdetections, for example: // // Configured with: ../gcc/configure CC=clang CXX=clang++ ... // if ((p = l.find ("clang ")) != string::npos && (p == 0 || l[p - 1] == ' ')) { if (cache.empty ()) { // Cache the clang line and continue in order to get the variant // line, if any. // cache = move (l); return guess_result (); } else if (cache.compare (0, 5, "emcc ") == 0) { return guess_result (compiler_id {type::clang, "emscripten"}, move (cache), move (l)); } } if (last) { if (cache.find ("clang ") != string::npos) { return guess_result (pt == type::msvc ? compiler_id {type::msvc, "clang"} : compiler_id {type::clang, ""}, move (cache)); } } return guess_result (); }; // The -v output contains other information (such as the compiler // build configuration for gcc or the selected gcc installation for // clang) which makes sense to include into the compiler checksum. So // ask run() to calculate it for every line of the -v ouput. // r = run ("-v", f, true /* checksum */); if (r.empty ()) { if (xi) { // Fallback to --version below in case this GCC/Clang-like // compiler doesn't support -v. // //fail << "unable to obtain " << xc << " signature with -v"; } cs.reset (); } else { // If this is clang-apple and pre-guess was gcc then change it so // that we don't issue any warnings. // if (r.id.type == type::clang && r.id.variant == "apple" && pt == type::gcc) { pre.type = type::clang; pre.variant = "apple"; } } } // Next try --version to detect icc. As well as obtain signature for // GCC/Clang-like compilers in case -v above didn't work. // if (r.empty () && (pt == invalid || pt == type::icc || pt == type::gcc || pt == type::clang)) { auto f = [&xi] (string& l, bool) -> guess_result { // Assume the first line is the signature. // if (xi) return guess_result (*xi, move (l)); // The first line has the " (ICC) " in it, for example: // // icpc (ICC) 9.0 20060120 // icpc (ICC) 11.1 20100414 // icpc (ICC) 12.1.0 20110811 // icpc (ICC) 14.0.0 20130728 // icpc (ICC) 15.0.2 20150121 // icpc (ICC) 16.0.2 20160204 // icc (ICC) 16.0.2 20160204 // if (l.find (" (ICC) ") != string::npos) return guess_result (compiler_id {type::icc, ""}, move (l)); return guess_result (); }; r = run ("--version", f); if (r.empty ()) { if (xi) fail << "unable to obtain " << xc << " signature with --version"; } } // Finally try to run it without any options to detect msvc. // if (r.empty () && (pt == invalid || pt == type::msvc)) { auto f = [&xi] (string& l, bool) -> guess_result { // Assume the first line is the signature. // if (xi) return guess_result (*xi, move (l)); // Check for "Microsoft (R)" and "C/C++" in the first line as a // signature since all other words/positions can be translated. For // example: // // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.6030 for 80x86 // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 14.00.50727.762 for 80x86 // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86 // Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64 // Microsoft (R) C/C++ Optimizing Compiler Version 17.00.50727.1 for x86 // Microsoft (R) C/C++ Optimizing Compiler Version 18.00.21005.1 for x86 // Microsoft (R) C/C++ Optimizing Compiler Version 19.00.23026 for x86 // Microsoft (R) C/C++ Optimizing Compiler Version 19.10.24629 for x86 // // In the recent versions the architecture is either "x86", "x64", // or "ARM". // if (l.find ("Microsoft (R)") != string::npos && l.find ("C/C++") != string::npos) return guess_result (compiler_id {type::msvc, ""}, move (l)); return guess_result (); }; // One can pass extra options/arguments to cl.exe with the CL and _CL_ // environment variables. However, if such extra options are passed // without anything to compile, then cl.exe no longer prints usage and // exits successfully but instead issues an error and fails. So we are // going to unset these variables for our test (interestingly, only CL // seem to cause the problem but let's unset both, for good measure). // // This is also the reason why we don't pass the mode options. // const char* evars[] = {"CL=", "_CL_=", nullptr}; r = build2::run<guess_result> (3, process_env (xp, evars), f, false); if (r.empty ()) { if (xi) fail << "unable to obtain " << xc << " signature"; } } if (!r.empty ()) { if (pt != invalid && (pt != r.id.type || (pv && *pv != r.id.variant))) { l4 ([&]{trace << "compiler type guess mismatch" << ", pre-guessed " << pre << ", determined " << r.id;}); r = guess_result (); } else { l5 ([&]{trace << xc << " is " << r.id << ": '" << r.signature << "'";}); r.path = move (xp); if (search_info != nullptr && r.info == nullptr) r.info = move (search_info); } } else l4 ([&]{trace << "unable to determine compiler type of " << xc;}); // Warn if the absolute compiler path looks like a ccache wrapper. // // The problem with ccache is that it pretends to be real GCC (i.e., // it's --version output is indistinguishable from real GCC's) but does // not handle all valid GCC modes, in particular -fdirectives-only. As a // poor man's solution we check if the absolute compiler path contains // any mentioning of ccache (for example, /usr/lib64/ccache/g++ on // Fedora). // if (!r.empty ()) { if (r.id.type == compiler_type::gcc || r.id.type == compiler_type::clang) { if (strstr (r.path.effect_string (), "ccache") != nullptr) warn << r.path << " looks like a ccache wrapper" << info << "ccache cannot be used as a " << xl << " compiler" << info << "use config." << xm << " to override"; } } return r; } // Try to derive the toolchain pattern. // // The s argument is the stem to look for in the leaf of the path. The ls // and rs arguments are the left/right separator characters. If either is // NULL, then the stem should be the prefix/suffix of the leaf, // respectively. Note that a path that is equal to stem is not considered // a pattern. // // Note that the default right separator includes digits to handle cases // like clang++37 (FreeBSD). // static string pattern (const path& xc, const char* s, const char* ls = "-_.", const char* rs = "-_.0123456789") { string r; size_t sn (strlen (s)); if (xc.size () > sn) { string l (xc.leaf ().string ()); size_t ln (l.size ()); size_t b; if (ln >= sn && (b = l.find (s)) != string::npos) { // Check left separators. // if (b == 0 || (ls != nullptr && strchr (ls, l[b - 1]) != nullptr)) { // Check right separators. // size_t e (b + sn); if (e == ln || (rs != nullptr && strchr (rs, l[e]) != nullptr)) { l.replace (b, sn, "*", 1); path p (xc.directory ()); p /= l; r = move (p).string (); } } } } return r; } static compiler_version msvc_compiler_version (string v) { compiler_version r; // Split the version into components. // size_t b (0), e (b); auto next = [&v, &b, &e] (const char* m) -> uint64_t { try { if (next_word (v, b, e, '.')) return stoull (string (v, b, e - b)); } catch (const invalid_argument&) {} catch (const out_of_range&) {} fail << "unable to extract MSVC " << m << " version from '" << v << "'" << endf; }; r.major = next ("major"); r.minor = next ("minor"); r.patch = next ("patch"); if (next_word (v, b, e, '.')) r.build.assign (v, b, e - b); r.string = move (v); return r; } static string msvc_runtime_version (const compiler_version& v) { // Mapping of compiler versions to runtime versions: // // Note that VC 15 has runtime version 14.1 but the DLLs are still // called *140.dll (they are said to be backwards-compatible). // // And VC 16 seems to have the runtime version 14.1 (and not 14.2, as // one might expect; DLLs are still *140.dll but there are now _1 and _2 // variants for, say, msvcp140.dll). We will, however, call it 14.2 // (which is the version of the "toolset") in our target triplet. // // year ver cl crt/dll toolset // // 2019 16.X 19.2X 14.2/140 14.2X // 2017 15.9 19.16 14.1/140 14.16 // 2017 15.8 19.15 14.1/140 // 2017 15.7 19.14 14.1/140 // 2017 15.6 19.13 14.1/140 // 2017 15.5 19.12 14.1/140 // 2017 15.3 19.11 14.1/140 // 2017 15 19.10 14.1/140 // 2015 14 19.00 14.0/140 // 2013 12 18.00 12.0/120 // 2012 11 17.00 11.0/110 // 2010 10 16.00 10.0/100 // 2008 9 15.00 9.0/90 // 2005 8 14.00 8.0/80 // 2003 7.1 13.10 7.1/71 // // _MSC_VER is the numeric cl version, e.g., 1921 for 19.21. // /**/ if (v.major == 19 && v.minor >= 20) return "14.2"; else if (v.major == 19 && v.minor >= 10) return "14.1"; else if (v.major == 19 && v.minor == 0) return "14.0"; else if (v.major == 18 && v.minor == 0) return "12.0"; else if (v.major == 17 && v.minor == 0) return "11.0"; else if (v.major == 16 && v.minor == 0) return "10.0"; else if (v.major == 15 && v.minor == 0) return "9.0"; else if (v.major == 14 && v.minor == 0) return "8.0"; else if (v.major == 13 && v.minor == 10) return "7.1"; fail << "unable to map MSVC compiler version '" << v.string << "' to runtime version" << endf; } void msvc_extract_header_search_dirs (const strings&, dir_paths&); // msvc.cxx void msvc_extract_library_search_dirs (const strings&, dir_paths&); // msvc.cxx // Return the MSVC system header search paths (i.e., what the Visual // Studio command prompt puts into INCLUDE) including any paths from the // compiler mode and their count. // // Note that currently we don't add any ATL/MFC or WinRT paths (but could // do that probably first checking if they exist/empty). // static pair<dir_paths, size_t> msvc_hdr (const msvc_info& mi, const strings& mo) { dir_paths r; // Extract /I paths and similar from the compiler mode. // msvc_extract_header_search_dirs (mo, r); size_t rn (r.size ()); r.push_back (dir_path (mi.msvc_dir) /= "include"); // This path structure only appeared in Platform SDK 10 (if anyone wants // to use anything older, they will just have to use the MSVC command // prompt). // if (!mi.psdk_ver.empty ()) { dir_path d ((dir_path (mi.psdk_dir) /= "Include") /= mi.psdk_ver); r.push_back (dir_path (d) /= "ucrt" ); r.push_back (dir_path (d) /= "shared"); r.push_back (dir_path (d) /= "um" ); } return make_pair (move (r), rn); } // Return the MSVC system module search paths (i.e., what the Visual // Studio command prompt puts into IFCPATH) including any paths from the // compiler mode and their count. // static pair<dir_paths, size_t> msvc_mod (const msvc_info& mi, const strings&, const char* cpu) { //@@ TODO: mode. dir_paths r; r.push_back ((dir_path (mi.msvc_dir) /= "ifc") /= cpu); return make_pair (move (r), size_t (0)); } // Return the MSVC system library search paths (i.e., what the Visual // Studio command prompt puts into LIB) including any paths from the // compiler mode and their count. // static pair<dir_paths, size_t> msvc_lib (const msvc_info& mi, const strings& mo, const char* cpu) { dir_paths r; // Extract /LIBPATH paths from the compiler mode. // msvc_extract_library_search_dirs (mo, r); size_t rn (r.size ()); r.push_back ((dir_path (mi.msvc_dir) /= "lib") /= cpu); // This path structure only appeared in Platform SDK 10 (if anyone wants // to use anything older, they will just have to use the MSVC command // prompt). // if (!mi.psdk_ver.empty ()) { dir_path d ((dir_path (mi.psdk_dir) /= "Lib") /= mi.psdk_ver); r.push_back ((dir_path (d) /= "ucrt") /= cpu); r.push_back ((dir_path (d) /= "um" ) /= cpu); } return make_pair (move (r), rn); } // Return the MSVC binutils search paths (i.e., what the Visual Studio // command prompt puts into PATH). // static string msvc_bin (const msvc_info& mi, const char* cpu) { string r; // Seeing that we only do 64-bit on Windows, let's always use 64-bit // MSVC tools (link.exe, etc). In case of the Platform SDK, it's unclear // what the CPU signifies (host, target, both). // r = (((dir_path (mi.msvc_dir) /= "bin") /= "Hostx64") /= cpu). representation (); r += path::traits_type::path_separator; r += (((dir_path (mi.psdk_dir) /= "bin") /= mi.psdk_ver) /= cpu). representation (); return r; } const char* msvc_cpu (const string&); // msvc.cxx // Note that LIB, LINK, and _LINK_ are technically link.exe's variables // but we include them in case linking is done via the compiler without // loading bin.ld. BTW, the same applies to rc.exe INCLUDE. // // See also the note on environment and caching below if adding any new // variables. // static const char* msvc_env[] = {"INCLUDE", "IFCPATH", "CL", "_CL_", "LIB", "LINK", "_LINK_", nullptr}; static compiler_info guess_msvc (const char* xm, lang xl, const path& xc, const string* xv, const string* xt, const strings& x_mo, const strings*, const strings*, const strings*, const strings*, const strings*, const strings*, guess_result&& gr, sha256&) { // Extract the version. The signature line has the following format // though language words can be translated and even rearranged (see // examples above). // // "Microsoft (R) C/C++ Optimizing Compiler Version A.B.C[.D] for CPU" // // The CPU keywords (based on the above samples) appear to be: // // "80x86" // "x86" // "x64" // "ARM" // "ARM64" // compiler_version ver; { auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << ".version to override"; }); // Treat the custom version as just a tail of the signature. // const string& s (xv == nullptr ? gr.signature : *xv); // Some overrides for testing. // //string s; //s = "Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.30729.01 for 80x86"; //s = "Compilador de optimizacion de C/C++ de Microsoft (R) version 16.00.30319.01 para x64"; //s = "Compilateur d'optimisation Microsoft (R) C/C++ version 19.16.27026.1 pour x64"; // Scan the string as words and look for the version. // size_t b (0), e (0); while (next_word (s, b, e, ' ', ',')) { // The third argument to find_first_not_of() is the length of the // first argument, not the length of the interval to check. So to // limit it to [b, e) we are also going to compare the result to the // end of the word position (first space). In fact, we can just // check if it is >= e. // if (s.find_first_not_of ("1234567890.", b, 11) >= e) break; } if (b == e) fail << "unable to extract MSVC version from '" << s << "'"; ver = msvc_compiler_version (string (s, b, e - b)); } // Figure out the target architecture. // string t, ot; if (xt == nullptr) { auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << ".target to override"; }); const string& s (gr.signature); // Scan the string as words and look for the CPU. // string cpu; for (size_t b (0), e (0), n; (n = next_word (s, b, e, ' ', ',')) != 0; ) { if (s.compare (b, n, "x64", 3) == 0 || s.compare (b, n, "x86", 3) == 0 || s.compare (b, n, "ARM64", 5) == 0 || s.compare (b, n, "ARM", 3) == 0 || s.compare (b, n, "80x86", 5) == 0) { cpu.assign (s, b, n); break; } } if (cpu.empty ()) fail << "unable to extract MSVC target CPU from " << "'" << s << "'"; // Now we need to map x86, x64, ARM, and ARM64 to the target // triplets. The problem is, there aren't any established ones so we // got to invent them ourselves. Based on the discussion in // <libbutl/target-triplet.hxx>, we need something in the // CPU-VENDOR-OS-ABI form. // // The CPU part is fairly straightforward with x86 mapped to 'i386' // (or maybe 'i686'), x64 to 'x86_64', ARM to 'arm' (it could also // include the version, e.g., 'amrv8'), and ARM64 to 'aarch64'. // // The (toolchain) VENDOR is also straightforward: 'microsoft'. Why // not omit it? Two reasons: firstly, there are other compilers with // the otherwise same target, for example Intel C/C++, and it could be // useful to distinguish between them. Secondly, by having all four // components we remove any parsing ambiguity. // // OS-ABI is where things are not as clear cut. The OS part shouldn't // probably be just 'windows' since we have Win32 and WinCE. And // WinRT. And Universal Windows Platform (UWP). So perhaps the // following values for OS: 'win32', 'wince', 'winrt', 'winup'. // // For 'win32' the ABI part could signal the Microsoft C/C++ runtime // by calling it 'msvc'. And seeing that the runtimes are incompatible // from version to version, we should probably add the 'X.Y' version // at the end (so we essentially mimic the DLL name, for example, // msvcr120.dll). Some suggested we also encode the runtime type // (those pesky /M* options) though I am not sure: the only // "redistributable" runtime is multi-threaded release DLL. // // The ABI part for the other OS values needs thinking. For 'winrt' // and 'winup' it probably makes sense to encode the WINAPI_FAMILY // macro value (perhaps also with the version). Some of its values: // // WINAPI_FAMILY_APP Windows 10 // WINAPI_FAMILY_PC_APP Windows 8.1 // WINAPI_FAMILY_PHONE_APP Windows Phone 8.1 // // For 'wince' we may also want to add the OS version, for example, // 'wince4.2'. // // Putting it all together, Visual Studio 2015 will then have the // following target triplets: // // x86 i386-microsoft-win32-msvc14.0 // x64 x86_64-microsoft-win32-msvc14.0 // ARM arm-microsoft-winup-??? // ARM64 aarch64-microsoft-win32-msvc14.0 // if (cpu == "ARM") fail << "cl.exe ARM/WinRT/UWP target is not yet supported"; else { if (cpu == "x64") t = "x86_64-microsoft-win32-msvc"; else if (cpu == "x86" || cpu == "80x86") t = "i386-microsoft-win32-msvc"; else if (cpu == "ARM64") t = "aarch64-microsoft-win32-msvc"; else assert (false); t += msvc_runtime_version (ver); } ot = t; } else ot = t = *xt; target_triplet tt (t); // Shouldn't fail. // If we have the MSVC installation information, then this means we are // running out of the Visual Studio command prompt and will have to // supply PATH/INCLUDE/LIB/IFCPATH equivalents ourselves. // optional<pair<dir_paths, size_t>> lib_dirs; optional<pair<dir_paths, size_t>> hdr_dirs; optional<pair<dir_paths, size_t>> mod_dirs; string bpat; if (const msvc_info* mi = static_cast<msvc_info*> (gr.info.get ())) { const char* cpu (msvc_cpu (tt.cpu)); lib_dirs = msvc_lib (*mi, x_mo, cpu); hdr_dirs = msvc_hdr (*mi, x_mo); mod_dirs = msvc_mod (*mi, x_mo, cpu); bpat = msvc_bin (*mi, cpu); } // Derive the toolchain pattern. // // If the compiler name is/starts with 'cl' (e.g., cl.exe, cl-14), // then replace it with '*' and use it as a pattern for lib, link, // etc. // string cpat (pattern (xc, "cl", nullptr, ".-")); if (bpat.empty ()) bpat = cpat; // Binutils pattern is the same as toolchain. // Runtime and standard library. // string rt ("msvc"); string csl ("msvc"); string xsl; switch (xl) { case lang::c: xsl = csl; break; case lang::cxx: xsl = "msvcp"; break; } return compiler_info { move (gr.path), move (gr.id), compiler_class::msvc, move (ver), nullopt, move (gr.signature), "", move (t), move (ot), move (cpat), move (bpat), move (rt), move (csl), move (xsl), move (lib_dirs), move (hdr_dirs), move (mod_dirs), msvc_env, nullptr}; } // See "Environment Variables Affecting GCC". // // Note that we also check below that the following variables are not set // since they would interfere with what we are doing. // // DEPENDENCIES_OUTPUT // SUNPRO_DEPENDENCIES // // Note also that we include (some) linker's variables in case linking is // done via the compiler without loading bin.ld (to do this precisely we // would need to detect which linker is being used at which point we might // as well load bin.ld). // // See also the note on environment and caching below if adding any new // variables. // static const char* gcc_c_env[] = { "CPATH", "C_INCLUDE_PATH", "LIBRARY_PATH", "LD_RUN_PATH", "SOURCE_DATE_EPOCH", "GCC_EXEC_PREFIX", "COMPILER_PATH", nullptr}; static const char* gcc_cxx_env[] = { "CPATH", "CPLUS_INCLUDE_PATH", "LIBRARY_PATH", "LD_RUN_PATH", "SOURCE_DATE_EPOCH", "GCC_EXEC_PREFIX", "COMPILER_PATH", nullptr}; // Note that Clang recognizes a whole family of *_DEPLOYMENT_TARGET // variables (as does ld64). // static const char* macos_env[] = { "SDKROOT", "MACOSX_DEPLOYMENT_TARGET", nullptr}; static compiler_info guess_gcc (const char* xm, lang xl, const path& xc, const string* xv, const string* xt, const strings& x_mo, const strings* c_po, const strings* x_po, const strings* c_co, const strings* x_co, const strings*, const strings*, guess_result&& gr, sha256&) { tracer trace ("cc::guess_gcc"); const process_path& xp (gr.path); // Extract the version. The signature line has the following format // though language words can be translated and even rearranged (see // examples above). // // "gcc version X.Y[.Z][...]" // compiler_version ver; { auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << ".version to override"; }); // Treat the custom version as just a tail of the signature. // const string& s (xv == nullptr ? gr.signature : *xv); // Scan the string as words and look for one that looks like a // version. // size_t b (0), e (0); while (next_word (s, b, e)) { // The third argument to find_first_not_of() is the length of the // first argument, not the length of the interval to check. So to // limit it to [b, e) we are also going to compare the result to the // end of the word position (first space). In fact, we can just // check if it is >= e. // size_t p (s.find_first_not_of ("1234567890.", b, 11)); if (p >= e || (p > b && (s[p] == '-' || s[p] == '+'))) break; } if (b == e) fail << "unable to extract GCC version from '" << s << "'"; // Split the version into components by parsing it as semantic-like // version. // try { semantic_version v (string (s, b, e - b), ".-+"); ver.major = v.major; ver.minor = v.minor; ver.patch = v.patch; ver.build = move (v.build); } catch (const invalid_argument& e) { fail << "unable to extract GCC version from '" << s << "': " << e; } ver.string.assign (s, b, string::npos); } // Figure out the target architecture. This is actually a lot trickier // than one would have hoped. // // There is the -dumpmachine option but gcc doesn't adjust it per the // compile options (e.g., -m32). However, starting with 4.6 it has the // -print-multiarch option which gives (almost) the right answer. The // "almost" part has to do with it not honoring the -arch option (which // is really what this compiler is building for). To get to that, we // would have to resort to a hack like this: // // gcc -v -E - 2>&1 | grep cc1 // .../cc1 ... -mtune=generic -march=x86-64 // // Also, -print-multiarch will print am empty line if the compiler // actually wasn't built with multi-arch support. // // So for now this is what we are going to do for the time being: First // try -print-multiarch. If that works out (recent gcc configure with // multi-arch support), then use the result. Otherwise, fallback to // -dumpmachine (older gcc or not multi-arch). // string t, ot; if (xt == nullptr) { cstrings args {xp.recall_string ()}; if (c_co != nullptr) append_options (args, *c_co); if (x_co != nullptr) append_options (args, *x_co); append_options (args, x_mo); args.push_back ("-print-multiarch"); // Note: position relied upon. args.push_back (nullptr); // The output of both -print-multiarch and -dumpmachine is a single // line containing just the target triplet. We don't expect any // localization so no need for LC_ALL. // auto f = [] (string& l, bool) {return move (l);}; t = run<string> (3, xp, args.data (), f, false); if (t.empty ()) { l5 ([&]{trace << xc << " doesn's support -print-multiarch, " << "falling back to -dumpmachine";}); args[args.size () - 2] = "-dumpmachine"; t = run<string> (3, xp, args.data (), f, false); } if (t.empty ()) fail << "unable to extract target architecture from " << xc << " using -print-multiarch or -dumpmachine output" << info << "use config." << xm << ".target to override"; ot = t; } else ot = t = *xt; // Parse the target into triplet (for further tests) ignoring any // failures. // target_triplet tt; try {tt = target_triplet (t);} catch (const invalid_argument&) {} // Derive the toolchain pattern. Try cc/c++ as a fallback. // string pat (pattern (xc, xl == lang::c ? "gcc" : "g++")); if (pat.empty ()) pat = pattern (xc, xl == lang::c ? "cc" : "c++"); // Runtime and standard library. // // GCC always uses libgcc (even on MinGW). Even with -nostdlib GCC's // documentation says that you should usually specify -lgcc. // string rt ("libgcc"); string csl ( tt.system == "mingw32" ? "msvc" : stdlib (xl, xp, x_mo, c_po, x_po, c_co, x_co, c_stdlib_src)); string xsl; switch (xl) { case lang::c: xsl = csl; break; case lang::cxx: { // While GCC only supports it's own C++ standard library (libstdc++) // we still run the test to detect the "none" case (-nostdinc++). // const char* src = "#include <bits/c++config.h> \n" "stdlib:=\"libstdc++\" \n"; xsl = stdlib (xl, xp, x_mo, c_po, x_po, c_co, x_co, src); break; } } // Environment. // if (getenv ("DEPENDENCIES_OUTPUT")) fail << "GCC DEPENDENCIES_OUTPUT environment variable is set"; if (getenv ("SUNPRO_DEPENDENCIES")) fail << "GCC SUNPRO_DEPENDENCIES environment variable is set"; const char* const* c_env (nullptr); switch (xl) { case lang::c: c_env = gcc_c_env; break; case lang::cxx: c_env = gcc_cxx_env; break; } const char* const* p_env (tt.system == "darwin" ? macos_env : nullptr); return compiler_info { move (gr.path), move (gr.id), compiler_class::gcc, move (ver), nullopt, move (gr.signature), move (gr.checksum), // Calculated on whole -v output. move (t), move (ot), move (pat), "", move (rt), move (csl), move (xsl), nullopt, nullopt, nullopt, c_env, p_env}; } struct clang_msvc_info: msvc_info { string triple; // cc1 -triple value string msvc_ver; // Compiler version from triple. string msvc_comp_ver; // cc1 -fms-compatibility-version value }; static clang_msvc_info guess_clang_msvc (lang xl, const process_path& xp, const strings& x_mo, const strings* c_co, const strings* x_co, bool cl) { tracer trace ("cc::guess_clang_msvc"); cstrings args {xp.recall_string ()}; if (c_co != nullptr) append_options (args, *c_co); if (x_co != nullptr) append_options (args, *x_co); append_options (args, x_mo); if (cl) { switch (xl) { case lang::c: args.push_back ("/TC"); break; case lang::cxx: args.push_back ("/TP"); break; } } else { args.push_back ("-x"); switch (xl) { case lang::c: args.push_back ("c"); break; case lang::cxx: args.push_back ("c++"); break; } } args.push_back ("-v"); args.push_back ("-E"); args.push_back ("-"); // Read stdin. args.push_back (nullptr); // The diagnostics we are interested in goes to stderr but we also get a // few lines of the preprocessed boilerplate at the end. // process pr (run_start (3 /* verbosity */, xp, args, -2 /* stdin (/dev/null) */, -1 /* stdout */, false /* error (2>&1) */)); clang_msvc_info r; string l; try { // The overall structure of the output is as follows (with some // fragments that we are not interested in replaced with `...`): // // clang version 9.0.0 (tags/RELEASE_900/final) // ... // ... // InstalledDir: C:\Program Files\LLVM\bin // "C:\\Program Files\\LLVM\\bin\\clang++.exe" -cc1 -triple x86_64-pc-windows-msvc19.23.28105 -fms-compatibility-version=19.23.28105 ..." // clang -cc1 version 9.0.0 based upon LLVM 9.0.0 default target x86_64-pc-windows-msvc // #include "..." search starts here: // #include <...> search starts here: // C:\Program Files\LLVM\lib\clang\9.0.0\include // C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.23.28105\include // C:\Program Files (x86)\Windows Kits\10\Include\10.0.18362.0\ucrt // ... // End of search list. // ... // ... // // Notice also that the version in the target triple and in the // ...VC\Tools\MSVC\ subdirectory are not exactly the same (and "how // the same" the are guaranteed to be is anyone's guess). // ifdstream is (move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit); for (bool in_include (false); !eof (getline (is, l)); ) { l6 ([&]{trace << "examining line '" << l << "'";}); if (r.triple.empty ()) { size_t b, e; if ((b = l.find ("-triple ")) != string::npos && (e = l.find (' ', b += 8)) != string::npos) { r.triple.assign (l, b, e - b); if ((b = r.triple.find ("-msvc")) == string::npos) fail << "no MSVC version in Clang target " << r.triple; r.msvc_ver.assign (r.triple, b += 5, string::npos); if ((b = l.find ("-fms-compatibility-version=")) != string::npos && (e = l.find (' ', b += 27)) != string::npos) { r.msvc_comp_ver.assign (l, b, e - b); } else r.msvc_comp_ver = r.msvc_ver; l5 ([&]{trace << "MSVC target " << r.triple << ", version " << r.msvc_ver << ", compatibility version " << r.msvc_comp_ver;}); } continue; } // Note: similar logic to gcc_header_search_paths(). // if (!in_include) in_include = l.find ("#include <...>") != string::npos; else { if (l[0] != ' ') // End of header search paths. break; try { dir_path d (move (trim (l))); l6 ([&]{trace << "examining directory " << d;}); auto b (d.begin ()), e (d.end ()); if (r.msvc_dir.empty ()) { // Look for the "Tools\MSVC\<ver>\include" component sequence. // auto i (find_if (b, e, [] (const string& n) { return icasecmp (n, "Tools") == 0; })); if (i != e && (++i != e && icasecmp (*i, "MSVC") == 0) && (++i != e ) && (++i != e && icasecmp (*i, "include") == 0)) { r.msvc_dir = dir_path (b, i); l5 ([&]{trace << "MSVC directory " << r.msvc_dir;}); } } if (r.psdk_dir.empty ()) { // Look for the "Windows Kits\<ver>\Include" component // sequence. // // Note that the path structure differs between 10 and pre-10 // versions: // // ...\Windows Kits\10\Include\10.0.18362.0\... // ...\Windows Kits\8.1\Include\... // auto i (find_if (b, e, [] (const string& n) { return icasecmp (n, "Windows Kits") == 0; })), j (i); if (i != e && (++i != e ) && (++i != e && icasecmp (*i, "Include") == 0)) { r.psdk_dir = dir_path (b, i); if (*++j == "10" && ++i != e) r.psdk_ver = *i; l5 ([&]{trace << "Platform SDK directory " << r.psdk_dir << ", version '" << r.psdk_ver << "'";}); } } } catch (const invalid_path&) { // Skip this path. } if (!r.msvc_dir.empty () && !r.psdk_dir.empty ()) break; } } is.close (); } catch (const io_error&) { // Presumably the child process failed. Let run_finish() deal with // that. } if (!run_finish_code (args.data (), pr, l)) fail << "unable to extract MSVC information from " << xp; if (const char* w = ( r.triple.empty () ? "MSVC target" : r.msvc_ver.empty () ? "MSVC version" : r.msvc_comp_ver.empty () ? "MSVC compatibility version" : r.msvc_dir.empty () ? "MSVC directory" : r.psdk_dir.empty () ? "Platform SDK directory": nullptr)) fail << "unable to extract " << w << " from " << xp; return r; } // These are derived from gcc_* plus the sparse documentation (clang(1)) // and source code. // // See also the note on environment and caching below if adding any new // variables. // static const char* clang_c_env[] = { "CPATH", "C_INCLUDE_PATH", "LIBRARY_PATH", "LD_RUN_PATH", "COMPILER_PATH", nullptr}; static const char* clang_cxx_env[] = { "CPATH", "CPLUS_INCLUDE_PATH", "LIBRARY_PATH", "LD_RUN_PATH", "COMPILER_PATH", nullptr}; static compiler_info guess_clang (const char* xm, lang xl, const path& xc, const string* xv, const string* xt, const strings& x_mo, const strings* c_po, const strings* x_po, const strings* c_co, const strings* x_co, const strings* c_lo, const strings* x_lo, guess_result&& gr, sha256& cs) { // This function handles vanilla Clang, including its clang-cl variant, // as well as Apple and Emscripten variants. // // The clang-cl variant appears to be a very thin wrapper over the // standard clang/clang++ drivers. In addition to the cl options, it // mostly accepts standard Clang options with a few exceptions (notably // -x). It also has /clang:<arg> to pass things down to the driver // (which for some reason doesn't work for -x). // bool cl (gr.id.type == compiler_type::msvc); bool apple (gr.id.variant == "apple"); bool emscr (gr.id.variant == "emscripten"); const process_path& xp (gr.path); // Extract the version. Here we will try to handle both vanilla and // Apple Clang since the signature lines are fairly similar. They have // the following format though language words can probably be translated // and even rearranged (see examples above). // // "[... ]clang version A.B.C[( |-)...]" // "Apple (clang|LLVM) version A.B[.C] ..." // // We will also reuse this code to parse the Emscripten version which // is quite similar: // // emcc (...) 2.0.8 // auto extract_version = [] (const string& s, bool patch, const char* what) -> compiler_version { compiler_version ver; size_t b (0), e (0); while (next_word (s, b, e, ' ', '-')) { // The third argument to find_first_not_of() is the length of the // first argument, not the length of the interval to check. So to // limit it to [b, e) we are also going to compare the result to the // end of the word position (first space). In fact, we can just // check if it is >= e. // if (s.find_first_not_of ("1234567890.", b, 11) >= e) break; } if (b == e) fail << "unable to extract " << what << " version from '" << s << "'" << endf; ver.string.assign (s, b, string::npos); // Split the version into components. // size_t vb (b), ve (b); auto next = [&s, what, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t { try { if (next_word (s, e, vb, ve, '.')) return stoull (string (s, vb, ve - vb)); if (opt) return 0; } catch (const invalid_argument&) {} catch (const out_of_range&) {} fail << "unable to extract " << what << ' ' << m << " version from '" << string (s, b, e - b) << "'" << endf; }; ver.major = next ("major", false); ver.minor = next ("minor", false); ver.patch = next ("patch", patch); if (e != s.size ()) ver.build.assign (s, e + 1, string::npos); return ver; }; compiler_version ver; { auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << ".version to override"; }); // Treat the custom version as just a tail of the signature. // // @@ TODO: should we have type_version here (and suggest that // in diag_frame above? // const string& s (xv != nullptr ? *xv : emscr ? gr.type_signature : gr.signature); // Some overrides for testing. // //s = "clang version 3.7.0 (tags/RELEASE_370/final)"; // //gr.id.variant = "apple"; //s = "Apple LLVM version 7.3.0 (clang-703.0.16.1)"; //s = "Apple clang version 3.1 (tags/Apple/clang-318.0.58) (based on LLVM 3.1svn)"; // Scan the string as words and look for one that looks like a // version. Use '-' as a second delimiter to handle versions like // "3.6.0-2ubuntu1~trusty1". // ver = extract_version (s, apple, "Clang"); } optional<compiler_version> var_ver; if (apple) { // Map Apple to vanilla Clang version, preserving the original as the // variant version. // var_ver = move (ver); // Apple no longer discloses the mapping so it's a guesswork and we // better be conservative. For details see: // // https://gist.github.com/yamaya/2924292 // // Specifically, we now look in the libc++'s __config file for the // _LIBCPP_VERSION and use the previous version as a conservative // estimate (note that there could be multiple __config files with // potentially different versions so compile with -v to see which one // gets picked up). // // Note that this is Apple Clang version and not XCode version. // // 4.2 -> 3.2svn // 5.0 -> 3.3svn // 5.1 -> 3.4svn // 6.0 -> 3.5svn // 6.1.0 -> 3.6svn // 7.0.0 -> 3.7 // 7.3.0 -> 3.8 // 8.0.0 -> 3.9 // 8.1.0 -> ? // 9.0.0 -> 4.0 // 9.1.0 -> 5.0 // 10.0.0 -> 6.0 // 11.0.0 -> 7.0 // 11.0.3 -> 8.0 (yes, seriously!) // 12.0.0 -> 9.0 // 12.0.5 -> 10.0 (yes, seriously!) // 13.0.0 -> 11.0 // uint64_t mj (var_ver->major); uint64_t mi (var_ver->minor); uint64_t pa (var_ver->patch); if (mj >= 13) {mj = 11; mi = 0;} else if (mj == 12 && (mi > 0 || pa >= 5)) {mj = 10; mi = 0;} else if (mj == 12) {mj = 9; mi = 0;} else if (mj == 11 && (mi > 0 || pa >= 3)) {mj = 8; mi = 0;} else if (mj == 11) {mj = 7; mi = 0;} else if (mj == 10) {mj = 6; mi = 0;} else if (mj == 9 && mi >= 1) {mj = 5; mi = 0;} else if (mj == 9) {mj = 4; mi = 0;} else if (mj == 8) {mj = 3; mi = 9;} else if (mj == 7 && mi >= 3) {mj = 3; mi = 8;} else if (mj == 7) {mj = 3; mi = 7;} else if (mj == 6 && mi >= 1) {mj = 3; mi = 5;} else if (mj == 6) {mj = 3; mi = 4;} else if (mj == 5 && mi >= 1) {mj = 3; mi = 3;} else if (mj == 5) {mj = 3; mi = 2;} else if (mj == 4 && mi >= 2) {mj = 3; mi = 1;} else {mj = 3; mi = 0;} ver = compiler_version { to_string (mj) + '.' + to_string (mi) + ".0", mj, mi, 0, ""}; } else if (emscr) { // Extract Emscripten version. // auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << ".version to override"; }); var_ver = extract_version (gr.signature, false, "Emscripten"); // The official Emscripten distributions routinely use unreleased // Clang snapshots which nevertheless have the next release version // (which means it's actually somewhere between the previous release // and the next release). On the other hand, distributions like Debian // package it to use their Clang package which normally has the // accurate version. So here we will try to detect the former and // similar to the Apple case we will conservatively adjust it to the // previous release. // if (gr.type_signature.find ("googlesource") != string::npos) { if (ver.patch != 0) ver.patch--; else if (ver.minor != 0) ver.minor--; else ver.major--; } } // Figure out the target architecture. // // Unlike gcc, clang doesn't have -print-multiarch. Its -dumpmachine, // however, respects the compile options (e.g., -m32). // string t, ot; if (xt == nullptr) { cstrings args {xp.recall_string ()}; if (c_co != nullptr) append_options (args, *c_co); if (x_co != nullptr) append_options (args, *x_co); append_options (args, x_mo); args.push_back (cl ? "/clang:-dumpmachine" : "-dumpmachine"); args.push_back (nullptr); // The output of -dumpmachine is a single line containing just the // target triplet. Again, we don't expect any localization so no need // for LC_ALL. // auto f = [] (string& l, bool) {return move (l);}; t = run<string> (3, xp, args.data (), f, false); if (t.empty ()) fail << "unable to extract target architecture from " << xc << " using -dumpmachine output" << info << "use config." << xm << ".target to override"; ot = t; } else ot = t = *xt; // Parse the target into triplet (for further tests) ignoring any // failures. // target_triplet tt; try {tt = target_triplet (t);} catch (const invalid_argument&) {} // For Clang on Windows targeting MSVC we remap the target to match // MSVC's. // optional<pair<dir_paths, size_t>> lib_dirs; string bpat; if (tt.system == "windows-msvc") { // Note that currently there is no straightforward way to determine // the VC version Clang is using. See: // // http://lists.llvm.org/pipermail/cfe-dev/2017-December/056240.html // // So we have to sniff this information out from Clang's -v output // (plus a couple of other useful bits like the VC installation // directory and Platform SDK). // clang_msvc_info mi (guess_clang_msvc (xl, xp, x_mo, c_co, x_co, cl)); // Keep the CPU and replace the rest. // tt.vendor = "microsoft"; tt.system = "win32-msvc"; tt.version = msvc_runtime_version (msvc_compiler_version (mi.msvc_ver)); t = tt.representation (); // Add the MSVC information to the signature and checksum. // if (cs.empty ()) cs.append (gr.signature); cs.append (mi.msvc_ver); cs.append (mi.msvc_dir.string ()); cs.append (mi.psdk_ver); cs.append (mi.psdk_dir.string ()); gr.signature += " MSVC version "; gr.signature += mi.msvc_ver; const char* cpu (msvc_cpu (tt.cpu)); // Come up with the system library search paths. Ideally we would want // to extract this from Clang and -print-search-paths would have been // the natural way for Clang to report it. But no luck. // lib_dirs = msvc_lib (mi, x_mo, cpu); // Binutils search paths. // // We shouldn't need them if we are running from the command prompt // and omitting them in this case would also result in tidier command // lines. However, reliably detecting this and making sure the result // matches Clang's is complex. So let's keep it simple for now. // bpat = msvc_bin (mi, cpu); // If this is clang-cl, then use the MSVC compatibility version as its // primary version. // if (cl) { var_ver = move (ver); ver = msvc_compiler_version (mi.msvc_comp_ver); } } // Derive the compiler toolchain pattern. // string cpat; if (cl) ; else if (emscr) { cpat = pattern (xc, xl == lang::c ? "emcc" : "em++"); // Emscripten provides the emar/emranlib wrappers (over llvm-*). // bpat = pattern (xc, xl == lang::c ? "cc" : "++", "m"); } else { // Try clang/clang++, the gcc/g++ alias, as well as cc/c++. // cpat = pattern (xc, xl == lang::c ? "clang" : "clang++"); if (cpat.empty ()) cpat = pattern (xc, xl == lang::c ? "gcc" : "g++"); if (cpat.empty ()) cpat = pattern (xc, xl == lang::c ? "cc" : "c++"); } // Runtime and standard library. // // Clang can use libgcc, its own compiler-rt, or, on Windows targeting // MSVC, the VC's runtime. As usual, there is no straightforward way // to query this and silence on the mailing list. See: // // http://lists.llvm.org/pipermail/cfe-dev/2018-January/056494.html // // So for now we will just look for --rtlib (note: linker option) and if // none specified, assume some platform-specific defaults. // string rt; { auto find_rtlib = [] (const strings* ops) -> const string* { return ops != nullptr ? find_option_prefix ("--rtlib=", *ops, false) : nullptr; }; const string* o; if ((o = find_rtlib (&x_mo)) != nullptr || (o = find_rtlib (x_lo)) != nullptr || (o = find_rtlib (c_lo)) != nullptr) { rt = string (*o, 8); } else if (tt.system == "win32-msvc") rt = "msvc"; else if (tt.system == "linux-gnu" || tt.system == "freebsd" || tt.system == "netbsd") rt = "libgcc"; else /* Mac OS, etc. */ rt = "compiler-rt"; } string csl ( tt.system == "win32-msvc" || tt.system == "mingw32" ? "msvc" : stdlib (xl, xp, x_mo, c_po, x_po, c_co, x_co, c_stdlib_src)); string xsl; switch (xl) { case lang::c: xsl = csl; break; case lang::cxx: { // All Clang versions that we care to support have __has_include() // so we use it to determine which standard library is available. // // Note that we still include the corresponding headers to verify // things are usable. For the "other" case we include some // standard header to detect the "none" case (e.g, -nostdinc++). // const char* src = "#if __has_include(<__config>) \n" " #include <__config> \n" " stdlib:=\"libc++\" \n" "#elif __has_include(<bits/c++config.h>) \n" " #include <bits/c++config.h> \n" " stdlib:=\"libstdc++\" \n" "#else \n" " #include <cstddef> \n" " stdlib:=\"other\" \n" "#endif \n"; xsl = tt.system == "win32-msvc" ? "msvcp" : stdlib (xl, xp, x_mo, c_po, x_po, c_co, x_co, src); break; } } // Environment. // // Note that "Emscripten Compiler Frontend (emcc)" has a long list of // environment variables with little explanation. So someone will need // to figure out what's important (some of them are clearly for // debugging of emcc itself). // const char* const* c_env (nullptr); const char* const* p_env (nullptr); if (tt.system == "win32-msvc") c_env = msvc_env; else { switch (xl) { case lang::c: c_env = clang_c_env; break; case lang::cxx: c_env = clang_cxx_env; break; } if (tt.system == "darwin") p_env = macos_env; } return compiler_info { move (gr.path), move (gr.id), cl ? compiler_class::msvc : compiler_class::gcc, move (ver), move (var_ver), move (gr.signature), move (gr.checksum), // Calculated on whole -v output. move (t), move (ot), move (cpat), move (bpat), move (rt), move (csl), move (xsl), move (lib_dirs), nullopt, nullopt, c_env, p_env}; } static compiler_info guess_icc (const char* xm, lang xl, const path& xc, const string* xv, const string* xt, const strings& x_mo, const strings* c_po, const strings* x_po, const strings* c_co, const strings* x_co, const strings*, const strings*, guess_result&& gr, sha256&) { //@@ TODO: this should be reviewed/revised if/when we get access // to more recent ICC versions. const process_path& xp (gr.path); // Extract the version. If the version has the fourth component, then // the signature line (extracted with --version) won't include it. So we // will have to get a more elaborate line with -V. We will also have to // do it to get the compiler target that respects the -m option: icc // doesn't support -print-multiarch like gcc and its -dumpmachine // doesn't respect -m like clang. In fact, its -dumpmachine is // completely broken as it appears to print the compiler's host and not // the target (e.g., .../bin/ia32/icpc prints x86_64-linux-gnu). // // Some examples of the signature lines from -V output: // // Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047 // Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116 // Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010 // Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074 // Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064 // Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427 // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 16.0.2.181 Build 20160204 // Intel(R) C++ Intel(R) 64 Compiler for applications running on IA-32, Version 16.0.2.181 Build 20160204 // Intel(R) C++ Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204 // Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) MIC Architecture, Version 16.0.2.181 Build 20160204 // // We should probably also assume the language words can be translated // and even rearranged. Thus pass LC_ALL=C. // process_env env (xp); #ifndef _WIN32 const char* evars[] = {"LC_ALL=C", nullptr}; env.vars = evars; #endif auto f = [] (string& l, bool) { return l.compare (0, 5, "Intel") == 0 && (l[5] == '(' || l[5] == ' ') ? move (l) : string (); }; if (xv == nullptr) { string& s (gr.signature); s.clear (); // The -V output is sent to STDERR. // // @@ TODO: running without the mode options. // s = run<string> (3, env, "-V", f, false); if (s.empty ()) fail << "unable to extract signature from " << xc << " -V output"; if (s.find (xl == lang::c ? " C " : " C++ ") == string::npos) fail << xc << " does not appear to be the Intel " << xl << " compiler" << info << "extracted signature: '" << s << "'"; } // Scan the string as words and look for the version. It consist of only // digits and periods and contains at least one period. // compiler_version ver; { auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << ".version to override"; }); // Treat the custom version as just a tail of the signature. // const string& s (xv == nullptr ? gr.signature : *xv); // Some overrides for testing. // //s = "Intel(R) C++ Compiler for 32-bit applications, Version 9.1 Build 20070215Z Package ID: l_cc_c_9.1.047"; //s = "Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 10.1 Build 20071116"; //s = "Intel(R) C++ Compiler for applications running on IA-32, Version 10.1 Build 20071116 Package ID: l_cc_p_10.1.010"; //s = "Intel C++ Intel 64 Compiler Professional for applications running on Intel 64, Version 11.0 Build 20081105 Package ID: l_cproc_p_11.0.074"; //s = "Intel(R) C++ Intel(R) 64 Compiler Professional for applications running on Intel(R) 64, Version 11.1 Build 20091130 Package ID: l_cproc_p_11.1.064"; //s = "Intel C++ Intel 64 Compiler XE for applications running on Intel 64, Version 12.0.4.191 Build 20110427"; size_t b (0), e (0); while (next_word (s, b, e, ' ', ',') != 0) { // The third argument to find_first_not_of() is the length of the // first argument, not the length of the interval to check. So to // limit it to [b, e) we are also going to compare the result to the // end of the word position (first space). In fact, we can just // check if it is >= e. Similar logic for find_first_of() except // that we add space to the list of character to make sure we don't // go too far. // if (s.find_first_not_of ("1234567890.", b, 11) >= e && s.find_first_of (". ", b, 2) < e) break; } if (b == e) fail << "unable to extract ICC version from '" << s << "'"; ver.string.assign (s, b, string::npos); // Split the version into components. // size_t vb (b), ve (b); auto next = [&s, b, e, &vb, &ve] (const char* m, bool opt) -> uint64_t { try { if (next_word (s, e, vb, ve, '.')) return stoull (string (s, vb, ve - vb)); if (opt) return 0; } catch (const invalid_argument&) {} catch (const out_of_range&) {} fail << "unable to extract ICC " << m << " version from '" << string (s, b, e - b) << "'" << endf; }; ver.major = next ("major", false); ver.minor = next ("minor", false); ver.patch = next ("patch", true); if (vb != ve && next_word (s, e, vb, ve, '.')) ver.build.assign (s, vb, ve - vb); if (e != s.size ()) { if (!ver.build.empty ()) ver.build += ' '; ver.build.append (s, e + 1, string::npos); } } // Figure out the target CPU by re-running the compiler with -V and // compile options (which may include, e.g., -m32). The output will // contain two CPU keywords: the first is the host and the second is the // target (hopefully this won't get rearranged by the translation). // // The CPU keywords (based on the above samples) appear to be: // // "32-bit" // "IA-32" // "Intel" "64" // "Intel(R)" "64" // "Intel(R)" "MIC" (-dumpmachine says: x86_64-k1om-linux) // // @@ TODO: why can't we combine it with the previous -V run? // string t, ot; if (xt == nullptr) { auto df = make_diag_frame ( [&xm](const diag_record& dr) { dr << info << "use config." << xm << ".target to override"; }); cstrings args {xp.recall_string ()}; if (c_co != nullptr) append_options (args, *c_co); if (x_co != nullptr) append_options (args, *x_co); append_options (args, x_mo); args.push_back ("-V"); args.push_back (nullptr); // The -V output is sent to STDERR. // t = run<string> (3, env, args.data (), f, false); if (t.empty ()) fail << "unable to extract target architecture from " << xc << " -V output"; string arch; for (size_t b (0), e (0), n; (n = next_word (t, b, e, ' ', ',')) != 0; ) { if (t.compare (b, n, "Intel(R)", 8) == 0 || t.compare (b, n, "Intel", 5) == 0) { if ((n = next_word (t, b, e, ' ', ',')) != 0) { if (t.compare (b, n, "64", 2) == 0) { arch = "x86_64"; } else if (t.compare (b, n, "MIC", 3) == 0) { arch = "x86_64"; // Plus "-k1om-linux" from -dumpmachine below. } } else break; } else if (t.compare (b, n, "IA-32", 5) == 0 || t.compare (b, n, "32-bit", 6) == 0) { arch = "i386"; } } if (arch.empty ()) fail << "unable to extract ICC target architecture from '" << t << "'"; // So we have the CPU but we still need the rest of the triplet. While // icc currently doesn't support cross-compilation (at least on Linux) // and we could have just used the build triplet (i.e., the // architecture on which we are running), who knows what will happen // in the future. So instead we are going to use -dumpmachine and // substitute the CPU. // // Note: no localication expected so running without LC_ALL. // // @@ TODO: running without the mode options. // { auto f = [] (string& l, bool) {return move (l);}; t = run<string> (3, xp, "-dumpmachine", f); } if (t.empty ()) fail << "unable to extract target architecture from " << xc << " using -dumpmachine output"; // The first component in the triplet is always CPU. // size_t p (t.find ('-')); if (p == string::npos) fail << "unable to parse ICC target architecture '" << t << "'"; t.swap (arch); t.append (arch, p, string::npos); ot = t; } else ot = t = *xt; // Parse the target into triplet (for further tests) ignoring any // failures. // target_triplet tt; try {tt = target_triplet (t);} catch (const invalid_argument&) {} // Derive the toolchain pattern. // string pat (pattern (xc, xl == lang::c ? "icc" : "icpc")); // Runtime and standard library. // // For now we assume that unless it is Windows, we are targeting // Linux/GCC. // string rt (tt.system == "win32-msvc" ? "msvc" : "libgcc"); string csl ( tt.system == "win32-msvc" ? "msvc" : stdlib (xl, xp, x_mo, c_po, x_po, c_co, x_co, c_stdlib_src)); string xsl; switch (xl) { case lang::c: xsl = csl; break; case lang::cxx: { xsl = tt.system == "win32-msvc" ? "msvcp" : "libstdc++"; break; } } return compiler_info { move (gr.path), move (gr.id), compiler_class::gcc, //@@ TODO: msvc on Windows? move (ver), nullopt, move (gr.signature), "", move (t), move (ot), move (pat), "", move (rt), move (csl), move (xsl), nullopt, nullopt, nullopt, nullptr, /* TODO */ nullptr}; } // Compiler checks can be expensive (we often need to run the compiler // several times) so we cache the result. // static global_cache<compiler_info> cache; const compiler_info& guess (const char* xm, lang xl, const string& ec, const path& xc, const string* xis, const string* xv, const string* xt, const strings& x_mo, const strings* c_po, const strings* x_po, const strings* c_co, const strings* x_co, const strings* c_lo, const strings* x_lo) { // First check the cache. // // Note that in case of MSVC (and Clang targeting MSVC) sys_*_dirs can // be affected by the environment (INCLUDE, LIB, and IFCPATH) which is // project-specific. So we have to include those into the key. But we // don't know yet know whether it's those compilers/targets. So it seems // we have no better choice than to include the project environment if // overridden. // // @@ We currently include config.{cc,x}.[pc]options into the key which // means any project-specific tweaks to these result in a different // key. Perhaps we should assume that any options that can affect the // result of what we are guessing (-m32, -stdlib=, etc) should be // specified as part of the mode? While definitely feels correct, // people will most likely specify these options else where as well. // string key; { sha256 cs; cs.append (static_cast<size_t> (xl)); cs.append (xc.string ()); if (!ec.empty ()) cs.append (ec); if (xis != nullptr) cs.append (*xis); append_options (cs, x_mo); if (c_po != nullptr) append_options (cs, *c_po); if (x_po != nullptr) append_options (cs, *x_po); if (c_co != nullptr) append_options (cs, *c_co); if (x_co != nullptr) append_options (cs, *x_co); if (c_lo != nullptr) append_options (cs, *c_lo); if (x_lo != nullptr) append_options (cs, *x_lo); key = cs.string (); if (const compiler_info* r = cache.find (key)) return *r; } // Parse the user-specified compiler id (config.x.id). // optional<compiler_id> xi; if (xis != nullptr) { try { xi = compiler_id (*xis); } catch (const invalid_argument& e) { fail << "invalid compiler id '" << *xis << "' " << "specified in variable config." << xm << ".id: " << e; } } pre_guess_result pre (pre_guess (xl, xc, xi)); // If we could pre-guess the type based on the excutable name, then // try the test just for that compiler. // guess_result gr; sha256 cs; if (pre.type != invalid_compiler_type) { gr = guess (xm, xl, xc, x_mo, xi, pre, cs); if (gr.empty ()) { warn << xc << " looks like " << pre << " but it is not" << info << "use config." << xm << " to override"; // Clear pre-guess. // pre.type = invalid_compiler_type; pre.variant = nullopt; pre.pos = string::npos; } } if (gr.empty ()) gr = guess (xm, xl, xc, x_mo, xi, pre, cs); if (gr.empty ()) fail << "unable to guess " << xl << " compiler type of " << xc << info << "use config." << xm << ".id to specify explicitly"; compiler_info (*gf) ( const char*, lang, const path&, const string*, const string*, const strings&, const strings*, const strings*, const strings*, const strings*, const strings*, const strings*, guess_result&&, sha256&) = nullptr; switch (gr.id.type) { case compiler_type::gcc: gf = &guess_gcc; break; case compiler_type::clang: gf = &guess_clang; break; case compiler_type::msvc: { gf = gr.id.variant == "clang" ? &guess_clang : &guess_msvc; break; } case compiler_type::icc: gf = &guess_icc; break; } compiler_info r (gf (xm, xl, xc, xv, xt, x_mo, c_po, x_po, c_co, x_co, c_lo, x_lo, move (gr), cs)); // By default use the signature line(s) to generate the checksum. // if (cs.empty ()) { cs.append (r.signature); if (!gr.type_signature.empty ()) cs.append (gr.type_signature); } r.checksum = cs.string (); // Derive binutils pattern unless this has already been done by the // compiler-specific code. // // When cross-compiling the whole toolchain is normally prefixed with // the target triplet, e.g., x86_64-w64-mingw32-{gcc,g++,ar,ld}. But // oftentimes it is not quite canonical (and sometimes -- outright // bogus). So instead we are going to first try to derive the prefix // using the pre-guessed position of the compiler name. Note that we // still want to try the target in case we could not pre-guess (think // x86_64-w64-mingw32-c++). // // BTW, for GCC we also get gcc-{ar,ranlib} (but not gcc-ld) which add // support for the LTO plugin though it seems more recent GNU binutils // (2.25) are able to load the plugin when needed automatically. So it // doesn't seem we should bother trying to support this on our end (one // way we could do it is by passing config.bin.{ar,ranlib} as hints). // // It's also normal for native (i.e., non-cross-compiler) builds of GCC // and Clang to not have binutils installed in the same directory and // instead relying on the system ones. In this case, if the compiler is // specified with the absolute path, the pattern will be the search // path. // if (r.bin_pattern.empty ()) { if (pre.pos != 0 && pre.pos != string::npos && !path::traits_type::is_separator (xc.string ()[pre.pos - 1])) { r.bin_pattern.assign (xc.string (), 0, pre.pos); r.bin_pattern += '*'; // '-' or similar is already there. } } if (r.bin_pattern.empty ()) { const string& t (r.target); size_t n (t.size ()); if (xc.size () > n + 1) { const string& l (xc.leaf ().string ()); if (l.size () > n + 1 && l.compare (0, n, t) == 0 && l[n] == '-') { path p (xc.directory ()); p /= t; p += "-*"; r.bin_pattern = move (p).string (); } } } // If we could not derive the pattern, then see if we can come up with a // search path. // if (r.bin_pattern.empty ()) { const path& p (r.path.recall.empty () ? xc : r.path.recall); if (!p.simple ()) r.bin_pattern = p.directory ().representation (); // Trailing slash. } return cache.insert (move (key), move (r)); } strings guess_default (lang xl, const string& cid, const string& pat, const strings& mode) { compiler_id id (cid); const char* s (nullptr); using type = compiler_type; switch (xl) { case lang::c: { switch (id.type) { case type::gcc: s = "gcc"; break; case type::clang: { if (id.variant == "emscripten") s = "emcc"; else s = "clang"; break; } case type::icc: s = "icc"; break; case type::msvc: { s = (id.variant == "clang" ? "clang-cl" : "cl"); break; } } break; } case lang::cxx: { switch (id.type) { case type::gcc: s = "g++"; break; case type::clang: { if (id.variant == "emscripten") s = "em++"; else s = "clang++"; break; } case type::icc: s = "icpc"; break; case type::msvc: { s = (id.variant == "clang" ? "clang-cl" : "cl"); break; } } break; } } strings r; r.reserve (mode.size () + 1); r.push_back (apply_pattern (s, pat)); r.insert (r.end (), mode.begin (), mode.end ()); return r; } // Table 23 [tab:headers.cpp]. // // In the future we will probably have to maintain per-standard additions. // static const char* std_importable[] = { "<algorithm>", "<any>", "<array>", "<atomic>", "<barrier>", "<bit>", "<bitset>", "<charconv>", "<chrono>", "<codecvt>", "<compare>", "<complex>", "<concepts>", "<condition_variable>", "<coroutine>", "<deque>", "<exception>", "<execution>", "<filesystem>", "<format>", "<forward_list>", "<fstream>", "<functional>", "<future>", "<initializer_list>", "<iomanip>", "<ios>", "<iosfwd>", "<iostream>", "<istream>", "<iterator>", "<latch>", "<limits>", "<list>", "<locale>", "<map>", "<memory>", "<memory_resource>", "<mutex>", "<new>", "<numbers>", "<numeric>", "<optional>", "<ostream>", "<queue>", "<random>", "<ranges>", "<ratio>", "<regex>", "<scoped_allocator>", "<semaphore>", "<set>", "<shared_mutex>", "<source_location>", "<span>", "<sstream>", "<stack>", "<stdexcept>", "<stop_token>", "<streambuf>", "<string>", "<string_view>", "<strstream>", "<syncstream>", "<system_error>", "<thread>", "<tuple>", "<typeindex>", "<typeinfo>", "<type_traits>", "<unordered_map>", "<unordered_set>", "<utility>", "<valarray>", "<variant>", "<vector>", "<version>" }; // Table 24 ([tab:headers.cpp.c]) // static const char* std_non_importable[] = { "<cassert>", "<cctype>", "<cerrno>", "<cfenv>", "<cfloat>", "<cinttypes>", "<climits>", "<clocale>", "<cmath>", "<csetjmp>", "<csignal>", "<cstdarg>", "<cstddef>", "<cstdint>", "<cstdio>", "<cstdlib>", "<cstring>", "<ctime>", "<cuchar>", "<cwchar>", "<cwctype>" }; void guess_std_importable_headers (const compiler_info& ci, const dir_paths& sys_hdr_dirs, importable_headers& hs) { hs.group_map.emplace (header_group_std, 0); hs.group_map.emplace (header_group_std_importable, 0); // For better performance we make compiler-specific assumptions. // // For example, we can assume that all these headers are found in the // same header search directory. This is at least the case for GCC's // libstdc++. // // Note also that some headers could be missing. For example, <format> // is currently not provided by GCC. Though entering missing headers // should be harmless. // pair<const path, importable_headers::groups>* p; auto add_groups = [&p] (bool imp) { if (imp) p->second.push_back (header_group_std_importable); // More specific. p->second.push_back (header_group_std); }; if (ci.id.type != compiler_type::gcc) { for (const char* f: std_importable) if ((p = hs.insert_angle (sys_hdr_dirs, f)) != nullptr) add_groups (true); for (const char* f: std_non_importable) if ((p = hs.insert_angle (sys_hdr_dirs, f)) != nullptr) add_groups (false); } else { p = hs.insert_angle (sys_hdr_dirs, std_importable[0]); assert (p != nullptr); add_groups (true); dir_path d (p->first.directory ()); auto add_header = [&hs, &d, &p, add_groups] (const char* f, bool imp) { path fp (d); fp.combine (f + 1, strlen (f) - 2, '\0'); // Assuming simple. p = &hs.insert_angle (move (fp), f); add_groups (imp); }; for (size_t i (1); i != sizeof (std_importable) / sizeof (std_importable[0]); ++i) add_header (std_importable[i], true); for (const char* f: std_non_importable) add_header (f, false); } } } }