From ae02c68df6f26ff24b008ca047ae7750eeecedac Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 27 Jan 2016 12:27:54 +0200 Subject: Add support for parsing cpu-vendor-os target triplets --- butl/buildfile | 1 + butl/triplet | 97 +++++++++++++++++++++++++++++ butl/triplet.cxx | 121 +++++++++++++++++++++++++++++++++++++ tests/buildfile | 2 +- tests/triplet/buildfile | 7 +++ tests/triplet/driver.cxx | 154 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 381 insertions(+), 1 deletion(-) create mode 100644 butl/triplet create mode 100644 butl/triplet.cxx create mode 100644 tests/triplet/buildfile create mode 100644 tests/triplet/driver.cxx diff --git a/butl/buildfile b/butl/buildfile index 9c96aab..de51ad5 100644 --- a/butl/buildfile +++ b/butl/buildfile @@ -15,6 +15,7 @@ lib{butl}: \ {hxx ixx cxx}{ process } \ {hxx txx }{ string-table } \ {hxx cxx}{ timestamp } \ +{hxx cxx}{ triplet } \ {hxx }{ utility } \ {hxx }{ version } diff --git a/butl/triplet b/butl/triplet new file mode 100644 index 0000000..c18368c --- /dev/null +++ b/butl/triplet @@ -0,0 +1,97 @@ +// file : butl/triplet -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUTL_TRIPLET +#define BUTL_TRIPLET + +#include + +namespace butl +{ + // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS + // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus + // some fields can sometimes be omitted. This looseness makes it hard to base + // any kind of decisions on the triplet without canonicalizing it and then + // splitting it into components. the way we are going to split it is like + // this: + // + // CPU + // + // This one is reasonably straightforward. Note that we always expect at + // least two components with the first being the CPU. In other words, we + // don't try to guess what just 'mingw32' might mean like config.sub does. + // + // VENDOR + // + // This can be a machine vendor as in i686-apple-darwin8, a toolchain vendor + // as in i686-lfs-linux-gnu, or something else as in arm-softfloat-linux-gnu. + // Just as we think vendor is pretty irrelevant and can be ignored, comes + // MinGW-W64 and calls itself *-w64-mingw32. While it is tempting to + // attribute w64 to OS-ABI, the MinGW-W64 folks insist it is a (presumably + // toolchain) vendor. + // + // To make thing more regular we also convert the information-free vendor + // names 'pc', 'unknown' and 'none' to the empty name. + // + // OS/KERNEL-OS/OS-ABI + // + // This is where things get really messy and instead of trying to guess, we + // call the entire thing SYSTEM. Except, in certain cases, we factor out the + // trailing version, again, to make SYSTEM easier to compare to. For example, + // *-darwin14.5.0 becomes 'darwin' and '14.5.0'. + // + // Again, to make things more regular, if the first component in SYSTEM is + // none, then it is removed (so *-none-eabi becomes just 'eabi'). + // + // Values for two-component systems (e.g., linux-gnu) that don't specify + // VENDOR explicitly are inherently ambiguous: is 'linux' VENDOR or part of + // SYSTEM? The only way to handle this is to recognize their specific names + // as special cases and this is what we do for some of the more common + // ones. The alternative would be to first run such names through config.sub + // which adds explicit VENDOR and this could be a reasonable fallback + // strategy for (presumably less common) cases were we don't split things + // correctly. + // + // Note also that the version splitting is only done for certain, + // commonly-used targets. + // + // Some examples of canonicalization and splitting: + // + // x86_64-apple-darwin14.5.0 x86_64 apple darwin 14.5.0 + // x86_64-unknown-freebsd10.2 x86_64 freebsd 10.2 + // i686-elf i686 elf + // arm-eabi arm eabi + // arm-none-eabi arm eabi + // arm-none-linux-gnueabi arm linux-gnueabi + // arm-softfloat-linux-gnu arm softfloat linux-gnu + // i686-pc-mingw32 i686 mingw32 + // i686-w64-mingw32 i686 w64 mingw32 + // i686-lfs-linux-gnu i686 lfs linux-gnu + // x86_64-unknown-linux-gnu x86_64 linux-gnu + // x86_64-linux-gnux32 x86_64 linux-gnux32 + // + // References: + // + // 1. The libtool repository contains the PLATFORM file that lists many known + // triplets. + // + // 2. LLVM has the Triple class with similar goals. + // + struct triplet + { + std::string cpu; + std::string vendor; + std::string system; + std::string version; + + // Parse the triplet optionally returning the canonicalized string. Throw + // std::invalid_argument if the triplet is not recognizable. + // + explicit + triplet (const std::string&, std::string* canon = nullptr); + triplet (const std::string& s, std::string& canon): triplet (s, &canon) {} + }; +}; + +#endif // BUTL_TRIPLET diff --git a/butl/triplet.cxx b/butl/triplet.cxx new file mode 100644 index 0000000..3208375 --- /dev/null +++ b/butl/triplet.cxx @@ -0,0 +1,121 @@ +// file : butl/triplet.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // invalid_argument + +using namespace std; + +namespace butl +{ + triplet:: + triplet (const string& s, string* c) + { + auto bad = [](const char* m) {throw invalid_argument (m);}; + + // Find the first and the last components. The first is CPU and the last is + // (part of) SYSTEM, that we know for sure. + // + string::size_type f (s.find ('-')), l (s.rfind ('-')); + + if (f == 0 || f == string::npos) + bad ("missing cpu"); + + cpu.assign (s, 0, f); + + if (c != nullptr) + *c = cpu; + + // If we have something in between, then the first component after CPU is + // VENDOR. Unless it is a first component of two-component system, as in + // i686-linux-gnu. + // + if (f != l) + { + // [f, p) is VENDOR. + // + string::size_type p (s.find ('-', ++f)), n (p - f); + + if (n == 0) + bad ("empty vendor"); + + // Do we have all four components? If so, then we don't need to do any + // special recognition of two-component systems. + // + if (l != p) + { + l = s.rfind ('-', --l); + + if (l != p) + bad ("too many components"); + + // Handle the none-* case here. + // + if (s.compare (l + 1, 5, "none-") == 0) + l += 5; + } + else + { + // See if this is one of the well-known non-vendors. + // + if (s.compare (f, n, "linux") == 0 || + s.compare (f, n, "kfreebsd") == 0) + { + l = f - 1; + n = 0; // No VENDOR. + } + } + + // Handle special VENDOR values. + // + if (n != 0) + { + if (s.compare (f, n, "pc") != 0 && + s.compare (f, n, "none") != 0 && + s.compare (f, n, "unknown") != 0) + { + vendor.assign (s, f, n); + + if (c != nullptr) + { + *c += '-'; + *c += vendor; + } + } + } + } + + // (l, npos) is SYSTEM + // + system.assign (s, ++l, string::npos); + + if (system.empty ()) + bad ("missing os/kernel/abi"); + + if (system.front () == '-' || system.back () == '-') + bad ("invalid os/kernel/abi"); + + if (c != nullptr) + { + *c += '-'; + *c += system; + } + + // Finally, extract VERSION for some recognized systems. + // + string::size_type v (0); + if (system.compare (0, (v = 6), "darwin") == 0 || + system.compare (0, (v = 7), "freebsd") == 0 || + system.compare (0, (v = 7), "openbsd") == 0 || + system.compare (0, (v = 7), "netbsd") == 0 || + system.compare (0, (v = 7), "solaris") == 0 || + system.compare (0, (v = 7), "aix") == 0 || + system.compare (0, (v = 7), "hpux") == 0) + { + version.assign (system, v, string::npos); + system.resize (system.size () - version.size ()); + } + } +} diff --git a/tests/buildfile b/tests/buildfile index c14870b..57e632e 100644 --- a/tests/buildfile +++ b/tests/buildfile @@ -2,6 +2,6 @@ # copyright : Copyright (c) 2014-2016 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -d = dir-iterator/ path/ prefix-map/ +d = dir-iterator/ path/ prefix-map/ triplet/ .: $d include $d diff --git a/tests/triplet/buildfile b/tests/triplet/buildfile new file mode 100644 index 0000000..237209e --- /dev/null +++ b/tests/triplet/buildfile @@ -0,0 +1,7 @@ +# file : tests/triplet/buildfile +# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +exe{driver}: cxx{driver} ../../butl/lib{butl} + +include ../../butl/ diff --git a/tests/triplet/driver.cxx b/tests/triplet/driver.cxx new file mode 100644 index 0000000..9d51fed --- /dev/null +++ b/tests/triplet/driver.cxx @@ -0,0 +1,154 @@ +// file : tests/triplet/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include +#include // invalid_argument + +#include + +using namespace std; +using namespace butl; + +static bool +fail (const char*); + +static bool +test (const char*, + const char* canon, + const char* cpu, + const char* vendor, + const char* system, + const char* version); + +int +main () +{ + assert (fail ("")); + assert (fail ("mingw32")); + assert (fail ("-")); + assert (fail ("arm-")); + assert (fail ("-mingw32")); + assert (fail ("a-b-c-d-e")); + assert (fail ("arm-pc--")); + assert (fail ("arm-pc-linux-")); + assert (fail ("arm-pc--gnu")); + + assert (test ("i686-elf", + "i686-elf", + "i686", "", "elf", "")); + + assert (test ("arm-eabi", + "arm-eabi", + "arm", "", "eabi", "")); + + assert (test ("arm-none-eabi", + "arm-eabi", + "arm", "", "eabi", "")); + + assert (test ("arm-none-linux-gnueabi", + "arm-linux-gnueabi", + "arm", "", "linux-gnueabi", "")); + + assert (test ("arm-softfloat-linux-gnu", + "arm-softfloat-linux-gnu", + "arm", "softfloat", "linux-gnu", "")); + + assert (test ("i686-pc-mingw32", + "i686-mingw32", + "i686", "", "mingw32", "")); + + assert (test ("i686-w64-mingw32", + "i686-w64-mingw32", + "i686", "w64", "mingw32", "")); + + assert (test ("i686-lfs-linux-gnu", + "i686-lfs-linux-gnu", + "i686", "lfs", "linux-gnu", "")); + + assert (test ("x86_64-unknown-linux-gnu", + "x86_64-linux-gnu", + "x86_64", "", "linux-gnu", "")); + + assert (test ("x86_64-linux-gnux32", + "x86_64-linux-gnux32", + "x86_64", "", "linux-gnux32", "")); + + // Removal of none-. + // + assert (test ("arm-none", + "arm-none", + "arm", "", "none", "")); + + assert (test ("arm-unknown-none-eabi", + "arm-eabi", + "arm", "", "eabi", "")); + + // Version extraction. + // + assert (test ("x86_64-apple-darwin14.5.0", + "x86_64-apple-darwin14.5.0", + "x86_64", "apple", "darwin", "14.5.0")); + + assert (test ("x86_64-unknown-freebsd10.2", + "x86_64-freebsd10.2", + "x86_64", "", "freebsd", "10.2")); + + assert (test ("x86_64-pc-openbsd5.6", + "x86_64-openbsd5.6", + "x86_64", "", "openbsd", "5.6")); + + assert (test ("sparc-sun-solaris2.9", + "sparc-sun-solaris2.9", + "sparc", "sun", "solaris", "2.9")); +} + +static bool +test (const char* s, + const char* canon, + const char* cpu, + const char* vendor, + const char* system, + const char* version) +{ + string c; + triplet t (s, c); + + auto cmp = [] (const string& a, const char* e, const char* n) -> bool + { + if (a != e) + { + cerr << n << " actual: " << a << endl + << n << " expect: " << e << endl; + + return false; + } + + return true; + }; + + return + cmp (c, canon, "canonical") && + cmp (t.cpu, cpu, "cpu") && + cmp (t.vendor, vendor, "vendor") && + cmp (t.system, system, "system") && + cmp (t.version, version, "version"); +} + +static bool +fail (const char* s) +{ + try + { + triplet t (s); + cerr << "nofail: " << s << endl; + return false; + } + catch (invalid_argument& e) + { + //cerr << e.what () << endl; + } + + return true; +} -- cgit v1.1