diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2016-01-27 12:27:54 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2016-01-27 12:27:54 +0200 |
commit | ae02c68df6f26ff24b008ca047ae7750eeecedac (patch) | |
tree | 3aa59815cfae218923ae3f8b5f8f2f3e611ee3ee /butl | |
parent | 59fc5d5cc5341e0505216c17405af995116ebb3b (diff) |
Add support for parsing cpu-vendor-os target triplets
Diffstat (limited to 'butl')
-rw-r--r-- | butl/buildfile | 1 | ||||
-rw-r--r-- | butl/triplet | 97 | ||||
-rw-r--r-- | butl/triplet.cxx | 121 |
3 files changed, 219 insertions, 0 deletions
diff --git a/butl/buildfile b/butl/buildfile index 9c96aab..de51ad5 100644 --- a/butl/buildfile +++ b/butl/buildfile @@ -15,6 +15,7 @@ lib{butl}: \ {hxx ixx cxx}{ process } \ {hxx txx }{ string-table } \ {hxx cxx}{ timestamp } \ +{hxx cxx}{ triplet } \ {hxx }{ utility } \ {hxx }{ version } diff --git a/butl/triplet b/butl/triplet new file mode 100644 index 0000000..c18368c --- /dev/null +++ b/butl/triplet @@ -0,0 +1,97 @@ +// file : butl/triplet -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef BUTL_TRIPLET +#define BUTL_TRIPLET + +#include <string> + +namespace butl +{ + // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS + // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus + // some fields can sometimes be omitted. This looseness makes it hard to base + // any kind of decisions on the triplet without canonicalizing it and then + // splitting it into components. the way we are going to split it is like + // this: + // + // CPU + // + // This one is reasonably straightforward. Note that we always expect at + // least two components with the first being the CPU. In other words, we + // don't try to guess what just 'mingw32' might mean like config.sub does. + // + // VENDOR + // + // This can be a machine vendor as in i686-apple-darwin8, a toolchain vendor + // as in i686-lfs-linux-gnu, or something else as in arm-softfloat-linux-gnu. + // Just as we think vendor is pretty irrelevant and can be ignored, comes + // MinGW-W64 and calls itself *-w64-mingw32. While it is tempting to + // attribute w64 to OS-ABI, the MinGW-W64 folks insist it is a (presumably + // toolchain) vendor. + // + // To make thing more regular we also convert the information-free vendor + // names 'pc', 'unknown' and 'none' to the empty name. + // + // OS/KERNEL-OS/OS-ABI + // + // This is where things get really messy and instead of trying to guess, we + // call the entire thing SYSTEM. Except, in certain cases, we factor out the + // trailing version, again, to make SYSTEM easier to compare to. For example, + // *-darwin14.5.0 becomes 'darwin' and '14.5.0'. + // + // Again, to make things more regular, if the first component in SYSTEM is + // none, then it is removed (so *-none-eabi becomes just 'eabi'). + // + // Values for two-component systems (e.g., linux-gnu) that don't specify + // VENDOR explicitly are inherently ambiguous: is 'linux' VENDOR or part of + // SYSTEM? The only way to handle this is to recognize their specific names + // as special cases and this is what we do for some of the more common + // ones. The alternative would be to first run such names through config.sub + // which adds explicit VENDOR and this could be a reasonable fallback + // strategy for (presumably less common) cases were we don't split things + // correctly. + // + // Note also that the version splitting is only done for certain, + // commonly-used targets. + // + // Some examples of canonicalization and splitting: + // + // x86_64-apple-darwin14.5.0 x86_64 apple darwin 14.5.0 + // x86_64-unknown-freebsd10.2 x86_64 freebsd 10.2 + // i686-elf i686 elf + // arm-eabi arm eabi + // arm-none-eabi arm eabi + // arm-none-linux-gnueabi arm linux-gnueabi + // arm-softfloat-linux-gnu arm softfloat linux-gnu + // i686-pc-mingw32 i686 mingw32 + // i686-w64-mingw32 i686 w64 mingw32 + // i686-lfs-linux-gnu i686 lfs linux-gnu + // x86_64-unknown-linux-gnu x86_64 linux-gnu + // x86_64-linux-gnux32 x86_64 linux-gnux32 + // + // References: + // + // 1. The libtool repository contains the PLATFORM file that lists many known + // triplets. + // + // 2. LLVM has the Triple class with similar goals. + // + struct triplet + { + std::string cpu; + std::string vendor; + std::string system; + std::string version; + + // Parse the triplet optionally returning the canonicalized string. Throw + // std::invalid_argument if the triplet is not recognizable. + // + explicit + triplet (const std::string&, std::string* canon = nullptr); + triplet (const std::string& s, std::string& canon): triplet (s, &canon) {} + }; +}; + +#endif // BUTL_TRIPLET diff --git a/butl/triplet.cxx b/butl/triplet.cxx new file mode 100644 index 0000000..3208375 --- /dev/null +++ b/butl/triplet.cxx @@ -0,0 +1,121 @@ +// file : butl/triplet.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <butl/triplet> + +#include <stdexcept> // invalid_argument + +using namespace std; + +namespace butl +{ + triplet:: + triplet (const string& s, string* c) + { + auto bad = [](const char* m) {throw invalid_argument (m);}; + + // Find the first and the last components. The first is CPU and the last is + // (part of) SYSTEM, that we know for sure. + // + string::size_type f (s.find ('-')), l (s.rfind ('-')); + + if (f == 0 || f == string::npos) + bad ("missing cpu"); + + cpu.assign (s, 0, f); + + if (c != nullptr) + *c = cpu; + + // If we have something in between, then the first component after CPU is + // VENDOR. Unless it is a first component of two-component system, as in + // i686-linux-gnu. + // + if (f != l) + { + // [f, p) is VENDOR. + // + string::size_type p (s.find ('-', ++f)), n (p - f); + + if (n == 0) + bad ("empty vendor"); + + // Do we have all four components? If so, then we don't need to do any + // special recognition of two-component systems. + // + if (l != p) + { + l = s.rfind ('-', --l); + + if (l != p) + bad ("too many components"); + + // Handle the none-* case here. + // + if (s.compare (l + 1, 5, "none-") == 0) + l += 5; + } + else + { + // See if this is one of the well-known non-vendors. + // + if (s.compare (f, n, "linux") == 0 || + s.compare (f, n, "kfreebsd") == 0) + { + l = f - 1; + n = 0; // No VENDOR. + } + } + + // Handle special VENDOR values. + // + if (n != 0) + { + if (s.compare (f, n, "pc") != 0 && + s.compare (f, n, "none") != 0 && + s.compare (f, n, "unknown") != 0) + { + vendor.assign (s, f, n); + + if (c != nullptr) + { + *c += '-'; + *c += vendor; + } + } + } + } + + // (l, npos) is SYSTEM + // + system.assign (s, ++l, string::npos); + + if (system.empty ()) + bad ("missing os/kernel/abi"); + + if (system.front () == '-' || system.back () == '-') + bad ("invalid os/kernel/abi"); + + if (c != nullptr) + { + *c += '-'; + *c += system; + } + + // Finally, extract VERSION for some recognized systems. + // + string::size_type v (0); + if (system.compare (0, (v = 6), "darwin") == 0 || + system.compare (0, (v = 7), "freebsd") == 0 || + system.compare (0, (v = 7), "openbsd") == 0 || + system.compare (0, (v = 7), "netbsd") == 0 || + system.compare (0, (v = 7), "solaris") == 0 || + system.compare (0, (v = 7), "aix") == 0 || + system.compare (0, (v = 7), "hpux") == 0) + { + version.assign (system, v, string::npos); + system.resize (system.size () - version.size ()); + } + } +} |