aboutsummaryrefslogtreecommitdiff
path: root/butl
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-01-27 12:27:54 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-01-27 12:27:54 +0200
commitae02c68df6f26ff24b008ca047ae7750eeecedac (patch)
tree3aa59815cfae218923ae3f8b5f8f2f3e611ee3ee /butl
parent59fc5d5cc5341e0505216c17405af995116ebb3b (diff)
Add support for parsing cpu-vendor-os target triplets
Diffstat (limited to 'butl')
-rw-r--r--butl/buildfile1
-rw-r--r--butl/triplet97
-rw-r--r--butl/triplet.cxx121
3 files changed, 219 insertions, 0 deletions
diff --git a/butl/buildfile b/butl/buildfile
index 9c96aab..de51ad5 100644
--- a/butl/buildfile
+++ b/butl/buildfile
@@ -15,6 +15,7 @@ lib{butl}: \
{hxx ixx cxx}{ process } \
{hxx txx }{ string-table } \
{hxx cxx}{ timestamp } \
+{hxx cxx}{ triplet } \
{hxx }{ utility } \
{hxx }{ version }
diff --git a/butl/triplet b/butl/triplet
new file mode 100644
index 0000000..c18368c
--- /dev/null
+++ b/butl/triplet
@@ -0,0 +1,97 @@
+// file : butl/triplet -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUTL_TRIPLET
+#define BUTL_TRIPLET
+
+#include <string>
+
+namespace butl
+{
+ // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS
+ // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus
+ // some fields can sometimes be omitted. This looseness makes it hard to base
+ // any kind of decisions on the triplet without canonicalizing it and then
+ // splitting it into components. the way we are going to split it is like
+ // this:
+ //
+ // CPU
+ //
+ // This one is reasonably straightforward. Note that we always expect at
+ // least two components with the first being the CPU. In other words, we
+ // don't try to guess what just 'mingw32' might mean like config.sub does.
+ //
+ // VENDOR
+ //
+ // This can be a machine vendor as in i686-apple-darwin8, a toolchain vendor
+ // as in i686-lfs-linux-gnu, or something else as in arm-softfloat-linux-gnu.
+ // Just as we think vendor is pretty irrelevant and can be ignored, comes
+ // MinGW-W64 and calls itself *-w64-mingw32. While it is tempting to
+ // attribute w64 to OS-ABI, the MinGW-W64 folks insist it is a (presumably
+ // toolchain) vendor.
+ //
+ // To make thing more regular we also convert the information-free vendor
+ // names 'pc', 'unknown' and 'none' to the empty name.
+ //
+ // OS/KERNEL-OS/OS-ABI
+ //
+ // This is where things get really messy and instead of trying to guess, we
+ // call the entire thing SYSTEM. Except, in certain cases, we factor out the
+ // trailing version, again, to make SYSTEM easier to compare to. For example,
+ // *-darwin14.5.0 becomes 'darwin' and '14.5.0'.
+ //
+ // Again, to make things more regular, if the first component in SYSTEM is
+ // none, then it is removed (so *-none-eabi becomes just 'eabi').
+ //
+ // Values for two-component systems (e.g., linux-gnu) that don't specify
+ // VENDOR explicitly are inherently ambiguous: is 'linux' VENDOR or part of
+ // SYSTEM? The only way to handle this is to recognize their specific names
+ // as special cases and this is what we do for some of the more common
+ // ones. The alternative would be to first run such names through config.sub
+ // which adds explicit VENDOR and this could be a reasonable fallback
+ // strategy for (presumably less common) cases were we don't split things
+ // correctly.
+ //
+ // Note also that the version splitting is only done for certain,
+ // commonly-used targets.
+ //
+ // Some examples of canonicalization and splitting:
+ //
+ // x86_64-apple-darwin14.5.0 x86_64 apple darwin 14.5.0
+ // x86_64-unknown-freebsd10.2 x86_64 freebsd 10.2
+ // i686-elf i686 elf
+ // arm-eabi arm eabi
+ // arm-none-eabi arm eabi
+ // arm-none-linux-gnueabi arm linux-gnueabi
+ // arm-softfloat-linux-gnu arm softfloat linux-gnu
+ // i686-pc-mingw32 i686 mingw32
+ // i686-w64-mingw32 i686 w64 mingw32
+ // i686-lfs-linux-gnu i686 lfs linux-gnu
+ // x86_64-unknown-linux-gnu x86_64 linux-gnu
+ // x86_64-linux-gnux32 x86_64 linux-gnux32
+ //
+ // References:
+ //
+ // 1. The libtool repository contains the PLATFORM file that lists many known
+ // triplets.
+ //
+ // 2. LLVM has the Triple class with similar goals.
+ //
+ struct triplet
+ {
+ std::string cpu;
+ std::string vendor;
+ std::string system;
+ std::string version;
+
+ // Parse the triplet optionally returning the canonicalized string. Throw
+ // std::invalid_argument if the triplet is not recognizable.
+ //
+ explicit
+ triplet (const std::string&, std::string* canon = nullptr);
+ triplet (const std::string& s, std::string& canon): triplet (s, &canon) {}
+ };
+};
+
+#endif // BUTL_TRIPLET
diff --git a/butl/triplet.cxx b/butl/triplet.cxx
new file mode 100644
index 0000000..3208375
--- /dev/null
+++ b/butl/triplet.cxx
@@ -0,0 +1,121 @@
+// file : butl/triplet.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <butl/triplet>
+
+#include <stdexcept> // invalid_argument
+
+using namespace std;
+
+namespace butl
+{
+ triplet::
+ triplet (const string& s, string* c)
+ {
+ auto bad = [](const char* m) {throw invalid_argument (m);};
+
+ // Find the first and the last components. The first is CPU and the last is
+ // (part of) SYSTEM, that we know for sure.
+ //
+ string::size_type f (s.find ('-')), l (s.rfind ('-'));
+
+ if (f == 0 || f == string::npos)
+ bad ("missing cpu");
+
+ cpu.assign (s, 0, f);
+
+ if (c != nullptr)
+ *c = cpu;
+
+ // If we have something in between, then the first component after CPU is
+ // VENDOR. Unless it is a first component of two-component system, as in
+ // i686-linux-gnu.
+ //
+ if (f != l)
+ {
+ // [f, p) is VENDOR.
+ //
+ string::size_type p (s.find ('-', ++f)), n (p - f);
+
+ if (n == 0)
+ bad ("empty vendor");
+
+ // Do we have all four components? If so, then we don't need to do any
+ // special recognition of two-component systems.
+ //
+ if (l != p)
+ {
+ l = s.rfind ('-', --l);
+
+ if (l != p)
+ bad ("too many components");
+
+ // Handle the none-* case here.
+ //
+ if (s.compare (l + 1, 5, "none-") == 0)
+ l += 5;
+ }
+ else
+ {
+ // See if this is one of the well-known non-vendors.
+ //
+ if (s.compare (f, n, "linux") == 0 ||
+ s.compare (f, n, "kfreebsd") == 0)
+ {
+ l = f - 1;
+ n = 0; // No VENDOR.
+ }
+ }
+
+ // Handle special VENDOR values.
+ //
+ if (n != 0)
+ {
+ if (s.compare (f, n, "pc") != 0 &&
+ s.compare (f, n, "none") != 0 &&
+ s.compare (f, n, "unknown") != 0)
+ {
+ vendor.assign (s, f, n);
+
+ if (c != nullptr)
+ {
+ *c += '-';
+ *c += vendor;
+ }
+ }
+ }
+ }
+
+ // (l, npos) is SYSTEM
+ //
+ system.assign (s, ++l, string::npos);
+
+ if (system.empty ())
+ bad ("missing os/kernel/abi");
+
+ if (system.front () == '-' || system.back () == '-')
+ bad ("invalid os/kernel/abi");
+
+ if (c != nullptr)
+ {
+ *c += '-';
+ *c += system;
+ }
+
+ // Finally, extract VERSION for some recognized systems.
+ //
+ string::size_type v (0);
+ if (system.compare (0, (v = 6), "darwin") == 0 ||
+ system.compare (0, (v = 7), "freebsd") == 0 ||
+ system.compare (0, (v = 7), "openbsd") == 0 ||
+ system.compare (0, (v = 7), "netbsd") == 0 ||
+ system.compare (0, (v = 7), "solaris") == 0 ||
+ system.compare (0, (v = 7), "aix") == 0 ||
+ system.compare (0, (v = 7), "hpux") == 0)
+ {
+ version.assign (system, v, string::npos);
+ system.resize (system.size () - version.size ());
+ }
+ }
+}