aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-01-27 12:27:54 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-01-27 12:27:54 +0200
commitae02c68df6f26ff24b008ca047ae7750eeecedac (patch)
tree3aa59815cfae218923ae3f8b5f8f2f3e611ee3ee
parent59fc5d5cc5341e0505216c17405af995116ebb3b (diff)
Add support for parsing cpu-vendor-os target triplets
-rw-r--r--butl/buildfile1
-rw-r--r--butl/triplet97
-rw-r--r--butl/triplet.cxx121
-rw-r--r--tests/buildfile2
-rw-r--r--tests/triplet/buildfile7
-rw-r--r--tests/triplet/driver.cxx154
6 files changed, 381 insertions, 1 deletions
diff --git a/butl/buildfile b/butl/buildfile
index 9c96aab..de51ad5 100644
--- a/butl/buildfile
+++ b/butl/buildfile
@@ -15,6 +15,7 @@ lib{butl}: \
{hxx ixx cxx}{ process } \
{hxx txx }{ string-table } \
{hxx cxx}{ timestamp } \
+{hxx cxx}{ triplet } \
{hxx }{ utility } \
{hxx }{ version }
diff --git a/butl/triplet b/butl/triplet
new file mode 100644
index 0000000..c18368c
--- /dev/null
+++ b/butl/triplet
@@ -0,0 +1,97 @@
+// file : butl/triplet -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUTL_TRIPLET
+#define BUTL_TRIPLET
+
+#include <string>
+
+namespace butl
+{
+ // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS
+ // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus
+ // some fields can sometimes be omitted. This looseness makes it hard to base
+ // any kind of decisions on the triplet without canonicalizing it and then
+ // splitting it into components. the way we are going to split it is like
+ // this:
+ //
+ // CPU
+ //
+ // This one is reasonably straightforward. Note that we always expect at
+ // least two components with the first being the CPU. In other words, we
+ // don't try to guess what just 'mingw32' might mean like config.sub does.
+ //
+ // VENDOR
+ //
+ // This can be a machine vendor as in i686-apple-darwin8, a toolchain vendor
+ // as in i686-lfs-linux-gnu, or something else as in arm-softfloat-linux-gnu.
+ // Just as we think vendor is pretty irrelevant and can be ignored, comes
+ // MinGW-W64 and calls itself *-w64-mingw32. While it is tempting to
+ // attribute w64 to OS-ABI, the MinGW-W64 folks insist it is a (presumably
+ // toolchain) vendor.
+ //
+ // To make thing more regular we also convert the information-free vendor
+ // names 'pc', 'unknown' and 'none' to the empty name.
+ //
+ // OS/KERNEL-OS/OS-ABI
+ //
+ // This is where things get really messy and instead of trying to guess, we
+ // call the entire thing SYSTEM. Except, in certain cases, we factor out the
+ // trailing version, again, to make SYSTEM easier to compare to. For example,
+ // *-darwin14.5.0 becomes 'darwin' and '14.5.0'.
+ //
+ // Again, to make things more regular, if the first component in SYSTEM is
+ // none, then it is removed (so *-none-eabi becomes just 'eabi').
+ //
+ // Values for two-component systems (e.g., linux-gnu) that don't specify
+ // VENDOR explicitly are inherently ambiguous: is 'linux' VENDOR or part of
+ // SYSTEM? The only way to handle this is to recognize their specific names
+ // as special cases and this is what we do for some of the more common
+ // ones. The alternative would be to first run such names through config.sub
+ // which adds explicit VENDOR and this could be a reasonable fallback
+ // strategy for (presumably less common) cases were we don't split things
+ // correctly.
+ //
+ // Note also that the version splitting is only done for certain,
+ // commonly-used targets.
+ //
+ // Some examples of canonicalization and splitting:
+ //
+ // x86_64-apple-darwin14.5.0 x86_64 apple darwin 14.5.0
+ // x86_64-unknown-freebsd10.2 x86_64 freebsd 10.2
+ // i686-elf i686 elf
+ // arm-eabi arm eabi
+ // arm-none-eabi arm eabi
+ // arm-none-linux-gnueabi arm linux-gnueabi
+ // arm-softfloat-linux-gnu arm softfloat linux-gnu
+ // i686-pc-mingw32 i686 mingw32
+ // i686-w64-mingw32 i686 w64 mingw32
+ // i686-lfs-linux-gnu i686 lfs linux-gnu
+ // x86_64-unknown-linux-gnu x86_64 linux-gnu
+ // x86_64-linux-gnux32 x86_64 linux-gnux32
+ //
+ // References:
+ //
+ // 1. The libtool repository contains the PLATFORM file that lists many known
+ // triplets.
+ //
+ // 2. LLVM has the Triple class with similar goals.
+ //
+ struct triplet
+ {
+ std::string cpu;
+ std::string vendor;
+ std::string system;
+ std::string version;
+
+ // Parse the triplet optionally returning the canonicalized string. Throw
+ // std::invalid_argument if the triplet is not recognizable.
+ //
+ explicit
+ triplet (const std::string&, std::string* canon = nullptr);
+ triplet (const std::string& s, std::string& canon): triplet (s, &canon) {}
+ };
+};
+
+#endif // BUTL_TRIPLET
diff --git a/butl/triplet.cxx b/butl/triplet.cxx
new file mode 100644
index 0000000..3208375
--- /dev/null
+++ b/butl/triplet.cxx
@@ -0,0 +1,121 @@
+// file : butl/triplet.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <butl/triplet>
+
+#include <stdexcept> // invalid_argument
+
+using namespace std;
+
+namespace butl
+{
+ triplet::
+ triplet (const string& s, string* c)
+ {
+ auto bad = [](const char* m) {throw invalid_argument (m);};
+
+ // Find the first and the last components. The first is CPU and the last is
+ // (part of) SYSTEM, that we know for sure.
+ //
+ string::size_type f (s.find ('-')), l (s.rfind ('-'));
+
+ if (f == 0 || f == string::npos)
+ bad ("missing cpu");
+
+ cpu.assign (s, 0, f);
+
+ if (c != nullptr)
+ *c = cpu;
+
+ // If we have something in between, then the first component after CPU is
+ // VENDOR. Unless it is a first component of two-component system, as in
+ // i686-linux-gnu.
+ //
+ if (f != l)
+ {
+ // [f, p) is VENDOR.
+ //
+ string::size_type p (s.find ('-', ++f)), n (p - f);
+
+ if (n == 0)
+ bad ("empty vendor");
+
+ // Do we have all four components? If so, then we don't need to do any
+ // special recognition of two-component systems.
+ //
+ if (l != p)
+ {
+ l = s.rfind ('-', --l);
+
+ if (l != p)
+ bad ("too many components");
+
+ // Handle the none-* case here.
+ //
+ if (s.compare (l + 1, 5, "none-") == 0)
+ l += 5;
+ }
+ else
+ {
+ // See if this is one of the well-known non-vendors.
+ //
+ if (s.compare (f, n, "linux") == 0 ||
+ s.compare (f, n, "kfreebsd") == 0)
+ {
+ l = f - 1;
+ n = 0; // No VENDOR.
+ }
+ }
+
+ // Handle special VENDOR values.
+ //
+ if (n != 0)
+ {
+ if (s.compare (f, n, "pc") != 0 &&
+ s.compare (f, n, "none") != 0 &&
+ s.compare (f, n, "unknown") != 0)
+ {
+ vendor.assign (s, f, n);
+
+ if (c != nullptr)
+ {
+ *c += '-';
+ *c += vendor;
+ }
+ }
+ }
+ }
+
+ // (l, npos) is SYSTEM
+ //
+ system.assign (s, ++l, string::npos);
+
+ if (system.empty ())
+ bad ("missing os/kernel/abi");
+
+ if (system.front () == '-' || system.back () == '-')
+ bad ("invalid os/kernel/abi");
+
+ if (c != nullptr)
+ {
+ *c += '-';
+ *c += system;
+ }
+
+ // Finally, extract VERSION for some recognized systems.
+ //
+ string::size_type v (0);
+ if (system.compare (0, (v = 6), "darwin") == 0 ||
+ system.compare (0, (v = 7), "freebsd") == 0 ||
+ system.compare (0, (v = 7), "openbsd") == 0 ||
+ system.compare (0, (v = 7), "netbsd") == 0 ||
+ system.compare (0, (v = 7), "solaris") == 0 ||
+ system.compare (0, (v = 7), "aix") == 0 ||
+ system.compare (0, (v = 7), "hpux") == 0)
+ {
+ version.assign (system, v, string::npos);
+ system.resize (system.size () - version.size ());
+ }
+ }
+}
diff --git a/tests/buildfile b/tests/buildfile
index c14870b..57e632e 100644
--- a/tests/buildfile
+++ b/tests/buildfile
@@ -2,6 +2,6 @@
# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
# license : MIT; see accompanying LICENSE file
-d = dir-iterator/ path/ prefix-map/
+d = dir-iterator/ path/ prefix-map/ triplet/
.: $d
include $d
diff --git a/tests/triplet/buildfile b/tests/triplet/buildfile
new file mode 100644
index 0000000..237209e
--- /dev/null
+++ b/tests/triplet/buildfile
@@ -0,0 +1,7 @@
+# file : tests/triplet/buildfile
+# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+exe{driver}: cxx{driver} ../../butl/lib{butl}
+
+include ../../butl/
diff --git a/tests/triplet/driver.cxx b/tests/triplet/driver.cxx
new file mode 100644
index 0000000..9d51fed
--- /dev/null
+++ b/tests/triplet/driver.cxx
@@ -0,0 +1,154 @@
+// file : tests/triplet/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+#include <stdexcept> // invalid_argument
+
+#include <butl/triplet>
+
+using namespace std;
+using namespace butl;
+
+static bool
+fail (const char*);
+
+static bool
+test (const char*,
+ const char* canon,
+ const char* cpu,
+ const char* vendor,
+ const char* system,
+ const char* version);
+
+int
+main ()
+{
+ assert (fail (""));
+ assert (fail ("mingw32"));
+ assert (fail ("-"));
+ assert (fail ("arm-"));
+ assert (fail ("-mingw32"));
+ assert (fail ("a-b-c-d-e"));
+ assert (fail ("arm-pc--"));
+ assert (fail ("arm-pc-linux-"));
+ assert (fail ("arm-pc--gnu"));
+
+ assert (test ("i686-elf",
+ "i686-elf",
+ "i686", "", "elf", ""));
+
+ assert (test ("arm-eabi",
+ "arm-eabi",
+ "arm", "", "eabi", ""));
+
+ assert (test ("arm-none-eabi",
+ "arm-eabi",
+ "arm", "", "eabi", ""));
+
+ assert (test ("arm-none-linux-gnueabi",
+ "arm-linux-gnueabi",
+ "arm", "", "linux-gnueabi", ""));
+
+ assert (test ("arm-softfloat-linux-gnu",
+ "arm-softfloat-linux-gnu",
+ "arm", "softfloat", "linux-gnu", ""));
+
+ assert (test ("i686-pc-mingw32",
+ "i686-mingw32",
+ "i686", "", "mingw32", ""));
+
+ assert (test ("i686-w64-mingw32",
+ "i686-w64-mingw32",
+ "i686", "w64", "mingw32", ""));
+
+ assert (test ("i686-lfs-linux-gnu",
+ "i686-lfs-linux-gnu",
+ "i686", "lfs", "linux-gnu", ""));
+
+ assert (test ("x86_64-unknown-linux-gnu",
+ "x86_64-linux-gnu",
+ "x86_64", "", "linux-gnu", ""));
+
+ assert (test ("x86_64-linux-gnux32",
+ "x86_64-linux-gnux32",
+ "x86_64", "", "linux-gnux32", ""));
+
+ // Removal of none-.
+ //
+ assert (test ("arm-none",
+ "arm-none",
+ "arm", "", "none", ""));
+
+ assert (test ("arm-unknown-none-eabi",
+ "arm-eabi",
+ "arm", "", "eabi", ""));
+
+ // Version extraction.
+ //
+ assert (test ("x86_64-apple-darwin14.5.0",
+ "x86_64-apple-darwin14.5.0",
+ "x86_64", "apple", "darwin", "14.5.0"));
+
+ assert (test ("x86_64-unknown-freebsd10.2",
+ "x86_64-freebsd10.2",
+ "x86_64", "", "freebsd", "10.2"));
+
+ assert (test ("x86_64-pc-openbsd5.6",
+ "x86_64-openbsd5.6",
+ "x86_64", "", "openbsd", "5.6"));
+
+ assert (test ("sparc-sun-solaris2.9",
+ "sparc-sun-solaris2.9",
+ "sparc", "sun", "solaris", "2.9"));
+}
+
+static bool
+test (const char* s,
+ const char* canon,
+ const char* cpu,
+ const char* vendor,
+ const char* system,
+ const char* version)
+{
+ string c;
+ triplet t (s, c);
+
+ auto cmp = [] (const string& a, const char* e, const char* n) -> bool
+ {
+ if (a != e)
+ {
+ cerr << n << " actual: " << a << endl
+ << n << " expect: " << e << endl;
+
+ return false;
+ }
+
+ return true;
+ };
+
+ return
+ cmp (c, canon, "canonical") &&
+ cmp (t.cpu, cpu, "cpu") &&
+ cmp (t.vendor, vendor, "vendor") &&
+ cmp (t.system, system, "system") &&
+ cmp (t.version, version, "version");
+}
+
+static bool
+fail (const char* s)
+{
+ try
+ {
+ triplet t (s);
+ cerr << "nofail: " << s << endl;
+ return false;
+ }
+ catch (invalid_argument& e)
+ {
+ //cerr << e.what () << endl;
+ }
+
+ return true;
+}