aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-01-09 17:03:04 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-01-09 21:44:23 +0300
commitca97c1e1527e721cae3a1114864da2110b15406a (patch)
tree4db188edbc01e3ebcb74920e52cc6bf8b6846af8
parent13fd92f4c8e4f68bf4f9cc0b7de95e7ee7fe9d82 (diff)
Workaround libc++'s basic_string<line_char> crash
-rw-r--r--build2/test/script/parser.cxx4
-rw-r--r--build2/test/script/regex103
-rw-r--r--build2/test/script/regex.cxx67
-rw-r--r--build2/variable11
-rw-r--r--unit-tests/test/script/parser/regex.test14
-rw-r--r--unit-tests/test/script/regex/driver.cxx45
6 files changed, 153 insertions, 91 deletions
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index e5f634c..da072dc 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -37,7 +37,7 @@ namespace std
#endif
ostringstream os;
- os << runtime_error (d); // Purify the description.
+ os << runtime_error (d); // Sanitize the description.
string s (os.str ());
if (s.find (' ') != string::npos)
@@ -1328,7 +1328,7 @@ namespace build2
// Create a special empty object.
//
regex_parts ()
- : introducer ('\0'), flags (regex::char_flags()) {}
+ : introducer ('\0'), flags (regex::char_flags ()) {}
regex_parts (string v, char i, regex::char_flags f)
: value (move (v)), introducer (i), flags (f) {}
diff --git a/build2/test/script/regex b/build2/test/script/regex
index 7dacfee..7708410 100644
--- a/build2/test/script/regex
+++ b/build2/test/script/regex
@@ -8,6 +8,7 @@
#include <list>
#include <regex>
#include <locale>
+#include <cstdint> // uintptr_t
#include <type_traits> // make_unsigned, is_unsigned
#include <unordered_set>
@@ -57,37 +58,45 @@ namespace build2
struct line_char
{
- /*
+ // Steal last two bits from the pointer to store the type.
+ //
private:
- size_t data_;
+ std::uintptr_t data_;
- // Steal two bits from the pointer (need to static assert).
- //
public:
line_type
- type () const;
+ type () const {return static_cast<line_type> (data_ & 0x3);}
int
- special () const; // Stored as (shifted) int16.
+ special () const
+ {
+ // Stored as (shifted) int16_t. Perform steps reversed to those
+ // that are described in the comment for the corresponding ctor.
+ // Note that the intermediate cast to uint16_t is required to
+ // portably preserve the -1 special character.
+ //
+ return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2));
+ }
const char_string*
- literal () const;
+ literal () const
+ {
+ // Note that 2 rightmost bits are used for packaging line_char
+ // type. Read the comment for the corresponding ctor for details.
+ //
+ return reinterpret_cast<const char_string*> (
+ data_ & ~std::uintptr_t (0x3));
+ }
const char_regex*
- regex () const;
- */
-
- line_type type;
-
- union
+ regex () const
{
- int special; // 0 (nul character)
- // -1 (EOF)
- // [()|.*+?{\}0123456789,=!] (excluding [])
-
- const char_string* literal;
- const char_regex* regex;
- };
+ // Note that 2 rightmost bits are used for packaging line_char
+ // type. Read the comment for the corresponding ctor for details.
+ //
+ return reinterpret_cast<const char_regex*> (
+ data_ & ~std::uintptr_t (0x3));
+ }
static const line_char nul;
static const line_char eof;
@@ -96,7 +105,12 @@ namespace build2
//
line_char () = default;
- // Create a special character.
+ // Create a special character. The argument value must be one of the
+ // following ones:
+ //
+ // 0 (nul character)
+ // -1 (EOF)
+ // [()|.*+?{\}0123456789,=!] (excluding [])
//
// Note that the constructor is implicit to allow basic_regex to
// implicitly construct line_chars from special char literals (in
@@ -121,7 +135,13 @@ namespace build2
explicit
line_char (const char_string* s) // Assume already pooled.
- : type (line_type::literal), literal (s) {}
+ //
+ // Steal two bits from the pointer to package line_char type.
+ // Assume (and statically assert) that char_string address is a
+ // multiple of four.
+ //
+ : data_ (reinterpret_cast <std::uintptr_t> (s) |
+ static_cast <std::uintptr_t> (line_type::literal)) {}
// Create a regex character.
//
@@ -130,7 +150,13 @@ namespace build2
explicit
line_char (const char_regex* r) // Assume already pooled.
- : type (line_type::regex), regex (r) {}
+ //
+ // Steal two bits from the pointer to package line_char type.
+ // Assume (and statically assert) that char_regex address is a
+ // multiple of four.
+ //
+ : data_ (reinterpret_cast <std::uintptr_t> (r) |
+ static_cast <std::uintptr_t> (line_type::regex)) {}
// Provide basic_regex with the ability to use line_char in a context
// where a char value is expected (e.g., as a function argument).
@@ -145,7 +171,7 @@ namespace build2
//
operator char () const
{
- return type == line_type::special ? special : '\a'; // BELL.
+ return type () == line_type::special ? special () : '\a'; // BELL.
}
// Return true if the character is a syntax (special) one.
@@ -160,8 +186,8 @@ namespace build2
explicit
operator T () const
{
- assert (type == line_type::special);
- return static_cast<T> (special);
+ assert (type () == line_type::special);
+ return static_cast<T> (special ());
}
};
@@ -234,16 +260,16 @@ namespace build2
bool
operator== (const line_char& l, const T& r)
{
- return
- l.type == line_type::special && static_cast<T> (l.special) == r;
+ return l.type () == line_type::special &&
+ static_cast<T> (l.special ()) == r;
}
template <typename T, typename = typename line_char_cmp<T>::type>
bool
operator== (const T& l, const line_char& r)
{
- return
- r.type == line_type::special && static_cast<T> (r.special) == l;
+ return r.type () == line_type::special &&
+ static_cast<T> (r.special ()) == l;
}
template <typename T, typename = typename line_char_cmp<T>::type>
@@ -264,16 +290,16 @@ namespace build2
bool
operator< (const line_char& l, const T& r)
{
- return
- l.type == line_type::special && static_cast<T> (l.special) < r;
+ return l.type () == line_type::special &&
+ static_cast<T> (l.special ()) < r;
}
template <typename T, typename = typename line_char_cmp<T>::type>
bool
operator< (const T& l, const line_char& r)
{
- return
- r.type == line_type::special && l < static_cast<T> (r.special);
+ return r.type () == line_type::special &&
+ l < static_cast<T> (r.special ());
}
template <typename T, typename = typename line_char_cmp<T>::type>
@@ -419,9 +445,10 @@ namespace std
bool
is (mask m, char_type c) const
{
- return m == (c.type == line_type::special && build2::digit (c.special)
- ? digit
- : 0);
+ return m ==
+ (c.type () == line_type::special && build2::digit (c.special ())
+ ? digit
+ : 0);
}
const char_type*
@@ -454,7 +481,7 @@ namespace std
char
narrow (char_type c, char def) const
{
- return c.type == line_type::special ? c.special : def;
+ return c.type () == line_type::special ? c.special () : def;
}
const char_type*
diff --git a/build2/test/script/regex.cxx b/build2/test/script/regex.cxx
index ebf28f0..bbf3f00 100644
--- a/build2/test/script/regex.cxx
+++ b/build2/test/script/regex.cxx
@@ -16,14 +16,41 @@ namespace build2
{
namespace regex
{
+ static_assert (alignof (char_string) % 4 == 0,
+ "inappropriate allignment for char_string");
+
+ static_assert (alignof (char_regex) % 4 == 0,
+ "inappropriate allignment for char_regex");
+
+ static_assert (sizeof (uintptr_t) > sizeof (int16_t),
+ "inappropriate uintptr_t size");
+
const line_char line_char::nul (0);
const line_char line_char::eof (-1);
// line_char
//
+ // We package the special character into uintptr_t with the following
+ // steps:
+ //
+ // - narrow down int value to int16_t (preserves all the valid values)
+ //
+ // - convert to uint16_t (bitwise representation stays the same, but no
+ // need to bother with signed value widening, leftmost bits loss on
+ // left shift, etc)
+ //
+ // - convert to uintptr_t (storage type)
+ //
+ // - shift left by two bits (the operation is fully reversible as
+ // uintptr_t is wider then uint16_t)
+ //
line_char::
line_char (int c)
- : type (line_type::special), special (c)
+ : data_ (
+ (static_cast <uintptr_t> (
+ static_cast<uint16_t> (
+ static_cast<int16_t> (c))) << 2) |
+ static_cast <uintptr_t> (line_type::special))
{
// @@ How can we allow anything for basic_regex but only subset
// for our own code?
@@ -81,19 +108,22 @@ namespace build2
bool
operator== (const line_char& l, const line_char& r)
{
- if (l.type == r.type)
+ line_type lt (l.type ());
+ line_type rt (r.type ());
+
+ if (lt == rt)
{
bool res (true);
- switch (l.type)
+ switch (lt)
{
- case line_type::special: res = l.special == r.special; break;
+ case line_type::special: res = l.special () == r.special (); break;
case line_type::regex: assert (false); break;
// Note that we use pointers (rather than vales) comparison
// assuming that the strings must belong to the same pool.
//
- case line_type::literal: res = l.literal == r.literal; break;
+ case line_type::literal: res = l.literal () == r.literal (); break;
}
return res;
@@ -101,10 +131,10 @@ namespace build2
// Match literal with regex.
//
- if (l.type == line_type::literal && r.type == line_type::regex)
- return regex_match (*l.literal, *r.regex);
- else if (r.type == line_type::literal && l.type == line_type::regex)
- return regex_match (*r.literal, *l.regex);
+ if (lt == line_type::literal && rt == line_type::regex)
+ return regex_match (*l.literal (), *r.regex ());
+ else if (rt == line_type::literal && lt == line_type::regex)
+ return regex_match (*r.literal (), *l.regex ());
return false;
}
@@ -115,15 +145,18 @@ namespace build2
if (l == r)
return false;
- if (l.type != r.type)
- return l.type < r.type;
+ line_type lt (l.type ());
+ line_type rt (r.type ());
+
+ if (lt != rt)
+ return lt < rt;
bool res (false);
- switch (l.type)
+ switch (lt)
{
- case line_type::special: res = l.special < r.special; break;
- case line_type::literal: res = *l.literal < *r.literal; break;
+ case line_type::special: res = l.special () < r.special (); break;
+ case line_type::literal: res = *l.literal () < *r.literal (); break;
case line_type::regex: assert (false); break;
}
@@ -229,7 +262,7 @@ namespace std
{
const char_type& c (*b++);
- *m++ = c.type == line_type::special && build2::digit (c.special)
+ *m++ = c.type () == line_type::special && build2::digit (c.special ())
? digit
: 0;
}
@@ -286,11 +319,11 @@ namespace std
{
assert (radix == 8 || radix == 10 || radix == 16);
- if (c.type != line_type::special)
+ if (c.type () != line_type::special)
return -1;
const char digits[] = "0123456789ABCDEF";
- const char* d (string::traits_type::find (digits, radix, c.special));
+ const char* d (string::traits_type::find (digits, radix, c.special ()));
return d != nullptr ? d - digits : -1;
}
}
diff --git a/build2/variable b/build2/variable
index 6aec85b..2206fe0 100644
--- a/build2/variable
+++ b/build2/variable
@@ -232,11 +232,14 @@ namespace build2
// specialization below). Types that don't fit will have to be handled
// with an extra dynamic allocation.
//
- std::aligned_storage<sizeof (string) * 11>::type data_;
-
- // VC14 needs decltype.
+ // std::max() is not constexpr on GCC 4.9.
//
- static const size_t size_ = sizeof (decltype (data_));
+ static constexpr size_t size_ =
+ sizeof (names) > sizeof (target_triplet)
+ ? sizeof (names)
+ : sizeof (target_triplet);
+
+ std::aligned_storage<size_>::type data_;
// Make sure we have sufficient storage for untyped values.
//
diff --git a/unit-tests/test/script/parser/regex.test b/unit-tests/test/script/parser/regex.test
index f3fda75..f9101c9 100644
--- a/unit-tests/test/script/parser/regex.test
+++ b/unit-tests/test/script/parser/regex.test
@@ -46,9 +46,12 @@
: malformed
:
- $* <'cmd >~/*foo/' 2>>~/EOE/ != 0
+ : Note that old versions of libc++ (for example 1.1) do not detect some
+ : regex errors. For example '*' is parsed successfully.
+ :
+ $* <'cmd >~/foo[/' 2>>~/EOE/ != 0
/testscript:1:7: error: invalid stdout regex redirect.*/
- info: regex: /*foo/
+ info: regex: /foo[/
EOE
: without-flags
@@ -128,7 +131,7 @@
:
$* <<EOI 2>>~/EOE/ != 0
cmd >>~/EOO/
- /?foo/
+ /foo[/
EOO
EOI
/testscript:2:1: error: invalid regex.*/
@@ -138,10 +141,11 @@
:
$* <<EOI 2>>~/EOE/ != 0
cmd >>~/EOO/
- /*
+ a
+ /{
EOO
EOI
- /testscript:3:1: error: invalid here-document regex.*/
+ /testscript:4:1: error: invalid here-document regex.*/
EOE
: empty
diff --git a/unit-tests/test/script/regex/driver.cxx b/unit-tests/test/script/regex/driver.cxx
index 643756e..6ff4d75 100644
--- a/unit-tests/test/script/regex/driver.cxx
+++ b/unit-tests/test/script/regex/driver.cxx
@@ -35,12 +35,20 @@ main ()
enum meta {mn = 'n', mp = 'p'};
+ // Special roundtrip.
+ //
+ assert (lc ('0').special () == '0');
+ assert (lc (0).special () == 0);
+ assert (lc (-1).special () == -1);
+ assert (lc ('p').special () == 'p');
+ assert (lc (u'\u2028').special () == u'\u2028');
+
// Special comparison.
//
assert (lc ('0') == lc ('0'));
assert (lc ('0') == '0');
assert (lc ('n') == mn);
- assert (mn == static_cast<meta> (lc ('n')));
+ assert (mn == static_cast<meta> (lc ('n')));
assert (lc ('0') != lc ('1'));
assert (lc ('0') != '1');
@@ -58,11 +66,15 @@ main ()
assert (lc ('n') <= mn);
assert (lc ('1') <= lc ("0", p));
+ // Literal roundtrip.
+ //
+ assert (*lc ("abc", p).literal () == "abc");
+
// Literal comparison.
//
- assert (lc ("a", p) == lc ("a", p));
- assert (lc ("a", p).literal == lc ("a", p).literal);
- assert (char (lc ("a", p)) == '\a');
+ assert (lc ("a", p) == lc ("a", p));
+ assert (lc ("a", p).literal () == lc ("a", p).literal ());
+ assert (char (lc ("a", p)) == '\a');
assert (lc ("a", p) != lc ("b", p));
assert (!(lc ("a", p) != lc (regex ("a"), p))); // Matches.
@@ -75,6 +87,10 @@ main ()
assert (lc ("a", p) <= lc (regex ("a"), p));
assert (lc ("a", p) < lc (regex ("c"), p));
+ // Regex roundtrip.
+ //
+ assert (regex_match ("abc", *lc (regex ("abc"), p).regex ()));
+
// Regex comparison.
//
assert (lc ("a", p) == lc (regex ("a|b"), p));
@@ -129,27 +145,6 @@ main ()
assert (v1 == vc ({'1', '2', '2'}));
}
- // Test line_string.
- //
- // @@ Add more tests.
- //
- // Note that the following code crashes if compiled with libc++ (LLVM bug
- // #31454).
- //
- // @@ Probably we can overcome it by providing our own allocator for
- // basic_string instantiation. The function allocate() could allocate some
- // more elements that would be enough not to corrupt the memory (which
- // push_back() does).
- // @@ But maybe doesn't worth to bother as the bug seems to get assigned.
- // @@ Heavily affects MacOS where clang++/libc++ is the default setup.
- //
- {
- line_string s;
- s.push_back (line_char ('0'));
- s.push_back (line_char ('1'));
- s.push_back (line_char ('2'));
- }
-
// Test line_char_locale and ctype<line_char> (only non-trivial functions).
//
{