diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2024-02-06 05:22:12 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2024-02-06 15:29:55 +0200 |
commit | 2269a611da40bd7242dbd1a3204c212ac6091fd7 (patch) | |
tree | 762f9eba621026e9bb7d8fd69107a4447783a45a /libbuild2 | |
parent | a5acaba537dab8e06be1197916acff86699aa5a3 (diff) |
Add experimental support for JSON value types
Diffstat (limited to 'libbuild2')
-rw-r--r-- | libbuild2/function.cxx | 3 | ||||
-rw-r--r-- | libbuild2/functions-json.cxx | 240 | ||||
-rw-r--r-- | libbuild2/json.cxx | 856 | ||||
-rw-r--r-- | libbuild2/json.hxx | 316 | ||||
-rw-r--r-- | libbuild2/json.ixx | 222 | ||||
-rw-r--r-- | libbuild2/parser.cxx | 124 | ||||
-rw-r--r-- | libbuild2/variable.cxx | 926 | ||||
-rw-r--r-- | libbuild2/variable.hxx | 61 | ||||
-rw-r--r-- | libbuild2/variable.ixx | 107 |
9 files changed, 2831 insertions, 24 deletions
diff --git a/libbuild2/function.cxx b/libbuild2/function.cxx index f979d92..3110547 100644 --- a/libbuild2/function.cxx +++ b/libbuild2/function.cxx @@ -355,6 +355,7 @@ namespace build2 void builtin_functions (function_map&); // functions-builtin.cxx void filesystem_functions (function_map&); // functions-filesystem.cxx void integer_functions (function_map&); // functions-integer.cxx + void json_functions (function_map&); // functions-json.cxx void name_functions (function_map&); // functions-name.cxx void path_functions (function_map&); // functions-path.cxx void process_functions (function_map&); // functions-process.cxx @@ -365,6 +366,7 @@ namespace build2 void target_triplet_functions (function_map&); // functions-target-triplet.cxx void project_name_functions (function_map&); // functions-target-triplet.cxx + void insert_builtin_functions (function_map& m) { @@ -372,6 +374,7 @@ namespace build2 builtin_functions (m); filesystem_functions (m); integer_functions (m); + json_functions (m); name_functions (m); path_functions (m); process_functions (m); diff --git a/libbuild2/functions-json.cxx b/libbuild2/functions-json.cxx new file mode 100644 index 0000000..7551fa1 --- /dev/null +++ b/libbuild2/functions-json.cxx @@ -0,0 +1,240 @@ +// file : libbuild2/functions-json.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/function.hxx> +#include <libbuild2/variable.hxx> + +#ifndef BUILD2_BOOTSTRAP +# include <libbutl/json/parser.hxx> +# include <libbutl/json/serializer.hxx> +#endif + +using namespace std; + +namespace build2 +{ + static size_t + find_index (const json_value& a, value v) + { + if (a.type != json_type::array) + fail << "expected json array instead of " << to_string (a.type) + << " as first argument"; + + auto b (a.array.begin ()), e (a.array.end ()); + auto i (find (b, e, convert<json_value> (move (v)))); + return i != e ? i - b : a.array.size (); + }; + + void + json_functions (function_map& m) + { + function_family f (m, "json"); + + // $value_type(<json>[, <distinguish_numbers>]) + // + // Return the type of a JSON value: `null`, `boolean`, `number`, `string`, + // `array`, or `object`. If the <distinguish_numbers> argument is `true`, + // then instead of `number` return `signed number`, `unsigned number`, or + // `hexadecimal number`. + // + f["value_type"] += [] (json_value v, optional<value> distinguish_numbers) + { + bool dn (distinguish_numbers && + convert<bool> (move (*distinguish_numbers))); + + return to_string (v.type, dn); + }; + + // $member_name(<json>) + // + // Return the name of a JSON object member. + // + f["member_name"] += [] (json_value v) + { + // A member becomes an object with a single member (see json_reverse() + // for details). + // + if (v.type == json_type::object && v.object.size () == 1) + return move (v.object.front ().name); + + fail << "json object member expected instead of " << v.type << endf; + }; + + // $member_value(<json>) + // + // Return the value of a JSON object member. + // + f["member_value"] += [] (json_value v) + { + // A member becomes an object with a single member (see json_reverse() + // for details). + // + if (v.type == json_type::object && v.object.size () == 1) + return move (v.object.front ().value); + + fail << "json object member expected instead of " << v.type << endf; + }; + + // $size(<json>) + // + // Return the size of a JSON value. + // + // The size of a `null` value is `0`. The sizes of simple values + // (`boolean`, `number`, and `string`) is `1`. The size of `array` and + // `object` values is the number of elements and members, respectively. + // + // Note that the size of a `string` JSON value is not the length of the + // string. To get the length call `$string.size()` instead by casting the + // JSON value to the `string` value type. + // + f["size"] += [] (json_value v) -> size_t + { + // Note: should be consistent with value_traits<json_value>::empty(), + // json_subscript(). + // + switch (v.type) + { + case json_type::null: return 0; + case json_type::boolean: + case json_type::signed_number: + case json_type::unsigned_number: + case json_type::hexadecimal_number: + case json_type::string: break; + case json_type::array: return v.array.size (); + case json_type::object: return v.object.size (); + } + + return 1; + }; + + // $find(<json-array>, <json>) + // + // Return true if the JSON array contains the specified JSON value. + // + f["find"] += [](json_value a, value v) + { + size_t i (find_index (a, move (v))); + return i != a.array.size (); // We now know it's an array. + }; + + // $find_index(<json-array>, <json>) + // + // Return the index of the first element in the JSON array that is equal + // to the specified JSON value or `$size(json-array)` if none is found. + // + f["find_index"] += [](json_value a, value v) + { + return find_index (a, move (v)); + }; + +#ifndef BUILD2_BOOTSTRAP + + // @@ Flag to support multi-value (returning it as JSON array)? Then + // probably also in $serialize(). + // + // @@ Flag to override duplicates instead of failing? + + // $json.load(<path>) + // + // Parse the contents of the specified file as JSON input text and return + // the result as a value of the `json` type. + // + // See also `$json.parse()`. + // + // Note that this function is not pure. + // + f.insert (".load", false) += [] (names xf) + { + path f (convert<path> (move (xf))); + + try + { + ifdstream is (f); + json_parser p (is, f.string ()); + return json_value (p); + } + catch (const invalid_json_input& e) + { + fail (location (f, e.line, e.column)) << "invalid json input: " << e << + info << "byte offset " << e.position << endf; + } + catch (const io_error& e) + { + fail << "unable to read from " << f << ": " << e << endf; + } + }; + + // $json.parse(<text>) + // + // Parse the specified JSON input text and return the result as a value of + // the `json` type. + // + // See also `$json.load()` and `$json.serialize()`. + // + f[".parse"] += [] (names text) + { + string t (convert<string> (move (text))); + + try + { + json_parser p (t, nullptr /* name */); + return json_value (p); + } + catch (const invalid_json_input& e) + { + fail << "invalid json input: " << e << + info << "line " << e.line + << ", column " << e.column + << ", byte offset " << e.position << endf; + } + }; + + // $serialize(<json>[, <indentation>]) + // + // Serialize the specified JSON value and return the resulting JSON output + // text. + // + // The optional <indentation> argument specifies the number of indentation + // spaces that should be used for pretty-printing. If `0` is passed, then + // no pretty-printing is performed. The default is `2` spaces. + // + // See also `$json.parse()`. + // + f["serialize"] += [] (json_value v, optional<value> indentation) + { + uint64_t i (indentation ? convert<uint64_t> (*indentation) : 2); + + try + { + // For the diagnostics test. + // +#if 0 + if (v.type == json_type::string && v.string == "deadbeef") + { + v.string[4] = 0xe0; + v.string[5] = 0xe0; + } +#endif + + string o; + json_buffer_serializer s (o, i); + v.serialize (s); + return o; + } + catch (const invalid_json_output& e) + { + diag_record dr; + dr << fail << "invalid json value: " << e; + + if (e.event) + dr << info << "while serializing " << to_string (*e.event); + + if (e.offset != string::npos) + dr << info << "offending byte offset " << e.offset; + + dr << endf; + } + }; +#endif + } +} diff --git a/libbuild2/json.cxx b/libbuild2/json.cxx new file mode 100644 index 0000000..f368679 --- /dev/null +++ b/libbuild2/json.cxx @@ -0,0 +1,856 @@ +// file : libbuild2/json.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <libbuild2/json.hxx> + +#include <limits> + +#ifndef BUILD2_BOOTSTRAP +# include <libbutl/json/parser.hxx> +# include <libbutl/json/serializer.hxx> +#endif + +namespace build2 +{ + // json_event + // +#ifndef BUILD2_BOOTSTRAP + const char* + to_string (json_event e) + { + switch (e) + { + case json_event::begin_object: return "beginning of object"; + case json_event::end_object: return "end of object"; + case json_event::begin_array: return "beginning of array"; + case json_event::end_array: return "end of array"; + case json_event::name: return "member name"; + case json_event::string: return "string value"; + case json_event::number: return "numeric value"; + case json_event::boolean: return "boolean value"; + case json_event::null: return "null value"; + } + + return ""; + } +#endif + + // json_type + // + const char* + to_string (json_type t, bool dn) noexcept + { + using type = json_type; + + switch (t) + { + case type::null: return "null"; + case type::boolean: return "boolean"; + case type::signed_number: return dn ? "signed number" : "number"; + case type::unsigned_number: return dn ? "unsigned number" : "number"; + case type::hexadecimal_number: return dn ? "hexadecimal number" : "number"; + case type::string: return "string"; + case type::array: return "array"; + case type::object: return "object"; + } + return ""; + } + + // json_value + // + const json_value null_json_value (json_type::null); + + [[noreturn]] void + at_throw (json_type t, json_type e, bool index) + { + string m; + + if (t != e && t != json_type::null) + { + m = "expected "; + m += to_string (e); + m += " instead of "; + m += to_string (t); + throw invalid_argument (move (m)); + } + else + { + m = index ? "index" : "name"; + m += " out of range in "; + m += to_string (e); + throw std::out_of_range (move (m)); + } + } + + const json_value& json_value:: + at (size_t index) const + { + if (type == json_type::array) + { + if (index < array.size ()) + return array[index]; + } + + at_throw (type, json_type::array, true); + } + + json_value& json_value:: + at (size_t index) + { + if (type == json_type::array) + { + if (index < array.size ()) + return array[index]; + } + + at_throw (type, json_type::array, true); + } + + const json_value& json_value:: + operator[] (size_t index) const + { + if (type == json_type::null) + return null_json_value; + + if (type == json_type::array) + return index < array.size () ? array[index] : null_json_value; + + at_throw (type, json_type::array, true); + } + + json_value& json_value:: + operator[] (size_t index) + { + if (type == json_type::null) + { + new (&array) array_type (); + type = json_type::array; + } + + if (type == json_type::array) + { + size_t n (array.size ()); + + if (index < n) + return array[index]; + + // If there are missing elements in between, fill them with nulls. + // + if (index != n) + array.resize (index, json_value ()); + + array.push_back (json_value ()); + return array.back (); + } + + at_throw (type, json_type::array, true); + } + + const json_value& json_value:: + at (const char* name) const + { + if (type == json_type::object) + { + auto i (find_if (object.begin (), object.end (), + [name] (const json_member& m) + { + return m.name == name; + })); + + if (i != object.end ()) + return i->value; + } + + at_throw (type, json_type::object, false); + } + + json_value& json_value:: + at (const char* name) + { + if (type == json_type::object) + { + auto i (find_if (object.begin (), object.end (), + [name] (const json_member& m) + { + return m.name == name; + })); + + if (i != object.end ()) + return i->value; + } + + at_throw (type, json_type::object, false); + } + + const json_value& json_value:: + operator[] (const char* name) const + { + if (type == json_type::null) + return null_json_value; + + if (type == json_type::object) + { + auto i (find_if (object.begin (), object.end (), + [name] (const json_member& m) + { + return m.name == name; + })); + + + return i != object.end () ? i->value : null_json_value; + } + + at_throw (type, json_type::object, false); + } + + json_value& json_value:: + operator[] (const char* name) + { + if (type == json_type::null) + { + new (&object) object_type (); + type = json_type::object; + } + + if (type == json_type::object) + { + auto i (find_if (object.begin (), object.end (), + [name] (const json_member& m) + { + return m.name == name; + })); + + if (i != object.end ()) + return i->value; + + object.push_back (json_member {name, json_value ()}); + return object.back ().value; + } + + at_throw (type, json_type::object, false); + } + + int json_value:: + compare (const json_value& v) const noexcept + { + int r (0); + { + // Note: we need to treat unsigned and hexadecimal the same. + // + json_type t (type == json_type::hexadecimal_number + ? json_type::unsigned_number + : type); + + json_type vt (v.type == json_type::hexadecimal_number + ? json_type::unsigned_number + : v.type); + + if (t != vt) + { + // Handle the special signed/unsigned number case here. + // + if (t == json_type::signed_number && + vt == json_type::unsigned_number) + { + if (signed_number < 0) + r = -1; + else + { + uint64_t u (static_cast<uint64_t> (signed_number)); + r = u < v.unsigned_number ? -1 : (u > v.unsigned_number ? 1 : 0); + } + } + else if (t == json_type::unsigned_number && + vt == json_type::signed_number) + { + if (v.signed_number < 0) + r = 1; + else + { + uint64_t u (static_cast<uint64_t> (v.signed_number)); + r = unsigned_number < u ? -1 : (unsigned_number > u ? 1 : 0); + } + } + else + r = (static_cast<uint8_t> (t) < static_cast<uint8_t> (vt) ? -1 : 1); + } + } + + if (r == 0) + { + switch (type) + { + case json_type::null: + { + r = 0; + break; + } + case json_type::boolean: + { + r = boolean == v.boolean ? 0 : boolean ? 1 : -1; + break; + } + case json_type::signed_number: + { + r = (signed_number < v.signed_number + ? -1 + : (signed_number > v.signed_number ? 1 : 0)); + break; + } + case json_type::unsigned_number: + case json_type::hexadecimal_number: + { + r = (unsigned_number < v.unsigned_number + ? -1 + : (unsigned_number > v.unsigned_number ? 1 : 0)); + break; + } + case json_type::string: + { + r = string.compare (v.string); + break; + } + case json_type::array: + { + auto i (array.begin ()), ie (array.end ()); + auto j (v.array.begin ()), je (v.array.end ()); + + for (; i != ie && j != je; ++i, ++j) + { + if ((r = i->compare (*j)) != 0) + break; + } + + if (r == 0) + r = i == ie ? (j == je ? 0 : -1) : 1; // More elements than other? + + break; + } + case json_type::object: + { + // We don't expect there to be a large number of members so it makes + // sense to iterate in the lexicographical order without making any + // copies. + // + auto next = [] (object_type::const_iterator p, // == e for first + object_type::const_iterator b, + object_type::const_iterator e) + { + // We need to find an element with the "smallest" name that is + // greater than the previous entry. + // + auto n (e); + + for (auto i (b); i != e; ++i) + { + if (p == e || i->name > p->name) + { + int r; + if (n == e || (r = n->name.compare (i->name)) > 0) + n = i; + else + assert (r != 0); // No duplicates. + } + } + + return n; + }; + + auto ib (object.begin ()), ie (object.end ()), i (ie); + auto jb (v.object.begin ()), je (v.object.end ()), j (je); + + for (;;) + { + // Note: we must call next() on both. + // + i = next (i, ib, ie); + j = next (j, jb, je); + + if (i == ie || j == je) + break; + + // Determine if both have this name and if not, which name comes + // first. + // + int n (i->name.compare (j->name)); + + r = (n < 0 // If i's first, then i is greater. + ? -1 + : (n > 0 // If j's first, then j is greater. + ? 1 + : i->value.compare (j->value))); // Both have this name. + + if (r != 0) + break; + } + + if (r == 0) + r = i == ie ? (j == je ? 0 : -1) : 1; // More members than other? + + break; + } + } + } + + return r; + } + + static void + append_numbers (json_value& l, const json_value& r) noexcept + { + auto append = [&l] (uint64_t u, int64_t s, bool hex = false) + { + if (s < 0) + { + // The absolute value of a minimum signed intereger is not + // representable in the 2s complement integers. So handle this + // specially for completeness. + // + uint64_t a ( + s != std::numeric_limits<int64_t>::min () + ? static_cast<uint64_t> (-s) + : static_cast<uint64_t> (std::numeric_limits<int64_t>::max ()) + 1); + + if (u >= a) + { + l.unsigned_number = u - a; + l.type = (hex + ? json_type::hexadecimal_number + : json_type::unsigned_number); + } + else + { + l.signed_number = -static_cast<int64_t> (a - u); + l.type = json_type::signed_number; + } + } + else + { + l.unsigned_number = u + static_cast<uint64_t> (s); + l.type = (hex + ? json_type::hexadecimal_number + : json_type::unsigned_number); + } + }; + + // We try to keep LHS hex if possible. + // + if (l.type == json_type::signed_number) + { + if (r.type == json_type::signed_number) + { + // Deal with non-negative signed numbers for completeness. + // + if (l.signed_number >= 0) + append (static_cast <uint64_t> (l.signed_number), r.signed_number); + else if (r.signed_number >= 0) + append (static_cast <uint64_t> (r.signed_number), l.signed_number); + else + l.signed_number += r.signed_number; + } + else + append (r.unsigned_number, l.signed_number); + } + else + { + if (r.type == json_type::signed_number) + append (l.unsigned_number, + r.signed_number, + l.type == json_type::hexadecimal_number); + else + l.unsigned_number += r.unsigned_number; + } + } + + void json_value:: + append (json_value&& v) + { + if (type == json_type::null) + { + *this = move (v); + return; + } + else if (type == json_type::array) + { + if (v.type == json_type::array) + { + if (array.empty ()) + array = move (v.array); + else + array.insert (array.end (), + make_move_iterator (v.array.begin ()), + make_move_iterator (v.array.end ())); + } + else + array.push_back (move (v)); + + return; + } + else + { + switch (v.type) + { + case json_type::null: return; + case json_type::boolean: + { + if (type != json_type::boolean) + break; + + boolean = boolean || v.boolean; + return; + } + case json_type::signed_number: + case json_type::unsigned_number: + case json_type::hexadecimal_number: + { + if (type != json_type::signed_number && + type != json_type::unsigned_number && + type != json_type::hexadecimal_number) + break; + + append_numbers (*this, v); + return; + } + case json_type::string: + { + if (type != json_type::string) + break; + + string += v.string; + return; + } + case json_type::array: break; + case json_type::object: + { + if (type != json_type::object) + break; + + if (object.empty ()) + object = move (v.object); + else + { + for (json_member& m: v.object) + { + auto i (find_if (object.begin (), object.end (), + [&m] (const json_member& o) + { + return m.name == o.name; + })); + if (i != object.end ()) + i->value = move (m.value); + else + object.push_back (move (m)); + } + } + + return; + } + } + } + + throw invalid_argument ( + string_type ("unable to append ") + to_string (v.type) + " to " + + to_string (type)); + } + + void json_value:: + prepend (json_value&& v) + { + if (type == json_type::null) + { + *this = move (v); + return; + } + else if (type == json_type::array) + { + if (v.type == json_type::array) + { + if (array.empty ()) + array = move (v.array); + else + array.insert (array.begin (), + make_move_iterator (v.array.begin ()), + make_move_iterator (v.array.end ())); + } + else + array.insert (array.begin (), move (v)); + + return; + } + else + { + switch (v.type) + { + case json_type::null: return; + case json_type::boolean: + { + if (type != json_type::boolean) + break; + + boolean = boolean || v.boolean; + return; + } + case json_type::signed_number: + case json_type::unsigned_number: + case json_type::hexadecimal_number: + { + if (type != json_type::signed_number && + type != json_type::unsigned_number && + type != json_type::hexadecimal_number) + break; + + append_numbers (*this, v); + return; + } + case json_type::string: + { + if (type != json_type::string) + break; + + string.insert (0, v.string); + return; + } + case json_type::array: break; + case json_type::object: + { + if (type != json_type::object) + break; + + if (object.empty ()) + object = move (v.object); + else + { + for (json_member& m: v.object) + { + auto i (find_if (object.begin (), object.end (), + [&m] (const json_member& o) + { + return m.name == o.name; + })); + if (i != object.end ()) + i->value = move (m.value); + else + object.insert (object.begin (), move (m)); + } + } + + return; + } + } + } + + throw invalid_argument ( + string_type ("unable to prepend ") + to_string (v.type) + " to " + + to_string (type)); + } + +#ifndef BUILD2_BOOTSTRAP + json_value:: + json_value (json_parser& p, optional<json_type> et) + { + using namespace butl::json; + + // A JSON input text cannot be empty. + // + // Once we have JSON5 support we will be able to distinguish hexadecimal + // numbers. + // + json_type t (json_type::null); + switch (*p.next ()) + { + case event::begin_object: t = json_type::object; break; + case event::begin_array: t = json_type::array; break; + case event::string: t = json_type::string; break; + case event::number: t = (p.value ()[0] == '-' + ? json_type::signed_number + : json_type::unsigned_number); break; + case event::boolean: t = json_type::boolean; break; + case event::null: t = json_type::null; break; + case event::name: + case event::end_array: + case event::end_object: + { + assert (false); + type = json_type::null; + return; + } + } + + if (et && *et != t) + { + throw invalid_json_input ( + p.input_name != nullptr ? p.input_name : "", + p.line (), + p.column (), + p.position (), + string_type ("expected ") + to_string (*et) + " instead of " + + to_string (t)); + } + + switch (t) + { + case json_type::object: + { + object_type o; // For exception safety. + while (*p.next () != event::end_object) + { + string_type n (p.name ()); + + // Check for duplicates. For now we fail but in the future we may + // provide a mode (via a flag) to override instead. + // + if (find_if (o.begin (), o.end (), + [&n] (const json_member& m) + { + return m.name == n; + }) != o.end ()) + { + throw invalid_json_input ( + p.input_name != nullptr ? p.input_name : "", + p.line (), + p.column (), + p.position (), + "duplicate object member '" + n + '\''); + } + + o.push_back (json_member {move (n), json_value (p)}); + } + + new (&object) object_type (move (o)); + type = t; + break; + } + case json_type::array: + { + array_type c; // For exception safety. + while (*p.peek () != event::end_array) + c.push_back (json_value (p)); + p.next (); // Consume end_array. + + new (&array) array_type (move (c)); + type = t; + break; + } + case json_type::string: + { + string_type& s (p.value ()); + + // Don't move if small string optimized. + // + if (s.size () > 15) + new (&string) string_type (move (s)); + else + new (&string) string_type (s); + + type = t; + break; + } + case json_type::signed_number: + { + signed_number = p.value<int64_t> (); + type = t; + break; + } + case json_type::unsigned_number: + case json_type::hexadecimal_number: + { + unsigned_number = p.value<uint64_t> (); + type = t; + break; + } + case json_type::boolean: + { + boolean = p.value<bool> (); + type = t; + break; + } + case json_type::null: + { + type = t; + break; + } + } + } + + void json_value:: + serialize (json_buffer_serializer& s, optional<json_type> et) const + { + using namespace butl::json; + + if (et && *et != type) + { + throw invalid_json_output ( + nullopt, + invalid_json_output::error_code::invalid_value, + string_type ("expected ") + to_string (*et) + " instead of " + + to_string (type)); + } + + switch (type) + { + case json_type::null: + { + s.value (nullptr); + break; + } + case json_type::boolean: + { + s.value (boolean); + break; + } + case json_type::signed_number: + { + s.value (signed_number); + break; + } + case json_type::unsigned_number: + case json_type::hexadecimal_number: + { + // When we have JSON5 support, we will be able to serialize + // hexadecimal properly. + // + s.value (unsigned_number); + break; + } + case json_type::string: + { + s.value (string); + break; + } + case json_type::array: + { + s.begin_array (); + for (const json_value& e: array) + e.serialize (s); + s.end_array (); + break; + } + case json_type::object: + { + s.begin_object (); + for (const json_member& m: object) + { + s.member_name (m.name); + m.value.serialize (s); + } + s.end_object (); + break; + } + } + } + +#else + json_value:: + json_value (json_parser&, optional<json_type>) + { + assert (false); + type = json_type::null; + } + + void json_value:: + serialize (json_buffer_serializer&, optional<json_type>) const + { + assert (false); + } +#endif +} diff --git a/libbuild2/json.hxx b/libbuild2/json.hxx new file mode 100644 index 0000000..1f2694d --- /dev/null +++ b/libbuild2/json.hxx @@ -0,0 +1,316 @@ +// file : libbuild2/json.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_JSON_HXX +#define LIBBUILD2_JSON_HXX + +#include <libbuild2/types.hxx> +#include <libbuild2/utility.hxx> + +#include <libbuild2/export.hxx> + +namespace butl +{ + namespace json + { + enum class event: uint8_t; + class parser; + class buffer_serializer; + class stream_serializer; + class invalid_json_input; + class invalid_json_output; + } +} + +namespace build2 +{ + using json_event = butl::json::event; + using json_parser = butl::json::parser; + using json_buffer_serializer = butl::json::buffer_serializer; + using json_stream_serializer = butl::json::stream_serializer; + using butl::json::invalid_json_input; + using butl::json::invalid_json_output; + +#ifndef BUILD2_BOOTSTRAP + LIBBUILD2_SYMEXPORT const char* + to_string (json_event); +#endif + + // @@ TODO: + // + // - provide swap(). + // - provide operator=(uint64_t), etc. + // - provide std::hash specialization + // - tighted at()/[] interface in json_array and json_object + // - tighten noexcep where possible + // - operator bool() - in a sense null is like nullopt. + // + + // This JSON representation has one extensions compared to the standard JSON + // model: it distinguishes between signed, unsigned, and hexadecimal + // numbers. + // + // Note also that we don't assume that object members are in a sorted order + // (but do assume there are no duplicates). However, we could add an + // argument to signal that this is the case to speed up some functions, for + // example, compare(). + // + enum class json_type: uint8_t + { + null, // Note: keep first for comparison. + boolean, + signed_number, + unsigned_number, + hexadecimal_number, + string, + array, + object, + }; + + // Return the JSON type as string. If distinguish_numbers is true, then + // distinguish between the singned, unsigned, and hexadecimal types. + // + LIBBUILD2_SYMEXPORT const char* + to_string (json_type, bool distinguish_numbers = false) noexcept; + + inline ostream& + operator<< (ostream& os, json_type t) {return os << to_string (t);} + + struct json_member; + + class LIBBUILD2_SYMEXPORT json_value + { + public: + using string_type = build2::string; + using array_type = vector<json_value>; + using object_type = vector<json_member>; + + json_type type; + + union + { + bool boolean; + int64_t signed_number; + uint64_t unsigned_number; // Also used for hexadecimal_number. + string_type string; + array_type array; + object_type object; + }; + + explicit + json_value (json_type = json_type::null) noexcept; + + explicit + json_value (std::nullptr_t) noexcept; + + explicit + json_value (bool) noexcept; + + explicit + json_value (int64_t) noexcept; + + explicit + json_value (uint64_t, bool hexadecimal = false) noexcept; + + explicit + json_value (string_type); + + // If the expected type is specfied, then fail if it does not match + // parsed. Throws invalid_json_input. + // + explicit + json_value (json_parser&, optional<json_type> expected = {}); + + // If the expected type is specfied, then fail if it does not match the + // value's. Throws invalid_json_output. + // + void + serialize (json_buffer_serializer&, + optional<json_type> expected = {}) const; + + // Note that values of different types are never equal, except for + // signed/unsigned/hexadecimal numbers. Null is equal to null and is less + // than any other value. Arrays are compared lexicographically. Object + // members are considered in the lexicographically-compared name-ascending + // order (see RFC8785). An absent member is less than a present member + // (even if it's null). + // + int + compare (const json_value&) const noexcept; + + // Append/prepend one JSON value to another. Throw invalid_argument if the + // values are incompatible. Note that for numbers this can also lead to + // the change of the value type. + // + // Append/prepend to an object overrides existing members. Append/prepend + // an array to an array splices in the array elements rather than adding + // an element of the array type. + // + void + append (json_value&&); + + void + prepend (json_value&&); + + + // Array element access. + // + // If the index is out of array bounds, the at() functions throw + // std::out_of_range, the const operator[] returns null_json_value, and + // the non-const operator[] inserts a new null value at the specified + // position (filling any missing elements in between with nulls) and + // returns that. All three functions throw std::invalid_argument if the + // value is not an array or null with null treated as (missing) array + // rather than wrong value type (and with at() functons throwing + // out_of_range in this case). + // + // Note that non-const operator[] will not only insert a new element but + // will also turn the value it is called upon into array if it is null. + // This semantics allows you to string several subscripts to build up a + // chain of values. + // + // Note also that while the operator[] interface is convenient for + // accessing and modifying (or building up) values deep in the tree, it + // can lead to inefficiencies or even undesirable semantics during + // otherwise read-only access of a non-const object due to the potential + // insertion of null values for missing array elements. As a result, it's + // recommended to alwas use a const reference for read-only access (or use + // the at() interface if this is deemed too easy to forget). + // + const json_value& + at (size_t) const; + + json_value& + at (size_t); + + const json_value& + operator[] (size_t) const; + + json_value& + operator[] (size_t); + + + // Object member access. + // + // If a member with the specified name is not found in the object, the + // at() functions throw std::out_of_range, the const operator[] returns + // null_json_value, and the non-const operator[] adds a new member with + // the specified name and null value and returns that value. All three + // functions throw std::invalid_argument if the value is not an array or + // null with null treated as (missing) object rather than wrong value type + // (and with at() functons throwing out_of_range in this case). + // + // Note that non-const operator[] will not only insert a new member but + // will also turn the value it is called upon into object if it is null. + // This semantics allows you to string several subscripts to build up a + // chain of values. + // + // Note also that while the operator[] interface is convenient for + // accessing and modifying (or building up) values deep in the tree, it + // can lead to inefficiencies or even undesirable semantics during + // otherwise read-only access of a non-const object due to the potential + // insertion of null values for missing object members. As a result, it's + // recommended to alwas use a const reference for read-only access (or use + // the at() interface if this is deemed too easy to forget). + // + const json_value& + at (const char*) const; + + json_value& + at (const char*); + + const json_value& + operator[] (const char*) const; + + json_value& + operator[] (const char*); + + const json_value& + at (const string_type&) const; + + json_value& + at (const string_type&); + + const json_value& + operator[] (const string_type&) const; + + json_value& + operator[] (const string_type&); + + // Note that the moved-from value becomes JSON null value. + // + json_value (json_value&&) noexcept; + json_value (const json_value&); + + json_value& operator= (json_value&&) noexcept; + json_value& operator= (const json_value&); + + ~json_value () noexcept; + }; + + LIBBUILD2_SYMEXPORT extern const json_value null_json_value; + + inline bool + operator== (const json_value& x, const json_value& y) {return x.compare (y) == 0;} + + inline bool + operator!= (const json_value& x, const json_value& y) {return !(x == y);} + + inline bool + operator< (const json_value& x, const json_value& y) {return x.compare (y) < 0;} + + inline bool + operator<= (const json_value& x, const json_value& y) {return x.compare (y) <= 0;} + + inline bool + operator> (const json_value& x, const json_value& y) {return !(x <= y);} + + inline bool + operator>= (const json_value& x, const json_value& y) {return !(x < y);} + + // A JSON object member. + // + struct json_member + { + // @@ TODO: add some convenience constructors? + + string name; + json_value value; + }; + + // A JSON value that can only be an array. + // + class /*LIBBUILD2_SYMEXPORT*/ json_array: public json_value + { + public: + // Create empty array. + // + json_array () noexcept; + + explicit + json_array (json_parser&); + + void + serialize (json_buffer_serializer& s) const; + }; + + // A JSON value that can only be an object. + // + class /*LIBBUILD2_SYMEXPORT*/ json_object: public json_value + { + public: + // Create empty object. + // + json_object () noexcept; + + explicit + json_object (json_parser&); + + void + serialize (json_buffer_serializer& s) const; + }; +} + +#include <libbuild2/json.ixx> + +#endif // LIBBUILD2_JSON_HXX diff --git a/libbuild2/json.ixx b/libbuild2/json.ixx new file mode 100644 index 0000000..c2b8845 --- /dev/null +++ b/libbuild2/json.ixx @@ -0,0 +1,222 @@ +// file : libbuild2/json.ixx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +namespace build2 +{ + inline json_value:: + ~json_value () noexcept + { + switch (type) + { + case json_type::null: + case json_type::boolean: + case json_type::signed_number: + case json_type::unsigned_number: + case json_type::hexadecimal_number: break; + case json_type::string: string.~string_type (); break; + case json_type::array: array.~array_type (); break; + case json_type::object: object.~object_type (); break; + } + } + + inline json_value:: + json_value (json_type t) noexcept + : type (t) + { + switch (type) + { + case json_type::null: break; + case json_type::boolean: boolean = false; break; + case json_type::signed_number: signed_number = 0; break; + case json_type::unsigned_number: + case json_type::hexadecimal_number: unsigned_number = 0; break; + case json_type::string: new (&string) string_type (); break; + case json_type::array: new (&array) array_type (); break; + case json_type::object: new (&object) object_type (); break; + } + } + + inline json_value:: + json_value (std::nullptr_t) noexcept + : type (json_type::null) + { + } + + inline json_value:: + json_value (bool v) noexcept + : type (json_type::boolean), boolean (v) + { + } + + inline json_value:: + json_value (int64_t v) noexcept + : type (json_type::signed_number), signed_number (v) + { + } + + inline json_value:: + json_value (uint64_t v, bool hex) noexcept + : type (hex + ? json_type::hexadecimal_number + : json_type::unsigned_number), + unsigned_number (v) + { + } + + inline json_value:: + json_value (string_type v) + : type (json_type::string), string (move (v)) + { + } + + inline const json_value& json_value:: + at (const string_type& n) const + { + return at (n.c_str ()); + } + + inline json_value& json_value:: + at (const string_type& n) + { + return at (n.c_str ()); + } + + inline const json_value& json_value:: + operator[] (const string_type& n) const + { + return operator[] (n.c_str ()); + } + + inline json_value& json_value:: + operator[] (const string_type& n) + { + return operator[] (n.c_str ()); + } + + inline json_value:: + json_value (json_value&& v) noexcept + : type (v.type) + { + switch (type) + { + case json_type::null: + break; + case json_type::boolean: + boolean = v.boolean; + break; + case json_type::signed_number: + signed_number = v.signed_number; + break; + case json_type::unsigned_number: + case json_type::hexadecimal_number: + unsigned_number = v.unsigned_number; + break; + case json_type::string: + new (&string) string_type (move (v.string)); + v.string.~string_type (); + break; + case json_type::array: + new (&array) array_type (move (v.array)); + v.array.~array_type (); + break; + case json_type::object: + new (&object) object_type (move (v.object)); + v.object.~object_type (); + break; + } + + v.type = json_type::null; + } + + inline json_value:: + json_value (const json_value& v) + : type (v.type) + { + switch (type) + { + case json_type::null: + break; + case json_type::boolean: + boolean = v.boolean; + break; + case json_type::signed_number: + signed_number = v.signed_number; + break; + case json_type::unsigned_number: + case json_type::hexadecimal_number: + unsigned_number = v.unsigned_number; + break; + case json_type::string: + new (&string) string_type (v.string); + break; + case json_type::array: + new (&array) array_type (v.array); + break; + case json_type::object: + new (&object) object_type (v.object); + break; + } + } + + inline json_value& json_value:: + operator= (json_value&& v) noexcept + { + if (this != &v) + { + this->~json_value (); + new (this) json_value (move (v)); + } + return *this; + } + + inline json_value& json_value:: + operator= (const json_value& v) + { + if (this != &v) + { + this->~json_value (); + new (this) json_value (v); + } + return *this; + } + + // json_array + // + inline json_array:: + json_array () noexcept + : json_value (json_type::array) + { + } + + inline json_array:: + json_array (json_parser& p) + : json_value (p, json_type::array) + { + } + + inline void json_array:: + serialize (json_buffer_serializer& s) const + { + json_value::serialize (s, json_type::array); + } + + // json_object + // + inline json_object:: + json_object () noexcept + : json_value (json_type::object) + { + } + + inline json_object:: + json_object (json_parser& p) + : json_value (p, json_type::object) + { + } + + inline void json_object:: + serialize (json_buffer_serializer& s) const + { + json_value::serialize (s, json_type::object); + } +} diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 043cd10..1ac159b 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -5761,30 +5761,106 @@ namespace build2 const value_type* parser:: find_value_type (const scope*, const string& n) { - auto ptr = [] (const value_type& vt) {return &vt;}; - - return - n == "bool" ? ptr (value_traits<bool>::value_type) : - n == "int64" ? ptr (value_traits<int64_t>::value_type) : - n == "uint64" ? ptr (value_traits<uint64_t>::value_type) : - n == "string" ? ptr (value_traits<string>::value_type) : - n == "path" ? ptr (value_traits<path>::value_type) : - n == "dir_path" ? ptr (value_traits<dir_path>::value_type) : - n == "abs_dir_path" ? ptr (value_traits<abs_dir_path>::value_type) : - n == "name" ? ptr (value_traits<name>::value_type) : - n == "name_pair" ? ptr (value_traits<name_pair>::value_type) : - n == "target_triplet" ? ptr (value_traits<target_triplet>::value_type) : - n == "project_name" ? ptr (value_traits<project_name>::value_type) : - - n == "int64s" ? ptr (value_traits<int64s>::value_type) : - n == "uint64s" ? ptr (value_traits<uint64s>::value_type) : - n == "strings" ? ptr (value_traits<strings>::value_type) : - n == "paths" ? ptr (value_traits<paths>::value_type) : - n == "dir_paths" ? ptr (value_traits<dir_paths>::value_type) : - n == "names" ? ptr (value_traits<vector<name>>::value_type) : - n == "cmdline" ? ptr (value_traits<cmdline>::value_type) : - - nullptr; + switch (n[0]) + { + case 'a': + { + if (n == "abs_dir_path") return &value_traits<abs_dir_path>::value_type; + break; + } + case 'b': + { + if (n == "bool") return &value_traits<bool>::value_type; + break; + } + case 'c': + { + if (n == "cmdline") return &value_traits<cmdline>::value_type; + break; + } + case 'd': + { + if (n.compare (0, 8, "dir_path") == 0) + { + if (n[8] == '\0') return &value_traits<dir_path>::value_type; + if (n[8] == 's' && + n[9] == '\0') return &value_traits<dir_paths>::value_type; + } + break; + } + case 'i': + { + if (n.compare (0, 5, "int64") == 0) + { + if (n[5] == '\0') return &value_traits<int64_t>::value_type; + if (n[5] == 's' && + n[6] == '\0') return &value_traits<int64s>::value_type; + } + break; + } + case 'j': + { + if (n.compare (0, 4, "json") == 0) + { + if (n[4] == '\0') return &value_traits<json_value>::value_type; + if (n == "json_array") return &value_traits<json_array>::value_type; + if (n == "json_object") return &value_traits<json_object>::value_type; + } + break; + } + case 'n': + { + if (n.compare (0, 4, "name") == 0) + { + if (n[4] == '\0') return &value_traits<name>::value_type; + if (n[4] == 's' && + n[5] == '\0') return &value_traits<vector<name>>::value_type; + if (n == "name_pair") return &value_traits<name_pair>::value_type; + } + break; + } + + case 'p': + { + if (n.compare (0, 4, "path") == 0) + { + if (n[4] == '\0') return &value_traits<path>::value_type; + if (n[4] == 's' && + n[5] == '\0') return &value_traits<paths>::value_type; + } + else if (n == "project_name") return &value_traits<project_name>::value_type; + break; + } + case 's': + { + if (n.compare (0, 6, "string") == 0) + { + if (n[6] == '\0') return &value_traits<string>::value_type; + if (n[6] == 's' && + n[7] == '\0') return &value_traits<strings>::value_type; + } + break; + } + case 't': + { + if (n == "target_triplet") return &value_traits<target_triplet>::value_type; + break; + } + case 'u': + { + if (n.compare (0, 6, "uint64") == 0) + { + if (n[6] == '\0') return &value_traits<uint64_t>::value_type; + if (n[6] == 's' && + n[7] == '\0') return &value_traits<uint64s>::value_type; + } + break; + } + default: + break; + } + + return nullptr; } void parser:: diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx index 0ad63b1..b0fe32a 100644 --- a/libbuild2/variable.cxx +++ b/libbuild2/variable.cxx @@ -3,10 +3,16 @@ #include <libbuild2/variable.hxx> +#include <cstdio> // snprintf() #include <cstring> // memcmp(), memcpy() #include <libbutl/path-pattern.hxx> +#ifndef BUILD2_BOOTSTRAP +# include <libbutl/json/parser.hxx> +# include <libbutl/json/serializer.hxx> +#endif + #include <libbuild2/target.hxx> #include <libbuild2/diagnostics.hxx> @@ -584,6 +590,8 @@ namespace build2 if (!wspace (v[0])) { + // Note: see also similar code in to_json_value(). + // int b (v[0] == '0' && (v[1] == 'x' || v[1] == 'X') ? 16 : 10); // May throw invalid_argument or out_of_range. @@ -1501,6 +1509,924 @@ namespace build2 nullptr // Iterate. }; + // json + // + static string + to_string_value (name& n, const char* what) + { + if (n.typed () || n.qualified () || n.pattern) + throw_invalid_argument (n, nullptr, what); + + string s; + + if (n.simple ()) + s.swap (n.value); + else + { + // Note that here we cannot assume what's in dir is really a path (think + // s/foo/bar/) so we have to reverse it exactly. + // + s = move (n.dir).representation (); // Move out of path. + + if (!n.value.empty ()) + s += n.value; // Separator is already there. + } + + return s; + } + + static json_value + to_json_value (name& n, const char* what) + { + if (n.typed () || n.qualified () || n.pattern) + throw_invalid_argument (n, nullptr, what); + + string s; + + if (n.simple ()) + s.swap (n.value); + else + { + // Note that here we cannot assume what's in dir is really a path (think + // s/foo/bar/) so we have to reverse it exactly. + // + s = move (n.dir).representation (); // Move out of path. + + if (!n.value.empty ()) + s += n.value; // Separator is already there. + + // A path is always interpreted as a JSON string. + // + return json_value (move (s)); + } + + bool f; + if (s.empty ()) + return json_value (string ()); + if (s == "null") + return json_value (); + else if ((f = (s == "true")) || s == "false") + return json_value (f); + else if (s.find_first_not_of ( + "0123456789", (f = (s[0] == '-')) ? 1 : 0) == string::npos) + { + name n (move (s)); + return f + ? json_value (value_traits<int64_t>::convert (n, nullptr)) + : json_value (value_traits<uint64_t>::convert (n, nullptr)); + } + // + // Handle the hex notation similar to <uint64_t>::convert() (and JSON5). + // + else if (s[0] == '0' && + (s[1] == 'x' || s[1] == 'X') && + s.size () > 2 && + s.find_first_not_of ("0123456789aAbBcCdDeEfF", 2) == string::npos) + { + return json_value ( + value_traits<uint64_t>::convert (name (move (s)), nullptr), + true /* hex */); + } + else + { + // If this is not a JSON representation of string, array, or object, + // then treat it as a string. + // + // Note that the special `"`, `{`, and `[` characters could be preceded + // with whitespaces. Note: see similar test in json_object below. + // + size_t p (s.find_first_not_of (" \t\n\r")); + + if (p == string::npos || (s[p] != '"' && s[p] != '{' && s[p] != '[')) + return json_value (move (s)); + + // Parse as valid JSON input text. + // +#ifndef BUILD2_BOOTSTRAP + try + { + json_parser p (s, nullptr /* name */); + return json_value (p); + } + catch (const invalid_json_input& e) + { + // Turned out printing line/column/offset can be misleading since we + // could be parsing a single name from a potential list of names. + // feels like without also printing the value this is of not much use. + // +#if 0 + string m ("invalid json input at line "); + m += to_string (e.line); + m += ", column "; + m += to_string (e.column); + m += ", byte offset "; + m += to_string (e.position); + m += ": "; + m += e.what (); +#else + string m ("invalid json input: "); + m += e.what (); +#endif + throw invalid_argument (move (m)); + } +#else + throw invalid_argument ("json parsing requested during bootstrap"); +#endif + } + } + + json_value value_traits<json_value>:: + convert (names&& ns) + { + size_t n (ns.size ()); + + if (n == 0) + { + // Note: this is the ([json] ) case, not ([json] ""). See also the + // relevant note in json_reverse() below. + // + return json_value (); // null + } + else if (n == 1) + { + return to_json_value (ns.front (), "json"); + } + else + { + if (ns.front ().pair) // object + { + json_value r (json_type::object); + r.object.reserve (n / 2); + + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + if (!i->pair) + throw invalid_argument ( + "expected pair in json member value '" + to_string (*i) + '\''); + + // Note that we could support JSON-quoted member names but it's + // unclear why would someone want that (and if they do, they can + // always specify JSON text instead). + // + // @@ The empty pair value ([json] one@ ) which is currently empty + // string is inconsistent with empty value ([json] ) above which + // is null. Maybe we could distinguish the one@ and one@"" cases + // via type hints? + // + string n (to_string_value (*i, "json member name")); + json_value v (to_json_value (*++i, "json member")); + + // Check for duplicates. One can use append/prepend to merge. + // + if (find_if (r.object.begin (), r.object.end (), + [&n] (const json_member& m) + { + return m.name == n; + }) != r.object.end ()) + { + throw invalid_argument ( + "duplicate json object member '" + n + '\''); + } + + r.object.push_back (json_member {move (n), move (v)}); + } + + return r; + } + else // array + { + json_value r (json_type::array); + r.array.reserve (n); + + for (name& n: ns) + { + if (n.pair) + throw invalid_argument ( + "unexpected pair in json array element value '" + + to_string (n) + '\''); + + r.array.push_back (to_json_value (n, "json array element")); + } + + return r; + } + } + } + + static void + json_assign (value& v, names&& ns, const variable* var) + { + using traits = value_traits<json_value>; + + try + { + traits::assign (v, traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json value"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + static void + json_append (value& v, names&& ns, const variable* var) + { + using traits = value_traits<json_value>; + + try + { + traits::append (v, traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json value"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + static void + json_prepend (value& v, names&& ns, const variable* var) + { + using traits = value_traits<json_value>; + + try + { + traits::prepend (v, traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json value"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + static names_view + json_reverse (const value& x, names& ns, bool) + { + const json_value& v (x.as<json_value> ()); + + switch (v.type) + { + case json_type::null: + { + // @@ Hm, it would be nice if this somehow got mapped to [null]/empty + // but still be round-trippable to JSON null. Perhaps via type + // hint? + // + // But won't `print ([json] null)` printing nothing be + // surprising. Also, it's not clear that mapping JSON null to out + // [null] is a good idea since our [null] means "no value" while + // JSON null means "null value". + // + // Maybe the current semantics is the best: we map our [null] and + // empty names to JSON null (naturally) but we always reverse JSON + // null to the JSON "null" literal. Or maybe we could reverse it to + // null but type-hint it that it's a spelling or [null]/empty. + // Quite fuzzy, admittedly. In our model null values decay to empty + // so JSON null decaying to "null" literal is strange. Let's try + // and see how it goes. See also json_subscript_impl() below. + // +#if 0 + ns.push_back (name ("null")); +#endif + break; + } + case json_type::boolean: + { + ns.push_back (name (v.boolean ? "true" : "false")); + break; + } + case json_type::signed_number: + { + ns.push_back (value_traits<int64_t>::reverse (v.signed_number)); + break; + } + case json_type::unsigned_number: + { + ns.push_back (value_traits<uint64_t>::reverse (v.unsigned_number)); + break; + } + case json_type::hexadecimal_number: + { + // Hexadecimal representation of 64-bit integers requires a maximum of + // 10 character (plus '\0'): 0xffffffff. + // + char buf[11]; + snprintf (buf, sizeof (buf), + "0x%llx", + static_cast<unsigned long long> (v.unsigned_number)); + + ns.push_back (name (string (buf))); + break; + } + case json_type::string: + // + // @@ Hm, it would be nice if this somehow got mapped to unquoted + // string but still be round-trippable to JSON value. Perhaps via + // the type hint idea? This is pretty bad. See also subscript we + // hacked around this somewhat. + // + // Note that it may be tempting to fix this by only quoting strings + // that would otherwise be mis-interpreted (null, true, all digits, + // etc). But that would be worse: things would seem to work but + // fall apart in the perhaps unlikely event of encountering one of + // the problematic values. It is better to produce a consistent + // result. + // + case json_type::array: + case json_type::object: + { + // Serialize as JSON output text. + // + string o; + +#ifndef BUILD2_BOOTSTRAP + try + { + // Disable pretty-printing so that the output is all on the same + // line. While it's not going to be easy to read for larger JSON + // outputs, it will fit better into the existing model where none of + // the value representations use formatting newlines. If a pretty- + // printed representation is required, then the $json.serialize() + // function can be used to obtain it. + // + json_buffer_serializer s (o, 0 /* indentation */); + v.serialize (s); + } + catch (const invalid_json_output& e) + { + // Note: the same diagnostics as in $json.serialize(). + // + diag_record dr; + dr << fail << "invalid json value: " << e; + + if (e.event) + dr << info << "while serializing " << to_string (*e.event); + + if (e.offset != string::npos) + dr << info << "offending byte offset " << e.offset; + } +#else + fail << "json serialization requested during bootstrap"; +#endif + ns.push_back (name (move (o))); + break; + } + } + + return ns; + } + + static int + json_compare (const value& l, const value& r) + { + return l.as<json_value> ().compare (r.as<json_value> ()); + } + + // Return null value if the index/name is out of range. + // + static value + json_subscript_impl (const value& val, value* val_data, + uint64_t i, const string& n, bool index) + { + const json_value& jv (val.as<json_value> ()); + + json_value jr; + + if (index) + { + if (i >= (jv.type == json_type::array ? jv.array.size () : + jv.type == json_type::object ? jv.object.size () : 1)) + return value (); + + switch (jv.type) + { + case json_type::null: + return value (); // JSON null has no elements. + case json_type::boolean: + case json_type::signed_number: + case json_type::unsigned_number: + case json_type::hexadecimal_number: + case json_type::string: + { + // Steal the value if possible. + // + jr = (&val == val_data + ? json_value (move (const_cast<json_value&> (jv))) + : json_value (jv)); + break; + } + case json_type::array: + { + // Steal the value if possible. + // + const json_value& r (jv.array[i]); + jr = (&val == val_data + ? json_value (move (const_cast<json_value&> (r))) + : json_value (r)); + break; + } + case json_type::object: + { + // Represent as an object with one member. + // + new (&jr.object) json_value::object_type (); + jr.type = json_type::object; + + // Steal the member if possible. + // + const json_member& m (jv.object[i]); + jr.object.push_back (&val == val_data + ? json_member (move (const_cast<json_member&> (m))) + : json_member (m)); + break; + } + } + } + else + { + auto i (find_if (jv.object.begin (), + jv.object.end (), + [&n] (const json_member& m) + { + return m.name == n; + })); + + if (i == jv.object.end ()) + return value (); + + // Steal the member value if possible. + // + jr = (&val == val_data + ? json_value (move (const_cast<json_value&> (i->value))) + : json_value (i->value)); + } + + // @@ As a temporary work around for the lack of type hints (see + // json_reverse() for background), reverse simple JSON values to the + // corresponding fundamental type values. The thinking here is that + // subscript (and iteration) is primarily meant for consumption (as + // opposed to reverse() where it is used to build up values and thus + // needs things to be fully reversible). Once we add type hints, then + // this should become unnecessary and we should be able to just always + // return json_value. + // + // @@ TODO: split this function into two (index/name) once get rid of this. + // +#if 1 + switch (jr.type) + { + case json_type::null: return value (names {}); + case json_type::boolean: return value (jr.boolean); + case json_type::signed_number: return value (jr.signed_number); + case json_type::unsigned_number: + case json_type::hexadecimal_number: return value (jr.unsigned_number); + case json_type::string: return value (move (jr.string)); + case json_type::array: + case json_type::object: break; + } +#endif + + return value (move (jr)); + } + + static value + json_subscript (const value& val, value* val_data, + value&& sub, + const location& sloc, + const location& bloc) + { + const json_value* jv (val.null ? nullptr : &val.as<json_value> ()); + + // For consistency with other places treat JSON null value as maybe + // missing array/object. In particular, we don't want to fail trying to + // lookup by-name on a null value which could have been an object. + // + if (jv != nullptr && jv->type == json_type::null) + jv = nullptr; + + // Process subscript even if the value is null to make sure it is valid. + // + bool index; + uint64_t i (0); + string n; + + // Always interpret uint64-typed subscript as index even for objects. + // This can be used to, for example, to iterate with an index over object + // members. + // + if (!sub.null && sub.type == &value_traits<uint64_t>::value_type) + { + i = sub.as<uint64_t> (); + index = true; + } + else + { + // How we interpret the subscript depends on the JSON value type. For + // objects we treat it as a string (member name) and for everything else + // as an index. + // + // What if the value is null and we don't have a JSON type? In this case + // we treat as a string since a valid number is also a valid string. + // + try + { + if (jv == nullptr || jv->type == json_type::object) + { + n = convert<string> (move (sub)); + index = false; + } + else + { + i = convert<uint64_t> (move (sub)); + index = true; + } + } + catch (const invalid_argument& e) + { + // We will likely be trying to interpret a member name as an integer + // due to the incorrect value type so issue appropriate diagnostics. + // + diag_record dr; + dr << fail (sloc) << "invalid json value subscript: " << e; + + if (jv != nullptr && jv->type != json_type::object) + dr << info << "json value type is " << jv->type; + + dr << info (bloc) << "use the '\\[' escape sequence if this is a " + << "wildcard pattern" << endf; + } + } + + return (jv != nullptr + ? json_subscript_impl (val, val_data, i, n, index) + : value ()); + } + + void json_iterate (const value& val, + const function<void (value&&, bool first)>& f) + { + // Implement in terms of subscript for consistency (in particular, + // iterating over simple values like number, string). + // + for (uint64_t i (0);; ++i) + { + value e (json_subscript_impl (val, nullptr, i, {}, true)); + + if (e.null) + break; + + f (move (e), i == 0); + } + } + + const json_value value_traits<json_value>::empty_instance; + const char* const value_traits<json_value>::type_name = "json"; + + // Note that whether the json value is a container or not depends on its + // payload type. However, for our purposes it feels correct to assume it is + // a container rather than not with itself as the element type (see + // value_traits::{container, element_type} usage for details). + // + const value_type value_traits<json_value>::value_type + { + type_name, + sizeof (json_value), + nullptr, // No base. + true, // Container. + &value_traits<json_value>::value_type, // Element (itself). + &default_dtor<json_value>, + &default_copy_ctor<json_value>, + &default_copy_assign<json_value>, + &json_assign, + json_append, + json_prepend, + &json_reverse, + nullptr, // No cast (cast data_ directly). + &json_compare, + &default_empty<json_value>, + &json_subscript, + &json_iterate + }; + + // json_array + // + json_array value_traits<json_array>:: + convert (names&& ns) + { + json_array r; + + size_t n (ns.size ()); + if (n == 0) + ; // Empty. + else if (n == 1) + { + // Tricky: this can still be JSON input text that is an array. And if + // it's not, then make it an element of an array. + // + json_value v (to_json_value (ns.front (), "json")); + + if (v.type == json_type::array) + r.array = move (v.array); + else + r.array.push_back (move (v)); + } + else + { + r.array.reserve (n); + + for (name& n: ns) + { + if (n.pair) + throw invalid_argument ( + "unexpected pair in json array element value '" + + to_string (n) + '\''); + + r.array.push_back (to_json_value (n, "json array element")); + } + } + + return r; + } + + static void + json_array_assign (value& v, names&& ns, const variable* var) + { + using traits = value_traits<json_array>; + + try + { + traits::assign (v, traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json array"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + static void + json_array_append (value& v, names&& ns, const variable* var) + { + using val_traits = value_traits<json_value>; + using arr_traits = value_traits<json_array>; + + try + { + arr_traits::append (v, val_traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json array"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + static void + json_array_prepend (value& v, names&& ns, const variable* var) + { + using val_traits = value_traits<json_value>; + using arr_traits = value_traits<json_array>; + + try + { + arr_traits::prepend (v, val_traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json array"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + const json_array value_traits<json_array>::empty_instance; + const char* const value_traits<json_array>::type_name = "json_array"; + + const value_type value_traits<json_array>::value_type + { + type_name, + sizeof (json_array), + &value_traits<json_value>::value_type, // Base (assuming direct cast works + // for both). + true, // Container. + &value_traits<json_value>::value_type, // Element (json_value). + &default_dtor<json_array>, + &default_copy_ctor<json_array>, + &default_copy_assign<json_array>, + &json_array_assign, + &json_array_append, + &json_array_prepend, + &json_reverse, + nullptr, // No cast (cast data_ directly). + &json_compare, + &default_empty<json_array>, + &json_subscript, + &json_iterate + }; + + // json_object + // + json_object value_traits<json_object>:: + convert (names&& ns) + { + json_object r; + + size_t n (ns.size ()); + if (n == 0) + ; // Empty. + else if (n == 1) + { + // Tricky: this can still be JSON input text that is an object. So do + // a similar check as in to_json_value() above. + // + name& n (ns.front ()); + + if (!n.simple () || n.pattern) + throw_invalid_argument (n, nullptr, "json object"); + + string& s (n.value); + size_t p (s.find_first_not_of (" \t\n\r")); + + if (p == string::npos || s[p] != '{') + { + // Unlike for array above, we cannot turn any value into a member. + // + throw invalid_argument ("expected json object instead of '" + s + '\''); + } + + json_value v (to_json_value (ns.front (), "json object")); + assert (v.type == json_type::object); + r.object = move (v.object); + } + else + { + r.object.reserve (n / 2); + + for (auto i (ns.begin ()); i != ns.end (); ++i) + { + if (!i->pair) + throw invalid_argument ( + "expected pair in json member value '" + to_string (*i) + '\''); + + string n (to_string_value (*i, "json member name")); + json_value v (to_json_value (*++i, "json member")); + + if (find_if (r.object.begin (), r.object.end (), + [&n] (const json_member& m) + { + return m.name == n; + }) != r.object.end ()) + { + throw invalid_argument ( + "duplicate json object member '" + n + '\''); + } + + r.object.push_back (json_member {move (n), move (v)}); + } + } + + return r; + } + + static void + json_object_assign (value& v, names&& ns, const variable* var) + { + using traits = value_traits<json_object>; + + try + { + traits::assign (v, traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json object"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + static void + json_object_append (value& v, names&& ns, const variable* var) + { + using val_traits = value_traits<json_value>; + using obj_traits = value_traits<json_object>; + + try + { + obj_traits::append (v, val_traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json object"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + static void + json_object_prepend (value& v, names&& ns, const variable* var) + { + using val_traits = value_traits<json_value>; + using obj_traits = value_traits<json_object>; + + try + { + obj_traits::prepend (v, val_traits::convert (move (ns))); + } + catch (const invalid_argument& e) + { + // Note: ns is not guaranteed to be valid. + // + diag_record dr (fail); + dr << "invalid json object"; + + if (var != nullptr) + dr << " in variable " << var->name; + + dr << ": " << e; + } + } + + const json_object value_traits<json_object>::empty_instance; + const char* const value_traits<json_object>::type_name = "json_object"; + + const value_type value_traits<json_object>::value_type + { + type_name, + sizeof (json_object), + &value_traits<json_value>::value_type, // Base (assuming direct cast works + // for both). + true, // Container. + &value_traits<json_value>::value_type, // Element (json_value). + &default_dtor<json_object>, + &default_copy_ctor<json_object>, + &default_copy_assign<json_object>, + &json_object_assign, + &json_object_append, + &json_object_prepend, + &json_reverse, + nullptr, // No cast (cast data_ directly). + &json_compare, + &default_empty<json_object>, + &json_subscript, + &json_iterate + }; + // cmdline // cmdline value_traits<cmdline>:: diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx index 9d7b001..6b0b30e 100644 --- a/libbuild2/variable.hxx +++ b/libbuild2/variable.hxx @@ -15,6 +15,8 @@ #include <libbuild2/forward.hxx> #include <libbuild2/utility.hxx> +#include <libbuild2/json.hxx> + #include <libbuild2/context.hxx> #include <libbuild2/target-type.hxx> #include <libbuild2/diagnostics.hxx> @@ -1201,6 +1203,65 @@ namespace build2 static const map_value_type<K, V> value_type; }; + // json + // + // Note that we do not expose json_member as a value type instead + // representing it as an object with one member. While we could expose + // member (and reverse it as a pair since there is no valid JSON + // representation for a standalone member), this doesn't seem to buy us much + // but will cause complications (for example, in supporting append/prepend). + // On the other hand, representing a member as an object only requires a bit + // of what looks like harmless looseness in a few contexts (such as the + // $json.member_*() functions). + // + template <> + struct LIBBUILD2_SYMEXPORT value_traits<json_value> + { + static_assert (sizeof (json_value) <= value::size_, "insufficient space"); + + static json_value convert (names&&); + static void assign (value&, json_value&&); + static void append (value&, json_value&&); + static void prepend (value&, json_value&&); + static bool empty (const json_value&); // null or empty array/object + + static const json_value empty_instance; // null + static const char* const type_name; + static const build2::value_type value_type; + }; + + template <> + struct LIBBUILD2_SYMEXPORT value_traits<json_array> + { + static_assert (sizeof (json_array) <= value::size_, "insufficient space"); + + static json_array convert (names&&); + static void assign (value&, json_array&&); + static void append (value&, json_value&&); // Note: value, not array. + static void prepend (value&, json_value&&); + static bool empty (const json_array& v) {return v.array.empty ();} + + static const json_array empty_instance; // empty array + static const char* const type_name; + static const build2::value_type value_type; + }; + + template <> + struct LIBBUILD2_SYMEXPORT value_traits<json_object> + { + static_assert (sizeof (json_object) <= value::size_, "insufficient space"); + + static json_object convert (names&&); + static void assign (value&, json_object&&); + static void append (value&, json_value&&); // Note: value, not object. + static void prepend (value&, json_value&&); + static bool empty (const json_object& v) {return v.object.empty ();} + + static const json_object empty_instance; // empty object + static const char* const type_name; + static const build2::value_type value_type; + }; + // Canned command line to be re-lexed (used in {Build,Test}scripts). // // Note that because the executable can be specific as a target or as diff --git a/libbuild2/variable.ixx b/libbuild2/variable.ixx index 51c35fd..b8f80e3 100644 --- a/libbuild2/variable.ixx +++ b/libbuild2/variable.ixx @@ -906,6 +906,113 @@ namespace build2 new (&v.data_) map<K, V> (move (x)); } + // json + // + inline bool value_traits<json_value>:: + empty (const json_value& v) + { + // Note: should be consistent with $json.size(). + // + switch (v.type) + { + case json_type::null: return true; + case json_type::boolean: + case json_type::signed_number: + case json_type::unsigned_number: + case json_type::hexadecimal_number: + case json_type::string: break; + case json_type::array: return v.array.empty (); + case json_type::object: return v.object.empty (); + } + + return false; + } + + inline void value_traits<json_value>:: + assign (value& v, json_value&& x) + { + if (v) + v.as<json_value> () = move (x); + else + new (&v.data_) json_value (move (x)); + } + + inline void value_traits<json_value>:: + append (value& v, json_value&& x) + { + if (v) + v.as<json_value> ().append (move (x)); + else + new (&v.data_) json_value (move (x)); + } + + inline void value_traits<json_value>:: + prepend (value& v, json_value&& x) + { + if (v) + v.as<json_value> ().prepend (move (x)); + else + new (&v.data_) json_value (move (x)); + } + + // json_array + // + inline void value_traits<json_array>:: + assign (value& v, json_array&& x) + { + if (v) + v.as<json_array> () = move (x); + else + new (&v.data_) json_array (move (x)); + } + + inline void value_traits<json_array>:: + append (value& v, json_value&& x) + { + if (!v) + new (&v.data_) json_array (); + + v.as<json_array> ().append (move (x)); + } + + inline void value_traits<json_array>:: + prepend (value& v, json_value&& x) + { + if (!v) + new (&v.data_) json_array (); + + v.as<json_array> ().prepend (move (x)); + } + + // json_object + // + inline void value_traits<json_object>:: + assign (value& v, json_object&& x) + { + if (v) + v.as<json_object> () = move (x); + else + new (&v.data_) json_object (move (x)); + } + + inline void value_traits<json_object>:: + append (value& v, json_value&& x) + { + if (!v) + new (&v.data_) json_object (); + + v.as<json_object> ().append (move (x)); + } + + inline void value_traits<json_object>:: + prepend (value& v, json_value&& x) + { + if (!v) + new (&v.data_) json_object (); + + v.as<json_object> ().prepend (move (x)); + } + // variable_pool // inline const variable* variable_pool:: |