diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2022-02-28 08:57:02 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2022-03-02 13:58:55 +0200 |
commit | 152ea943395822f55591eadaf8e0f5aac263db5e (patch) | |
tree | 268a9dae37704b2398876dad44a5095ba6d6f750 | |
parent | 0e5d575feceea4feac4b33e85626719e14f762a1 (diff) |
Add JSON serializer (copy of libstud-json)
-rw-r--r-- | libbutl/json/event.hxx | 27 | ||||
-rw-r--r-- | libbutl/json/serializer.cxx | 669 | ||||
-rw-r--r-- | libbutl/json/serializer.hxx | 379 | ||||
-rw-r--r-- | libbutl/json/serializer.ixx | 202 |
4 files changed, 1277 insertions, 0 deletions
diff --git a/libbutl/json/event.hxx b/libbutl/json/event.hxx new file mode 100644 index 0000000..77185cc --- /dev/null +++ b/libbutl/json/event.hxx @@ -0,0 +1,27 @@ +#pragma once + +#include <cstddef> +#include <cstdint> + +namespace butl +{ + namespace json + { + // Parsing/serialization event. + // + enum class event: std::uint8_t + { + begin_object = 1, + end_object, + begin_array, + end_array, + name, + string, + number, + boolean, + null + }; + + constexpr std::size_t event_count = 9; + } +} diff --git a/libbutl/json/serializer.cxx b/libbutl/json/serializer.cxx new file mode 100644 index 0000000..84941ed --- /dev/null +++ b/libbutl/json/serializer.cxx @@ -0,0 +1,669 @@ +#include <cstdio> // snprintf +#include <cstdarg> // va_list +#include <cstring> // memcpy +#include <ostream> + +#include <libbutl/json/serializer.hxx> + +using namespace std; + +namespace butl +{ + namespace json + { + using buffer = buffer_serializer::buffer; + using error_code = invalid_json_output::error_code; + + template <typename T> + static void + dynarray_overflow (void* d, event, buffer& b, size_t ex) + { + T& v (*static_cast<T*> (d)); + v.resize (b.capacity + ex); + v.resize (v.capacity ()); + // const_cast is required for std::string pre C++17. + // + b.data = const_cast<typename T::value_type*> (v.data ()); + b.capacity = v.size (); + } + + template <typename T> + static void + dynarray_flush (void* d, event, buffer& b) + { + T& v (*static_cast<T*> (d)); + v.resize (b.size); + b.data = const_cast<typename T::value_type*> (v.data ()); + b.capacity = b.size; + } + + buffer_serializer:: + buffer_serializer (string& s, size_t i) + : buffer_serializer (const_cast<char*> (s.data ()), size_, s.size (), + dynarray_overflow<string>, + dynarray_flush<string>, + &s, + i) + { + size_ = s.size (); + } + + buffer_serializer:: + buffer_serializer (vector<char>& v, size_t i) + : buffer_serializer (v.data (), size_, v.size (), + dynarray_overflow<vector<char>>, + dynarray_flush<vector<char>>, + &v, + i) + { + size_ = v.size (); + } + + static void + ostream_overflow (void* d, event e, buffer& b, size_t) + { + ostream& s (*static_cast<ostream*> (d)); + s.write (static_cast<char*> (b.data), b.size); + if (s.fail ()) + throw invalid_json_output ( + e, error_code::buffer_overflow, "unable to write JSON output text"); + b.size = 0; + } + + static void + ostream_flush (void* d, event e, buffer& b) + { + ostream_overflow (d, e, b, 0); + + ostream& s (*static_cast<ostream*> (d)); + s.flush (); + if (s.fail ()) + throw invalid_json_output ( + e, error_code::buffer_overflow, "unable to write JSON output text"); + } + + stream_serializer:: + stream_serializer (ostream& os, size_t i) + : buffer_serializer (tmp_, sizeof (tmp_), + ostream_overflow, + ostream_flush, + &os, + i) + { + } + + bool buffer_serializer:: + next (optional<event> e, pair<const char*, size_t> val, bool check) + { + if (absent_ == 2) + goto fail_complete; + + if (e == nullopt) + { + if (!state_.empty ()) + goto fail_incomplete; + + absent_++; + return false; + } + + absent_ = 0; // Clear inter-value absent event. + + { + state* st (state_.empty () ? nullptr : &state_.back ()); + + auto name_expected = [] (const state& s) + { + return s.type == event::begin_object && s.count % 2 == 0; + }; + + auto make_str = [] (const char* s, size_t n) + { + return make_pair (s, n); + }; + + // When it comes to pretty-printing, the common way to do it is along + // these lines: + // + // { + // "str": "value", + // "obj": { + // "arr": [ + // 1, + // 2, + // 3 + // ] + // }, + // "num": 123 + // } + // + // Empty objects and arrays are printed without a newline: + // + // { + // "obj": {}, + // "arr": [] + // } + // + // There are two types of separators: between name and value, which is + // always ": ", and before/after value inside an object or array which + // is either newline followed by indentation, or comma followed by + // newline followed by indentation (we also have separation between + // top-level values but that's orthogonal to pretty-printing). + // + // Based on this observation, we are going to handle the latter case by + // starting with the ",\n" string (in this->sep_) and pushing/popping + // indentation spaces as we enter/leave objects and arrays. We handle + // the cases where we don't need the comma by simply skipping it in the + // C-string pointer. + // + bool pp (indent_ != 0); + + pair<const char*, size_t> sep; + if (st != nullptr) + { + // The name-value separator. + // + if (st->type == event::begin_object && st->count % 2 == 1) + { + sep = !pp ? make_str (":", 1) : make_str (": ", 2); + } + // We don't need the comma if we are closing the object or array. + // + else if (e == event::end_array || e == event::end_object) + { + // But in this case we need to unindent one level prior to writing + // the brace. Also handle the empty object/array as a special case. + // + sep = !pp || st->count == 0 + ? make_str (nullptr, 0) + : make_str (sep_.c_str () + 1, sep_.size () - 1 - indent_); + } + // Or if this is the first value (note: must come after end_*). + // + else if (st->count == 0) + { + sep = !pp + ? make_str (nullptr, 0) + : make_str (sep_.c_str () + 1, sep_.size () - 1); + } + else + { + sep = !pp + ? make_str (",", 1) + : make_str (sep_.c_str (), sep_.size ()); + } + } + else if (values_ != 0) // Subsequent top-level value. + { + // Top-level value separation. For now we always separate them with + // newlines, which is the most common/sensible way. + // + sep = make_str ("\n", 1); + } + + switch (*e) + { + case event::begin_array: + case event::begin_object: + { + if (st != nullptr && name_expected (*st)) + goto fail_unexpected_event; + + write (*e, + sep, + make_str (e == event::begin_array ? "[" : "{", 1), + false); + + if (st != nullptr) + st->count++; + + if (pp) + sep_.append (indent_, ' '); + + state_.push_back (state {*e, 0}); + break; + } + case event::end_array: + case event::end_object: + { + if (st == nullptr || (e == event::end_array + ? st->type != event::begin_array + : !name_expected (*st))) + goto fail_unexpected_event; + + write (*e, + sep, + make_str (e == event::end_array ? "]" : "}", 1), + false); + + if (pp) + sep_.erase (sep_.size () - indent_); + + state_.pop_back (); + break; + } + case event::name: + case event::string: + { + if (e == event::name + ? (st == nullptr || !name_expected (*st)) + : (st != nullptr && name_expected (*st))) + goto fail_unexpected_event; + + write (*e, sep, val, check, '"'); + + if (st != nullptr) + st->count++; + break; + } + case event::null: + case event::boolean: + { + if (e == event::null && val.first == nullptr) + val = {"null", 4}; + else if (check) + { + auto eq = [&val] (const char* v, size_t n) + { + return val.second == n && memcmp (val.first, v, n) == 0; + }; + + if (e == event::null) + { + if (!eq ("null", 4)) + goto fail_null; + } + else + { + if (!eq ("true", 4) && !eq ("false", 5)) + goto fail_bool; + } + } + } + // Fall through. + case event::number: + { + if (st != nullptr && name_expected (*st)) + goto fail_unexpected_event; + + write (*e, sep, val, check); + + if (st != nullptr) + st->count++; + break; + } + } + } + + if (state_.empty ()) + { + values_++; + if (flush_ != nullptr) + flush_ (data_, *e, buf_); + + return false; + } + + return true; + + fail_complete: + throw invalid_json_output ( + e, error_code::invalid_value, "value sequence is complete"); + fail_incomplete: + throw invalid_json_output ( + e, error_code::invalid_value, "value is incomplete"); + fail_null: + throw invalid_json_output ( + e, error_code::invalid_value, "invalid null value"); + fail_bool: + throw invalid_json_output ( + e, error_code::invalid_value, "invalid boolean value"); + fail_unexpected_event: + throw invalid_json_output ( + e, error_code::unexpected_event, "unexpected event"); + } + + // JSON escape sequences for control characters <= 0x1F. + // + static const char* json_escapes[] = + {"\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", + "\\u0006", "\\u0007", "\\b", "\\t", "\\n", "\\u000B", + "\\f", "\\r", "\\u000E", "\\u000F", "\\u0010", "\\u0011", + "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017", + "\\u0018", "\\u0019", "\\u001A", "\\u001B", "\\u001C", "\\u001D", + "\\u001E", "\\u001F"}; + + void buffer_serializer:: + write (event e, + pair<const char*, size_t> sep, + pair<const char*, size_t> val, + bool check, + char q) + { + // Assumptions: + // + // 1. A call to overflow should be able to provide enough capacity to + // write the entire separator (in other words, we are not going to + // bother with chunking the separator). + // + // 2. Similarly, a call to overflow should be able to provide enough + // capacity to write an entire UTF-8 multi-byte sequence. + // + // 3. Performance-wise, we do not expect very long contiguous sequences + // of character that require escaping. + + // Total number of bytes remaining to be written and the capacity + // currently available. + // + size_t size (sep.second + val.second + (q != '\0' ? 2 : 0)); + size_t cap (buf_.capacity - buf_.size); + + auto grow = [this, e, &size, &cap] (size_t min, size_t extra = 0) + { + if (overflow_ == nullptr) + return false; + + extra += size; + extra -= cap; + overflow_ (data_, e, buf_, extra > min ? extra : min); + cap = buf_.capacity - buf_.size; + + return cap >= min; + }; + + auto append = [this, &cap, &size] (const char* d, size_t s) + { + memcpy (static_cast<char*> (buf_.data) + buf_.size, d, s); + buf_.size += s; + cap -= s; + size -= s; + }; + + // Return the longest chunk of input that fits into the buffer and does + // not end in the middle of a multi-byte UTF-8 sequence. Assume value + // size and capacity are not 0. Return NULL in first if no chunk could + // be found that fits into the remaining space. In this case, second is + // the additional (to size) required space (used to handle escapes in + // the checked version). + // + // The basic idea is to seek in the input buffer to the capacity of the + // output buffer (unless the input is shorter than the output). If we + // ended up in the middle of a multi-byte UTF-8 sequence, then seek back + // until we end up at the UTF-8 sequence boundary. Note that this + // implementation assumes valid UTF-8. + // + auto chunk = [&cap, &val] () -> pair<const char*, size_t> + { + pair<const char*, size_t> r (nullptr, 0); + + if (cap >= val.second) + r = val; + else + { + // Start from the character past capacity and search for a UTF-8 + // sequence boundary. + // + for (const char* p (val.first + cap); p != val.first; --p) + { + const auto u (static_cast<uint8_t> (*p)); + if (u < 0x80 || u > 0xBF) // Not a continuation byte + { + r = {val.first, p - val.first}; + break; + } + } + } + + val.first += r.second; + val.second -= r.second; + + return r; + }; + + // Escaping and UTF-8-validating version of chunk(). + // + // There are three classes of mandatory escapes in a JSON string: + // + // - \\ and \" + // + // - \b \f \n \r \t for popular control characters + // + // - \u00NN for other control characters <= 0x1F + // + // If the input begins with a character that must be escaped, return + // only its escape sequence. Otherwise validate and return everything up + // to the end of input or buffer capacity, but cutting it short before + // the next character that must be escaped or the first UTF-8 sequence + // that would not fit. + // + // Return string::npos in second in case of a stray continuation byte or + // any byte in an invalid UTF-8 range (for example, an "overlong" 2-byte + // encoding of a 7-bit/ASCII character or a 4-, 5-, or 6-byte sequence + // that would encode a codepoint beyond the U+10FFFF Unicode limit). + // + auto chunk_checked = [&cap, &size, &val] () -> pair<const char*, size_t> + { + pair<const char*, size_t> r (nullptr, 0); + + // Check whether the first character needs to be escaped. + // + const uint8_t c (val.first[0]); + if (c == '"') + r = {"\\\"", 2}; + else if (c == '\\') + r = {"\\\\", 2}; + else if (c <= 0x1F) + { + auto s (json_escapes[c]); + r = {s, s[1] == 'u' ? 6 : 2}; + } + + if (r.first != nullptr) + { + // Return in second the additional (to size) space required. + // + if (r.second > cap) + return {nullptr, r.second - 1}; + + // If we had to escape the character then adjust size accordingly + // (see append() above). + // + size += r.second - 1; + + val.first += 1; + val.second -= 1; + return r; + } + + // First character doesn't need to be escaped. Return as much of the + // rest of the input as possible. + // + size_t i (0); + for (size_t n (min (cap, val.second)); i != n; i++) + { + const uint8_t c1 (val.first[i]); + + if (c1 == '"' || c1 == '\\' || c1 <= 0x1F) // Needs to be escaped. + break; + else if (c1 >= 0x80) // Not ASCII, so validate as a UTF-8 sequence. + { + size_t i1 (i); // Position of the first byte. + + // The control flow here is to continue if valid and to fall + // through to return on error. + // + if (c1 >= 0xC2 && c1 <= 0xDF) // 2-byte sequence. + { + if (i + 2 <= val.second) // Sequence is complete in JSON value. + { + if (i + 2 > cap) // Sequence won't fit. + break; + + const uint8_t c2 (val.first[++i]); + + if (c2 >= 0x80 && c2 <= 0xBF) + continue; + } + } + else if (c1 >= 0xE0 && c1 <= 0xEF) // 3-byte sequence. + { + if (i + 3 <= val.second) + { + if (i + 3 > cap) + break; + + const uint8_t c2 (val.first[++i]), c3 (val.first[++i]); + + if (c3 >= 0x80 && c3 <= 0xBF) + { + switch (c1) + { + case 0xE0: if (c2 >= 0xA0 && c2 <= 0xBF) continue; break; + case 0xED: if (c2 >= 0x80 && c2 <= 0x9F) continue; break; + default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break; + } + } + } + } + else if (c1 >= 0xF0 && c1 <= 0xF4) // 4-byte sequence. + { + if (i + 4 <= val.second) + { + if (i + 4 > cap) + break; + + const uint8_t c2 (val.first[++i]), + c3 (val.first[++i]), + c4 (val.first[++i]); + + if (c3 >= 0x80 && c3 <= 0xBF && + c4 >= 0x80 && c4 <= 0xBF) + { + switch (c1) + { + case 0xF0: if (c2 >= 0x90 && c2 <= 0xBF) continue; break; + case 0xF4: if (c2 >= 0x80 && c2 <= 0x8F) continue; break; + default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break; + } + } + } + } + + r = {val.first, string::npos}; + + // Update val to point to the beginning of the invalid sequence. + // + val.first += i1; + val.second -= i1; + + return r; + } + } + + if (i != 0) // We have a chunk. + { + r = {val.first, i}; + + val.first += i; + val.second -= i; + } + + return r; + }; + + // Value's original size (used to calculate the offset of the errant + // character in case of a validation failure). + // + const size_t vn (val.second); + + // Write the separator, if any. + // + if (sep.second != 0) + { + if (cap < sep.second && !grow (sep.second)) + goto fail_nospace; + + append (sep.first, sep.second); + } + + // Write the value's opening quote, if requested. + // + if (q != '\0') + { + if (cap == 0 && !grow (1)) + goto fail_nospace; + + append ("\"", 1); + } + + // Write the value, unless empty. + // + while (val.second != 0) + { + pair<const char*, size_t> ch (nullptr, 0); + + if (cap != 0) + ch = check ? chunk_checked () : chunk (); + + if (ch.first == nullptr) + { + // The minimum extra bytes we need the overflow function to be able + // to provide is based on these sequences that we do not break: + // + // - 4 bytes for a UTF-8 sequence + // - 6 bytes for an escaped Unicode sequence (\uXXXX). + // + if (!grow (6, ch.second)) + goto fail_nospace; + } + else if (ch.second != string::npos) + append (ch.first, ch.second); + else + goto fail_utf8; + } + + // Write the value's closing quote, if requested. + // + if (q != '\0') + { + if (cap == 0 && !grow (1)) + goto fail_nospace; + + append ("\"", 1); + } + + return; + + // Note: keep descriptions consistent with the parser. + // + fail_utf8: + throw invalid_json_output (e, + e == event::name ? error_code::invalid_name + : error_code::invalid_value, + "invalid UTF-8 text", + vn - val.second); + + fail_nospace: + throw invalid_json_output ( + e, error_code::buffer_overflow, "insufficient space in buffer"); + } + + size_t buffer_serializer:: + to_chars_impl (char* b, size_t n, const char* f, ...) + { + va_list a; + va_start (a, f); + const int r (vsnprintf (b, n, f, a)); + va_end (a); + + if (r < 0 || r >= static_cast<int> (n)) + { + throw invalid_json_output (event::number, + error_code::invalid_value, + "unable to convert number to string"); + } + + return static_cast<size_t> (r); + } + } +} diff --git a/libbutl/json/serializer.hxx b/libbutl/json/serializer.hxx new file mode 100644 index 0000000..fad91e2 --- /dev/null +++ b/libbutl/json/serializer.hxx @@ -0,0 +1,379 @@ +#pragma once + +#ifdef BUILD2_BOOTSTRAP +# error JSON serializer not available during bootstrap +#endif + +#include <array> +#include <iosfwd> +#include <string> +#include <vector> +#include <cstddef> // size_t, nullptr_t +#include <utility> // pair +#include <stdexcept> // invalid_argument +#include <type_traits> // enable_if, is_* + +#include <libbutl/optional.hxx> // butl::optional is std::optional or similar. + +#include <libbutl/json/event.hxx> + +#include <libbutl/export.hxx> + +namespace butl +{ + // Using the RFC8259 terminology: JSON (output) text, JSON value, object + // member. + // + namespace json + { + class invalid_json_output: public std::invalid_argument + { + public: + using event_type = json::event; + + enum class error_code + { + buffer_overflow, + unexpected_event, + invalid_name, + invalid_value + }; + + invalid_json_output (optional<event_type> event, + error_code code, + const char* description, + std::size_t offset = std::string::npos); + + invalid_json_output (optional<event_type> event, + error_code code, + const std::string& description, + std::size_t offset = std::string::npos); + + // Event that triggered the error. If the error is in the value, then + // offset points to the offending byte (for example, the beginning of an + // invalid UTF-8 byte sequence). Otherwise, offset is string::npos. + // + optional<event_type> event; + error_code code; + std::size_t offset; + }; + + // The serializer makes sure the resulting JSON is syntactically but not + // necessarily semantically correct. For example, it's possible to + // serialize a number event with non-numeric data. + // + // Note that unlike the parser, the serializer is always in the multi- + // value mode allowing the serialization of zero or more values. Note also + // that while values are separated with newlines, there is no trailing + // newline after the last (or only) value and the user is expected to add + // it manually if needed. + // + // Also note that while RFC8259 recommends object members to have unique + // names, the serializer does not enforce this. + // + class LIBBUTL_SYMEXPORT buffer_serializer + { + public: + // Serialize to string growing it as necessary. + // + // The indentation argument specifies the number of indentation spaces + // that should be used for pretty-printing. If 0 is passed, no + // pretty-printing is performed. + // + explicit + buffer_serializer (std::string&, std::size_t indentation = 2); + + // Serialize to vector of characters growing it as necessary. + // + explicit + buffer_serializer (std::vector<char>&, std::size_t indentation = 2); + + // Serialize to a fixed array. + // + // The length of the output text written is tracked in the size + // argument. + // + // If the array is not big enough to store the entire output text, the + // next() call that reaches the limit will throw invalid_json_output. + // + template <std::size_t N> + buffer_serializer (std::array<char, N>&, std::size_t& size, + std::size_t indentation = 2); + + // Serialize to a fixed buffer. + // + // The length of the output text written is tracked in the size + // argument. + // + // If the buffer is not big enough to store the entire output text, the + // next() call that reaches the limit will throw invalid_json_output. + // + buffer_serializer (void* buf, std::size_t& size, std::size_t capacity, + std::size_t indentation = 2); + + // The overflow function is called when the output buffer is out of + // space. The extra argument is a hint indicating the extra space likely + // to be required. + // + // Possible strategies include re-allocating a larger buffer or flushing + // the contents of the original buffer to the output destination. In + // case of a reallocation, the implementation is responsible for copying + // the contents of the original buffer over. + // + // The flush function is called when the complete JSON value has been + // serialized to the buffer. It can be used to write the contents of the + // buffer to the output destination. Note that flush is not called after + // the second absent (nullopt) event (or the only absent event; see + // next() for details). + // + // Both functions are passed the original buffer, its size (the amount + // of output text), and its capacity. They return (by modifying the + // argument) the replacement buffer and its size and capacity (these may + // refer to the original buffer). If space cannot be made available, the + // implementation can throw an appropriate exception (for example, + // std::bad_alloc or std::ios_base::failure). Any exceptions thrown is + // propagated to the user. + // + struct buffer + { + void* data; + std::size_t& size; + std::size_t capacity; + }; + + using overflow_function = void (void* data, + event, + buffer&, + std::size_t extra); + using flush_function = void (void* data, event, buffer&); + + // Serialize using a custom buffer and overflow/flush functions (both + // are optional). + // + buffer_serializer (void* buf, std::size_t capacity, + overflow_function*, + flush_function*, + void* data, + std::size_t indentation = 2); + + // As above but the length of the output text written is tracked in the + // size argument. + // + buffer_serializer (void* buf, std::size_t& size, std::size_t capacity, + overflow_function*, + flush_function*, + void* data, + std::size_t indentation = 2); + + // Begin/end an object. + // + void + begin_object (); + + void + end_object (); + + // Serialize an object member (name and value). + // + // If check is false, then don't check whether the name (or value, if + // it's a string) is valid UTF-8 and don't escape any characters. + // + template <typename T> + void + member (const char* name, const T& value, bool check = true); + + template <typename T> + void + member (const std::string& name, const T& value, bool check = true); + + // Serialize an object member name. + // + // If check is false, then don't check whether the name is valid UTF-8 + // and don't escape any characters. + // + void + member_name (const char*, bool check = true); + + void + member_name (const std::string&, bool check = true); + + // Begin/end an array. + // + void + begin_array (); + + void + end_array (); + + // Serialize a string. + // + // If check is false, then don't check whether the value is valid UTF-8 + // and don't escape any characters. + // + // Note that a NULL C-string pointer is serialized as a null value. + // + void + value (const char*, bool check = true); + + void + value (const std::string&, bool check = true); + + // Serialize a number. + // + template <typename T> + typename std::enable_if<std::is_integral<T>::value || + std::is_floating_point<T>::value>::type + value (T); + + // Serialize a boolean value. + // + void + value (bool); + + // Serialize a null value. + // + void + value (std::nullptr_t); + + // Serialize next JSON event. + // + // If check is false, then don't check whether the value is valid UTF-8 + // and don't escape any characters. + // + // Return true if more events are required to complete the (top-level) + // value (that is, it is currently incomplete) and false otherwise. + // Throw invalid_json_output exception in case of an invalid event or + // value. + // + // At the end of the value an optional absent (nullopt) event can be + // serialized to verify the value is complete. If it is incomplete an + // invalid_json_output exception is thrown. An optional followup absent + // event can be serialized to indicate the completion of a multi-value + // sequence (one and only absent event indicates a zero value sequence). + // If anything is serialized to a complete value sequence an + // invalid_json_output exception is thrown. + // + // Note that this function was designed to be easily invoked with the + // output from parser::next() and parser::data(). For example, for a + // single-value mode: + // + // optional<event> e; + // do + // { + // e = p.next (); + // s.next (e, p.data ()); + // } + // while (e); + // + // For a multi-value mode: + // + // while (p.peek ()) + // { + // optional<event> e; + // do + // { + // e = p.next (); + // s.next (e, p.data ()); + // } + // while (e); + // } + // s.next (nullopt); // End of value sequence. + // + bool + next (optional<event> event, + std::pair<const char*, std::size_t> value = {}, + bool check = true); + + private: + void + write (event, + std::pair<const char*, std::size_t> sep, + std::pair<const char*, std::size_t> val, + bool check, char quote = '\0'); + + // Forward a value(v, check) call to value(v) ignoring the check + // argument. Used in the member() implementation. + // + template <typename T> + void + value (const T& v, bool /*check*/) + { + value (v); + } + + // Convert numbers to string. + // + static std::size_t to_chars (char*, std::size_t, int); + static std::size_t to_chars (char*, std::size_t, long); + static std::size_t to_chars (char*, std::size_t, long long); + static std::size_t to_chars (char*, std::size_t, unsigned int); + static std::size_t to_chars (char*, std::size_t, unsigned long); + static std::size_t to_chars (char*, std::size_t, unsigned long long); + static std::size_t to_chars (char*, std::size_t, double); + static std::size_t to_chars (char*, std::size_t, long double); + + static std::size_t to_chars_impl (char*, size_t, const char* fmt, ...); + + buffer buf_; + std::size_t size_; + overflow_function* overflow_; + flush_function* flush_; + void* data_; + + // State of a "structured type" (array or object; as per the RFC + // terminology). + // + struct state + { + const event type; // Type kind (begin_array or begin_object). + std::size_t count; // Number of events serialized inside this type. + }; + + // Stack of nested structured type states. + // + // @@ TODO: would have been nice to use small_vector. + // + std::vector<state> state_; + + // The number of consecutive absent events (nullopt) serialized thus + // far. + // + // Note: initialized to 1 to naturally handle a single absent event + // (declares an empty value sequence complete). + // + std::size_t absent_ = 1; + + // The number of spaces with which to indent (once for each level of + // nesting). If zero, pretty-printing is disabled. + // + std::size_t indent_; + + // Separator and indentation before/after value inside an object or + // array (see pretty-printing implementation for details). + // + std::string sep_; + + // The number of complete top-level values serialized thus far. + // + std::size_t values_ = 0; + }; + + class LIBBUTL_SYMEXPORT stream_serializer: public buffer_serializer + { + public: + // Serialize to std::ostream. + // + // If stream exceptions are enabled then the std::ios_base::failure + // exception is used to report input/output errors (badbit and failbit). + // Otherwise, those are reported as the invalid_json_output exception. + // + explicit + stream_serializer (std::ostream&, std::size_t indentation = 2); + + protected: + char tmp_[4096]; + }; + } +} + +#include <libbutl/json/serializer.ixx> diff --git a/libbutl/json/serializer.ixx b/libbutl/json/serializer.ixx new file mode 100644 index 0000000..5b2c173 --- /dev/null +++ b/libbutl/json/serializer.ixx @@ -0,0 +1,202 @@ +namespace butl +{ + namespace json + { + inline invalid_json_output:: + invalid_json_output (optional<event_type> e, + error_code c, + const char* d, + std::size_t o) + : std::invalid_argument (d), event (e), code (c), offset (o) + { + } + + inline invalid_json_output:: + invalid_json_output (optional<event_type> e, + error_code c, + const std::string& d, + std::size_t o) + : invalid_json_output (e, c, d.c_str (), o) + { + } + + inline buffer_serializer:: + buffer_serializer (void* b, std::size_t& s, std::size_t c, + overflow_function* o, flush_function* f, void* d, + std::size_t i) + : buf_ {b, s, c}, + overflow_ (o), + flush_ (f), + data_ (d), + indent_ (i), + sep_ (indent_ != 0 ? ",\n" : "") + { + } + + template <std::size_t N> + inline buffer_serializer:: + buffer_serializer (std::array<char, N>& a, std::size_t& s, std::size_t i) + : buffer_serializer (a.data (), s, a.size (), + nullptr, nullptr, nullptr, + i) + { + } + + inline buffer_serializer:: + buffer_serializer (void* b, std::size_t& s, std::size_t c, std::size_t i) + : buffer_serializer (b, s, c, nullptr, nullptr, nullptr, i) + { + } + + inline buffer_serializer:: + buffer_serializer (void* b, std::size_t c, + overflow_function* o, flush_function* f, void* d, + std::size_t i) + : buffer_serializer (b, size_, c, o, f, d, i) + { + size_ = 0; + } + + inline void buffer_serializer:: + begin_object () + { + next (event::begin_object); + } + + inline void buffer_serializer:: + end_object () + { + next (event::end_object); + } + + inline void buffer_serializer:: + member_name (const char* n, bool c) + { + next (event::name, {n, n != nullptr ? strlen (n) : 0}, c); + } + + inline void buffer_serializer:: + member_name (const std::string& n, bool c) + { + next (event::name, {n.c_str (), n.size ()}, c); + } + + template <typename T> + inline void buffer_serializer:: + member (const char* n, const T& v, bool c) + { + member_name (n, c); + value (v, c); + } + + template <typename T> + inline void buffer_serializer:: + member (const std::string& n, const T& v, bool c) + { + member_name (n, c); + value (v, c); + } + + inline void buffer_serializer:: + begin_array () + { + next (event::begin_array); + } + + inline void buffer_serializer:: + end_array () + { + next (event::end_array); + } + + inline void buffer_serializer:: + value (const char* v, bool c) + { + if (v != nullptr) + next (event::string, {v, strlen (v)}, c); + else + next (event::null); + } + + inline void buffer_serializer:: + value (const std::string& v, bool c) + { + next (event::string, {v.c_str (), v.size ()}, c); + } + + template <typename T> + typename std::enable_if<std::is_integral<T>::value || + std::is_floating_point<T>::value>::type + buffer_serializer:: + value (T v) + { + // The largest 128-bit integer has 39 digits, and long floating point + // numbers will fit because they are output in scientific notation. + // + char b[40]; + const std::size_t n (to_chars (b, sizeof (b), v)); + next (event::number, {b, n}); + } + + inline void buffer_serializer:: + value (bool b) + { + next (event::boolean, + b ? std::make_pair ("true", 4) : std::make_pair ("false", 5)); + } + + inline void buffer_serializer:: + value (std::nullptr_t) + { + next (event::null); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, int v) + { + return to_chars_impl (b, s, "%d", v); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, long v) + { + return to_chars_impl (b, s, "%ld", v); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, long long v) + { + return to_chars_impl (b, s, "%lld", v); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, unsigned v) + { + return to_chars_impl (b, s, "%u", v); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, unsigned long v) + { + return to_chars_impl (b, s, "%lu", v); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, unsigned long long v) + { + return to_chars_impl (b, s, "%llu", v); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, double v) + { + return to_chars_impl (b, s, "%.10g", v); + } + + inline size_t buffer_serializer:: + to_chars (char* b, size_t s, long double v) + { + return to_chars_impl (b, s, "%.10Lg", v); + } + } +} |