aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2022-02-28 08:57:02 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2022-03-02 13:58:55 +0200
commit152ea943395822f55591eadaf8e0f5aac263db5e (patch)
tree268a9dae37704b2398876dad44a5095ba6d6f750
parent0e5d575feceea4feac4b33e85626719e14f762a1 (diff)
Add JSON serializer (copy of libstud-json)
-rw-r--r--libbutl/json/event.hxx27
-rw-r--r--libbutl/json/serializer.cxx669
-rw-r--r--libbutl/json/serializer.hxx379
-rw-r--r--libbutl/json/serializer.ixx202
4 files changed, 1277 insertions, 0 deletions
diff --git a/libbutl/json/event.hxx b/libbutl/json/event.hxx
new file mode 100644
index 0000000..77185cc
--- /dev/null
+++ b/libbutl/json/event.hxx
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace butl
+{
+ namespace json
+ {
+ // Parsing/serialization event.
+ //
+ enum class event: std::uint8_t
+ {
+ begin_object = 1,
+ end_object,
+ begin_array,
+ end_array,
+ name,
+ string,
+ number,
+ boolean,
+ null
+ };
+
+ constexpr std::size_t event_count = 9;
+ }
+}
diff --git a/libbutl/json/serializer.cxx b/libbutl/json/serializer.cxx
new file mode 100644
index 0000000..84941ed
--- /dev/null
+++ b/libbutl/json/serializer.cxx
@@ -0,0 +1,669 @@
+#include <cstdio> // snprintf
+#include <cstdarg> // va_list
+#include <cstring> // memcpy
+#include <ostream>
+
+#include <libbutl/json/serializer.hxx>
+
+using namespace std;
+
+namespace butl
+{
+ namespace json
+ {
+ using buffer = buffer_serializer::buffer;
+ using error_code = invalid_json_output::error_code;
+
+ template <typename T>
+ static void
+ dynarray_overflow (void* d, event, buffer& b, size_t ex)
+ {
+ T& v (*static_cast<T*> (d));
+ v.resize (b.capacity + ex);
+ v.resize (v.capacity ());
+ // const_cast is required for std::string pre C++17.
+ //
+ b.data = const_cast<typename T::value_type*> (v.data ());
+ b.capacity = v.size ();
+ }
+
+ template <typename T>
+ static void
+ dynarray_flush (void* d, event, buffer& b)
+ {
+ T& v (*static_cast<T*> (d));
+ v.resize (b.size);
+ b.data = const_cast<typename T::value_type*> (v.data ());
+ b.capacity = b.size;
+ }
+
+ buffer_serializer::
+ buffer_serializer (string& s, size_t i)
+ : buffer_serializer (const_cast<char*> (s.data ()), size_, s.size (),
+ dynarray_overflow<string>,
+ dynarray_flush<string>,
+ &s,
+ i)
+ {
+ size_ = s.size ();
+ }
+
+ buffer_serializer::
+ buffer_serializer (vector<char>& v, size_t i)
+ : buffer_serializer (v.data (), size_, v.size (),
+ dynarray_overflow<vector<char>>,
+ dynarray_flush<vector<char>>,
+ &v,
+ i)
+ {
+ size_ = v.size ();
+ }
+
+ static void
+ ostream_overflow (void* d, event e, buffer& b, size_t)
+ {
+ ostream& s (*static_cast<ostream*> (d));
+ s.write (static_cast<char*> (b.data), b.size);
+ if (s.fail ())
+ throw invalid_json_output (
+ e, error_code::buffer_overflow, "unable to write JSON output text");
+ b.size = 0;
+ }
+
+ static void
+ ostream_flush (void* d, event e, buffer& b)
+ {
+ ostream_overflow (d, e, b, 0);
+
+ ostream& s (*static_cast<ostream*> (d));
+ s.flush ();
+ if (s.fail ())
+ throw invalid_json_output (
+ e, error_code::buffer_overflow, "unable to write JSON output text");
+ }
+
+ stream_serializer::
+ stream_serializer (ostream& os, size_t i)
+ : buffer_serializer (tmp_, sizeof (tmp_),
+ ostream_overflow,
+ ostream_flush,
+ &os,
+ i)
+ {
+ }
+
+ bool buffer_serializer::
+ next (optional<event> e, pair<const char*, size_t> val, bool check)
+ {
+ if (absent_ == 2)
+ goto fail_complete;
+
+ if (e == nullopt)
+ {
+ if (!state_.empty ())
+ goto fail_incomplete;
+
+ absent_++;
+ return false;
+ }
+
+ absent_ = 0; // Clear inter-value absent event.
+
+ {
+ state* st (state_.empty () ? nullptr : &state_.back ());
+
+ auto name_expected = [] (const state& s)
+ {
+ return s.type == event::begin_object && s.count % 2 == 0;
+ };
+
+ auto make_str = [] (const char* s, size_t n)
+ {
+ return make_pair (s, n);
+ };
+
+ // When it comes to pretty-printing, the common way to do it is along
+ // these lines:
+ //
+ // {
+ // "str": "value",
+ // "obj": {
+ // "arr": [
+ // 1,
+ // 2,
+ // 3
+ // ]
+ // },
+ // "num": 123
+ // }
+ //
+ // Empty objects and arrays are printed without a newline:
+ //
+ // {
+ // "obj": {},
+ // "arr": []
+ // }
+ //
+ // There are two types of separators: between name and value, which is
+ // always ": ", and before/after value inside an object or array which
+ // is either newline followed by indentation, or comma followed by
+ // newline followed by indentation (we also have separation between
+ // top-level values but that's orthogonal to pretty-printing).
+ //
+ // Based on this observation, we are going to handle the latter case by
+ // starting with the ",\n" string (in this->sep_) and pushing/popping
+ // indentation spaces as we enter/leave objects and arrays. We handle
+ // the cases where we don't need the comma by simply skipping it in the
+ // C-string pointer.
+ //
+ bool pp (indent_ != 0);
+
+ pair<const char*, size_t> sep;
+ if (st != nullptr)
+ {
+ // The name-value separator.
+ //
+ if (st->type == event::begin_object && st->count % 2 == 1)
+ {
+ sep = !pp ? make_str (":", 1) : make_str (": ", 2);
+ }
+ // We don't need the comma if we are closing the object or array.
+ //
+ else if (e == event::end_array || e == event::end_object)
+ {
+ // But in this case we need to unindent one level prior to writing
+ // the brace. Also handle the empty object/array as a special case.
+ //
+ sep = !pp || st->count == 0
+ ? make_str (nullptr, 0)
+ : make_str (sep_.c_str () + 1, sep_.size () - 1 - indent_);
+ }
+ // Or if this is the first value (note: must come after end_*).
+ //
+ else if (st->count == 0)
+ {
+ sep = !pp
+ ? make_str (nullptr, 0)
+ : make_str (sep_.c_str () + 1, sep_.size () - 1);
+ }
+ else
+ {
+ sep = !pp
+ ? make_str (",", 1)
+ : make_str (sep_.c_str (), sep_.size ());
+ }
+ }
+ else if (values_ != 0) // Subsequent top-level value.
+ {
+ // Top-level value separation. For now we always separate them with
+ // newlines, which is the most common/sensible way.
+ //
+ sep = make_str ("\n", 1);
+ }
+
+ switch (*e)
+ {
+ case event::begin_array:
+ case event::begin_object:
+ {
+ if (st != nullptr && name_expected (*st))
+ goto fail_unexpected_event;
+
+ write (*e,
+ sep,
+ make_str (e == event::begin_array ? "[" : "{", 1),
+ false);
+
+ if (st != nullptr)
+ st->count++;
+
+ if (pp)
+ sep_.append (indent_, ' ');
+
+ state_.push_back (state {*e, 0});
+ break;
+ }
+ case event::end_array:
+ case event::end_object:
+ {
+ if (st == nullptr || (e == event::end_array
+ ? st->type != event::begin_array
+ : !name_expected (*st)))
+ goto fail_unexpected_event;
+
+ write (*e,
+ sep,
+ make_str (e == event::end_array ? "]" : "}", 1),
+ false);
+
+ if (pp)
+ sep_.erase (sep_.size () - indent_);
+
+ state_.pop_back ();
+ break;
+ }
+ case event::name:
+ case event::string:
+ {
+ if (e == event::name
+ ? (st == nullptr || !name_expected (*st))
+ : (st != nullptr && name_expected (*st)))
+ goto fail_unexpected_event;
+
+ write (*e, sep, val, check, '"');
+
+ if (st != nullptr)
+ st->count++;
+ break;
+ }
+ case event::null:
+ case event::boolean:
+ {
+ if (e == event::null && val.first == nullptr)
+ val = {"null", 4};
+ else if (check)
+ {
+ auto eq = [&val] (const char* v, size_t n)
+ {
+ return val.second == n && memcmp (val.first, v, n) == 0;
+ };
+
+ if (e == event::null)
+ {
+ if (!eq ("null", 4))
+ goto fail_null;
+ }
+ else
+ {
+ if (!eq ("true", 4) && !eq ("false", 5))
+ goto fail_bool;
+ }
+ }
+ }
+ // Fall through.
+ case event::number:
+ {
+ if (st != nullptr && name_expected (*st))
+ goto fail_unexpected_event;
+
+ write (*e, sep, val, check);
+
+ if (st != nullptr)
+ st->count++;
+ break;
+ }
+ }
+ }
+
+ if (state_.empty ())
+ {
+ values_++;
+ if (flush_ != nullptr)
+ flush_ (data_, *e, buf_);
+
+ return false;
+ }
+
+ return true;
+
+ fail_complete:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "value sequence is complete");
+ fail_incomplete:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "value is incomplete");
+ fail_null:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "invalid null value");
+ fail_bool:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "invalid boolean value");
+ fail_unexpected_event:
+ throw invalid_json_output (
+ e, error_code::unexpected_event, "unexpected event");
+ }
+
+ // JSON escape sequences for control characters <= 0x1F.
+ //
+ static const char* json_escapes[] =
+ {"\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005",
+ "\\u0006", "\\u0007", "\\b", "\\t", "\\n", "\\u000B",
+ "\\f", "\\r", "\\u000E", "\\u000F", "\\u0010", "\\u0011",
+ "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017",
+ "\\u0018", "\\u0019", "\\u001A", "\\u001B", "\\u001C", "\\u001D",
+ "\\u001E", "\\u001F"};
+
+ void buffer_serializer::
+ write (event e,
+ pair<const char*, size_t> sep,
+ pair<const char*, size_t> val,
+ bool check,
+ char q)
+ {
+ // Assumptions:
+ //
+ // 1. A call to overflow should be able to provide enough capacity to
+ // write the entire separator (in other words, we are not going to
+ // bother with chunking the separator).
+ //
+ // 2. Similarly, a call to overflow should be able to provide enough
+ // capacity to write an entire UTF-8 multi-byte sequence.
+ //
+ // 3. Performance-wise, we do not expect very long contiguous sequences
+ // of character that require escaping.
+
+ // Total number of bytes remaining to be written and the capacity
+ // currently available.
+ //
+ size_t size (sep.second + val.second + (q != '\0' ? 2 : 0));
+ size_t cap (buf_.capacity - buf_.size);
+
+ auto grow = [this, e, &size, &cap] (size_t min, size_t extra = 0)
+ {
+ if (overflow_ == nullptr)
+ return false;
+
+ extra += size;
+ extra -= cap;
+ overflow_ (data_, e, buf_, extra > min ? extra : min);
+ cap = buf_.capacity - buf_.size;
+
+ return cap >= min;
+ };
+
+ auto append = [this, &cap, &size] (const char* d, size_t s)
+ {
+ memcpy (static_cast<char*> (buf_.data) + buf_.size, d, s);
+ buf_.size += s;
+ cap -= s;
+ size -= s;
+ };
+
+ // Return the longest chunk of input that fits into the buffer and does
+ // not end in the middle of a multi-byte UTF-8 sequence. Assume value
+ // size and capacity are not 0. Return NULL in first if no chunk could
+ // be found that fits into the remaining space. In this case, second is
+ // the additional (to size) required space (used to handle escapes in
+ // the checked version).
+ //
+ // The basic idea is to seek in the input buffer to the capacity of the
+ // output buffer (unless the input is shorter than the output). If we
+ // ended up in the middle of a multi-byte UTF-8 sequence, then seek back
+ // until we end up at the UTF-8 sequence boundary. Note that this
+ // implementation assumes valid UTF-8.
+ //
+ auto chunk = [&cap, &val] () -> pair<const char*, size_t>
+ {
+ pair<const char*, size_t> r (nullptr, 0);
+
+ if (cap >= val.second)
+ r = val;
+ else
+ {
+ // Start from the character past capacity and search for a UTF-8
+ // sequence boundary.
+ //
+ for (const char* p (val.first + cap); p != val.first; --p)
+ {
+ const auto u (static_cast<uint8_t> (*p));
+ if (u < 0x80 || u > 0xBF) // Not a continuation byte
+ {
+ r = {val.first, p - val.first};
+ break;
+ }
+ }
+ }
+
+ val.first += r.second;
+ val.second -= r.second;
+
+ return r;
+ };
+
+ // Escaping and UTF-8-validating version of chunk().
+ //
+ // There are three classes of mandatory escapes in a JSON string:
+ //
+ // - \\ and \"
+ //
+ // - \b \f \n \r \t for popular control characters
+ //
+ // - \u00NN for other control characters <= 0x1F
+ //
+ // If the input begins with a character that must be escaped, return
+ // only its escape sequence. Otherwise validate and return everything up
+ // to the end of input or buffer capacity, but cutting it short before
+ // the next character that must be escaped or the first UTF-8 sequence
+ // that would not fit.
+ //
+ // Return string::npos in second in case of a stray continuation byte or
+ // any byte in an invalid UTF-8 range (for example, an "overlong" 2-byte
+ // encoding of a 7-bit/ASCII character or a 4-, 5-, or 6-byte sequence
+ // that would encode a codepoint beyond the U+10FFFF Unicode limit).
+ //
+ auto chunk_checked = [&cap, &size, &val] () -> pair<const char*, size_t>
+ {
+ pair<const char*, size_t> r (nullptr, 0);
+
+ // Check whether the first character needs to be escaped.
+ //
+ const uint8_t c (val.first[0]);
+ if (c == '"')
+ r = {"\\\"", 2};
+ else if (c == '\\')
+ r = {"\\\\", 2};
+ else if (c <= 0x1F)
+ {
+ auto s (json_escapes[c]);
+ r = {s, s[1] == 'u' ? 6 : 2};
+ }
+
+ if (r.first != nullptr)
+ {
+ // Return in second the additional (to size) space required.
+ //
+ if (r.second > cap)
+ return {nullptr, r.second - 1};
+
+ // If we had to escape the character then adjust size accordingly
+ // (see append() above).
+ //
+ size += r.second - 1;
+
+ val.first += 1;
+ val.second -= 1;
+ return r;
+ }
+
+ // First character doesn't need to be escaped. Return as much of the
+ // rest of the input as possible.
+ //
+ size_t i (0);
+ for (size_t n (min (cap, val.second)); i != n; i++)
+ {
+ const uint8_t c1 (val.first[i]);
+
+ if (c1 == '"' || c1 == '\\' || c1 <= 0x1F) // Needs to be escaped.
+ break;
+ else if (c1 >= 0x80) // Not ASCII, so validate as a UTF-8 sequence.
+ {
+ size_t i1 (i); // Position of the first byte.
+
+ // The control flow here is to continue if valid and to fall
+ // through to return on error.
+ //
+ if (c1 >= 0xC2 && c1 <= 0xDF) // 2-byte sequence.
+ {
+ if (i + 2 <= val.second) // Sequence is complete in JSON value.
+ {
+ if (i + 2 > cap) // Sequence won't fit.
+ break;
+
+ const uint8_t c2 (val.first[++i]);
+
+ if (c2 >= 0x80 && c2 <= 0xBF)
+ continue;
+ }
+ }
+ else if (c1 >= 0xE0 && c1 <= 0xEF) // 3-byte sequence.
+ {
+ if (i + 3 <= val.second)
+ {
+ if (i + 3 > cap)
+ break;
+
+ const uint8_t c2 (val.first[++i]), c3 (val.first[++i]);
+
+ if (c3 >= 0x80 && c3 <= 0xBF)
+ {
+ switch (c1)
+ {
+ case 0xE0: if (c2 >= 0xA0 && c2 <= 0xBF) continue; break;
+ case 0xED: if (c2 >= 0x80 && c2 <= 0x9F) continue; break;
+ default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
+ }
+ }
+ }
+ }
+ else if (c1 >= 0xF0 && c1 <= 0xF4) // 4-byte sequence.
+ {
+ if (i + 4 <= val.second)
+ {
+ if (i + 4 > cap)
+ break;
+
+ const uint8_t c2 (val.first[++i]),
+ c3 (val.first[++i]),
+ c4 (val.first[++i]);
+
+ if (c3 >= 0x80 && c3 <= 0xBF &&
+ c4 >= 0x80 && c4 <= 0xBF)
+ {
+ switch (c1)
+ {
+ case 0xF0: if (c2 >= 0x90 && c2 <= 0xBF) continue; break;
+ case 0xF4: if (c2 >= 0x80 && c2 <= 0x8F) continue; break;
+ default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
+ }
+ }
+ }
+ }
+
+ r = {val.first, string::npos};
+
+ // Update val to point to the beginning of the invalid sequence.
+ //
+ val.first += i1;
+ val.second -= i1;
+
+ return r;
+ }
+ }
+
+ if (i != 0) // We have a chunk.
+ {
+ r = {val.first, i};
+
+ val.first += i;
+ val.second -= i;
+ }
+
+ return r;
+ };
+
+ // Value's original size (used to calculate the offset of the errant
+ // character in case of a validation failure).
+ //
+ const size_t vn (val.second);
+
+ // Write the separator, if any.
+ //
+ if (sep.second != 0)
+ {
+ if (cap < sep.second && !grow (sep.second))
+ goto fail_nospace;
+
+ append (sep.first, sep.second);
+ }
+
+ // Write the value's opening quote, if requested.
+ //
+ if (q != '\0')
+ {
+ if (cap == 0 && !grow (1))
+ goto fail_nospace;
+
+ append ("\"", 1);
+ }
+
+ // Write the value, unless empty.
+ //
+ while (val.second != 0)
+ {
+ pair<const char*, size_t> ch (nullptr, 0);
+
+ if (cap != 0)
+ ch = check ? chunk_checked () : chunk ();
+
+ if (ch.first == nullptr)
+ {
+ // The minimum extra bytes we need the overflow function to be able
+ // to provide is based on these sequences that we do not break:
+ //
+ // - 4 bytes for a UTF-8 sequence
+ // - 6 bytes for an escaped Unicode sequence (\uXXXX).
+ //
+ if (!grow (6, ch.second))
+ goto fail_nospace;
+ }
+ else if (ch.second != string::npos)
+ append (ch.first, ch.second);
+ else
+ goto fail_utf8;
+ }
+
+ // Write the value's closing quote, if requested.
+ //
+ if (q != '\0')
+ {
+ if (cap == 0 && !grow (1))
+ goto fail_nospace;
+
+ append ("\"", 1);
+ }
+
+ return;
+
+ // Note: keep descriptions consistent with the parser.
+ //
+ fail_utf8:
+ throw invalid_json_output (e,
+ e == event::name ? error_code::invalid_name
+ : error_code::invalid_value,
+ "invalid UTF-8 text",
+ vn - val.second);
+
+ fail_nospace:
+ throw invalid_json_output (
+ e, error_code::buffer_overflow, "insufficient space in buffer");
+ }
+
+ size_t buffer_serializer::
+ to_chars_impl (char* b, size_t n, const char* f, ...)
+ {
+ va_list a;
+ va_start (a, f);
+ const int r (vsnprintf (b, n, f, a));
+ va_end (a);
+
+ if (r < 0 || r >= static_cast<int> (n))
+ {
+ throw invalid_json_output (event::number,
+ error_code::invalid_value,
+ "unable to convert number to string");
+ }
+
+ return static_cast<size_t> (r);
+ }
+ }
+}
diff --git a/libbutl/json/serializer.hxx b/libbutl/json/serializer.hxx
new file mode 100644
index 0000000..fad91e2
--- /dev/null
+++ b/libbutl/json/serializer.hxx
@@ -0,0 +1,379 @@
+#pragma once
+
+#ifdef BUILD2_BOOTSTRAP
+# error JSON serializer not available during bootstrap
+#endif
+
+#include <array>
+#include <iosfwd>
+#include <string>
+#include <vector>
+#include <cstddef> // size_t, nullptr_t
+#include <utility> // pair
+#include <stdexcept> // invalid_argument
+#include <type_traits> // enable_if, is_*
+
+#include <libbutl/optional.hxx> // butl::optional is std::optional or similar.
+
+#include <libbutl/json/event.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ // Using the RFC8259 terminology: JSON (output) text, JSON value, object
+ // member.
+ //
+ namespace json
+ {
+ class invalid_json_output: public std::invalid_argument
+ {
+ public:
+ using event_type = json::event;
+
+ enum class error_code
+ {
+ buffer_overflow,
+ unexpected_event,
+ invalid_name,
+ invalid_value
+ };
+
+ invalid_json_output (optional<event_type> event,
+ error_code code,
+ const char* description,
+ std::size_t offset = std::string::npos);
+
+ invalid_json_output (optional<event_type> event,
+ error_code code,
+ const std::string& description,
+ std::size_t offset = std::string::npos);
+
+ // Event that triggered the error. If the error is in the value, then
+ // offset points to the offending byte (for example, the beginning of an
+ // invalid UTF-8 byte sequence). Otherwise, offset is string::npos.
+ //
+ optional<event_type> event;
+ error_code code;
+ std::size_t offset;
+ };
+
+ // The serializer makes sure the resulting JSON is syntactically but not
+ // necessarily semantically correct. For example, it's possible to
+ // serialize a number event with non-numeric data.
+ //
+ // Note that unlike the parser, the serializer is always in the multi-
+ // value mode allowing the serialization of zero or more values. Note also
+ // that while values are separated with newlines, there is no trailing
+ // newline after the last (or only) value and the user is expected to add
+ // it manually if needed.
+ //
+ // Also note that while RFC8259 recommends object members to have unique
+ // names, the serializer does not enforce this.
+ //
+ class LIBBUTL_SYMEXPORT buffer_serializer
+ {
+ public:
+ // Serialize to string growing it as necessary.
+ //
+ // The indentation argument specifies the number of indentation spaces
+ // that should be used for pretty-printing. If 0 is passed, no
+ // pretty-printing is performed.
+ //
+ explicit
+ buffer_serializer (std::string&, std::size_t indentation = 2);
+
+ // Serialize to vector of characters growing it as necessary.
+ //
+ explicit
+ buffer_serializer (std::vector<char>&, std::size_t indentation = 2);
+
+ // Serialize to a fixed array.
+ //
+ // The length of the output text written is tracked in the size
+ // argument.
+ //
+ // If the array is not big enough to store the entire output text, the
+ // next() call that reaches the limit will throw invalid_json_output.
+ //
+ template <std::size_t N>
+ buffer_serializer (std::array<char, N>&, std::size_t& size,
+ std::size_t indentation = 2);
+
+ // Serialize to a fixed buffer.
+ //
+ // The length of the output text written is tracked in the size
+ // argument.
+ //
+ // If the buffer is not big enough to store the entire output text, the
+ // next() call that reaches the limit will throw invalid_json_output.
+ //
+ buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
+ std::size_t indentation = 2);
+
+ // The overflow function is called when the output buffer is out of
+ // space. The extra argument is a hint indicating the extra space likely
+ // to be required.
+ //
+ // Possible strategies include re-allocating a larger buffer or flushing
+ // the contents of the original buffer to the output destination. In
+ // case of a reallocation, the implementation is responsible for copying
+ // the contents of the original buffer over.
+ //
+ // The flush function is called when the complete JSON value has been
+ // serialized to the buffer. It can be used to write the contents of the
+ // buffer to the output destination. Note that flush is not called after
+ // the second absent (nullopt) event (or the only absent event; see
+ // next() for details).
+ //
+ // Both functions are passed the original buffer, its size (the amount
+ // of output text), and its capacity. They return (by modifying the
+ // argument) the replacement buffer and its size and capacity (these may
+ // refer to the original buffer). If space cannot be made available, the
+ // implementation can throw an appropriate exception (for example,
+ // std::bad_alloc or std::ios_base::failure). Any exceptions thrown is
+ // propagated to the user.
+ //
+ struct buffer
+ {
+ void* data;
+ std::size_t& size;
+ std::size_t capacity;
+ };
+
+ using overflow_function = void (void* data,
+ event,
+ buffer&,
+ std::size_t extra);
+ using flush_function = void (void* data, event, buffer&);
+
+ // Serialize using a custom buffer and overflow/flush functions (both
+ // are optional).
+ //
+ buffer_serializer (void* buf, std::size_t capacity,
+ overflow_function*,
+ flush_function*,
+ void* data,
+ std::size_t indentation = 2);
+
+ // As above but the length of the output text written is tracked in the
+ // size argument.
+ //
+ buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
+ overflow_function*,
+ flush_function*,
+ void* data,
+ std::size_t indentation = 2);
+
+ // Begin/end an object.
+ //
+ void
+ begin_object ();
+
+ void
+ end_object ();
+
+ // Serialize an object member (name and value).
+ //
+ // If check is false, then don't check whether the name (or value, if
+ // it's a string) is valid UTF-8 and don't escape any characters.
+ //
+ template <typename T>
+ void
+ member (const char* name, const T& value, bool check = true);
+
+ template <typename T>
+ void
+ member (const std::string& name, const T& value, bool check = true);
+
+ // Serialize an object member name.
+ //
+ // If check is false, then don't check whether the name is valid UTF-8
+ // and don't escape any characters.
+ //
+ void
+ member_name (const char*, bool check = true);
+
+ void
+ member_name (const std::string&, bool check = true);
+
+ // Begin/end an array.
+ //
+ void
+ begin_array ();
+
+ void
+ end_array ();
+
+ // Serialize a string.
+ //
+ // If check is false, then don't check whether the value is valid UTF-8
+ // and don't escape any characters.
+ //
+ // Note that a NULL C-string pointer is serialized as a null value.
+ //
+ void
+ value (const char*, bool check = true);
+
+ void
+ value (const std::string&, bool check = true);
+
+ // Serialize a number.
+ //
+ template <typename T>
+ typename std::enable_if<std::is_integral<T>::value ||
+ std::is_floating_point<T>::value>::type
+ value (T);
+
+ // Serialize a boolean value.
+ //
+ void
+ value (bool);
+
+ // Serialize a null value.
+ //
+ void
+ value (std::nullptr_t);
+
+ // Serialize next JSON event.
+ //
+ // If check is false, then don't check whether the value is valid UTF-8
+ // and don't escape any characters.
+ //
+ // Return true if more events are required to complete the (top-level)
+ // value (that is, it is currently incomplete) and false otherwise.
+ // Throw invalid_json_output exception in case of an invalid event or
+ // value.
+ //
+ // At the end of the value an optional absent (nullopt) event can be
+ // serialized to verify the value is complete. If it is incomplete an
+ // invalid_json_output exception is thrown. An optional followup absent
+ // event can be serialized to indicate the completion of a multi-value
+ // sequence (one and only absent event indicates a zero value sequence).
+ // If anything is serialized to a complete value sequence an
+ // invalid_json_output exception is thrown.
+ //
+ // Note that this function was designed to be easily invoked with the
+ // output from parser::next() and parser::data(). For example, for a
+ // single-value mode:
+ //
+ // optional<event> e;
+ // do
+ // {
+ // e = p.next ();
+ // s.next (e, p.data ());
+ // }
+ // while (e);
+ //
+ // For a multi-value mode:
+ //
+ // while (p.peek ())
+ // {
+ // optional<event> e;
+ // do
+ // {
+ // e = p.next ();
+ // s.next (e, p.data ());
+ // }
+ // while (e);
+ // }
+ // s.next (nullopt); // End of value sequence.
+ //
+ bool
+ next (optional<event> event,
+ std::pair<const char*, std::size_t> value = {},
+ bool check = true);
+
+ private:
+ void
+ write (event,
+ std::pair<const char*, std::size_t> sep,
+ std::pair<const char*, std::size_t> val,
+ bool check, char quote = '\0');
+
+ // Forward a value(v, check) call to value(v) ignoring the check
+ // argument. Used in the member() implementation.
+ //
+ template <typename T>
+ void
+ value (const T& v, bool /*check*/)
+ {
+ value (v);
+ }
+
+ // Convert numbers to string.
+ //
+ static std::size_t to_chars (char*, std::size_t, int);
+ static std::size_t to_chars (char*, std::size_t, long);
+ static std::size_t to_chars (char*, std::size_t, long long);
+ static std::size_t to_chars (char*, std::size_t, unsigned int);
+ static std::size_t to_chars (char*, std::size_t, unsigned long);
+ static std::size_t to_chars (char*, std::size_t, unsigned long long);
+ static std::size_t to_chars (char*, std::size_t, double);
+ static std::size_t to_chars (char*, std::size_t, long double);
+
+ static std::size_t to_chars_impl (char*, size_t, const char* fmt, ...);
+
+ buffer buf_;
+ std::size_t size_;
+ overflow_function* overflow_;
+ flush_function* flush_;
+ void* data_;
+
+ // State of a "structured type" (array or object; as per the RFC
+ // terminology).
+ //
+ struct state
+ {
+ const event type; // Type kind (begin_array or begin_object).
+ std::size_t count; // Number of events serialized inside this type.
+ };
+
+ // Stack of nested structured type states.
+ //
+ // @@ TODO: would have been nice to use small_vector.
+ //
+ std::vector<state> state_;
+
+ // The number of consecutive absent events (nullopt) serialized thus
+ // far.
+ //
+ // Note: initialized to 1 to naturally handle a single absent event
+ // (declares an empty value sequence complete).
+ //
+ std::size_t absent_ = 1;
+
+ // The number of spaces with which to indent (once for each level of
+ // nesting). If zero, pretty-printing is disabled.
+ //
+ std::size_t indent_;
+
+ // Separator and indentation before/after value inside an object or
+ // array (see pretty-printing implementation for details).
+ //
+ std::string sep_;
+
+ // The number of complete top-level values serialized thus far.
+ //
+ std::size_t values_ = 0;
+ };
+
+ class LIBBUTL_SYMEXPORT stream_serializer: public buffer_serializer
+ {
+ public:
+ // Serialize to std::ostream.
+ //
+ // If stream exceptions are enabled then the std::ios_base::failure
+ // exception is used to report input/output errors (badbit and failbit).
+ // Otherwise, those are reported as the invalid_json_output exception.
+ //
+ explicit
+ stream_serializer (std::ostream&, std::size_t indentation = 2);
+
+ protected:
+ char tmp_[4096];
+ };
+ }
+}
+
+#include <libbutl/json/serializer.ixx>
diff --git a/libbutl/json/serializer.ixx b/libbutl/json/serializer.ixx
new file mode 100644
index 0000000..5b2c173
--- /dev/null
+++ b/libbutl/json/serializer.ixx
@@ -0,0 +1,202 @@
+namespace butl
+{
+ namespace json
+ {
+ inline invalid_json_output::
+ invalid_json_output (optional<event_type> e,
+ error_code c,
+ const char* d,
+ std::size_t o)
+ : std::invalid_argument (d), event (e), code (c), offset (o)
+ {
+ }
+
+ inline invalid_json_output::
+ invalid_json_output (optional<event_type> e,
+ error_code c,
+ const std::string& d,
+ std::size_t o)
+ : invalid_json_output (e, c, d.c_str (), o)
+ {
+ }
+
+ inline buffer_serializer::
+ buffer_serializer (void* b, std::size_t& s, std::size_t c,
+ overflow_function* o, flush_function* f, void* d,
+ std::size_t i)
+ : buf_ {b, s, c},
+ overflow_ (o),
+ flush_ (f),
+ data_ (d),
+ indent_ (i),
+ sep_ (indent_ != 0 ? ",\n" : "")
+ {
+ }
+
+ template <std::size_t N>
+ inline buffer_serializer::
+ buffer_serializer (std::array<char, N>& a, std::size_t& s, std::size_t i)
+ : buffer_serializer (a.data (), s, a.size (),
+ nullptr, nullptr, nullptr,
+ i)
+ {
+ }
+
+ inline buffer_serializer::
+ buffer_serializer (void* b, std::size_t& s, std::size_t c, std::size_t i)
+ : buffer_serializer (b, s, c, nullptr, nullptr, nullptr, i)
+ {
+ }
+
+ inline buffer_serializer::
+ buffer_serializer (void* b, std::size_t c,
+ overflow_function* o, flush_function* f, void* d,
+ std::size_t i)
+ : buffer_serializer (b, size_, c, o, f, d, i)
+ {
+ size_ = 0;
+ }
+
+ inline void buffer_serializer::
+ begin_object ()
+ {
+ next (event::begin_object);
+ }
+
+ inline void buffer_serializer::
+ end_object ()
+ {
+ next (event::end_object);
+ }
+
+ inline void buffer_serializer::
+ member_name (const char* n, bool c)
+ {
+ next (event::name, {n, n != nullptr ? strlen (n) : 0}, c);
+ }
+
+ inline void buffer_serializer::
+ member_name (const std::string& n, bool c)
+ {
+ next (event::name, {n.c_str (), n.size ()}, c);
+ }
+
+ template <typename T>
+ inline void buffer_serializer::
+ member (const char* n, const T& v, bool c)
+ {
+ member_name (n, c);
+ value (v, c);
+ }
+
+ template <typename T>
+ inline void buffer_serializer::
+ member (const std::string& n, const T& v, bool c)
+ {
+ member_name (n, c);
+ value (v, c);
+ }
+
+ inline void buffer_serializer::
+ begin_array ()
+ {
+ next (event::begin_array);
+ }
+
+ inline void buffer_serializer::
+ end_array ()
+ {
+ next (event::end_array);
+ }
+
+ inline void buffer_serializer::
+ value (const char* v, bool c)
+ {
+ if (v != nullptr)
+ next (event::string, {v, strlen (v)}, c);
+ else
+ next (event::null);
+ }
+
+ inline void buffer_serializer::
+ value (const std::string& v, bool c)
+ {
+ next (event::string, {v.c_str (), v.size ()}, c);
+ }
+
+ template <typename T>
+ typename std::enable_if<std::is_integral<T>::value ||
+ std::is_floating_point<T>::value>::type
+ buffer_serializer::
+ value (T v)
+ {
+ // The largest 128-bit integer has 39 digits, and long floating point
+ // numbers will fit because they are output in scientific notation.
+ //
+ char b[40];
+ const std::size_t n (to_chars (b, sizeof (b), v));
+ next (event::number, {b, n});
+ }
+
+ inline void buffer_serializer::
+ value (bool b)
+ {
+ next (event::boolean,
+ b ? std::make_pair ("true", 4) : std::make_pair ("false", 5));
+ }
+
+ inline void buffer_serializer::
+ value (std::nullptr_t)
+ {
+ next (event::null);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, int v)
+ {
+ return to_chars_impl (b, s, "%d", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, long v)
+ {
+ return to_chars_impl (b, s, "%ld", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, long long v)
+ {
+ return to_chars_impl (b, s, "%lld", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, unsigned v)
+ {
+ return to_chars_impl (b, s, "%u", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, unsigned long v)
+ {
+ return to_chars_impl (b, s, "%lu", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, unsigned long long v)
+ {
+ return to_chars_impl (b, s, "%llu", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, double v)
+ {
+ return to_chars_impl (b, s, "%.10g", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, long double v)
+ {
+ return to_chars_impl (b, s, "%.10Lg", v);
+ }
+ }
+}