From 152ea943395822f55591eadaf8e0f5aac263db5e Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 28 Feb 2022 08:57:02 +0200 Subject: Add JSON serializer (copy of libstud-json) --- libbutl/json/serializer.hxx | 379 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 379 insertions(+) create mode 100644 libbutl/json/serializer.hxx (limited to 'libbutl/json/serializer.hxx') diff --git a/libbutl/json/serializer.hxx b/libbutl/json/serializer.hxx new file mode 100644 index 0000000..fad91e2 --- /dev/null +++ b/libbutl/json/serializer.hxx @@ -0,0 +1,379 @@ +#pragma once + +#ifdef BUILD2_BOOTSTRAP +# error JSON serializer not available during bootstrap +#endif + +#include +#include +#include +#include +#include // size_t, nullptr_t +#include // pair +#include // invalid_argument +#include // enable_if, is_* + +#include // butl::optional is std::optional or similar. + +#include + +#include + +namespace butl +{ + // Using the RFC8259 terminology: JSON (output) text, JSON value, object + // member. + // + namespace json + { + class invalid_json_output: public std::invalid_argument + { + public: + using event_type = json::event; + + enum class error_code + { + buffer_overflow, + unexpected_event, + invalid_name, + invalid_value + }; + + invalid_json_output (optional event, + error_code code, + const char* description, + std::size_t offset = std::string::npos); + + invalid_json_output (optional event, + error_code code, + const std::string& description, + std::size_t offset = std::string::npos); + + // Event that triggered the error. If the error is in the value, then + // offset points to the offending byte (for example, the beginning of an + // invalid UTF-8 byte sequence). Otherwise, offset is string::npos. + // + optional event; + error_code code; + std::size_t offset; + }; + + // The serializer makes sure the resulting JSON is syntactically but not + // necessarily semantically correct. For example, it's possible to + // serialize a number event with non-numeric data. + // + // Note that unlike the parser, the serializer is always in the multi- + // value mode allowing the serialization of zero or more values. Note also + // that while values are separated with newlines, there is no trailing + // newline after the last (or only) value and the user is expected to add + // it manually if needed. + // + // Also note that while RFC8259 recommends object members to have unique + // names, the serializer does not enforce this. + // + class LIBBUTL_SYMEXPORT buffer_serializer + { + public: + // Serialize to string growing it as necessary. + // + // The indentation argument specifies the number of indentation spaces + // that should be used for pretty-printing. If 0 is passed, no + // pretty-printing is performed. + // + explicit + buffer_serializer (std::string&, std::size_t indentation = 2); + + // Serialize to vector of characters growing it as necessary. + // + explicit + buffer_serializer (std::vector&, std::size_t indentation = 2); + + // Serialize to a fixed array. + // + // The length of the output text written is tracked in the size + // argument. + // + // If the array is not big enough to store the entire output text, the + // next() call that reaches the limit will throw invalid_json_output. + // + template + buffer_serializer (std::array&, std::size_t& size, + std::size_t indentation = 2); + + // Serialize to a fixed buffer. + // + // The length of the output text written is tracked in the size + // argument. + // + // If the buffer is not big enough to store the entire output text, the + // next() call that reaches the limit will throw invalid_json_output. + // + buffer_serializer (void* buf, std::size_t& size, std::size_t capacity, + std::size_t indentation = 2); + + // The overflow function is called when the output buffer is out of + // space. The extra argument is a hint indicating the extra space likely + // to be required. + // + // Possible strategies include re-allocating a larger buffer or flushing + // the contents of the original buffer to the output destination. In + // case of a reallocation, the implementation is responsible for copying + // the contents of the original buffer over. + // + // The flush function is called when the complete JSON value has been + // serialized to the buffer. It can be used to write the contents of the + // buffer to the output destination. Note that flush is not called after + // the second absent (nullopt) event (or the only absent event; see + // next() for details). + // + // Both functions are passed the original buffer, its size (the amount + // of output text), and its capacity. They return (by modifying the + // argument) the replacement buffer and its size and capacity (these may + // refer to the original buffer). If space cannot be made available, the + // implementation can throw an appropriate exception (for example, + // std::bad_alloc or std::ios_base::failure). Any exceptions thrown is + // propagated to the user. + // + struct buffer + { + void* data; + std::size_t& size; + std::size_t capacity; + }; + + using overflow_function = void (void* data, + event, + buffer&, + std::size_t extra); + using flush_function = void (void* data, event, buffer&); + + // Serialize using a custom buffer and overflow/flush functions (both + // are optional). + // + buffer_serializer (void* buf, std::size_t capacity, + overflow_function*, + flush_function*, + void* data, + std::size_t indentation = 2); + + // As above but the length of the output text written is tracked in the + // size argument. + // + buffer_serializer (void* buf, std::size_t& size, std::size_t capacity, + overflow_function*, + flush_function*, + void* data, + std::size_t indentation = 2); + + // Begin/end an object. + // + void + begin_object (); + + void + end_object (); + + // Serialize an object member (name and value). + // + // If check is false, then don't check whether the name (or value, if + // it's a string) is valid UTF-8 and don't escape any characters. + // + template + void + member (const char* name, const T& value, bool check = true); + + template + void + member (const std::string& name, const T& value, bool check = true); + + // Serialize an object member name. + // + // If check is false, then don't check whether the name is valid UTF-8 + // and don't escape any characters. + // + void + member_name (const char*, bool check = true); + + void + member_name (const std::string&, bool check = true); + + // Begin/end an array. + // + void + begin_array (); + + void + end_array (); + + // Serialize a string. + // + // If check is false, then don't check whether the value is valid UTF-8 + // and don't escape any characters. + // + // Note that a NULL C-string pointer is serialized as a null value. + // + void + value (const char*, bool check = true); + + void + value (const std::string&, bool check = true); + + // Serialize a number. + // + template + typename std::enable_if::value || + std::is_floating_point::value>::type + value (T); + + // Serialize a boolean value. + // + void + value (bool); + + // Serialize a null value. + // + void + value (std::nullptr_t); + + // Serialize next JSON event. + // + // If check is false, then don't check whether the value is valid UTF-8 + // and don't escape any characters. + // + // Return true if more events are required to complete the (top-level) + // value (that is, it is currently incomplete) and false otherwise. + // Throw invalid_json_output exception in case of an invalid event or + // value. + // + // At the end of the value an optional absent (nullopt) event can be + // serialized to verify the value is complete. If it is incomplete an + // invalid_json_output exception is thrown. An optional followup absent + // event can be serialized to indicate the completion of a multi-value + // sequence (one and only absent event indicates a zero value sequence). + // If anything is serialized to a complete value sequence an + // invalid_json_output exception is thrown. + // + // Note that this function was designed to be easily invoked with the + // output from parser::next() and parser::data(). For example, for a + // single-value mode: + // + // optional e; + // do + // { + // e = p.next (); + // s.next (e, p.data ()); + // } + // while (e); + // + // For a multi-value mode: + // + // while (p.peek ()) + // { + // optional e; + // do + // { + // e = p.next (); + // s.next (e, p.data ()); + // } + // while (e); + // } + // s.next (nullopt); // End of value sequence. + // + bool + next (optional event, + std::pair value = {}, + bool check = true); + + private: + void + write (event, + std::pair sep, + std::pair val, + bool check, char quote = '\0'); + + // Forward a value(v, check) call to value(v) ignoring the check + // argument. Used in the member() implementation. + // + template + void + value (const T& v, bool /*check*/) + { + value (v); + } + + // Convert numbers to string. + // + static std::size_t to_chars (char*, std::size_t, int); + static std::size_t to_chars (char*, std::size_t, long); + static std::size_t to_chars (char*, std::size_t, long long); + static std::size_t to_chars (char*, std::size_t, unsigned int); + static std::size_t to_chars (char*, std::size_t, unsigned long); + static std::size_t to_chars (char*, std::size_t, unsigned long long); + static std::size_t to_chars (char*, std::size_t, double); + static std::size_t to_chars (char*, std::size_t, long double); + + static std::size_t to_chars_impl (char*, size_t, const char* fmt, ...); + + buffer buf_; + std::size_t size_; + overflow_function* overflow_; + flush_function* flush_; + void* data_; + + // State of a "structured type" (array or object; as per the RFC + // terminology). + // + struct state + { + const event type; // Type kind (begin_array or begin_object). + std::size_t count; // Number of events serialized inside this type. + }; + + // Stack of nested structured type states. + // + // @@ TODO: would have been nice to use small_vector. + // + std::vector state_; + + // The number of consecutive absent events (nullopt) serialized thus + // far. + // + // Note: initialized to 1 to naturally handle a single absent event + // (declares an empty value sequence complete). + // + std::size_t absent_ = 1; + + // The number of spaces with which to indent (once for each level of + // nesting). If zero, pretty-printing is disabled. + // + std::size_t indent_; + + // Separator and indentation before/after value inside an object or + // array (see pretty-printing implementation for details). + // + std::string sep_; + + // The number of complete top-level values serialized thus far. + // + std::size_t values_ = 0; + }; + + class LIBBUTL_SYMEXPORT stream_serializer: public buffer_serializer + { + public: + // Serialize to std::ostream. + // + // If stream exceptions are enabled then the std::ios_base::failure + // exception is used to report input/output errors (badbit and failbit). + // Otherwise, those are reported as the invalid_json_output exception. + // + explicit + stream_serializer (std::ostream&, std::size_t indentation = 2); + + protected: + char tmp_[4096]; + }; + } +} + +#include -- cgit v1.1