From d7b7fd602b981dd6816b0e5c7c341e2cef272c17 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 7 Sep 2021 15:31:34 +0200 Subject: Expose low-level, iterative LZ4 compression/decompression API --- libbutl/lz4.hxx | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 130 insertions(+), 8 deletions(-) (limited to 'libbutl/lz4.hxx') diff --git a/libbutl/lz4.hxx b/libbutl/lz4.hxx index bf679c5..98175c1 100644 --- a/libbutl/lz4.hxx +++ b/libbutl/lz4.hxx @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include @@ -14,16 +15,14 @@ namespace butl { namespace lz4 { - //@@ TODO: allow (re-)using external buffers, contexts? - // Read the content from the input stream, compress it using the specified // compression level and block size, and write the compressed content to // the output stream. If content size is specified, then include it into // the compressed content header. Return the compressed content size. // // This function may throw std::bad_alloc as well as exceptions thrown by - // fdstream read/write functions. It may also throw invalid_argument in - // case of argument inconsistencies (e.g., content size does not match + // fdstream read/write functions. It may also throw std::invalid_argument + // in case of argument inconsistencies (e.g., content size does not match // actual) with what() returning the error description. The input stream // is expected to throw on badbit (but not failbit). The output stream is // expected to throw on badbit or failbit. @@ -52,14 +51,75 @@ namespace butl int block_size_id, optional content_size); + // Low-level iterative compression API. + // + // This API may throw std::bad_alloc in case of memory allocation errors + // and std::invalid_argument in case of argument inconsistencies (e.g., + // content size does not match actual) with what() returning the error + // description. + // + // See the implementation of the compress() function above for usage + // example. + // + // @@ TODO: reset support. + // + struct LIBBUTL_SYMEXPORT compressor + { + // Buffer, current size (part filled with data), and capacity. + // + char* ib; std::size_t in, ic; // Input. + char* ob; std::size_t on, oc; // Output. + + // As a first step call begin(). This function sets the required input + // and output buffer capacities (ic, oc). + // + // The caller normally allocates the input and output buffers and fills + // the input buffer. + // + void + begin (int compression_level, + int block_size_id, + optional content_size); + + // Then call next() to compress the next chunk of input passing true on + // reaching EOF. Note that the input buffer should be filled to capacity + // unless end is true and the output buffer must be flushed before each + // subsequent call to next(). + // + void + next (bool end); + + // Not copyable or movable. + // + compressor (const compressor&) = delete; + compressor (compressor&&) = delete; + compressor& operator= (const compressor&) = delete; + compressor& operator= (compressor&&) = delete; + + // Implementation details. + // + compressor (): ctx_ (nullptr) {} + ~compressor (); + + public: + void + init_preferences (void*) const; + + void* ctx_; + int level_; + int block_id_; + optional content_size_; + bool begin_; + }; + // Read the compressed content from the input stream, decompress it, and // write the decompressed content to the output stream. Return the // decompressed content size. // // This function may throw std::bad_alloc as well as exceptions thrown by - // fdstream read/write functions. It may also throw invalid_argument if - // the compressed content is invalid with what() returning the error + // fdstream read/write functions. It may also throw std::invalid_argument + // if the compressed content is invalid with what() returning the error // description. The input stream is expected to throw on badbit (but not // failbit). The output stream is expected to throw on badbit or failbit. // @@ -67,10 +127,72 @@ namespace butl // mode. // // Note that this function does not require the input stream to reach EOF - // at the end of compressed content. So if you have this a requirement, - // you will need to enforce it yourself. + // at the end of compressed content. So if you have this requirement, you + // will need to enforce it yourself. // LIBBUTL_SYMEXPORT std::uint64_t decompress (ofdstream&, ifdstream&); + + // Low-level iterative decompression API. + // + // This API may throw std::bad_alloc in case of memory allocation errors + // and std::invalid_argument if the compressed content is invalid with + // what() returning the error description. + // + // See the implementation of the decompress() function above for usage + // example. + // + // The LZ4F_*() decompression functions return a hint of how much data + // they want on the next call. So the plan is to allocate the input + // buffer large enough to hold anything that can be asked for and then + // fill it in in the asked chunks. This way we avoid having to shift the + // unread data around. + // + // @@ TODO: reset support. + // + struct LIBBUTL_SYMEXPORT decompressor + { + // Buffer, current size (part filled with data), and capacity. + // + char hb[19]; std::size_t hn ; // Header. + char* ib; std::size_t in, ic; // Input. + char* ob; std::size_t on, oc; // Output. + + // As a first step, fill in the header buffer and call begin(). This + // function sets the required input and output buffer capacities (ic, + // oc) and the number of bytes left in the header buffer (hn) and + // returns the number of bytes expected by the following call to next(). + // + // The caller normally allocates the input and output buffers, copies + // remaining header buffer data over to the input buffer, and then fills + // in the remainder of the input buffer up to what's expected by the + // call to next(). + // + std::size_t + begin (); + + // Then call next() to decompress the next chunk of input. This function + // returns the number of bytes expected by the following call to next() + // or 0 if no further input is expected. Note that the output buffer + // must be flushed before each subsequent call to next(). + // + std::size_t + next (); + + // Not copyable or movable. + // + decompressor (const decompressor&) = delete; + decompressor (decompressor&&) = delete; + decompressor& operator= (const decompressor&) = delete; + decompressor& operator= (decompressor&&) = delete; + + // Implementation details. + // + decompressor (); + ~decompressor (); + + public: + void* ctx_; + }; } } -- cgit v1.1