aboutsummaryrefslogtreecommitdiff
path: root/libbutl/lz4.hxx
blob: 98175c116f051b438691fd1e55e787bcdee624cf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
// file      : libbutl/lz4.hxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#pragma once

#include <cstdint>
#include <cstddef>

#include <libbutl/optional.mxx>
#include <libbutl/fdstream.mxx>

#include <libbutl/export.hxx>

namespace butl
{
  namespace lz4
  {
    // Read the content from the input stream, compress it using the specified
    // compression level and block size, and write the compressed content to
    // the output stream. If content size is specified, then include it into
    // the compressed content header. Return the compressed content size.
    //
    // This function may throw std::bad_alloc as well as exceptions thrown by
    // fdstream read/write functions. It may also throw std::invalid_argument
    // in case of argument inconsistencies (e.g., content size does not match
    // actual) with what() returning the error description. The input stream
    // is expected to throw on badbit (but not failbit). The output stream is
    // expected to throw on badbit or failbit.
    //
    // The output and most likely the input streams must be in the binary
    // mode.
    //
    // Valid values for the compressions level are between 1 (fastest) and
    // 12 (best compression level).
    //
    // Valid block sizes and their IDs:
    //
    // 4:   64KB
    // 5:  256KB
    // 6:    1MB
    // 7:    4MB
    //
    // This function produces compressed content identical to:
    //
    // lz4 -z -<compression_level> -B<block_size_id> -BD [--content-size]
    //
    LIBBUTL_SYMEXPORT std::uint64_t
    compress (ofdstream&,
              ifdstream&,
              int compression_level,
              int block_size_id,
              optional<std::uint64_t> content_size);

    // Low-level iterative compression API.
    //
    // This API may throw std::bad_alloc in case of memory allocation errors
    // and std::invalid_argument in case of argument inconsistencies (e.g.,
    // content size does not match actual) with what() returning the error
    // description.
    //
    // See the implementation of the compress() function above for usage
    // example.
    //
    // @@ TODO: reset support.
    //
    struct LIBBUTL_SYMEXPORT compressor
    {
      // Buffer, current size (part filled with data), and capacity.
      //
      char* ib; std::size_t in, ic; // Input.
      char* ob; std::size_t on, oc; // Output.

      // As a first step call begin(). This function sets the required input
      // and output buffer capacities (ic, oc).
      //
      // The caller normally allocates the input and output buffers and fills
      // the input buffer.
      //
      void
      begin (int compression_level,
             int block_size_id,
             optional<std::uint64_t> content_size);

      // Then call next() to compress the next chunk of input passing true on
      // reaching EOF. Note that the input buffer should be filled to capacity
      // unless end is true and the output buffer must be flushed before each
      // subsequent call to next().
      //
      void
      next (bool end);

      // Not copyable or movable.
      //
      compressor (const compressor&) = delete;
      compressor (compressor&&) = delete;
      compressor& operator= (const compressor&) = delete;
      compressor& operator= (compressor&&) = delete;

      // Implementation details.
      //
      compressor (): ctx_ (nullptr) {}
      ~compressor ();

    public:
      void
      init_preferences (void*) const;

      void* ctx_;
      int level_;
      int block_id_;
      optional<std::uint64_t> content_size_;
      bool begin_;
    };


    // Read the compressed content from the input stream, decompress it, and
    // write the decompressed content to the output stream. Return the
    // decompressed content size.
    //
    // This function may throw std::bad_alloc as well as exceptions thrown by
    // fdstream read/write functions. It may also throw std::invalid_argument
    // if the compressed content is invalid with what() returning the error
    // description. The input stream is expected to throw on badbit (but not
    // failbit). The output stream is expected to throw on badbit or failbit.
    //
    // The input and most likely the output streams must be in the binary
    // mode.
    //
    // Note that this function does not require the input stream to reach EOF
    // at the end of compressed content. So if you have this requirement, you
    // will need to enforce it yourself.
    //
    LIBBUTL_SYMEXPORT std::uint64_t
    decompress (ofdstream&, ifdstream&);

    // Low-level iterative decompression API.
    //
    // This API may throw std::bad_alloc in case of memory allocation errors
    // and std::invalid_argument if the compressed content is invalid with
    // what() returning the error description.
    //
    // See the implementation of the decompress() function above for usage
    // example.
    //
    // The LZ4F_*() decompression functions return a hint of how much data
    // they want on the next call. So the plan is to allocate the input
    // buffer large enough to hold anything that can be asked for and then
    // fill it in in the asked chunks. This way we avoid having to shift the
    // unread data around.
    //
    // @@ TODO: reset support.
    //
    struct LIBBUTL_SYMEXPORT decompressor
    {
      // Buffer, current size (part filled with data), and capacity.
      //
      char  hb[19]; std::size_t hn    ; // Header.
      char* ib;     std::size_t in, ic; // Input.
      char* ob;     std::size_t on, oc; // Output.

      // As a first step, fill in the header buffer and call begin(). This
      // function sets the required input and output buffer capacities (ic,
      // oc) and the number of bytes left in the header buffer (hn) and
      // returns the number of bytes expected by the following call to next().
      //
      // The caller normally allocates the input and output buffers, copies
      // remaining header buffer data over to the input buffer, and then fills
      // in the remainder of the input buffer up to what's expected by the
      // call to next().
      //
      std::size_t
      begin ();

      // Then call next() to decompress the next chunk of input. This function
      // returns the number of bytes expected by the following call to next()
      // or 0 if no further input is expected. Note that the output buffer
      // must be flushed before each subsequent call to next().
      //
      std::size_t
      next ();

      // Not copyable or movable.
      //
      decompressor (const decompressor&) = delete;
      decompressor (decompressor&&) = delete;
      decompressor& operator= (const decompressor&) = delete;
      decompressor& operator= (decompressor&&) = delete;

      // Implementation details.
      //
      decompressor ();
      ~decompressor ();

    public:
      void* ctx_;
    };
  }
}