aboutsummaryrefslogtreecommitdiff
path: root/libbutl/lz4.cxx
blob: a627b06877db249e29700a3c1750da32e8622272 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
// file      : libbutl/lz4.cxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#include <libbutl/lz4.hxx>

// This careful macro dance makes sure that all the LZ4 C API functions are
// made static while making sure we include the headers in the same way as the
// implementation files that we include below.
//
#define LZ4LIB_VISIBILITY static
#define LZ4_STATIC_LINKING_ONLY
#define LZ4_PUBLISH_STATIC_FUNCTIONS
#define LZ4_DISABLE_DEPRECATE_WARNINGS
#include "lz4.h"
#include "lz4hc.h"

#define LZ4FLIB_VISIBILITY static
#define LZ4F_STATIC_LINKING_ONLY
#define LZ4F_PUBLISH_STATIC_FUNCTIONS
#define LZ4F_DISABLE_DEPRECATE_WARNINGS
#include "lz4frame.h"

#include <new>       // bad_alloc
#include <memory>    // unique_ptr
#include <cstring>   // memcpy()
#include <cassert>
#include <stdexcept> // invalid_argument, logic_error

#include <libbutl/utility.mxx> // eos()

#if 0
#include <libbutl/lz4-stream.hxx>
#endif

using namespace std;

namespace butl
{
  namespace lz4
  {
    static inline size_t
    block_size (LZ4F_blockSizeID_t id)
    {
      return (id == LZ4F_max4MB   ? 4 * 1024 * 1024 :
              id == LZ4F_max1MB   ? 1 * 1024 * 1024 :
              id == LZ4F_max256KB ?      256 * 1024 :
              id == LZ4F_max64KB  ?       64 * 1024 : 0);
    }

    [[noreturn]] static void
    throw_exception (LZ4F_errorCodes c)
    {
      using i = invalid_argument;

      switch (c)
      {
      case LZ4F_ERROR_GENERIC:                     throw i ("generic LZ4 error");
      case LZ4F_ERROR_maxBlockSize_invalid:        throw i ("invalid LZ4 block size");
      case LZ4F_ERROR_blockMode_invalid:           throw i ("invalid LZ4 block mode");
      case LZ4F_ERROR_contentChecksumFlag_invalid: throw i ("invalid LZ4 content checksum flag");
      case LZ4F_ERROR_compressionLevel_invalid:    throw i ("invalid LZ4 compression level");
      case LZ4F_ERROR_headerVersion_wrong:         throw i ("wrong LZ4 header version");
      case LZ4F_ERROR_blockChecksum_invalid:       throw i ("invalid LZ4 block checksum");
      case LZ4F_ERROR_reservedFlag_set:            throw i ("reserved LZ4 flag set");
      case LZ4F_ERROR_srcSize_tooLarge:            throw i ("LZ4 input too large");
      case LZ4F_ERROR_dstMaxSize_tooSmall:         throw i ("LZ4 output too small");
      case LZ4F_ERROR_frameHeader_incomplete:      throw i ("incomplete LZ4 frame header");
      case LZ4F_ERROR_frameType_unknown:           throw i ("unknown LZ4 frame type");
      case LZ4F_ERROR_frameSize_wrong:             throw i ("wrong LZ4 frame size");
      case LZ4F_ERROR_decompressionFailed:         throw i ("invalid LZ4 compressed content");
      case LZ4F_ERROR_headerChecksum_invalid:      throw i ("invalid LZ4 header checksum");
      case LZ4F_ERROR_contentChecksum_invalid:     throw i ("invalid LZ4 content checksum");

      case LZ4F_ERROR_allocation_failed:           throw bad_alloc ();

      // These seem to be programming errors.
      //
      case LZ4F_ERROR_srcPtr_wrong:                 // NULL pointer.
      case LZ4F_ERROR_frameDecoding_alreadyStarted: // Incorrect call seq.

      // We should never get these.
      //
      case LZ4F_OK_NoError:
      case LZ4F_ERROR_maxCode:
      case _LZ4F_dummy_error_enum_for_c89_never_used:
        break;
      }

      assert (false);
      throw logic_error (LZ4F_getErrorName ((LZ4F_errorCode_t)(-c)));
    }

    // As above but for erroneous LZ4F_*() function result.
    //
    [[noreturn]] static inline void
    throw_exception (size_t r)
    {
      throw_exception (LZ4F_getErrorCode (r));
    }

    // compression
    //

    compressor::
    ~compressor ()
    {
      if (LZ4F_cctx* ctx = static_cast<LZ4F_cctx*> (ctx_))
      {
        LZ4F_errorCode_t e (LZ4F_freeCompressionContext (ctx));
        assert (!LZ4F_isError (e));
      }
    }

    inline void compressor::
    init_preferences (void* vp) const
    {
      LZ4F_preferences_t* p (static_cast<LZ4F_preferences_t*> (vp));

      p->autoFlush = 1;
      p->favorDecSpeed = 0;
      p->compressionLevel = level_;
      p->frameInfo.blockMode = LZ4F_blockLinked;
      p->frameInfo.blockSizeID = static_cast<LZ4F_blockSizeID_t> (block_id_);
      p->frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum;
      p->frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
      p->frameInfo.contentSize = content_size_
        ? static_cast<unsigned long long> (*content_size_)
        : 0;
    }

    void compressor::
    begin (int level,
           int block_id,
           optional<uint64_t> content_size)
    {
      assert (block_id >= 4 && block_id <= 7);

      level_ = level;
      block_id_ = block_id;
      content_size_ = content_size;

      LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
      init_preferences (&prefs);

      // Input/output buffer capacities.
      //
      // To be binary compatible with the lz4 utility we have to compress
      // files that fit into the block with a single *_compressFrame() call
      // instead of *_compressBegin()/*_compressUpdate(). And to determine the
      // output buffer capacity we must use *_compressFrameBound() instead of
      // *_compressBound(). The problem is, at this stage (before filling the
      // input buffer), we don't know which case it will be.
      //
      // However, in our case (autoFlush=1), *Bound() < *FrameBound() and so
      // we can always use the latter at the cost of slight overhead. Also,
      // using *FrameBound() allows us to call *Begin() and *Update() without
      // flushing the buffer in between (this insight is based on studying the
      // implementation of the *Bound() functions).
      //
      // Actually, we can use content_size (we can get away with much smaller
      // buffers for small inputs). We just need to verify the caller is not
      // lying to us (failed that, we may end up with strange error like
      // insufficient output buffer space).
      //
      ic = block_size (prefs.frameInfo.blockSizeID);

      if (content_size_ && *content_size_ < ic)
      {
        // This is nuanced: we need to add an extra byte in order to detect
        // EOF.
        //
        ic = static_cast<size_t> (*content_size_) + 1;
      }

      oc = LZ4F_compressFrameBound (ic, &prefs);

      begin_ = true;
    }

    void compressor::
    next (bool end)
    {
      LZ4F_cctx* ctx;

      // Unlike the decompression case below, compression cannot fail due to
      // invalid content. So any LZ4F_*() function failure is either due to a
      // programming bug or argument inconsistencies (e.g., content size does
      // not match actual).

      if (begin_)
      {
        begin_ = false;

        LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
        init_preferences (&prefs);

        // If we've allocated smaller buffers based on content_size_, then
        // verify the input size matches what's promised.
        //
        // Note also that LZ4F_compressFrame() does not fail if it doesn't
        // match instead replacing it with the actual value.
        //
        size_t bs (block_size (prefs.frameInfo.blockSizeID));
        if (content_size_ && *content_size_ < bs)
        {
          if (!end || in != *content_size_)
            throw_exception (LZ4F_ERROR_frameSize_wrong);
        }

        // Must be < for lz4 compatibility (see EOF nuance above for the
        // likely reason).
        //
        if (end && in < bs)
        {
          on = LZ4F_compressFrame (ob, oc, ib, in, &prefs);
          if (LZ4F_isError (on))
            throw_exception (on);

          in = 0; // All consumed.
          return;
        }
        else
        {
          if (LZ4F_isError (LZ4F_createCompressionContext (&ctx, LZ4F_VERSION)))
            throw bad_alloc ();

          ctx_ = ctx;

          // Write the header.
          //
          on = LZ4F_compressBegin (ctx, ob, oc, &prefs);
          if (LZ4F_isError (on))
            throw_exception (on);

          // Fall through.
        }
      }
      else
      {
        ctx = static_cast<LZ4F_cctx*> (ctx_);
        on = 0;
      }

      size_t n;

      if (in != 0)
      {
        n = LZ4F_compressUpdate (ctx, ob + on, oc - on, ib, in, nullptr);
        if (LZ4F_isError (n))
          throw_exception (n);

        in = 0; // All consumed.
        on += n;
      }

      // Write the end marker.
      //
      if (end)
      {
        // Note that this call also verifies specified and actual content
        // sizes match.
        //
        n = LZ4F_compressEnd (ctx, ob + on, oc - on, nullptr);
        if (LZ4F_isError (n))
          throw_exception (n);

        on += n;
      }
    }

    uint64_t
    compress (ofdstream& os, ifdstream& is,
              int level,
              int block_id,
              optional<uint64_t> content_size)
    {
#if 0
      char buf[1024 * 3 + 7];
      ostream cos (os, level, block_id, content_size);

      for (bool e (false); !e; )
      {
        e = eof (is.read (buf, sizeof (buf)));
        cos.write (buf, is.gcount ());
        //for (streamsize i (0), n (is.gcount ()); i != n; ++i)
        //  cos.put (buf[i]);
      }

      cos.close ();
      return content_size ? *content_size : 0;
#else
      compressor c;

      // Input/output buffer guards.
      //
      unique_ptr<char[]> ibg;
      unique_ptr<char[]> obg;

      // First determine required buffer capacities.
      //
      c.begin (level, block_id, content_size);

      ibg.reset ((c.ib = new char[c.ic]));
      obg.reset ((c.ob = new char[c.oc]));

      // Read into the input buffer updating the eof flag.
      //
      // Note that we could try to do direct fd read/write but that would
      // complicate things quite a bit (error handling, stream state, etc).
      //
      bool eof (false);
      auto read = [&is, &c, &eof] ()
      {
        eof = butl::eof (is.read (c.ib, c.ic));
        c.in = static_cast<size_t> (is.gcount ());
      };

      // Write from the output buffer updating the total written.
      //
      uint64_t ot (0);
      auto write = [&os, &c, &ot] ()
      {
        os.write (c.ob, static_cast<streamsize> (c.on));
        ot += c.on;
      };

      // Keep reading, compressing, and writing chunks of content.
      //
      while (!eof)
      {
        read ();

        c.next (eof);

        if (c.on != 0) // next() may just buffer the data.
          write ();
      }

      return ot;
#endif
    }

    // decompression
    //

    static_assert (sizeof (decompressor::hb) == LZ4F_HEADER_SIZE_MAX,
                   "LZ4 header size mismatch");

    decompressor::
    ~decompressor ()
    {
      if (LZ4F_dctx* ctx = static_cast<LZ4F_dctx*> (ctx_))
      {
        LZ4F_errorCode_t e (LZ4F_freeDecompressionContext (ctx));
        assert (!LZ4F_isError (e));
      }
    }

    size_t decompressor::
    begin (optional<uint64_t>* content_size)
    {
      LZ4F_dctx* ctx;

      if (LZ4F_isError (LZ4F_createDecompressionContext (&ctx, LZ4F_VERSION)))
        throw bad_alloc ();

      ctx_ = ctx;

      LZ4F_frameInfo_t info = LZ4F_INIT_FRAMEINFO;

      // Input hint and end as signalled by the LZ4F_*() functions.
      //
      size_t h, e;

      h = LZ4F_getFrameInfo (ctx, &info, hb, &(e = hn));
      if (LZ4F_isError (h))
        throw_exception (h);

      if (content_size != nullptr)
      {
        if (info.contentSize != 0)
          *content_size = static_cast<uint64_t> (info.contentSize);
        else
          *content_size = nullopt;
      }

      // Use the block size for the output buffer capacity and compressed
      // bound plus the header size for the input. The expectation is that
      // LZ4F_decompress() should never hint for more than that.
      //
      oc = block_size (info.blockSizeID);
      ic = LZ4F_compressBound (oc, nullptr) + LZ4F_BLOCK_HEADER_SIZE;

      assert (h <= ic);

      // Move over whatever is left in the header buffer to be beginning.
      //
      hn -= e;
      memmove (hb, hb + e, hn);

      return h;
    }

    size_t decompressor::
    next ()
    {
      LZ4F_dctx* ctx (static_cast<LZ4F_dctx*> (ctx_));

      size_t h, e;

      // Note that LZ4F_decompress() verifies specified and actual content
      // sizes match (similar to compression).
      //
      h = LZ4F_decompress (ctx, ob, &(on = oc), ib, &(e = in), nullptr);
      if (LZ4F_isError (h))
        throw_exception (h);

      // We expect LZ4F_decompress() to consume what it asked for.
      //
      assert (e == in && h <= ic);
      in = 0; // All consumed.

      return h;
    }

    uint64_t
    decompress (ofdstream& os, ifdstream& is)
    {
      // Write the specified number of bytes from the output buffer updating
      // the total written.
      //
      uint64_t ot (0);
      auto write = [&os, &ot] (char* b, size_t n)
      {
        os.write (b, static_cast<streamsize> (n));
        ot += n;
      };

#if 0
      char buf[1024 * 3 + 7];
      istream dis (is, true, istream::badbit);

      for (bool e (false); !e; )
      {
        e = eof (dis.read (buf, sizeof (buf)));
        write (buf, static_cast<size_t> (dis.gcount ()));
      }
#else
      // Read into the specified buffer returning the number of bytes read and
      // updating the eof flag.
      //
      bool eof (false);
      auto read = [&is, &eof] (char* b, size_t c) -> size_t
      {
        size_t n (0);
        do
        {
          eof = butl::eof (is.read (b + n, c - n));
          n += static_cast<size_t> (is.gcount ());
        }
        while (!eof && n != c);

        return n;
      };

      decompressor d;

      // Input/output buffer guards.
      //
      unique_ptr<char[]> ibg;
      unique_ptr<char[]> obg;

      size_t h; // Input hint.

      // First read in the header and allocate the buffers.
      //
      // What if we hit EOF here? And could begin() return 0? Turns out the
      // answer to both questions is yes: 0-byte content compresses to 15
      // bytes (with or without content size; 1-byte -- to 20/28 bytes). We
      // can ignore EOF here since an attempt to read more will result in
      // another EOF. And code below is prepared to handle 0 initial hint.
      //
      // @@ We could end up leaving some of the input content from the
      //    header in the input buffer which the caller will have to way
      //    of using/detecting.
      //
      d.hn = read (d.hb, sizeof (d.hb));
      h = d.begin ();

      ibg.reset ((d.ib = new char[d.ic]));
      obg.reset ((d.ob = new char[d.oc]));

      // Copy over whatever is left in the header buffer and read up to
      // the hinted size.
      //
      memcpy (d.ib, d.hb, (d.in = d.hn));

      if (h > d.in)
        d.in += read (d.ib + d.in, h - d.in);

      // Keep decompressing, writing, and reading chunks of compressed
      // content.
      //
      while (h != 0)
      {
        h = d.next ();

        if (d.on != 0) // next() may just buffer the data.
          write (d.ob, d.on);

        if (h != 0)
        {
          if (eof)
            throw invalid_argument ("incomplete LZ4 compressed content");

          d.in = read (d.ib, h);
        }
      }
#endif

      return ot;
    }
  }
}

// Include the implementation into our translation unit. Let's keep it last
// since the implementation defines a bunch of macros.
//
#if defined(__clang__) || defined(__GNUC__)
#  pragma GCC diagnostic ignored "-Wunused-function"
#endif

// This header is only include in the implementation so we can include it
// here instead of the above.
//
#define XXH_PRIVATE_API // Makes API static and includes xxhash.c.
#include "xxhash.h"

// Clang targeting MSVC prior to version 10 has difficulty with _tzcnt_u64()
// (see Clang bug 47099 for a potentially related issue). Including relevant
// headers (<immintrin.h>, <intrin.h>) does not appear to help. So for now we
// just disable the use of _tzcnt_u64().
//
#if defined(_MSC_VER) && defined(__clang__) && __clang_major__ < 10
#  define LZ4_FORCE_SW_BITCOUNT
#endif

// Note that the order of inclusion is important (see *_SRC_INCLUDED macros).
//
extern "C"
{
#include "lz4.c"
#include "lz4hc.c"
#include "lz4frame.c"
}