aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitattributes1
-rw-r--r--.gitignore6
-rw-r--r--COPYRIGHT2
-rw-r--r--LICENSE8
-rw-r--r--build/root.build15
-rw-r--r--libbutl/b.cxx184
-rw-r--r--libbutl/b.hxx150
-rw-r--r--libbutl/b.ixx31
-rw-r--r--libbutl/b.mxx109
-rw-r--r--libbutl/backtrace.cxx30
-rw-r--r--libbutl/backtrace.hxx (renamed from libbutl/backtrace.mxx)19
-rw-r--r--libbutl/base64.cxx102
-rw-r--r--libbutl/base64.hxx (renamed from libbutl/base64.mxx)39
-rw-r--r--libbutl/bufstreambuf.cxx13
-rw-r--r--libbutl/bufstreambuf.hxx67
-rw-r--r--libbutl/buildfile63
-rw-r--r--libbutl/builtin-options.cxx467
-rw-r--r--libbutl/builtin-options.hxx118
-rw-r--r--libbutl/builtin-options.ixx35
-rw-r--r--libbutl/builtin.cli5
-rw-r--r--libbutl/builtin.cxx613
-rw-r--r--libbutl/builtin.hxx (renamed from libbutl/builtin.mxx)128
-rw-r--r--libbutl/builtin.ixx80
-rw-r--r--libbutl/char-scanner.hxx (renamed from libbutl/char-scanner.mxx)45
-rw-r--r--libbutl/char-scanner.ixx6
-rw-r--r--libbutl/char-scanner.txx7
-rw-r--r--libbutl/command.cxx50
-rw-r--r--libbutl/command.hxx (renamed from libbutl/command.mxx)23
-rw-r--r--libbutl/const-ptr.hxx (renamed from libbutl/const-ptr.mxx)21
-rw-r--r--libbutl/curl.cxx173
-rw-r--r--libbutl/curl.hxx (renamed from libbutl/curl.mxx)121
-rw-r--r--libbutl/curl.ixx79
-rw-r--r--libbutl/curl.txx10
-rw-r--r--libbutl/default-options.cxx73
-rw-r--r--libbutl/default-options.hxx (renamed from libbutl/default-options.mxx)81
-rw-r--r--libbutl/default-options.ixx2
-rw-r--r--libbutl/default-options.txx104
-rw-r--r--libbutl/diagnostics.cxx89
-rw-r--r--libbutl/diagnostics.hxx (renamed from libbutl/diagnostics.mxx)138
-rw-r--r--libbutl/export.hxx8
-rw-r--r--libbutl/fdstream.cxx452
-rw-r--r--libbutl/fdstream.hxx (renamed from libbutl/fdstream.mxx)202
-rw-r--r--libbutl/fdstream.ixx31
-rw-r--r--libbutl/filesystem.cxx940
-rw-r--r--libbutl/filesystem.hxx (renamed from libbutl/filesystem.mxx)213
-rw-r--r--libbutl/filesystem.ixx62
-rw-r--r--libbutl/ft/lang.hxx9
-rw-r--r--libbutl/git.cxx44
-rw-r--r--libbutl/git.hxx (renamed from libbutl/git.mxx)28
-rw-r--r--libbutl/host-os-release.cxx323
-rw-r--r--libbutl/host-os-release.hxx86
-rw-r--r--libbutl/json/event.hxx27
-rw-r--r--libbutl/json/parser.cxx645
-rw-r--r--libbutl/json/parser.hxx705
-rw-r--r--libbutl/json/parser.ixx552
-rw-r--r--libbutl/json/pdjson.c1044
-rw-r--r--libbutl/json/pdjson.h147
-rw-r--r--libbutl/json/serializer.cxx671
-rw-r--r--libbutl/json/serializer.hxx413
-rw-r--r--libbutl/json/serializer.ixx247
-rw-r--r--libbutl/lz4-stream.cxx281
-rw-r--r--libbutl/lz4-stream.hxx280
-rw-r--r--libbutl/lz4.c2495
-rw-r--r--libbutl/lz4.cxx555
-rw-r--r--libbutl/lz4.h774
-rw-r--r--libbutl/lz4.hxx205
-rw-r--r--libbutl/lz4frame.c1899
-rw-r--r--libbutl/lz4frame.h623
-rw-r--r--libbutl/lz4hc.c1615
-rw-r--r--libbutl/lz4hc.h413
-rw-r--r--libbutl/manifest-parser.cxx238
-rw-r--r--libbutl/manifest-parser.hxx (renamed from libbutl/manifest-parser.mxx)38
-rw-r--r--libbutl/manifest-rewriter.cxx46
-rw-r--r--libbutl/manifest-rewriter.hxx (renamed from libbutl/manifest-rewriter.mxx)28
-rw-r--r--libbutl/manifest-serializer.cxx153
-rw-r--r--libbutl/manifest-serializer.hxx (renamed from libbutl/manifest-serializer.mxx)43
-rw-r--r--libbutl/manifest-types.hxx (renamed from libbutl/manifest-types.mxx)22
-rw-r--r--libbutl/mingw-condition_variable.hxx275
-rw-r--r--libbutl/mingw-invoke.hxx109
-rw-r--r--libbutl/mingw-mutex.hxx210
-rw-r--r--libbutl/mingw-shared_mutex.hxx124
-rw-r--r--libbutl/mingw-thread.hxx330
-rw-r--r--libbutl/move-only-function.hxx177
-rw-r--r--libbutl/multi-index.hxx (renamed from libbutl/multi-index.mxx)21
-rw-r--r--libbutl/openssl.cxx27
-rw-r--r--libbutl/openssl.hxx (renamed from libbutl/openssl.mxx)69
-rw-r--r--libbutl/openssl.ixx14
-rw-r--r--libbutl/openssl.txx68
-rw-r--r--libbutl/optional.hxx (renamed from libbutl/optional.mxx)185
-rw-r--r--libbutl/optional.ixx5
-rw-r--r--libbutl/pager.cxx40
-rw-r--r--libbutl/pager.hxx (renamed from libbutl/pager.mxx)26
-rw-r--r--libbutl/path-io.hxx (renamed from libbutl/path-io.mxx)24
-rw-r--r--libbutl/path-map.hxx (renamed from libbutl/path-map.mxx)35
-rw-r--r--libbutl/path-pattern.cxx33
-rw-r--r--libbutl/path-pattern.hxx (renamed from libbutl/path-pattern.mxx)27
-rw-r--r--libbutl/path-pattern.ixx26
-rw-r--r--libbutl/path.cxx33
-rw-r--r--libbutl/path.hxx (renamed from libbutl/path.mxx)77
-rw-r--r--libbutl/path.ixx93
-rw-r--r--libbutl/path.txx18
-rw-r--r--libbutl/prefix-map.hxx (renamed from libbutl/prefix-map.mxx)62
-rw-r--r--libbutl/prefix-map.txx126
-rw-r--r--libbutl/process-details.hxx32
-rw-r--r--libbutl/process-io.cxx29
-rw-r--r--libbutl/process-io.hxx (renamed from libbutl/process-io.mxx)23
-rw-r--r--libbutl/process-run.cxx29
-rw-r--r--libbutl/process-run.txx83
-rw-r--r--libbutl/process.cxx410
-rw-r--r--libbutl/process.hxx (renamed from libbutl/process.mxx)259
-rw-r--r--libbutl/process.ixx240
-rw-r--r--libbutl/project-name.cxx30
-rw-r--r--libbutl/project-name.hxx (renamed from libbutl/project-name.mxx)23
-rw-r--r--libbutl/prompt.cxx30
-rw-r--r--libbutl/prompt.hxx (renamed from libbutl/prompt.mxx)23
-rw-r--r--libbutl/regex.cxx31
-rw-r--r--libbutl/regex.hxx (renamed from libbutl/regex.mxx)71
-rw-r--r--libbutl/regex.ixx30
-rw-r--r--libbutl/regex.txx77
-rw-r--r--libbutl/semantic-version.cxx98
-rw-r--r--libbutl/semantic-version.hxx (renamed from libbutl/semantic-version.mxx)93
-rw-r--r--libbutl/semantic-version.ixx64
-rw-r--r--libbutl/sendmail.cxx27
-rw-r--r--libbutl/sendmail.hxx (renamed from libbutl/sendmail.mxx)31
-rw-r--r--libbutl/sendmail.ixx5
-rw-r--r--libbutl/sha1.c10
-rw-r--r--libbutl/sha1.cxx34
-rw-r--r--libbutl/sha1.hxx (renamed from libbutl/sha1.mxx)33
-rw-r--r--libbutl/sha256.cxx44
-rw-r--r--libbutl/sha256.hxx (renamed from libbutl/sha256.mxx)29
-rw-r--r--libbutl/small-allocator.hxx (renamed from libbutl/small-allocator.mxx)18
-rw-r--r--libbutl/small-forward-list.hxx (renamed from libbutl/small-forward-list.mxx)29
-rw-r--r--libbutl/small-list.hxx (renamed from libbutl/small-list.mxx)31
-rw-r--r--libbutl/small-vector-odb.hxx2
-rw-r--r--libbutl/small-vector.hxx (renamed from libbutl/small-vector.mxx)55
-rw-r--r--libbutl/standard-version.cxx32
-rw-r--r--libbutl/standard-version.hxx (renamed from libbutl/standard-version.mxx)25
-rw-r--r--libbutl/string-parser.cxx30
-rw-r--r--libbutl/string-parser.hxx (renamed from libbutl/string-parser.mxx)19
-rw-r--r--libbutl/string-table.hxx (renamed from libbutl/string-table.mxx)26
-rw-r--r--libbutl/string-table.txx3
-rw-r--r--libbutl/tab-parser.cxx31
-rw-r--r--libbutl/tab-parser.hxx (renamed from libbutl/tab-parser.mxx)20
-rw-r--r--libbutl/target-triplet.cxx60
-rw-r--r--libbutl/target-triplet.hxx (renamed from libbutl/target-triplet.mxx)33
-rw-r--r--libbutl/timestamp.cxx127
-rw-r--r--libbutl/timestamp.hxx (renamed from libbutl/timestamp.mxx)32
-rw-r--r--libbutl/unicode.cxx23
-rw-r--r--libbutl/unicode.hxx (renamed from libbutl/unicode.mxx)20
-rw-r--r--libbutl/url.hxx (renamed from libbutl/url.mxx)41
-rw-r--r--libbutl/url.ixx2
-rw-r--r--libbutl/url.txx7
-rw-r--r--libbutl/utf8.hxx (renamed from libbutl/utf8.mxx)22
-rw-r--r--libbutl/utf8.ixx2
-rw-r--r--libbutl/utility.cxx119
-rw-r--r--libbutl/utility.hxx (renamed from libbutl/utility.mxx)166
-rw-r--r--libbutl/utility.ixx160
-rw-r--r--libbutl/uuid-linux.cxx2
-rw-r--r--libbutl/uuid-openbsd.cxx80
-rw-r--r--libbutl/uuid.cxx23
-rw-r--r--libbutl/uuid.hxx20
-rw-r--r--libbutl/uuid.ixx4
-rw-r--r--libbutl/vector-view.hxx (renamed from libbutl/vector-view.mxx)19
-rw-r--r--libbutl/win32-utility.cxx9
-rw-r--r--libbutl/win32-utility.hxx4
-rw-r--r--libbutl/xxhash.c1030
-rw-r--r--libbutl/xxhash.h328
-rw-r--r--manifest10
-rw-r--r--tests/b-info/driver.cxx53
-rw-r--r--tests/b-info/testscript4
-rw-r--r--tests/backtrace/driver.cxx24
-rw-r--r--tests/base64/driver.cxx86
-rw-r--r--tests/build/root.build10
-rw-r--r--tests/builtin/buildfile3
-rw-r--r--tests/builtin/driver.cxx184
-rw-r--r--tests/builtin/find.testscript276
-rw-r--r--tests/builtin/sed.testscript55
-rw-r--r--tests/builtin/timeout.testscript30
-rw-r--r--tests/command/driver.cxx32
-rw-r--r--tests/cpfile/driver.cxx23
-rw-r--r--tests/curl/driver.cxx56
-rw-r--r--tests/curl/testscript20
-rw-r--r--tests/default-options/driver.cxx174
-rw-r--r--tests/default-options/testscript57
-rw-r--r--tests/dir-iterator/driver.cxx69
-rw-r--r--tests/dir-iterator/testscript30
-rw-r--r--tests/entry-time/driver.cxx26
-rw-r--r--tests/fdstream/driver.cxx172
-rw-r--r--tests/host-os-release/buildfile6
-rw-r--r--tests/host-os-release/driver.cxx58
-rw-r--r--tests/host-os-release/testscript223
-rw-r--r--tests/link/driver.cxx43
-rw-r--r--tests/lz4/buildfile6
-rw-r--r--tests/lz4/driver.cxx46
-rw-r--r--tests/lz4/testscript85
-rw-r--r--tests/lz4/truncated-content.lz4bin0 -> 108 bytes
-rw-r--r--tests/lz4/truncated-header12.lz4bin0 -> 12 bytes
-rw-r--r--tests/lz4/truncated-header6.lz41
-rw-r--r--tests/manifest-parser/driver.cxx89
-rw-r--r--tests/manifest-rewriter/driver.cxx48
-rw-r--r--tests/manifest-roundtrip/buildfile3
-rw-r--r--tests/manifest-roundtrip/driver.cxx69
-rw-r--r--tests/manifest-roundtrip/manifest32
-rw-r--r--tests/manifest-roundtrip/testscript118
-rw-r--r--tests/manifest-serializer/driver.cxx84
-rw-r--r--tests/move-only-function/buildfile6
-rw-r--r--tests/move-only-function/driver.cxx149
-rw-r--r--tests/mventry/driver.cxx23
-rw-r--r--tests/mventry/testscript10
-rw-r--r--tests/next-word/buildfile6
-rw-r--r--tests/next-word/driver.cxx46
-rw-r--r--tests/openssl/driver.cxx58
-rw-r--r--tests/optional/driver.cxx20
-rw-r--r--tests/pager/driver.cxx21
-rw-r--r--tests/path-entry/driver.cxx36
-rw-r--r--tests/path-entry/testscript10
-rw-r--r--tests/path/driver.cxx20
-rw-r--r--tests/prefix-map/driver.cxx21
-rw-r--r--tests/process-run/driver.cxx31
-rw-r--r--tests/process-term/buildfile6
-rw-r--r--tests/process-term/driver.cxx403
-rw-r--r--tests/process-term/testscript4
-rw-r--r--tests/process/driver.cxx105
-rw-r--r--tests/progress/driver.cxx29
-rw-r--r--tests/project-name/driver.cxx25
-rw-r--r--tests/regex/driver.cxx43
-rw-r--r--tests/regex/testscript67
-rw-r--r--tests/semantic-version/driver.cxx50
-rw-r--r--tests/sendmail/driver.cxx34
-rw-r--r--tests/sha1/driver.cxx29
-rw-r--r--tests/sha256/driver.cxx29
-rw-r--r--tests/small-forward-list/driver.cxx21
-rw-r--r--tests/small-list/driver.cxx17
-rw-r--r--tests/small-vector/driver.cxx18
-rw-r--r--tests/standard-version/driver.cxx31
-rw-r--r--tests/strcase/driver.cxx16
-rw-r--r--tests/string-parser/driver.cxx24
-rw-r--r--tests/tab-parser/driver.cxx20
-rw-r--r--tests/target-triplet/driver.cxx53
-rw-r--r--tests/timestamp/driver.cxx23
-rw-r--r--tests/url/driver.cxx24
-rw-r--r--tests/utf8/driver.cxx23
-rw-r--r--tests/uuid/driver.cxx4
-rw-r--r--tests/wildcard/driver.cxx86
-rw-r--r--tests/wildcard/testscript6
245 files changed, 25872 insertions, 4720 deletions
diff --git a/.gitattributes b/.gitattributes
index 1631641..d01efdc 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -17,3 +17,4 @@
# Use `binary` to make sure certain files are never auto-detected as text.
#
#*.png binary
+*.lz4 binary
diff --git a/.gitignore b/.gitignore
index c3de2e7..5046596 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,10 +5,16 @@
*.d
*.t
*.i
+*.i.*
*.ii
+*.ii.*
*.o
*.obj
+*.gcm
+*.pcm
+*.ifc
*.so
+*.dylib
*.dll
*.a
*.lib
diff --git a/COPYRIGHT b/COPYRIGHT
index af15f5b..6bd48c9 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1 +1 @@
-Copyright (c) 2014-2020 the build2 authors (see the AUTHORS file).
+Copyright (c) 2014-2024 the build2 authors (see the AUTHORS file).
diff --git a/LICENSE b/LICENSE
index 45c77ff..f22e52f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -2,10 +2,16 @@ libbutl/sha1.c:
3-clause BSD License; see the file header for details.
-libbutl/{sha256c.c, strptime.c, timelocal.h, timelocal.c}:
+libbutl/{sha256c.c, strptime.c, timelocal.[hc]}:
+libbutl/{xxhash.[hc], lz4*.[hc]}:
+libbutl/mingw-*.hxx:
2-clause BSD License; see the file headers for details.
+libbutl/json/pdjson.[hc]:
+
+UNLICENSE (dedicated to the public domain).
+
The rest:
MIT License
diff --git a/build/root.build b/build/root.build
index 2151480..17e42b1 100644
--- a/build/root.build
+++ b/build/root.build
@@ -5,7 +5,7 @@ cxx.std = latest
using cxx
-hxx{*}: extension = hxx # We also have .mxx; see libbutl/buildfile.
+hxx{*}: extension = hxx
ixx{*}: extension = ixx
txx{*}: extension = txx
cxx{*}: extension = cxx
@@ -15,6 +15,19 @@ if ($cxx.target.system == 'win32-msvc')
if ($cxx.class == 'msvc')
cxx.coptions += /wd4251 /wd4275 /wd4800
+elif ($cxx.id == 'gcc')
+{
+ # See GCC bugs 100115, 98753 (attachment 50081/comment 15), and 101361. Note
+ # that these must also be disabled in projects that use libbutl.
+ #
+ cxx.coptions += -Wno-maybe-uninitialized -Wno-free-nonheap-object \
+-Wno-stringop-overread
+
+ if ($cxx.version.major >= 13)
+ cxx.coptions += -Wno-dangling-reference
+}
+elif ($cxx.id.type == 'clang' && $cxx.version.major >= 15)
+ cxx.coptions += -Wno-unqualified-std-cast-call
# Load the cli module but only if it's available. This way a distribution
# that includes pre-generated files can be built without installing cli.
diff --git a/libbutl/b.cxx b/libbutl/b.cxx
index a071bba..0b4472f 100644
--- a/libbutl/b.cxx
+++ b/libbutl/b.cxx
@@ -1,58 +1,19 @@
// file : libbutl/b.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/b.mxx>
-#endif
-
-// C includes.
+#include <libbutl/b.hxx>
+#include <ios> // ios::failure
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef>
-#include <cstdint>
-#include <stdexcept>
-#include <functional>
-
-#include <ios> // ios::failure
-#include <utility> // move()
+#include <utility> // move()
#include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.b;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.url;
-import butl.path;
-import butl.process;
-import butl.optional;
-import butl.project_name;
-import butl.standard_version;
-#endif
-
-import butl.utility; // next_word(), eof(), etc
-import butl.path_io;
-import butl.fdstream;
-import butl.process_io; // operator<<(ostream, process_path)
-import butl.small_vector;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/process-io.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <algorithm>
+
+#include <libbutl/utility.hxx> // next_word(), eof(), etc
+#include <libbutl/path-io.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/process-io.hxx> // operator<<(ostream, process_path)
+#include <libbutl/small-vector.hxx>
using namespace std;
@@ -71,14 +32,30 @@ namespace butl
throw runtime_error ("invalid " + d);
}
- b_project_info
- b_info (const dir_path& project,
+ void
+ b_info (std::vector<b_project_info>& r,
+ const vector<dir_path>& projects,
+ b_info_flags fl,
uint16_t verb,
const function<b_callback>& cmd_callback,
const path& program,
const dir_path& search_fallback,
const vector<string>& ops)
{
+ // Bail out if the project list is empty.
+ //
+ if (projects.empty ())
+ return;
+
+ // Reserve enough space in the result and save its original size.
+ //
+ size_t rn (r.size ());
+ {
+ size_t n (rn + projects.size ());
+ if (r.capacity () < n)
+ r.reserve (n);
+ }
+
try
{
process_path pp (
@@ -104,6 +81,23 @@ namespace butl
else
vops.push_back ("-q");
+ string spec ("info(");
+
+ // Note that quoting is essential here.
+ //
+ for (size_t i (0); i != projects.size(); ++i)
+ {
+ if (i != 0)
+ spec += ' ';
+
+ spec += '\'' + projects[i].representation () + '\'';
+ }
+
+ if ((fl & b_info_flags::subprojects) == b_info_flags::none)
+ spec += ",no_subprojects";
+
+ spec += ')';
+
pr = process_start_callback (
cmd_callback ? cmd_callback : [] (const char* const*, size_t) {},
0 /* stdin */,
@@ -111,9 +105,12 @@ namespace butl
2 /* stderr */,
pp,
vops,
+ ((fl & b_info_flags::ext_mods) == b_info_flags::none
+ ? "--no-external-modules"
+ : nullptr),
"-s",
ops,
- "info:", "'" + project.representation () + "'");
+ spec);
pipe.out.close ();
ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit);
@@ -143,31 +140,52 @@ namespace butl
}
};
- b_project_info r;
- for (string l; !eof (getline (is, l)); )
+ b_project_info pi;
+ auto add_project = [&r, &pi] ()
{
- if (l.compare (0, 9, "project: ") == 0)
- {
- string v (l, 9);
- if (!v.empty ())
- r.project = parse_name (move (v), "project");
- }
- else if (l.compare (0, 9, "version: ") == 0)
+ // Parse version string to standard version if the project loaded
+ // the version module.
+ //
+ const auto& ms (pi.modules);
+ if (find (ms.begin (), ms.end (), "version") != ms.end ())
{
- string v (l, 9);
- if (!v.empty ())
try
{
- r.version = standard_version (v, standard_version::allow_stub);
+ pi.version = standard_version (pi.version_string,
+ standard_version::allow_stub);
}
catch (const invalid_argument& e)
{
- bad_value ("version '" + v + "': " + e.what ());
+ bad_value ("version '" + pi.version_string + "': " + e.what ());
}
}
+
+ // Add the project info and prepare for the next project info
+ // parsing.
+ //
+ r.push_back (move (pi));
+ pi = b_project_info ();
+ };
+
+ for (string l; !eof (getline (is, l)); )
+ {
+ if (l.empty ())
+ {
+ add_project ();
+ }
+ else if (l.compare (0, 9, "project: ") == 0)
+ {
+ string v (l, 9);
+ if (!v.empty ())
+ pi.project = parse_name (move (v), "project");
+ }
+ else if (l.compare (0, 9, "version: ") == 0)
+ {
+ pi.version_string = string (l, 9);
+ }
else if (l.compare (0, 9, "summary: ") == 0)
{
- r.summary = string (l, 9);
+ pi.summary = string (l, 9);
}
else if (l.compare (0, 5, "url: ") == 0)
{
@@ -175,7 +193,7 @@ namespace butl
if (!v.empty ())
try
{
- r.url = url (v);
+ pi.url = url (v);
}
catch (const invalid_argument& e)
{
@@ -184,17 +202,17 @@ namespace butl
}
else if (l.compare (0, 10, "src_root: ") == 0)
{
- r.src_root = parse_dir (string (l, 10), "src_root");
+ pi.src_root = parse_dir (string (l, 10), "src_root");
}
else if (l.compare (0, 10, "out_root: ") == 0)
{
- r.out_root = parse_dir (string (l, 10), "out_root");
+ pi.out_root = parse_dir (string (l, 10), "out_root");
}
else if (l.compare (0, 14, "amalgamation: ") == 0)
{
string v (l, 14);
if (!v.empty ())
- r.amalgamation = parse_dir (move (v), "amalgamation");
+ pi.amalgamation = parse_dir (move (v), "amalgamation");
}
else if (l.compare (0, 13, "subprojects: ") == 0)
{
@@ -212,7 +230,7 @@ namespace butl
if (p != 0)
sn = parse_name (string (s, 0, p), "subproject");
- r.subprojects.push_back (
+ pi.subprojects.push_back (
b_project_info::subproject {move (sn),
parse_dir (string (s, p + 1),
"subproject")});
@@ -222,20 +240,36 @@ namespace butl
{
string v (l, 12);
for (size_t b (0), e (0); next_word (v, b, e); )
- r.operations.push_back (string (v, b, e - b));
+ pi.operations.push_back (string (v, b, e - b));
}
else if (l.compare (0, 17, "meta-operations: ") == 0)
{
string v (l, 17);
for (size_t b (0), e (0); next_word (v, b, e); )
- r.meta_operations.push_back (string (v, b, e - b));
+ pi.meta_operations.push_back (string (v, b, e - b));
+ }
+ else if (l.compare (0, 9, "modules: ") == 0)
+ {
+ string v (l, 9);
+ for (size_t b (0), e (0); next_word (v, b, e); )
+ pi.modules.push_back (string (v, b, e - b));
}
}
is.close (); // Detect errors.
if (pr.wait ())
- return r;
+ {
+ add_project (); // Add the remaining project info.
+
+ if (r.size () - rn == projects.size ())
+ return;
+
+ ostringstream os;
+ os << "invalid " << pp << " output: expected information for "
+ << projects.size () << " projects instead of " << r.size () - rn;
+ throw b_error (os.str (), move (pr.exit));
+ }
}
// Note that ios::failure inherits from std::runtime_error, so this
// catch-clause must go last.
@@ -274,7 +308,7 @@ namespace butl
assert (!pr.wait ());
throw b_error (
- string ("process ") + pp.recall_string () + " " + to_string (*pr.exit),
+ string ("process ") + pp.recall_string () + ' ' + to_string (*pr.exit),
move (pr.exit));
}
catch (const process_error& e)
diff --git a/libbutl/b.hxx b/libbutl/b.hxx
new file mode 100644
index 0000000..d3fd2bf
--- /dev/null
+++ b/libbutl/b.hxx
@@ -0,0 +1,150 @@
+// file : libbutl/b.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <utility> // move()
+#include <cstddef> // size_tu
+#include <cstdint> // uint16_t
+#include <stdexcept> // runtime_error
+#include <functional>
+
+#include <libbutl/url.hxx>
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/project-name.hxx>
+#include <libbutl/standard-version.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ class LIBBUTL_SYMEXPORT b_error: public std::runtime_error
+ {
+ public:
+ // Build system program exit information. May be absent if the error
+ // occured before the process has been started.
+ //
+ // Can be used by the caller to decide if to print the error message to
+ // stderr. Normally, it is not required if the process exited normally
+ // with non-zero code, since presumably it has issued diagnostics. Note
+ // that the normal() function can be used to check for this.
+ //
+ optional<process_exit> exit;
+
+ // Return true if the build2 process exited normally with non-zero code.
+ //
+ bool
+ normal () const {return exit && exit->normal () && !*exit;}
+
+ explicit
+ b_error (const std::string& description, optional<process_exit> = nullopt);
+ };
+
+ // Run `b info: <project-dir>...` command and parse and return (via argument
+ // to allow appending and for error position; see below) the build2 projects
+ // information it prints to stdout. Return the empty list if the specified
+ // project list is empty. Throw b_error on error. Note that the size of the
+ // result vector can be used to determine which project information caused
+ // the error.
+ //
+ // You can also specify the build2 verbosity level, command line callback
+ // (see process_run_callback() for details), build program search details,
+ // and additional options.
+ //
+ // Note that version_string is only parsed to standard_version if a project
+ // uses the version module. Otherwise, standard_version is empty.
+ //
+ struct b_project_info
+ {
+ using url_type = butl::url;
+
+ struct subproject
+ {
+ project_name name; // Empty if anonymous.
+ dir_path path; // Relative to the project root.
+ };
+
+ project_name project;
+ std::string version_string;
+ standard_version version;
+ std::string summary;
+ url_type url;
+
+ dir_path src_root;
+ dir_path out_root;
+
+ dir_path amalgamation; // Relative to project root and
+ // empty if not amalgmated.
+ std::vector<subproject> subprojects;
+
+ std::vector<std::string> operations;
+ std::vector<std::string> meta_operations;
+
+ std::vector<std::string> modules;
+ };
+
+ enum class b_info_flags: std::uint16_t
+ {
+ // Retrieve information that may come from external modules (operations,
+ // meta-operations, etc). Omitting this flag results in passing
+ // --no-external-modules to the build2 program and speeds up its
+ // execution.
+ //
+ ext_mods = 0x1,
+
+ // Discover subprojects. Omitting this flag results in passing
+ // no_subprojects info meta-operation parameter to the build2 program and
+ // speeds up its execution.
+ //
+ subprojects = 0x2,
+
+ none = 0
+ };
+
+ inline b_info_flags operator& (b_info_flags, b_info_flags);
+ inline b_info_flags operator| (b_info_flags, b_info_flags);
+ inline b_info_flags operator&= (b_info_flags&, b_info_flags);
+ inline b_info_flags operator|= (b_info_flags&, b_info_flags);
+
+ using b_callback = void (const char* const args[], std::size_t n);
+
+ LIBBUTL_SYMEXPORT void
+ b_info (std::vector<b_project_info>& result,
+ const std::vector<dir_path>& projects,
+ b_info_flags,
+ std::uint16_t verb = 1,
+ const std::function<b_callback>& cmd_callback = {},
+ const path& program = path ("b"),
+ const dir_path& search_fallback = {},
+ const std::vector<std::string>& options = {});
+
+ // As above but retrieve information for a single project.
+ //
+ inline b_project_info
+ b_info (const dir_path& project,
+ b_info_flags fl,
+ std::uint16_t verb = 1,
+ const std::function<b_callback>& cmd_callback = {},
+ const path& program = path ("b"),
+ const dir_path& search_fallback = {},
+ const std::vector<std::string>& options = {})
+ {
+ std::vector<b_project_info> r;
+ b_info (r,
+ std::vector<dir_path> ({project}),
+ fl,
+ verb,
+ cmd_callback,
+ program,
+ search_fallback,
+ options);
+
+ return std::move (r[0]);
+ }
+}
+
+#include <libbutl/b.ixx>
diff --git a/libbutl/b.ixx b/libbutl/b.ixx
new file mode 100644
index 0000000..1667101
--- /dev/null
+++ b/libbutl/b.ixx
@@ -0,0 +1,31 @@
+// file : libbutl/b.ixx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+namespace butl
+{
+ // b_info_flags
+ //
+ inline b_info_flags operator& (b_info_flags x, b_info_flags y)
+ {
+ return x &= y;
+ }
+
+ inline b_info_flags operator| (b_info_flags x, b_info_flags y)
+ {
+ return x |= y;
+ }
+
+ inline b_info_flags operator&= (b_info_flags& x, b_info_flags y)
+ {
+ return x = static_cast<b_info_flags> (
+ static_cast<std::uint16_t> (x) &
+ static_cast<std::uint16_t> (y));
+ }
+
+ inline b_info_flags operator|= (b_info_flags& x, b_info_flags y)
+ {
+ return x = static_cast<b_info_flags> (
+ static_cast<std::uint16_t> (x) |
+ static_cast<std::uint16_t> (y));
+ }
+}
diff --git a/libbutl/b.mxx b/libbutl/b.mxx
deleted file mode 100644
index 6eaf473..0000000
--- a/libbutl/b.mxx
+++ /dev/null
@@ -1,109 +0,0 @@
-// file : libbutl/b.mxx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-#ifndef __cpp_modules_ts
-#pragma once
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef> // size_tu
-#include <cstdint> // uint16_t
-#include <stdexcept> // runtime_error
-#include <functional>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.b;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.url;
-import butl.path;
-import butl.process;
-import butl.optional;
-import butl.project_name;
-import butl.standard_version;
-#else
-#include <libbutl/url.mxx>
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/project-name.mxx>
-#include <libbutl/standard-version.mxx>
-#endif
-
-#include <libbutl/export.hxx>
-
-LIBBUTL_MODEXPORT namespace butl
-{
- class LIBBUTL_SYMEXPORT b_error: public std::runtime_error
- {
- public:
- // Build system program exit information. May be absent if the error
- // occured before the process has been started.
- //
- // Can be used by the caller to decide if to print the error message to
- // stderr. Normally, it is not required if the process exited normally
- // with non-zero code, since presumably it has issued diagnostics. Note
- // that the normal() function can be used to check for this.
- //
- optional<process_exit> exit;
-
- // Return true if the build2 process exited normally with non-zero code.
- //
- bool
- normal () const {return exit && exit->normal () && !*exit;}
-
- explicit
- b_error (const std::string& description, optional<process_exit> = nullopt);
- };
-
- // Run `b info: <project-dir>` command and parse and return the build2
- // project information it prints to stdout. Throw b_error on error.
- //
- // You can also specify the build2 verbosity level, command line callback
- // (see process_run_callback() for details), build program search details
- // and additional options.
- //
- struct b_project_info
- {
- using url_type = butl::url;
-
- struct subproject
- {
- project_name name; // Empty if anonymous.
- dir_path path; // Relative to the project root.
- };
-
- project_name project;
- standard_version version;
- std::string summary;
- url_type url;
-
- dir_path src_root;
- dir_path out_root;
-
- dir_path amalgamation; // Relative to project root and
- // empty if not amalgmated.
- std::vector<subproject> subprojects;
-
- std::vector<std::string> operations;
- std::vector<std::string> meta_operations;
- };
-
- using b_callback = void (const char* const args[], std::size_t n);
-
- LIBBUTL_SYMEXPORT b_project_info
- b_info (const dir_path& project,
- std::uint16_t verb = 1,
- const std::function<b_callback>& cmd_callback = {},
- const path& program = path ("b"),
- const dir_path& search_fallback = {},
- const std::vector<std::string>& options = {});
-}
diff --git a/libbutl/backtrace.cxx b/libbutl/backtrace.cxx
index 8c9c6ae..347e231 100644
--- a/libbutl/backtrace.cxx
+++ b/libbutl/backtrace.cxx
@@ -1,15 +1,14 @@
// file : libbutl/backtrace.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/backtrace.mxx>
-#endif
+#include <libbutl/backtrace.hxx>
// We only enable backtrace during bootstrap if we can do it without any
// complications of the build scripts/makefiles.
//
// With glibc linking with -rdynamic gives (non-static) function names.
-// FreeBSD/NetBSD requires explicitly linking -lexecinfo.
+// FreeBSD/NetBSD requires explicitly linking -lexecinfo. OpenBSD only has
+// this functionality built-in from 7.0 and requires -lexecinfo.
//
// Note that some libc implementation on Linux (most notably, musl), don't
// support this, at least not out of the box.
@@ -20,6 +19,11 @@
defined(__FreeBSD__) || \
defined(__NetBSD__)
# define LIBBUTL_BACKTRACE
+# elif defined (__OpenBSD__)
+# include <sys/param.h> // OpenBSD (yyyymm)
+# if OpenBSD >= 202110 // 7.0 was released in October 2021.
+# define LIBBUTL_BACKTRACE
+# endif
# endif
#else
# if defined(__GLIBC__) || \
@@ -35,30 +39,12 @@
#include <cassert>
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
#ifdef LIBBUTL_BACKTRACE
# include <memory> // unique_ptr
# include <cstddef> // size_t
#endif
#include <exception>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.backtrace;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-#endif
using namespace std;
diff --git a/libbutl/backtrace.mxx b/libbutl/backtrace.hxx
index f5a63d5..6afb6ea 100644
--- a/libbutl/backtrace.mxx
+++ b/libbutl/backtrace.hxx
@@ -1,28 +1,13 @@
-// file : libbutl/backtrace.mxx -*- C++ -*-
+// file : libbutl/backtrace.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.backtrace;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Return the calling thread's backtrace or empty string if this
// functionality is not supported or an error has occurred. The exact
diff --git a/libbutl/base64.cxx b/libbutl/base64.cxx
index 527c6af..282f7c2 100644
--- a/libbutl/base64.cxx
+++ b/libbutl/base64.cxx
@@ -1,37 +1,13 @@
// file : libbutl/base64.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/base64.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
+#include <libbutl/base64.hxx>
#include <cstddef> // size_t
#include <istream>
#include <ostream>
#include <iterator> // {istreambuf, ostreambuf, back_insert}_iterator
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.base64;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-#endif
using namespace std;
@@ -40,19 +16,20 @@ namespace butl
static const char codes[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ static const char codes_url[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
// base64-encode the data in the iterator range [i, e). Write the encoded
- // data starting at the iterator position o.
+ // data starting at the iterator position o. If url is true, encode using
+ // base64url.
//
template <typename I, typename O>
static void
- base64_encode (I& i, const I& e, O& o)
+ base64_encode (I& i, const I& e, O& o, bool url = false)
{
const size_t un (65); // Non-existing index of the codes string.
for (size_t n (0); i != e; ++n)
{
- if (n && n % 19 == 0)
- *o++ = '\n'; // Split into lines, like the base64 utility does.
-
auto next = [&i] () {return static_cast<unsigned char> (*i++);};
unsigned char c (next ());
@@ -75,10 +52,26 @@ namespace butl
i4 = c & 0x3F;
}
- *o++ = codes[i1];
- *o++ = codes[i2];
- *o++ = i3 == un ? '=' : codes[i3];
- *o++ = i4 == un ? '=' : codes[i4];
+ if (!url)
+ {
+ if (n && n % 19 == 0)
+ *o++ = '\n'; // Split into lines, like the base64 utility does.
+
+ *o++ = codes[i1];
+ *o++ = codes[i2];
+ *o++ = i3 == un ? '=' : codes[i3];
+ *o++ = i4 == un ? '=' : codes[i4];
+ }
+ // base64url: different 63rd and 64th characters and no padding or
+ // newlines.
+ //
+ else
+ {
+ *o++ = codes_url[i1];
+ *o++ = codes_url[i2];
+ if (i3 != un) *o++ = codes_url[i3];
+ if (i4 != un) *o++ = codes_url[i4];
+ }
}
}
@@ -194,6 +187,47 @@ namespace butl
return r;
}
+ string
+ base64url_encode (istream& is)
+ {
+ if (!is.good ())
+ throw invalid_argument ("bad stream");
+
+ string r;
+ istreambuf_iterator<char> i (is);
+ back_insert_iterator<string> o (r);
+
+ base64_encode (i, istreambuf_iterator<char> (), o, true /* url */);
+ is.setstate (istream::eofbit);
+ return r;
+ }
+
+ void
+ base64url_encode (ostream& os, istream& is)
+ {
+ if (!os.good () || !is.good ())
+ throw invalid_argument ("bad stream");
+
+ istreambuf_iterator<char> i (is);
+ ostreambuf_iterator<char> o (os);
+ base64_encode (i, istreambuf_iterator<char> (), o, true /* url */);
+
+ if (o.failed ())
+ os.setstate (istream::badbit);
+
+ is.setstate (istream::eofbit);
+ }
+
+ string
+ base64url_encode (const std::vector<char>& v)
+ {
+ string r;
+ back_insert_iterator<string> o (r);
+ auto i (v.begin ());
+ base64_encode (i, v.end (), o, true /* url */);
+ return r;
+ }
+
void
base64_decode (ostream& os, istream& is)
{
diff --git a/libbutl/base64.mxx b/libbutl/base64.hxx
index 698b7e2..a0d1450 100644
--- a/libbutl/base64.mxx
+++ b/libbutl/base64.hxx
@@ -1,31 +1,15 @@
-// file : libbutl/base64.mxx -*- C++ -*-
+// file : libbutl/base64.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <iosfwd>
#include <string>
#include <vector>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.base64;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Base64-encode a stream or a buffer. Split the output into 76 char-long
// lines (new line is the 77th). If reading from a stream, check if it has
@@ -43,6 +27,25 @@ LIBBUTL_MODEXPORT namespace butl
LIBBUTL_SYMEXPORT std::string
base64_encode (const std::vector<char>&);
+ // Encode a stream or a buffer using base64url (RFC4648), a base64 variant
+ // with different 62nd and 63rd alphabet characters (- and _ instead of ~
+ // and .; to make it filesystem safe) and optional padding because the
+ // padding character `=` would have to be percent-encoded to be safe in
+ // URLs. This implementation does not output any padding, newlines or any
+ // other whitespace (which is required, for example, by RFC7519: JSON Web
+ // Token (JWT) and RFC7515: JSON Web Signature (JWS)).
+ //
+ // Note that base64url decoding has not yet been implemented.
+ //
+ LIBBUTL_SYMEXPORT void
+ base64url_encode (std::ostream&, std::istream&);
+
+ LIBBUTL_SYMEXPORT std::string
+ base64url_encode (std::istream&);
+
+ LIBBUTL_SYMEXPORT std::string
+ base64url_encode (const std::vector<char>&);
+
// Base64-decode a stream or a string. Throw invalid_argument if the input
// is not a valid base64 representation. If reading from a stream, check if
// it has badbit, failbit, or eofbit set and throw invalid_argument if
diff --git a/libbutl/bufstreambuf.cxx b/libbutl/bufstreambuf.cxx
new file mode 100644
index 0000000..d152166
--- /dev/null
+++ b/libbutl/bufstreambuf.cxx
@@ -0,0 +1,13 @@
+// file : libbutl/bufstreambuf.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbutl/bufstreambuf.hxx>
+
+namespace butl
+{
+ bufstreambuf::
+ ~bufstreambuf ()
+ {
+ // Vtable.
+ }
+}
diff --git a/libbutl/bufstreambuf.hxx b/libbutl/bufstreambuf.hxx
new file mode 100644
index 0000000..a49b2d0
--- /dev/null
+++ b/libbutl/bufstreambuf.hxx
@@ -0,0 +1,67 @@
+// file : libbutl/bufstreambuf.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <cstdint> // uint64_t
+#include <streambuf>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ // A buffered streambuf interface that exposes its buffer for direct scan
+ // and provides a notion of logical position. See fdstreambuf for background
+ // and motivation.
+ //
+ class LIBBUTL_SYMEXPORT bufstreambuf: public std::basic_streambuf<char>
+ {
+ public:
+ using base = std::basic_streambuf<char>;
+
+ using int_type = base::int_type;
+ using traits_type = base::traits_type;
+
+ using pos_type = base::pos_type; // std::streampos
+ using off_type = base::off_type; // std::streamoff
+
+ public:
+ explicit
+ bufstreambuf (std::uint64_t pos = 0): off_ (pos) {}
+
+ virtual
+ ~bufstreambuf ();
+
+ // basic_streambuf input interface.
+ //
+ public:
+
+ // Direct access to the get area. Use with caution.
+ //
+ using base::gptr;
+ using base::egptr;
+ using base::gbump;
+
+ // Return the (logical) position of the next byte to be read.
+ //
+ // Note that on Windows when reading in the text mode the logical position
+ // may differ from the physical file descriptor position due to the CRLF
+ // character sequence translation. See the fdstreambuf::seekoff()
+ // implementation for more background on this issue.
+ //
+ std::uint64_t
+ tellg () const {return off_ - (egptr () - gptr ());}
+
+ // basic_streambuf output interface.
+ //
+ public:
+
+ // Return the (logical) position of the next byte to be written.
+ //
+ std::uint64_t
+ tellp () const {return off_ + (pptr () - pbase ());}
+
+ protected:
+ std::uint64_t off_;
+ };
+}
diff --git a/libbutl/buildfile b/libbutl/buildfile
index 6526900..ba4ad96 100644
--- a/libbutl/buildfile
+++ b/libbutl/buildfile
@@ -1,37 +1,37 @@
# file : libbutl/buildfile
# license : MIT; see accompanying LICENSE file
-# This library was modularized using the Modules TS semantics (with support
-# for dual, module/header consumption) which was subsequently partially
-# dismantled. We, however, kept some of the changes in anticipation that they
-# would be useful when attempting to modularize using the merged modules
-# semantics. Specifically, there are currently headers with both .mxx and .hxx
-# extensions and the code is littered with the `#if __cpp_[lib_]modules_ts`
-# blocks. Note that it's important for the auto-generated header support
-# that the default extension for hxx{} is .hxx.
-#
-# @@ If/when going back to using mxx{}, make sure to cleanup explicit .mxx.
-#
-lib{butl}: {hxx ixx txx cxx}{** -uuid-* +uuid-io \
- -win32-utility \
- -version \
- -builtin-options} \
- hxx{**.mxx} {hxx}{version} {hxx ixx cxx}{builtin-options}
+lib{butl}: {hxx ixx txx cxx}{** -uuid-* +uuid-io \
+ -win32-utility \
+ -mingw-* \
+ -version \
+ -builtin-options} \
+ {hxx}{version} {hxx ixx cxx}{builtin-options}
tclass = $cxx.target.class
tsys = $cxx.target.system
windows = ($tclass == 'windows')
-# Exclude these from compilation on non-Windows targets.
+# Whether to use our own implementation of C++14 threads on MinGW (note:
+# requires Windows 7 or later).
+#
+# Note that for now we use built-in POSIX thread support during bootstrap
+# (which, as a side effect, verifies we still use MinGW GCC configured with
+# POSIX support, which we still need for TLS, exceptions, and thread-safe
+# static locals).
+#
+mingw_stdthread = ($tsys == 'mingw32')
+
+# Exclude these from compilation on targets where does not apply.
#
lib{butl}: {hxx ixx cxx}{win32-utility}: include = $windows
+lib{butl}: hxx{mingw-*}: include = $mingw_stdthread
-# Our C-files are included into sha256.cxx (sha256c.c) and timestamp.cxx
-# (strptime.c timelocal.h timelocal.c), so treat them as files exclude from
-# the compilation.
+# Our C-files are always included into C++-files that wrap the corresponding
+# API so treat them as files to exclude from the compilation.
#
-lib{butl}: file{*.c *.h}
+lib{butl}: file{**.c **.h}
# Platform-specific UUID implementations.
#
@@ -39,6 +39,13 @@ lib{butl}: cxx{uuid-linux}: include = ($tclass == 'linux')
lib{butl}: cxx{uuid-macos}: include = ($tclass == 'macos')
lib{butl}: cxx{uuid-windows}: include = $windows
lib{butl}: cxx{uuid-freebsd}: include = ($tsys == 'freebsd' || $tsys == 'netbsd')
+lib{butl}: cxx{uuid-openbsd}: include = ($tsys == 'openbsd')
+
+# GCC prior to version 6 has flaky `#pragma GCC diagnostic` so we have to
+# disable certain warnings outright.
+#
+if ($cxx.id == 'gcc' && $cxx.version.major < 6)
+ cc.coptions += -Wno-unused-function
# Additional system libraries.
#
@@ -58,6 +65,14 @@ switch $tclass, $tsys
case 'bsd', 'freebsd' | 'netbsd'
cxx.libs += -lexecinfo
+
+ case 'bsd', 'openbsd'
+ {
+ # Built-in libexecinfo is only available since OpenBSD 7.0.
+ #
+ if (([uint64] $regex.replace($cxx.target.version, '(\d+)\..+', '\1')) >= 7)
+ cxx.libs += -lexecinfo
+ }
}
if! $windows
@@ -78,6 +93,9 @@ hxx{version}:
#
cxx.poptions =+ "-I$out_root" "-I$src_root"
+if $mingw_stdthread
+ cxx.poptions += -D_WIN32_WINNT=0x0601 -DLIBBUTL_MINGW_STDTHREAD
+
obja{*} bmia{*}: cxx.poptions += -DLIBBUTL_STATIC_BUILD
objs{*} bmis{*}: cxx.poptions += -DLIBBUTL_SHARED_BUILD
@@ -85,6 +103,9 @@ objs{*} bmis{*}: cxx.poptions += -DLIBBUTL_SHARED_BUILD
#
lib{butl}: cxx.export.poptions = "-I$out_root" "-I$src_root"
+if $mingw_stdthread
+ lib{butl}: cxx.export.poptions += -D_WIN32_WINNT=0x0601 -DLIBBUTL_MINGW_STDTHREAD
+
liba{butl}: cxx.export.poptions += -DLIBBUTL_STATIC
libs{butl}: cxx.export.poptions += -DLIBBUTL_SHARED
diff --git a/libbutl/builtin-options.cxx b/libbutl/builtin-options.cxx
index 2848eea..98a47cf 100644
--- a/libbutl/builtin-options.cxx
+++ b/libbutl/builtin-options.cxx
@@ -15,8 +15,10 @@
#include <set>
#include <string>
#include <vector>
+#include <utility>
#include <ostream>
#include <sstream>
+#include <cstring>
namespace butl
{
@@ -25,7 +27,7 @@ namespace butl
// unknown_option
//
unknown_option::
- ~unknown_option () throw ()
+ ~unknown_option () noexcept
{
}
@@ -36,7 +38,7 @@ namespace butl
}
const char* unknown_option::
- what () const throw ()
+ what () const noexcept
{
return "unknown option";
}
@@ -44,7 +46,7 @@ namespace butl
// unknown_argument
//
unknown_argument::
- ~unknown_argument () throw ()
+ ~unknown_argument () noexcept
{
}
@@ -55,7 +57,7 @@ namespace butl
}
const char* unknown_argument::
- what () const throw ()
+ what () const noexcept
{
return "unknown argument";
}
@@ -63,7 +65,7 @@ namespace butl
// missing_value
//
missing_value::
- ~missing_value () throw ()
+ ~missing_value () noexcept
{
}
@@ -74,7 +76,7 @@ namespace butl
}
const char* missing_value::
- what () const throw ()
+ what () const noexcept
{
return "missing option value";
}
@@ -82,7 +84,7 @@ namespace butl
// invalid_value
//
invalid_value::
- ~invalid_value () throw ()
+ ~invalid_value () noexcept
{
}
@@ -97,7 +99,7 @@ namespace butl
}
const char* invalid_value::
- what () const throw ()
+ what () const noexcept
{
return "invalid option value";
}
@@ -111,7 +113,7 @@ namespace butl
}
const char* eos_reached::
- what () const throw ()
+ what () const noexcept
{
return "end of argument stream reached";
}
@@ -158,6 +160,7 @@ namespace butl
else
++i_;
+ ++start_position_;
return r;
}
else
@@ -168,11 +171,20 @@ namespace butl
skip ()
{
if (i_ < argc_)
+ {
++i_;
+ ++start_position_;
+ }
else
throw eos_reached ();
}
+ std::size_t argv_scanner::
+ position ()
+ {
+ return start_position_;
+ }
+
// vector_scanner
//
bool vector_scanner::
@@ -208,6 +220,12 @@ namespace butl
throw eos_reached ();
}
+ std::size_t vector_scanner::
+ position ()
+ {
+ return start_position_ + i_;
+ }
+
template <typename X>
struct parser
{
@@ -235,10 +253,31 @@ namespace butl
struct parser<bool>
{
static void
- parse (bool& x, scanner& s)
+ parse (bool& x, bool& xs, scanner& s)
{
- s.next ();
- x = true;
+ const char* o (s.next ());
+
+ if (s.more ())
+ {
+ const char* v (s.next ());
+
+ if (std::strcmp (v, "1") == 0 ||
+ std::strcmp (v, "true") == 0 ||
+ std::strcmp (v, "TRUE") == 0 ||
+ std::strcmp (v, "True") == 0)
+ x = true;
+ else if (std::strcmp (v, "0") == 0 ||
+ std::strcmp (v, "false") == 0 ||
+ std::strcmp (v, "FALSE") == 0 ||
+ std::strcmp (v, "False") == 0)
+ x = false;
+ else
+ throw invalid_value (o, v);
+ }
+ else
+ throw missing_value (o);
+
+ xs = true;
}
};
@@ -260,6 +299,17 @@ namespace butl
};
template <typename X>
+ struct parser<std::pair<X, std::size_t> >
+ {
+ static void
+ parse (std::pair<X, std::size_t>& x, bool& xs, scanner& s)
+ {
+ x.second = s.position ();
+ parser<X>::parse (x.first, xs, s);
+ }
+ };
+
+ template <typename X>
struct parser<std::vector<X> >
{
static void
@@ -273,11 +323,11 @@ namespace butl
}
};
- template <typename X>
- struct parser<std::set<X> >
+ template <typename X, typename C>
+ struct parser<std::set<X, C> >
{
static void
- parse (std::set<X>& c, bool& xs, scanner& s)
+ parse (std::set<X, C>& c, bool& xs, scanner& s)
{
X x;
bool dummy;
@@ -287,16 +337,17 @@ namespace butl
}
};
- template <typename K, typename V>
- struct parser<std::map<K, V> >
+ template <typename K, typename V, typename C>
+ struct parser<std::map<K, V, C> >
{
static void
- parse (std::map<K, V>& m, bool& xs, scanner& s)
+ parse (std::map<K, V, C>& m, bool& xs, scanner& s)
{
const char* o (s.next ());
if (s.more ())
{
+ std::size_t pos (s.position ());
std::string ov (s.next ());
std::string::size_type p = ov.find ('=');
@@ -316,14 +367,14 @@ namespace butl
if (!kstr.empty ())
{
av[1] = const_cast<char*> (kstr.c_str ());
- argv_scanner s (0, ac, av);
+ argv_scanner s (0, ac, av, false, pos);
parser<K>::parse (k, dummy, s);
}
if (!vstr.empty ())
{
av[1] = const_cast<char*> (vstr.c_str ());
- argv_scanner s (0, ac, av);
+ argv_scanner s (0, ac, av, false, pos);
parser<V>::parse (v, dummy, s);
}
@@ -336,6 +387,56 @@ namespace butl
}
};
+ template <typename K, typename V, typename C>
+ struct parser<std::multimap<K, V, C> >
+ {
+ static void
+ parse (std::multimap<K, V, C>& m, bool& xs, scanner& s)
+ {
+ const char* o (s.next ());
+
+ if (s.more ())
+ {
+ std::size_t pos (s.position ());
+ std::string ov (s.next ());
+ std::string::size_type p = ov.find ('=');
+
+ K k = K ();
+ V v = V ();
+ std::string kstr (ov, 0, p);
+ std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ()));
+
+ int ac (2);
+ char* av[] =
+ {
+ const_cast<char*> (o),
+ 0
+ };
+
+ bool dummy;
+ if (!kstr.empty ())
+ {
+ av[1] = const_cast<char*> (kstr.c_str ());
+ argv_scanner s (0, ac, av, false, pos);
+ parser<K>::parse (k, dummy, s);
+ }
+
+ if (!vstr.empty ())
+ {
+ av[1] = const_cast<char*> (vstr.c_str ());
+ argv_scanner s (0, ac, av, false, pos);
+ parser<V>::parse (v, dummy, s);
+ }
+
+ m.insert (typename std::multimap<K, V, C>::value_type (k, v));
+ }
+ else
+ throw missing_value (o);
+
+ xs = true;
+ }
+ };
+
template <typename X, typename T, T X::*M>
void
thunk (X& x, scanner& s)
@@ -343,6 +444,14 @@ namespace butl
parser<T>::parse (x.*M, s);
}
+ template <typename X, bool X::*M>
+ void
+ thunk (X& x, scanner& s)
+ {
+ s.next ();
+ x.*M = true;
+ }
+
template <typename X, typename T, T X::*M, bool X::*S>
void
thunk (X& x, scanner& s)
@@ -353,7 +462,6 @@ namespace butl
}
#include <map>
-#include <cstring>
namespace butl
{
@@ -704,15 +812,15 @@ namespace butl
_cli_cp_options_map_init ()
{
_cli_cp_options_map_["--recursive"] =
- &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >;
+ &::butl::cli::thunk< cp_options, &cp_options::recursive_ >;
_cli_cp_options_map_["-R"] =
- &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >;
+ &::butl::cli::thunk< cp_options, &cp_options::recursive_ >;
_cli_cp_options_map_["-r"] =
- &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >;
+ &::butl::cli::thunk< cp_options, &cp_options::recursive_ >;
_cli_cp_options_map_["--preserve"] =
- &::butl::cli::thunk< cp_options, bool, &cp_options::preserve_ >;
+ &::butl::cli::thunk< cp_options, &cp_options::preserve_ >;
_cli_cp_options_map_["-p"] =
- &::butl::cli::thunk< cp_options, bool, &cp_options::preserve_ >;
+ &::butl::cli::thunk< cp_options, &cp_options::preserve_ >;
}
};
@@ -978,9 +1086,9 @@ namespace butl
_cli_date_options_map_init ()
{
_cli_date_options_map_["--utc"] =
- &::butl::cli::thunk< date_options, bool, &date_options::utc_ >;
+ &::butl::cli::thunk< date_options, &date_options::utc_ >;
_cli_date_options_map_["-u"] =
- &::butl::cli::thunk< date_options, bool, &date_options::utc_ >;
+ &::butl::cli::thunk< date_options, &date_options::utc_ >;
}
};
@@ -1163,6 +1271,269 @@ namespace butl
return r;
}
+ // find_options
+ //
+
+ find_options::
+ find_options ()
+ {
+ }
+
+ bool find_options::
+ parse (int& argc,
+ char** argv,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ return r;
+ }
+
+ bool find_options::
+ parse (int start,
+ int& argc,
+ char** argv,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (start, argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ return r;
+ }
+
+ bool find_options::
+ parse (int& argc,
+ char** argv,
+ int& end,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ end = s.end ();
+ return r;
+ }
+
+ bool find_options::
+ parse (int start,
+ int& argc,
+ char** argv,
+ int& end,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (start, argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ end = s.end ();
+ return r;
+ }
+
+ bool find_options::
+ parse (::butl::cli::scanner& s,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ bool r = _parse (s, opt, arg);
+ return r;
+ }
+
+ typedef
+ std::map<std::string, void (*) (find_options&, ::butl::cli::scanner&)>
+ _cli_find_options_map;
+
+ static _cli_find_options_map _cli_find_options_map_;
+
+ struct _cli_find_options_map_init
+ {
+ _cli_find_options_map_init ()
+ {
+ }
+ };
+
+ static _cli_find_options_map_init _cli_find_options_map_init_;
+
+ bool find_options::
+ _parse (const char* o, ::butl::cli::scanner& s)
+ {
+ _cli_find_options_map::const_iterator i (_cli_find_options_map_.find (o));
+
+ if (i != _cli_find_options_map_.end ())
+ {
+ (*(i->second)) (*this, s);
+ return true;
+ }
+
+ return false;
+ }
+
+ bool find_options::
+ _parse (::butl::cli::scanner& s,
+ ::butl::cli::unknown_mode opt_mode,
+ ::butl::cli::unknown_mode arg_mode)
+ {
+ // Can't skip combined flags (--no-combined-flags).
+ //
+ assert (opt_mode != ::butl::cli::unknown_mode::skip);
+
+ bool r = false;
+ bool opt = true;
+
+ while (s.more ())
+ {
+ const char* o = s.peek ();
+
+ if (std::strcmp (o, "--") == 0)
+ {
+ opt = false;
+ }
+
+ if (opt)
+ {
+ if (_parse (o, s))
+ {
+ r = true;
+ continue;
+ }
+
+ if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0')
+ {
+ // Handle combined option values.
+ //
+ std::string co;
+ if (const char* v = std::strchr (o, '='))
+ {
+ co.assign (o, 0, v - o);
+ ++v;
+
+ int ac (2);
+ char* av[] =
+ {
+ const_cast<char*> (co.c_str ()),
+ const_cast<char*> (v)
+ };
+
+ ::butl::cli::argv_scanner ns (0, ac, av);
+
+ if (_parse (co.c_str (), ns))
+ {
+ // Parsed the option but not its value?
+ //
+ if (ns.end () != 2)
+ throw ::butl::cli::invalid_value (co, v);
+
+ s.next ();
+ r = true;
+ continue;
+ }
+ else
+ {
+ // Set the unknown option and fall through.
+ //
+ o = co.c_str ();
+ }
+ }
+
+ // Handle combined flags.
+ //
+ char cf[3];
+ {
+ const char* p = o + 1;
+ for (; *p != '\0'; ++p)
+ {
+ if (!((*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z') ||
+ (*p >= '0' && *p <= '9')))
+ break;
+ }
+
+ if (*p == '\0')
+ {
+ for (p = o + 1; *p != '\0'; ++p)
+ {
+ std::strcpy (cf, "-");
+ cf[1] = *p;
+ cf[2] = '\0';
+
+ int ac (1);
+ char* av[] =
+ {
+ cf
+ };
+
+ ::butl::cli::argv_scanner ns (0, ac, av);
+
+ if (!_parse (cf, ns))
+ break;
+ }
+
+ if (*p == '\0')
+ {
+ // All handled.
+ //
+ s.next ();
+ r = true;
+ continue;
+ }
+ else
+ {
+ // Set the unknown option and fall through.
+ //
+ o = cf;
+ }
+ }
+ }
+
+ switch (opt_mode)
+ {
+ case ::butl::cli::unknown_mode::skip:
+ {
+ s.skip ();
+ r = true;
+ continue;
+ }
+ case ::butl::cli::unknown_mode::stop:
+ {
+ break;
+ }
+ case ::butl::cli::unknown_mode::fail:
+ {
+ throw ::butl::cli::unknown_option (o);
+ }
+ }
+
+ break;
+ }
+ }
+
+ switch (arg_mode)
+ {
+ case ::butl::cli::unknown_mode::skip:
+ {
+ s.skip ();
+ r = true;
+ continue;
+ }
+ case ::butl::cli::unknown_mode::stop:
+ {
+ break;
+ }
+ case ::butl::cli::unknown_mode::fail:
+ {
+ throw ::butl::cli::unknown_argument (o);
+ }
+ }
+
+ break;
+ }
+
+ return r;
+ }
+
// ln_options
//
@@ -1246,9 +1617,9 @@ namespace butl
_cli_ln_options_map_init ()
{
_cli_ln_options_map_["--symbolic"] =
- &::butl::cli::thunk< ln_options, bool, &ln_options::symbolic_ >;
+ &::butl::cli::thunk< ln_options, &ln_options::symbolic_ >;
_cli_ln_options_map_["-s"] =
- &::butl::cli::thunk< ln_options, bool, &ln_options::symbolic_ >;
+ &::butl::cli::thunk< ln_options, &ln_options::symbolic_ >;
}
};
@@ -1514,9 +1885,9 @@ namespace butl
_cli_mkdir_options_map_init ()
{
_cli_mkdir_options_map_["--parents"] =
- &::butl::cli::thunk< mkdir_options, bool, &mkdir_options::parents_ >;
+ &::butl::cli::thunk< mkdir_options, &mkdir_options::parents_ >;
_cli_mkdir_options_map_["-p"] =
- &::butl::cli::thunk< mkdir_options, bool, &mkdir_options::parents_ >;
+ &::butl::cli::thunk< mkdir_options, &mkdir_options::parents_ >;
}
};
@@ -1782,9 +2153,9 @@ namespace butl
_cli_mv_options_map_init ()
{
_cli_mv_options_map_["--force"] =
- &::butl::cli::thunk< mv_options, bool, &mv_options::force_ >;
+ &::butl::cli::thunk< mv_options, &mv_options::force_ >;
_cli_mv_options_map_["-f"] =
- &::butl::cli::thunk< mv_options, bool, &mv_options::force_ >;
+ &::butl::cli::thunk< mv_options, &mv_options::force_ >;
}
};
@@ -2051,13 +2422,13 @@ namespace butl
_cli_rm_options_map_init ()
{
_cli_rm_options_map_["--recursive"] =
- &::butl::cli::thunk< rm_options, bool, &rm_options::recursive_ >;
+ &::butl::cli::thunk< rm_options, &rm_options::recursive_ >;
_cli_rm_options_map_["-r"] =
- &::butl::cli::thunk< rm_options, bool, &rm_options::recursive_ >;
+ &::butl::cli::thunk< rm_options, &rm_options::recursive_ >;
_cli_rm_options_map_["--force"] =
- &::butl::cli::thunk< rm_options, bool, &rm_options::force_ >;
+ &::butl::cli::thunk< rm_options, &rm_options::force_ >;
_cli_rm_options_map_["-f"] =
- &::butl::cli::thunk< rm_options, bool, &rm_options::force_ >;
+ &::butl::cli::thunk< rm_options, &rm_options::force_ >;
}
};
@@ -2323,9 +2694,9 @@ namespace butl
_cli_rmdir_options_map_init ()
{
_cli_rmdir_options_map_["--force"] =
- &::butl::cli::thunk< rmdir_options, bool, &rmdir_options::force_ >;
+ &::butl::cli::thunk< rmdir_options, &rmdir_options::force_ >;
_cli_rmdir_options_map_["-f"] =
- &::butl::cli::thunk< rmdir_options, bool, &rmdir_options::force_ >;
+ &::butl::cli::thunk< rmdir_options, &rmdir_options::force_ >;
}
};
@@ -2594,13 +2965,13 @@ namespace butl
_cli_sed_options_map_init ()
{
_cli_sed_options_map_["--quiet"] =
- &::butl::cli::thunk< sed_options, bool, &sed_options::quiet_ >;
+ &::butl::cli::thunk< sed_options, &sed_options::quiet_ >;
_cli_sed_options_map_["-n"] =
- &::butl::cli::thunk< sed_options, bool, &sed_options::quiet_ >;
+ &::butl::cli::thunk< sed_options, &sed_options::quiet_ >;
_cli_sed_options_map_["--in-place"] =
- &::butl::cli::thunk< sed_options, bool, &sed_options::in_place_ >;
+ &::butl::cli::thunk< sed_options, &sed_options::in_place_ >;
_cli_sed_options_map_["-i"] =
- &::butl::cli::thunk< sed_options, bool, &sed_options::in_place_ >;
+ &::butl::cli::thunk< sed_options, &sed_options::in_place_ >;
_cli_sed_options_map_["--expression"] =
&::butl::cli::thunk< sed_options, std::vector<std::string>, &sed_options::expression_,
&sed_options::expression_specified_ >;
@@ -3136,13 +3507,13 @@ namespace butl
_cli_test_options_map_init ()
{
_cli_test_options_map_["--file"] =
- &::butl::cli::thunk< test_options, bool, &test_options::file_ >;
+ &::butl::cli::thunk< test_options, &test_options::file_ >;
_cli_test_options_map_["-f"] =
- &::butl::cli::thunk< test_options, bool, &test_options::file_ >;
+ &::butl::cli::thunk< test_options, &test_options::file_ >;
_cli_test_options_map_["--directory"] =
- &::butl::cli::thunk< test_options, bool, &test_options::directory_ >;
+ &::butl::cli::thunk< test_options, &test_options::directory_ >;
_cli_test_options_map_["-d"] =
- &::butl::cli::thunk< test_options, bool, &test_options::directory_ >;
+ &::butl::cli::thunk< test_options, &test_options::directory_ >;
}
};
diff --git a/libbutl/builtin-options.hxx b/libbutl/builtin-options.hxx
index b389298..70179dd 100644
--- a/libbutl/builtin-options.hxx
+++ b/libbutl/builtin-options.hxx
@@ -68,7 +68,7 @@ namespace butl
{
public:
virtual
- ~unknown_option () throw ();
+ ~unknown_option () noexcept;
unknown_option (const std::string& option);
@@ -79,7 +79,7 @@ namespace butl
print (::std::ostream&) const;
virtual const char*
- what () const throw ();
+ what () const noexcept;
private:
std::string option_;
@@ -89,7 +89,7 @@ namespace butl
{
public:
virtual
- ~unknown_argument () throw ();
+ ~unknown_argument () noexcept;
unknown_argument (const std::string& argument);
@@ -100,7 +100,7 @@ namespace butl
print (::std::ostream&) const;
virtual const char*
- what () const throw ();
+ what () const noexcept;
private:
std::string argument_;
@@ -110,7 +110,7 @@ namespace butl
{
public:
virtual
- ~missing_value () throw ();
+ ~missing_value () noexcept;
missing_value (const std::string& option);
@@ -121,7 +121,7 @@ namespace butl
print (::std::ostream&) const;
virtual const char*
- what () const throw ();
+ what () const noexcept;
private:
std::string option_;
@@ -131,7 +131,7 @@ namespace butl
{
public:
virtual
- ~invalid_value () throw ();
+ ~invalid_value () noexcept;
invalid_value (const std::string& option,
const std::string& value,
@@ -150,7 +150,7 @@ namespace butl
print (::std::ostream&) const;
virtual const char*
- what () const throw ();
+ what () const noexcept;
private:
std::string option_;
@@ -165,7 +165,7 @@ namespace butl
print (::std::ostream&) const;
virtual const char*
- what () const throw ();
+ what () const noexcept;
};
// Command line argument scanner interface.
@@ -174,6 +174,14 @@ namespace butl
// for the two previous arguments up until a call to a third
// peek() or next().
//
+ // The position() function returns a monotonically-increasing
+ // number which, if stored, can later be used to determine the
+ // relative position of the argument returned by the following
+ // call to next(). Note that if multiple scanners are used to
+ // extract arguments from multiple sources, then the end
+ // position of the previous scanner should be used as the
+ // start position of the next.
+ //
class scanner
{
public:
@@ -191,13 +199,24 @@ namespace butl
virtual void
skip () = 0;
+
+ virtual std::size_t
+ position () = 0;
};
class argv_scanner: public scanner
{
public:
- argv_scanner (int& argc, char** argv, bool erase = false);
- argv_scanner (int start, int& argc, char** argv, bool erase = false);
+ argv_scanner (int& argc,
+ char** argv,
+ bool erase = false,
+ std::size_t start_position = 0);
+
+ argv_scanner (int start,
+ int& argc,
+ char** argv,
+ bool erase = false,
+ std::size_t start_position = 0);
int
end () const;
@@ -214,7 +233,11 @@ namespace butl
virtual void
skip ();
- private:
+ virtual std::size_t
+ position ();
+
+ protected:
+ std::size_t start_position_;
int i_;
int& argc_;
char** argv_;
@@ -224,13 +247,15 @@ namespace butl
class vector_scanner: public scanner
{
public:
- vector_scanner (const std::vector<std::string>&, std::size_t start = 0);
+ vector_scanner (const std::vector<std::string>&,
+ std::size_t start = 0,
+ std::size_t start_position = 0);
std::size_t
end () const;
void
- reset (std::size_t start = 0);
+ reset (std::size_t start = 0, std::size_t start_position = 0);
virtual bool
more ();
@@ -244,7 +269,11 @@ namespace butl
virtual void
skip ();
+ virtual std::size_t
+ position ();
+
private:
+ std::size_t start_position_;
const std::vector<std::string>& v_;
std::size_t i_;
};
@@ -455,6 +484,67 @@ namespace butl
bool utc_;
};
+ class find_options
+ {
+ public:
+ find_options ();
+
+ // Return true if anything has been parsed.
+ //
+ bool
+ parse (int& argc,
+ char** argv,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (int start,
+ int& argc,
+ char** argv,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (int& argc,
+ char** argv,
+ int& end,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (int start,
+ int& argc,
+ char** argv,
+ int& end,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (::butl::cli::scanner&,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ // Option accessors.
+ //
+ // Implementation details.
+ //
+ protected:
+ bool
+ _parse (const char*, ::butl::cli::scanner&);
+
+ private:
+ bool
+ _parse (::butl::cli::scanner&,
+ ::butl::cli::unknown_mode option,
+ ::butl::cli::unknown_mode argument);
+
+ public:
+ };
+
class ln_options
{
public:
diff --git a/libbutl/builtin-options.ixx b/libbutl/builtin-options.ixx
index f10f82d..e118156 100644
--- a/libbutl/builtin-options.ixx
+++ b/libbutl/builtin-options.ixx
@@ -107,14 +107,29 @@ namespace butl
// argv_scanner
//
inline argv_scanner::
- argv_scanner (int& argc, char** argv, bool erase)
- : i_ (1), argc_ (argc), argv_ (argv), erase_ (erase)
+ argv_scanner (int& argc,
+ char** argv,
+ bool erase,
+ std::size_t sp)
+ : start_position_ (sp + 1),
+ i_ (1),
+ argc_ (argc),
+ argv_ (argv),
+ erase_ (erase)
{
}
inline argv_scanner::
- argv_scanner (int start, int& argc, char** argv, bool erase)
- : i_ (start), argc_ (argc), argv_ (argv), erase_ (erase)
+ argv_scanner (int start,
+ int& argc,
+ char** argv,
+ bool erase,
+ std::size_t sp)
+ : start_position_ (sp + static_cast<std::size_t> (start)),
+ i_ (start),
+ argc_ (argc),
+ argv_ (argv),
+ erase_ (erase)
{
}
@@ -127,8 +142,10 @@ namespace butl
// vector_scanner
//
inline vector_scanner::
- vector_scanner (const std::vector<std::string>& v, std::size_t i)
- : v_ (v), i_ (i)
+ vector_scanner (const std::vector<std::string>& v,
+ std::size_t i,
+ std::size_t sp)
+ : start_position_ (sp), v_ (v), i_ (i)
{
}
@@ -139,9 +156,10 @@ namespace butl
}
inline void vector_scanner::
- reset (std::size_t i)
+ reset (std::size_t i, std::size_t sp)
{
i_ = i;
+ start_position_ = sp;
}
}
}
@@ -175,6 +193,9 @@ namespace butl
return this->utc_;
}
+ // find_options
+ //
+
// ln_options
//
diff --git a/libbutl/builtin.cli b/libbutl/builtin.cli
index adc47fa..23a5708 100644
--- a/libbutl/builtin.cli
+++ b/libbutl/builtin.cli
@@ -34,6 +34,11 @@ namespace butl
bool --utc|-u;
};
+ class find_options
+ {
+ // No options so far (expression/primaries handled as arguments).
+ };
+
class ln_options
{
bool --symbolic|-s;
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx
index c6083b6..2755bf1 100644
--- a/libbutl/builtin.cxx
+++ b/libbutl/builtin.cxx
@@ -1,28 +1,16 @@
// file : libbutl/builtin.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/builtin.mxx>
-#endif
+#include <libbutl/builtin.hxx>
#ifdef _WIN32
# include <libbutl/win32-utility.hxx>
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <map>
-#include <string>
-#include <vector>
-#include <thread>
-#include <utility> // move(), forward()
-#include <cstdint> // uint*_t
-#include <functional>
-
#include <ios>
#include <chrono>
#include <cerrno>
+#include <cassert>
#include <ostream>
#include <sstream>
#include <cstdlib> // strtoull()
@@ -30,41 +18,16 @@
#include <exception>
#include <system_error>
-#endif
+#include <libbutl/regex.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream,exception),
+ // throw_generic_error()
+#include <libbutl/optional.hxx>
+#include <libbutl/filesystem.hxx>
+#include <libbutl/small-vector.hxx>
#include <libbutl/builtin-options.hxx>
-#ifdef __cpp_modules_ts
-module butl.builtin;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.threading;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.timestamp;
-#endif
-
-import butl.regex;
-import butl.path_io;
-import butl.utility; // operator<<(ostream,exception),
- // throw_generic_error()
-import butl.optional;
-import butl.filesystem;
-import butl.small_vector;
-#else
-#include <libbutl/regex.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
-
// Strictly speaking a builtin which reads/writes from/to standard streams
// must be asynchronous so that the caller can communicate with it through
// pipes without being blocked on I/O operations. However, as an optimization,
@@ -507,7 +470,7 @@ namespace butl
if (cbs.create)
call (fail, cbs.create, to, false /* pre */);
- for (const auto& de: dir_iterator (from, false /* ignore_dangling */))
+ for (const auto& de: dir_iterator (from, dir_iterator::no_follow))
{
path f (from / de.path ());
path t (to / de.path ());
@@ -853,6 +816,314 @@ namespace butl
return builtin (r = 0);
}
+ // find <start-path>... [-name <pattern>]
+ // [-type <type>]
+ // [-mindepth <depth>]
+ // [-maxdepth <depth>]
+ //
+ // Note: must be executed asynchronously.
+ //
+ static uint8_t
+ find (const strings& args,
+ auto_fd in, auto_fd out, auto_fd err,
+ const dir_path& cwd,
+ const builtin_callbacks& cbs) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ()));
+
+ // Note that on some errors we will issue diagnostics but continue the
+ // search and return with non-zero code at the end. This is consistent
+ // with how major implementations behave (see below).
+ //
+ bool error_occured (false);
+ auto error = [&cerr, &error_occured] (bool fail = false)
+ {
+ error_occured = true;
+ return error_record (cerr, fail, "find");
+ };
+
+ auto fail = [&error] () {return error (true /* fail */);};
+
+ try
+ {
+ in.close ();
+ ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ()));
+
+ // Parse arguments.
+ //
+ cli::vector_scanner scan (args);
+
+ // Currently, we don't expect any options.
+ //
+ parse<find_options> (scan, args, cbs.parse_option, fail);
+
+ // Parse path arguments until the first primary (starts with '-') is
+ // encountered.
+ //
+ small_vector<path, 1> paths;
+
+ while (scan.more ())
+ {
+ if (*scan.peek () == '-')
+ break;
+
+ try
+ {
+ paths.emplace_back (scan.next ());
+ }
+ catch (const invalid_path& e)
+ {
+ fail () << "invalid path '" << e.path << "'";
+ }
+ }
+
+ // Note that POSIX doesn't explicitly describe the behavior if no paths
+ // are specified on the command line. On Linux the current directory is
+ // assumed in this case. We, however, will follow the FreeBSD behavior
+ // and fail since this seems to be less error-prone.
+ //
+ if (paths.empty ())
+ fail () << "missing start path";
+
+ // Parse primaries.
+ //
+ optional<string> name;
+ optional<entry_type> type;
+ optional<uint64_t> min_depth;
+ optional<uint64_t> max_depth;
+
+ while (scan.more ())
+ {
+ const char* p (scan.next ());
+
+ // Return the string value of the current primary. Fail if absent or
+ // empty, unless empty value is allowed.
+ //
+ auto str = [p, &scan, &fail] (bool allow_empty = false)
+ {
+ if (!scan.more ())
+ {
+ fail () << "missing value for primary '" << p << "'";
+ }
+
+ string n (p); // Save for diagnostics.
+ string r (scan.next ());
+
+ if (r.empty () && !allow_empty)
+ fail () << "empty value for primary '" << n << "'";
+
+ return r;
+ };
+
+ // Return the unsigned numeric value of the current primary. Fail if
+ // absent or is not a valid number.
+ //
+ auto num = [p, &str, &fail] ()
+ {
+ string n (p); // Save for diagnostics.
+ string s (str ());
+
+ const char* b (s.c_str ());
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ uint64_t r (strtoull (b, &e, 10)); // Can't throw.
+
+ if (errno == ERANGE || e != b + s.size ())
+ fail () << "invalid value '" << s << "' for primary '" << n << "'";
+
+ return r;
+ };
+
+ if (strcmp (p, "-name") == 0)
+ {
+ // Note that the empty never-matching pattern is allowed.
+ //
+ name = str (true /* allow_empty */);
+ }
+ else if (strcmp (p, "-type") == 0)
+ {
+ string s (str ());
+ char t (s.size () == 1 ? s[0] : '\0');
+
+ switch (t)
+ {
+ case 'f': type = entry_type::regular; break;
+ case 'd': type = entry_type::directory; break;
+ case 'l': type = entry_type::symlink; break;
+ default: fail () << "invalid value '" << s << "' for primary '-type'";
+ }
+ }
+ else if (strcmp (p, "-mindepth") == 0)
+ {
+ min_depth = num ();
+ }
+ else if (strcmp (p, "-maxdepth") == 0)
+ {
+ max_depth = num ();
+ }
+ else
+ fail () << "unknown primary '" << p << "'";
+ }
+
+ // Print the path if the expression evaluates to true for it. Traverse
+ // further down if the path refers to a directory and the maximum depth
+ // is not specified or is not reached.
+ //
+ // Note that paths for evaluating/printing (pp) and for
+ // stating/traversing (ap) are passed separately. The former is
+ // potentially relative and the latter is absolute. Also note that
+ // for optimization we separately pass the base name simple path.
+ //
+ auto find = [&cout,
+ &name,
+ &type,
+ &min_depth,
+ &max_depth,
+ &fail] (const path& pp,
+ const path& ap,
+ const path& bp,
+ entry_type t,
+ uint64_t level,
+ const auto& find) -> void
+ {
+ // Print the path if no primary evaluates to false.
+ //
+ if ((!type || *type == t) &&
+ (!min_depth || level >= *min_depth) &&
+ (!name || path_match (bp.string (), *name)))
+ {
+ // Print the trailing directory separator, if present.
+ //
+ if (pp.to_directory ())
+ {
+ // The trailing directory separator can only be present for
+ // paths specified on the command line.
+ //
+ assert (level == 0);
+
+ cout << pp.representation () << '\n';
+ }
+ else
+ cout << pp << '\n';
+ }
+
+ // Traverse the directory, unless the max depth is specified and
+ // reached.
+ //
+ if (t == entry_type::directory && (!max_depth || level < *max_depth))
+ try
+ {
+ for (const auto& de: dir_iterator (path_cast<dir_path> (ap),
+ dir_iterator::no_follow))
+ {
+ find (pp / de.path (),
+ ap / de.path (),
+ de.path (),
+ de.ltype (),
+ level + 1,
+ find);
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to scan directory '" << pp << "': " << e;
+ }
+ };
+
+ dir_path wd;
+
+ for (const path& p: paths)
+ {
+ // Complete the path if it is relative, so that we can properly stat
+ // it and, potentially, traverse. Note that we don't normalize it
+ // since POSIX requires that the paths should be evaluated (by
+ // primaries) and printed unaltered.
+ //
+ path ap;
+
+ if (p.relative ())
+ {
+ if (wd.empty () && cwd.relative ())
+ wd = current_directory (cwd, fail);
+
+ ap = (!wd.empty () ? wd : cwd) / p;
+ }
+
+ // Issue an error if the path is empty, doesn't exist, or has the
+ // trailing directory separator but refers to a non-directory.
+ //
+ // Note that POSIX doesn't explicitly describe the behavior if any of
+ // the above happens. We will follow the behavior which is common for
+ // both Linux and FreeBSD by issuing the diagnostics, proceeding to
+ // the subsequent paths, and returning with non-zero code at the end.
+ //
+ if (p.empty ())
+ {
+ error () << "empty path";
+ continue;
+ }
+
+ const path& fp (!ap.empty () ? ap : p);
+ pair<bool, entry_stat> pe;
+
+ try
+ {
+ pe = path_entry (fp);
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to stat '" << p << "': " << e;
+ }
+
+ if (!pe.first)
+ {
+ error () << "'" << p << "' doesn't exists";
+ continue;
+ }
+
+ entry_type t (pe.second.type);
+
+ if (p.to_directory () && t != entry_type::directory)
+ {
+ error () << "'" << p << "' is not a directory";
+ continue;
+ }
+
+ find (p, fp, p.leaf (), t, 0 /* level */, find);
+ }
+
+ cout.close ();
+ r = !error_occured ? 0 : 1;
+ }
+ // Can be thrown while closing cin or creating, writing to, or closing
+ // cout or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error () << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+ catch (const cli::exception& e)
+ {
+ error () << e;
+ }
+
+ cerr.close ();
+ return r;
+ }
+ // In particular, handles io_error exception potentially thrown while
+ // creating, writing to, or closing cerr.
+ //
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
// Create a symlink to a file or directory at the specified path and calling
// the hook for the created filesystem entries. The paths must be absolute
// and normalized. Fall back to creating a hardlink, if symlink creation is
@@ -1569,7 +1840,7 @@ namespace butl
return 1;
}
- // sed [-n|--quiet] [-i|--in-place] -e|--expression <script> [<file>]
+ // sed [-n|--quiet] [-i|--in-place] (-e|--expression <script>)... [<file>]
//
// Note: must be executed asynchronously.
//
@@ -1597,13 +1868,24 @@ namespace butl
//
auto_rmfile rm;
+ if (in == nullfd)
+ in = fddup (stdin_fd ());
+
+ if (out == nullfd)
+ out = fddup (stdout_fd ());
+
+ // Turn the streams into the binary mode to preserve the original line
+ // endings.
+ //
+ fdmode (in.get (), fdstream_mode::binary);
+ fdmode (out.get (), fdstream_mode::binary);
+
// Do not throw when failbit is set (getline() failed to extract any
// character).
//
- ifdstream cin (in != nullfd ? move (in) : fddup (stdin_fd ()),
- ifdstream::badbit);
+ ifdstream cin (move (in), ifdstream::badbit);
- ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ()));
+ ofdstream cout (move (out));
// Parse arguments.
//
@@ -1615,71 +1897,88 @@ namespace butl
if (ops.expression ().empty ())
fail () << "missing script";
- // Only a single script is supported.
- //
- if (ops.expression ().size () != 1)
- fail () << "multiple scripts";
-
- struct
+ struct subst
{
- string regex;
+ std::regex regex;
string replacement;
- bool icase = false;
- bool global = false;
- bool print = false;
- } subst;
+ bool global;
+ bool print;
+ };
+
+ small_vector<subst, 1> substs;
+ for (const string& v: ops.expression ())
{
- const string& v (ops.expression ()[0]);
if (v.empty ())
fail () << "empty script";
if (v[0] != 's')
- fail () << "only 's' command supported";
+ fail () << "unknown command in '" << v << "': only 's' command "
+ << "supported";
// Parse the substitute command.
//
if (v.size () < 2)
- fail () << "no delimiter for 's' command";
+ fail () << "no delimiter for 's' command in '" << v << "'";
char delim (v[1]);
if (delim == '\\' || delim == '\n')
- fail () << "invalid delimiter for 's' command";
-
- size_t p (v.find (delim, 2));
- if (p == string::npos)
- fail () << "unterminated 's' command regex";
-
- subst.regex.assign (v, 2, p - 2);
+ fail () << "invalid delimiter for 's' command in '" << v << "'";
- // Empty regex matches nothing, so not of much use.
+ // Parse the substitute command regex (as string), replacement, and
+ // flags.
//
- if (subst.regex.empty ())
- fail () << "empty regex in 's' command";
+ pair<string, string> rf;
+ bool icase (false);
+ bool global (false);
+ bool print (false);
- size_t b (p + 1);
- p = v.find (delim, b);
- if (p == string::npos)
- fail () << "unterminated 's' command replacement";
-
- subst.replacement.assign (v, b, p - b);
-
- // Parse the substitute command flags.
- //
- char c;
- for (++p; (c = v[p]) != '\0'; ++p)
+ try
{
- switch (c)
+ size_t e;
+ rf = regex_replace_parse (v.c_str () + 1, v.size () - 1, e);
+
+ char c;
+ for (size_t i (e + 1); (c = v[i]) != '\0'; ++i)
{
- case 'i': subst.icase = true; break;
- case 'g': subst.global = true; break;
- case 'p': subst.print = true; break;
- default:
+ switch (c)
{
- fail () << "invalid 's' command flag '" << c << "'";
+ case 'i': icase = true; break;
+ case 'g': global = true; break;
+ case 'p': print = true; break;
+ default:
+ {
+ fail () << "invalid 's' command flag '" << c << "' in '" << v
+ << "'";
+ }
}
}
}
+ catch (const invalid_argument& e)
+ {
+ fail () << "invalid 's' command '" << v << "': " << e;
+ }
+
+ // Parse the regex and add the substitution to the list.
+ //
+ try
+ {
+ // Note that ECMAScript is implied if no grammar flag is specified.
+ //
+ regex re (rf.first, icase ? regex::icase : regex::ECMAScript);
+
+ substs.push_back ({move (re),
+ move (rf.second),
+ global,
+ print});
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful (no space).
+ //
+ fail () << "invalid regex '" << rf.first << "' in '" << v << "'"
+ << e;
+ }
}
// Path of a file to edit. An empty path represents stdin.
@@ -1723,7 +2022,8 @@ namespace butl
cout.open (fdopen (tp,
fdopen_mode::out |
fdopen_mode::truncate |
- fdopen_mode::create,
+ fdopen_mode::create |
+ fdopen_mode::binary,
path_permissions (p)));
}
catch (const io_error& e)
@@ -1738,10 +2038,6 @@ namespace butl
rm = auto_rmfile (tp);
}
- // Note that ECMAScript is implied if no grammar flag is specified.
- //
- regex re (subst.regex, subst.icase ? regex::icase : regex::ECMAScript);
-
// Edit a file or STDIN.
//
try
@@ -1751,27 +2047,55 @@ namespace butl
if (!p.empty ())
{
cin.close (); // Flush and close.
- cin.open (p);
+ cin.open (p, fdopen_mode::binary);
}
// Read until failbit is set (throw on badbit).
//
- string s;
- while (getline (cin, s))
+ string ps;
+ while (getline (cin, ps))
{
- auto r (regex_replace_search (
- s,
- re,
- subst.replacement,
- subst.global
- ? regex_constants::format_default
- : regex_constants::format_first_only));
+ // Remember the line ending type and, if it is CRLF, strip the
+ // trailing '\r'.
+ //
+ bool crlf (!ps.empty () && ps.back() == '\r');
+ if (crlf)
+ ps.pop_back();
+
+ bool prn (!ops.quiet ());
+
+ for (const subst& s: substs)
+ {
+ auto r (regex_replace_search (
+ ps,
+ s.regex,
+ s.replacement,
+ s.global
+ ? regex_constants::format_default
+ : regex_constants::format_first_only));
+
+ // If the regex matches, then override the pattern space with the
+ // replacement result and print it and proceed to the next line,
+ // if requested.
+ //
+ if (r.second)
+ {
+ ps = move (r.first);
+
+ if (s.print)
+ {
+ prn = true;
+ break;
+ }
+ }
+ }
// Add newline regardless whether the source line is newline-
- // terminated or not (in accordance with POSIX).
+ // terminated or not (in accordance with POSIX), preserving the
+ // original line ending.
//
- if (!ops.quiet () || (r.second && subst.print))
- cout << r.first << '\n';
+ if (prn)
+ cout << ps << (crlf ? "\r\n" : "\n");
}
cin.close ();
@@ -1801,12 +2125,6 @@ namespace butl
d << ": " << e;
}
}
- catch (const regex_error& e)
- {
- // Print regex_error description if meaningful (no space).
- //
- error () << "invalid regex" << e;
- }
// Can be thrown while creating cin, cout or writing to cerr.
//
catch (const io_error& e)
@@ -1882,6 +2200,7 @@ namespace butl
if (!a.empty () && a[0] != '-' && a[0] != '+')
{
char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
n = strtoull (a.c_str (), &e, 10); // Can't throw.
if (errno != ERANGE && e == a.c_str () + a.size ())
@@ -2164,17 +2483,22 @@ namespace butl
const dir_path& cwd,
const builtin_callbacks& cbs)
{
- return builtin (
- r,
- thread ([fn, &r, &args,
- in = move (in),
- out = move (out),
- err = move (err),
- &cwd,
- &cbs] () mutable noexcept
- {
- r = fn (args, move (in), move (out), move (err), cwd, cbs);
- }));
+ unique_ptr<builtin::async_state> s (
+ new builtin::async_state (
+ r,
+ [fn,
+ &args,
+ in = move (in), out = move (out), err = move (err),
+ &cwd,
+ &cbs] () mutable noexcept -> uint8_t
+ {
+ return fn (args,
+ move (in), move (out), move (err),
+ cwd,
+ cbs);
+ }));
+
+ return builtin (r, move (s));
}
template <builtin_impl fn>
@@ -2200,7 +2524,7 @@ namespace butl
const builtin_callbacks& cbs)
{
r = fn (args, move (in), move (out), move (err), cwd, cbs);
- return builtin (r, thread ());
+ return builtin (r);
}
const builtin_map builtins
@@ -2211,6 +2535,7 @@ namespace butl
{"diff", {nullptr, 2}},
{"echo", {&async_impl<&echo>, 2}},
{"false", {&false_, 0}},
+ {"find", {&async_impl<&find>, 2}},
{"ln", {&sync_impl<&ln>, 2}},
{"mkdir", {&sync_impl<&mkdir>, 2}},
{"mv", {&sync_impl<&mv>, 2}},
@@ -2222,4 +2547,36 @@ namespace butl
{"touch", {&sync_impl<&touch>, 2}},
{"true", {&true_, 0}}
};
+
+ // builtin
+ //
+ uint8_t builtin::
+ wait ()
+ {
+ if (state_ != nullptr)
+ {
+ unique_lock l (state_->mutex);
+
+ if (!state_->finished)
+ state_->condv.wait (l, [this] {return state_->finished;});
+ }
+
+ return result_;
+ }
+
+ template <>
+ optional<uint8_t> builtin::
+ timed_wait (const chrono::milliseconds& tm)
+ {
+ if (state_ != nullptr)
+ {
+ unique_lock l (state_->mutex);
+
+ if (!state_->finished &&
+ !state_->condv.wait_for (l, tm, [this] {return state_->finished;}))
+ return nullopt;
+ }
+
+ return result_;
+ }
}
diff --git a/libbutl/builtin.mxx b/libbutl/builtin.hxx
index e4dd4f8..b301f8a 100644
--- a/libbutl/builtin.mxx
+++ b/libbutl/builtin.hxx
@@ -1,66 +1,106 @@
-// file : libbutl/builtin.mxx -*- C++ -*-
+// file : libbutl/builtin.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-// C includes.
-#ifndef __cpp_lib_modules_ts
#include <map>
#include <string>
#include <vector>
-#include <thread>
-#include <cstddef> // size_t
-#include <utility> // move()
-#include <cstdint> // uint8_t
+#include <chrono>
+#include <memory> // unique_ptr
+#include <cstddef> // size_t
+#include <utility> // move()
+#include <cstdint> // uint8_t
#include <functional>
-#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.builtin;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.threading;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.timestamp;
+#ifndef LIBBUTL_MINGW_STDTHREAD
+# include <mutex>
+# include <thread>
+# include <condition_variable>
#else
-#include <libbutl/path.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/timestamp.mxx>
+# include <libbutl/mingw-mutex.hxx>
+# include <libbutl/mingw-thread.hxx>
+# include <libbutl/mingw-condition_variable.hxx>
#endif
+#include <libbutl/path.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/timestamp.hxx>
+
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// A process/thread-like object representing a running builtin.
//
- // For now, instead of allocating the result storage dynamically, we
- // expect it to be provided by the caller.
+ // For now, instead of allocating the result storage dynamically, we expect
+ // it to be provided by the caller (allocating it dynamically would be
+ // wasteful for synchronous builtins).
//
- class builtin
+ class LIBBUTL_SYMEXPORT builtin
{
public:
+ // Wait for the builtin to complete and return its exit code. This
+ // function can be called multiple times.
+ //
std::uint8_t
- wait () {if (t_.joinable ()) t_.join (); return r_;}
+ wait ();
+
+ // Return the same result as wait() if the builtin has already completed
+ // and nullopt otherwise.
+ //
+ optional<std::uint8_t>
+ try_wait ();
+
+ // Wait for the builtin to complete for up to the specified time duration.
+ // Return the same result as wait() if the builtin has completed in this
+ // timeframe and nullopt otherwise.
+ //
+ template <typename R, typename P>
+ optional<std::uint8_t>
+ timed_wait (const std::chrono::duration<R, P>&);
- ~builtin () {wait ();}
+ ~builtin () {if (state_ != nullptr) state_->thread.join ();}
public:
- builtin (std::uint8_t& r, std::thread&& t = std::thread ())
- : r_ (r), t_ (move (t)) {}
+#ifndef LIBBUTL_MINGW_STDTHREAD
+ using mutex_type = std::mutex;
+ using condition_variable_type = std::condition_variable;
+ using thread_type = std::thread;
+
+ using unique_lock = std::unique_lock<mutex_type>;
+#else
+ using mutex_type = mingw_stdthread::mutex;
+ using condition_variable_type = mingw_stdthread::condition_variable;
+ using thread_type = mingw_stdthread::thread;
+
+ using unique_lock = mingw_stdthread::unique_lock<mutex_type>;
+#endif
+
+ struct async_state
+ {
+ bool finished = false;
+ mutex_type mutex;
+ condition_variable_type condv;
+ thread_type thread;
+
+ // Note that we can't use std::function as an argument type to get rid
+ // of the template since std::function can only be instantiated with a
+ // copy-constructible function and that's too restrictive for us (won't
+ // be able to capture auto_fd by value in a lambda, etc).
+ //
+ template <typename F>
+ async_state (uint8_t&, F);
+ };
+
+ builtin (std::uint8_t& r, std::unique_ptr<async_state>&& s = nullptr)
+ : result_ (r), state_ (move (s)) {}
builtin (builtin&&) = default;
private:
- std::uint8_t& r_;
- std::thread t_;
+ std::uint8_t& result_;
+ std::unique_ptr<async_state> state_;
};
// Builtin execution callbacks that can be used for checking/handling the
@@ -181,12 +221,20 @@ LIBBUTL_MODEXPORT namespace butl
// Return NULL if not a builtin.
//
const builtin_info*
- find (const std::string& n) const
- {
- auto i (base::find (n));
- return i != end () ? &i->second : nullptr;
- }
+ find (const std::string&) const;
};
+ // Asynchronously run a function as if it was a builtin. The function must
+ // have the std::uint8_t() signature and not throw exceptions.
+ //
+ // Note that using std::function as an argument type would be too
+ // restrictive (see above).
+ //
+ template <typename F>
+ builtin
+ pseudo_builtin (std::uint8_t&, F);
+
LIBBUTL_SYMEXPORT extern const builtin_map builtins;
}
+
+#include <libbutl/builtin.ixx>
diff --git a/libbutl/builtin.ixx b/libbutl/builtin.ixx
new file mode 100644
index 0000000..d77590b
--- /dev/null
+++ b/libbutl/builtin.ixx
@@ -0,0 +1,80 @@
+// file : libbutl/builtin.ixx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+namespace butl
+{
+ // builtin
+ //
+ // Implement timed_wait() function templates in terms of their milliseconds
+ // specialization.
+ //
+ template <>
+ LIBBUTL_SYMEXPORT optional<std::uint8_t> builtin::
+ timed_wait (const std::chrono::milliseconds&);
+
+ template <typename R, typename P>
+ inline optional<std::uint8_t> builtin::
+ timed_wait (const std::chrono::duration<R, P>& d)
+ {
+ using namespace std::chrono;
+ return timed_wait (duration_cast<milliseconds> (d));
+ }
+
+ inline optional<std::uint8_t> builtin::
+ try_wait ()
+ {
+ if (state_ != nullptr)
+ {
+ unique_lock l (state_->mutex);
+
+ if (!state_->finished)
+ return nullopt;
+ }
+
+ return result_;
+ }
+
+ // builtin_map
+ //
+ inline const builtin_info* builtin_map::
+ find (const std::string& n) const
+ {
+ auto i (base::find (n));
+ return i != end () ? &i->second : nullptr;
+ }
+
+ // builtin::async_state
+ //
+ template <typename F>
+ inline builtin::async_state::
+ async_state (uint8_t& r, F f)
+ : thread ([this, &r, f = std::move (f)] () mutable noexcept
+ {
+ uint8_t t (f ());
+
+ {
+ unique_lock l (this->mutex);
+ r = t;
+ finished = true;
+ }
+
+ condv.notify_all ();
+ })
+ {
+ }
+
+ template <typename F>
+ inline builtin
+ pseudo_builtin (std::uint8_t& r, F f)
+ {
+ std::unique_ptr<builtin::async_state> s (
+ new builtin::async_state (
+ r,
+ [f = std::move (f)] () mutable noexcept -> uint8_t
+ {
+ return f ();
+ }));
+
+ return builtin (r, move (s));
+ }
+}
diff --git a/libbutl/char-scanner.mxx b/libbutl/char-scanner.hxx
index 60994cf..24865b7 100644
--- a/libbutl/char-scanner.mxx
+++ b/libbutl/char-scanner.hxx
@@ -1,37 +1,21 @@
-// file : libbutl/char-scanner.mxx -*- C++ -*-
+// file : libbutl/char-scanner.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string> // char_traits
+#include <cassert>
#include <cstddef> // size_t
#include <cstdint> // uint64_t
#include <climits> // INT_*
#include <utility> // pair, make_pair()
#include <istream>
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.char_scanner;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.fdstream;
-#else
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/bufstreambuf.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Refer to utf8_validator for details.
//
@@ -59,23 +43,25 @@ LIBBUTL_MODEXPORT namespace butl
// 0x0D is treated "as if" it was followed by 0x0A and multiple 0x0D
// are treated as one.
//
- // Note also that if the stream happens to be ifdstream, then it includes
- // a number of optimizations that assume nobody else is messing with the
- // stream.
+ // Note also that if the stream happens to be bufstreambuf-based, then it
+ // includes a number of optimizations that assume nobody else is messing
+ // with the stream.
//
- // The line and position arguments can be used to override the start line
- // and position in the stream (useful when re-scanning data saved with the
- // save_* facility).
+ // The line, column, and position arguments can be used to override the
+ // start line, column, and position in the stream (useful when re-scanning
+ // data saved with the save_* facility).
//
char_scanner (std::istream&,
bool crlf = true,
std::uint64_t line = 1,
+ std::uint64_t column = 1,
std::uint64_t position = 0);
char_scanner (std::istream&,
validator_type,
bool crlf = true,
std::uint64_t line = 1,
+ std::uint64_t column = 1,
std::uint64_t position = 0);
char_scanner (const char_scanner&) = delete;
@@ -106,8 +92,9 @@ LIBBUTL_MODEXPORT namespace butl
std::uint64_t line;
std::uint64_t column;
- // Logical character position (see ifdstream for details on the logical
- // part) if the scanned stream is ifdstream and always zero otherwise.
+ // Logical character position (see bufstreambuf for details on the
+ // logical part) if the scanned stream is bufstreambuf-based and always
+ // zero otherwise.
//
std::uint64_t position;
@@ -240,7 +227,7 @@ LIBBUTL_MODEXPORT namespace butl
// the hairy details; realistically, you would probably only direct-scan
// ASCII fragments).
//
- fdbuf* buf_; // NULL if not ifdstream.
+ bufstreambuf* buf_; // NULL if not bufstreambuf-based.
const char_type* gptr_;
const char_type* egptr_;
diff --git a/libbutl/char-scanner.ixx b/libbutl/char-scanner.ixx
index 57aefc2..2dc41de 100644
--- a/libbutl/char-scanner.ixx
+++ b/libbutl/char-scanner.ixx
@@ -5,8 +5,10 @@ namespace butl
{
template <typename V, std::size_t N>
inline char_scanner<V, N>::
- char_scanner (std::istream& is, bool crlf, std::uint64_t l, std::uint64_t p)
- : char_scanner (is, validator_type (), crlf, l, p)
+ char_scanner (std::istream& is,
+ bool crlf,
+ std::uint64_t l, std::uint64_t c, std::uint64_t p)
+ : char_scanner (is, validator_type (), crlf, l, c, p)
{
}
diff --git a/libbutl/char-scanner.txx b/libbutl/char-scanner.txx
index 35edf42..75ea189 100644
--- a/libbutl/char-scanner.txx
+++ b/libbutl/char-scanner.txx
@@ -1,9 +1,7 @@
// file : libbutl/char-scanner.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_lib_modules_ts
#include <utility> // move
-#endif
namespace butl
{
@@ -13,13 +11,14 @@ namespace butl
validator_type v,
bool crlf,
std::uint64_t l,
+ std::uint64_t c,
std::uint64_t p)
: line (l),
- column (1),
+ column (c),
position (p),
is_ (is),
val_ (std::move (v)),
- buf_ (dynamic_cast<fdbuf*> (is.rdbuf ())),
+ buf_ (dynamic_cast<bufstreambuf*> (is.rdbuf ())),
gptr_ (nullptr),
egptr_ (nullptr),
crlf_ (crlf)
diff --git a/libbutl/command.cxx b/libbutl/command.cxx
index c23dfd5..2df52dd 100644
--- a/libbutl/command.cxx
+++ b/libbutl/command.cxx
@@ -1,48 +1,18 @@
// file : libbutl/command.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/command.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <map>
-#include <string>
-#include <cstddef>
-#include <functional>
+#include <libbutl/command.hxx>
#include <ios> // ios::failure
#include <vector>
+#include <cassert>
#include <utility> // move()
#include <stdexcept> // invalid_argument
#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.command;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-import butl.optional;
-#endif
-
-import butl.builtin;
-import butl.fdstream;
-import butl.string_parser;
-#else
-#include <libbutl/builtin.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/string-parser.mxx>
-#endif
+
+#include <libbutl/builtin.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/string-parser.hxx>
using namespace std;
@@ -81,7 +51,7 @@ namespace butl
//
if (p == string::npos)
throw invalid_argument (string ("unmatched substitution character '") +
- open + "'");
+ open + '\'');
if (p == sp)
throw invalid_argument ("empty substitution variable");
@@ -90,12 +60,12 @@ namespace butl
if (vn.find_first_of (" \t") != string::npos)
throw invalid_argument ("whitespace in substitution variable '" +
- vn + "'");
+ vn + '\'');
// Find the variable and append its value or fail if it's unknown.
//
if (!sc (vn, r))
- throw invalid_argument ("unknown substitution variable '" + vn + "'");
+ throw invalid_argument ("unknown substitution variable '" + vn + '\'');
}
// Append the source string tail following the last substitution.
@@ -198,7 +168,7 @@ namespace butl
catch (const invalid_path& e)
{
throw invalid_argument ("invalid stdout redirect file path '" +
- e.path + "'");
+ e.path + '\'');
}
if (redir->empty ())
diff --git a/libbutl/command.mxx b/libbutl/command.hxx
index 143d406..fb7258f 100644
--- a/libbutl/command.mxx
+++ b/libbutl/command.hxx
@@ -1,34 +1,19 @@
-// file : libbutl/command.mxx -*- C++ -*-
+// file : libbutl/command.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#ifndef __cpp_lib_modules_ts
#include <map>
#include <string>
#include <cstddef> // size_t
#include <functional>
-#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.command;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.process;
-import butl.optional;
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/process.hxx>
+#include <libbutl/optional.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Run a process or a builtin, interpreting the command line as
// whitespace-separated, potentially quoted program path/builtin name,
diff --git a/libbutl/const-ptr.mxx b/libbutl/const-ptr.hxx
index 343ecf6..1474e17 100644
--- a/libbutl/const-ptr.mxx
+++ b/libbutl/const-ptr.hxx
@@ -1,28 +1,11 @@
-// file : libbutl/const-ptr.mxx -*- C++ -*-
+// file : libbutl/const-ptr.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <cstddef> // nullptr_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.const_ptr;
-#ifdef __cpp_lib_modules_ts
-import std.core; // @@ MOD std.fundamental.
-#endif
-#endif
-
-#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Const-propagating pointer.
//
diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx
index ac3d0cb..5649965 100644
--- a/libbutl/curl.cxx
+++ b/libbutl/curl.cxx
@@ -1,41 +1,14 @@
// file : libbutl/curl.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/curl.mxx>
-#endif
-
-// C includes.
+#include <libbutl/curl.hxx>
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
#include <utility> // move()
+#include <cstdlib> // strtoul(), size_t
#include <exception> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.curl;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#endif
-import butl.utility; // icasecmp()
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx>
using namespace std;
@@ -49,7 +22,17 @@ namespace butl
case ftp_put:
throw invalid_argument ("no input specified for PUT method");
case http_post:
- throw invalid_argument ("no input specified for POST method");
+ {
+ // Post the empty data.
+ //
+ // Note that while it's tempting to specify the --request POST option
+ // instead, that can potentially overwrite the request methods for the
+ // HTTP 30X response code redirects.
+ //
+ d.options.push_back ("--data-raw");
+ d.options.push_back ("");
+ }
+ // Fall through.
case ftp_get:
case http_get:
{
@@ -170,7 +153,7 @@ namespace butl
}
curl::method_proto curl::
- translate (method_type m, const string& u, method_proto_options& o)
+ translate (method_type m, const string& u, method_proto_options& o, flags fs)
{
size_t n (u.find ("://"));
@@ -189,8 +172,11 @@ namespace butl
}
else if (icasecmp (u, "http", n) == 0 || icasecmp (u, "https", n) == 0)
{
- o.push_back ("--fail"); // Fail on HTTP errors (e.g., 404).
- o.push_back ("--location"); // Follow redirects.
+ if ((fs & flags::no_fail) == flags::none)
+ o.push_back ("--fail"); // Fail on HTTP errors (e.g., 404).
+
+ if ((fs & flags::no_location) == flags::none)
+ o.push_back ("--location"); // Follow redirects.
switch (m)
{
@@ -203,4 +189,123 @@ namespace butl
throw invalid_argument ("unsupported protocol");
}
+
+ uint16_t curl::
+ parse_http_status_code (const string& s)
+ {
+ char* e (nullptr);
+ unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw.
+ assert (e != nullptr);
+
+ return *e == '\0' && c >= 100 && c < 600
+ ? static_cast<uint16_t> (c)
+ : 0;
+ }
+
+ string curl::
+ read_http_response_line (ifdstream& is)
+ {
+ string r;
+ getline (is, r); // Strips the trailing LF (0xA).
+
+ // Note that on POSIX CRLF is not automatically translated into LF, so we
+ // need to strip CR (0xD) manually.
+ //
+ if (!r.empty () && r.back () == '\r')
+ r.pop_back ();
+
+ return r;
+ }
+
+ curl::http_status curl::
+ read_http_status (ifdstream& is, bool skip_headers)
+ {
+ // After getting the status line, if requested, we will read until the
+ // empty line (containing just CRLF). Not being able to reach such a line
+ // is an error, which is the reason for the exception mask choice. When
+ // done, we will restore the original exception mask.
+ //
+ ifdstream::iostate es (is.exceptions ());
+ is.exceptions (ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);
+
+ auto read_status = [&is, es] ()
+ {
+ string l (read_http_response_line (is));
+
+ for (;;) // Breakout loop.
+ {
+ if (l.compare (0, 5, "HTTP/") != 0)
+ break;
+
+ size_t p (l.find (' ', 5)); // The protocol end.
+ if (p == string::npos)
+ break;
+
+ p = l.find_first_not_of (' ', p + 1); // The code start.
+ if (p == string::npos)
+ break;
+
+ size_t e (l.find (' ', p + 1)); // The code end.
+ if (e == string::npos)
+ break;
+
+ uint16_t c (parse_http_status_code (string (l, p, e - p)));
+ if (c == 0)
+ break;
+
+ string r;
+ p = l.find_first_not_of (' ', e + 1); // The reason start.
+ if (p != string::npos)
+ {
+ e = l.find_last_not_of (' '); // The reason end.
+ assert (e != string::npos && e >= p);
+
+ r = string (l, p, e - p + 1);
+ }
+
+ return http_status {c, move (r)};
+ }
+
+ is.exceptions (es); // Restore the exception mask.
+
+ throw invalid_argument ("invalid status line '" + l + "'");
+ };
+
+ // The curl output for a successfull request looks like this:
+ //
+ // HTTP/1.1 100 Continue
+ //
+ // HTTP/1.1 200 OK
+ // Content-Length: 83
+ // Content-Type: text/manifest;charset=utf-8
+ //
+ // <response-body>
+ //
+ // curl normally sends the 'Expect: 100-continue' header for uploads, so
+ // we need to handle the interim HTTP server response with the continue
+ // (100) status code.
+ //
+ // Interestingly, Apache can respond with the continue (100) code and with
+ // the not found (404) code afterwords.
+ //
+ http_status rs (read_status ());
+
+ if (rs.code == 100)
+ {
+ // Skips the interim response.
+ //
+ while (!read_http_response_line (is).empty ()) ;
+
+ rs = read_status (); // Reads the final status code.
+ }
+
+ if (skip_headers)
+ {
+ while (!read_http_response_line (is).empty ()) ; // Skips headers.
+ }
+
+ is.exceptions (es);
+
+ return rs;
+ }
}
diff --git a/libbutl/curl.mxx b/libbutl/curl.hxx
index 03aac99..ea91807 100644
--- a/libbutl/curl.mxx
+++ b/libbutl/curl.hxx
@@ -1,42 +1,20 @@
-// file : libbutl/curl.mxx -*- C++ -*-
+// file : libbutl/curl.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
+#include <cstdint> // uint16_t
#include <type_traits>
-#include <cstddef> // size_t
-#include <utility> // forward()
-#include <exception> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.curl;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process; //@@ MOD TODO: should we re-export?
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Perform a method (GET, POST, PUT) on a URL using the curl(1) program.
// Throw process_error and io_error (both derive from system_error) in case
@@ -113,6 +91,19 @@ LIBBUTL_MODEXPORT namespace butl
public:
enum method_type {get, put, post};
+ // By default the -sS and, for the HTTP protocol, --fail and --location
+ // options are passed to curl on the command line. Optionally, these
+ // options can be suppressed.
+ //
+ enum class flags: std::uint16_t
+ {
+ no_fail = 0x01, // Don't pass --fail.
+ no_location = 0x02, // Don't pass --location
+ no_sS = 0x04, // Don't pass -sS
+
+ none = 0 // Default options set.
+ };
+
ifdstream in;
ofdstream out;
@@ -143,12 +134,77 @@ LIBBUTL_MODEXPORT namespace butl
const std::string& url,
A&&... options);
+ // Similar to the above, but allows to adjust the curl's default command
+ // line.
+ //
+ template <typename I,
+ typename O,
+ typename E,
+ typename... A>
+ curl (I&& in,
+ O&& out,
+ E&& err,
+ method_type,
+ flags,
+ const std::string& url,
+ A&&... options);
+
+ template <typename C,
+ typename I,
+ typename O,
+ typename E,
+ typename... A>
+ curl (const C&,
+ I&& in,
+ O&& out,
+ E&& err,
+ method_type,
+ flags,
+ const std::string& url,
+ A&&... options);
+
+ // Read the HTTP response status from an input stream.
+ //
+ // Specifically, read and parse the HTTP status line, by default skip over
+ // the remaining headers (leaving the stream at the beginning of the
+ // response body), and return the status code and the reason phrase. Throw
+ // std::invalid_argument if the status line could not be parsed. Pass
+ // through the ios::failure exception on the stream error.
+ //
+ // Note that if ios::failure is thrown the stream's exception mask may not
+ // be preserved.
+ //
+ struct http_status
+ {
+ std::uint16_t code;
+ std::string reason;
+ };
+
+ static http_status
+ read_http_status (ifdstream&, bool skip_headers = true);
+
+ // Parse and return the HTTP status code. Return 0 if the argument is
+ // invalid.
+ //
+ static std::uint16_t
+ parse_http_status_code (const std::string&);
+
+ // Read the CRLF-terminated line from an input stream, stripping the
+ // trailing CRLF. Pass through the ios::failure exception on the stream
+ // error.
+ //
+ static std::string
+ read_http_response_line (ifdstream&);
+
private:
enum method_proto {ftp_get, ftp_put, http_get, http_post};
using method_proto_options = small_vector<const char*, 2>;
method_proto
- translate (method_type, const std::string& url, method_proto_options&);
+ translate (method_type,
+ const std::string& url,
+ method_proto_options&,
+ flags);
private:
template <typename T>
@@ -188,6 +244,11 @@ LIBBUTL_MODEXPORT namespace butl
typename std::enable_if<is_other<O>::value, O>::type
map_out (O&&, method_proto, io_data&);
};
+
+ curl::flags operator& (curl::flags, curl::flags);
+ curl::flags operator| (curl::flags, curl::flags);
+ curl::flags operator&= (curl::flags&, curl::flags);
+ curl::flags operator|= (curl::flags&, curl::flags);
}
#include <libbutl/curl.ixx>
diff --git a/libbutl/curl.ixx b/libbutl/curl.ixx
index 61a4ff5..6dcfe13 100644
--- a/libbutl/curl.ixx
+++ b/libbutl/curl.ixx
@@ -1,7 +1,11 @@
// file : libbutl/curl.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef> // size_t
+#include <utility> // forward()
+#include <exception> // invalid_argument
+
+namespace butl
{
template <typename I,
typename O,
@@ -12,6 +16,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
O&& out,
E&& err,
method_type m,
+ flags fs,
const std::string& url,
A&&... options)
: curl ([] (const char* [], std::size_t) {},
@@ -19,8 +24,80 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
std::forward<O> (out),
std::forward<E> (err),
m,
+ fs,
+ url,
+ std::forward<A> (options)...)
+ {
+ }
+
+ template <typename C,
+ typename I,
+ typename O,
+ typename E,
+ typename... A>
+ inline curl::
+ curl (const C& cmdc,
+ I&& in,
+ O&& out,
+ E&& err,
+ method_type m,
+ const std::string& url,
+ A&&... options)
+ : curl (cmdc,
+ std::forward<I> (in),
+ std::forward<O> (out),
+ std::forward<E> (err),
+ m,
+ flags::none,
+ url,
+ std::forward<A> (options)...)
+ {
+ }
+
+ template <typename I,
+ typename O,
+ typename E,
+ typename... A>
+ inline curl::
+ curl (I&& in,
+ O&& out,
+ E&& err,
+ method_type m,
+ const std::string& url,
+ A&&... options)
+ : curl (std::forward<I> (in),
+ std::forward<O> (out),
+ std::forward<E> (err),
+ m,
+ flags::none,
url,
std::forward<A> (options)...)
{
}
+
+ inline curl::flags
+ operator&= (curl::flags& x, curl::flags y)
+ {
+ return x = static_cast<curl::flags> (static_cast<std::uint16_t> (x) &
+ static_cast<std::uint16_t> (y));
+ }
+
+ inline curl::flags
+ operator|= (curl::flags& x, curl::flags y)
+ {
+ return x = static_cast<curl::flags> (static_cast<std::uint16_t> (x) |
+ static_cast<std::uint16_t> (y));
+ }
+
+ inline curl::flags
+ operator& (curl::flags x, curl::flags y)
+ {
+ return x &= y;
+ }
+
+ inline curl::flags
+ operator| (curl::flags x, curl::flags y)
+ {
+ return x |= y;
+ }
}
diff --git a/libbutl/curl.txx b/libbutl/curl.txx
index 0c07d35..fc74470 100644
--- a/libbutl/curl.txx
+++ b/libbutl/curl.txx
@@ -1,7 +1,7 @@
// file : libbutl/curl.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
{
template <typename I>
typename std::enable_if<curl::is_other<I>::value, I>::type curl::
@@ -65,11 +65,12 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
O&& out,
E&& err,
method_type m,
+ flags fs,
const std::string& url,
A&&... options)
{
method_proto_options mpo;
- method_proto mp (translate (m, url, mpo));
+ method_proto mp (translate (m, url, mpo, fs));
io_data in_data;
io_data out_data;
@@ -81,8 +82,9 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
map_out (std::forward<O> (out), mp, out_data),
std::forward<E> (err),
"curl",
- "-s", // Silent.
- "-S", // But do show diagnostics.
+ ((fs & flags::no_sS) == flags::none
+ ? "-sS" // Silent but do show diagnostics.
+ : nullptr),
mpo,
in_data.options,
out_data.options,
diff --git a/libbutl/default-options.cxx b/libbutl/default-options.cxx
deleted file mode 100644
index 28f6fb7..0000000
--- a/libbutl/default-options.cxx
+++ /dev/null
@@ -1,73 +0,0 @@
-// file : libbutl/default-options.cxx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-#ifndef __cpp_modules_ts
-#include <libbutl/default-options.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <vector>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.default_options;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.small_vector;
-#endif
-
-#endif
-
-using namespace std;
-
-namespace butl
-{
- optional<dir_path>
- default_options_start (const optional<dir_path>& home,
- const vector<dir_path>& dirs)
- {
- if (home)
- assert (home->absolute () && home->normalized ());
-
- if (dirs.empty ())
- return nullopt;
-
- // Use the first directory as a start.
- //
- auto i (dirs.begin ());
- dir_path d (*i);
-
- // Try to find a common prefix for each subsequent directory.
- //
- for (++i; i != dirs.end (); ++i)
- {
- bool p (false);
-
- for (;
- !(d.root () || (home && d == *home));
- d = d.directory ())
- {
- if (i->sub (d))
- {
- p = true;
- break;
- }
- }
-
- if (!p)
- return nullopt;
- }
-
- return d;
- }
-}
diff --git a/libbutl/default-options.mxx b/libbutl/default-options.hxx
index aeb246d..1d363b6 100644
--- a/libbutl/default-options.mxx
+++ b/libbutl/default-options.hxx
@@ -1,44 +1,18 @@
-// file : libbutl/default-options.mxx -*- C++ -*-
+// file : libbutl/default-options.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
-#include <utility> // move(), forward(), make_pair()
-#include <algorithm> // reverse()
-#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.default_options;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.small_vector;
-
-import butl.git;
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/small-vector.mxx>
-
-#include <libbutl/git.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/small-vector.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Default options files helper implementation.
//
@@ -61,10 +35,12 @@ LIBBUTL_MODEXPORT namespace butl
using default_options = small_vector<default_options_entry<O>, 4>;
// Search for and load the specified list of options files in the specified
- // directories returning a vector of option class instances (O). If args is
- // false, only options are allowed and are parsed using scanner S in the
- // U::fail mode. If args is true, then both options and arguments are
- // allowed in any order with options parsed in the U::stop mode.
+ // directories returning a vector of option class instances (O). Read
+ // additional options from files referenced by the specified option
+ // (normally --options-file). If args is false, only options are allowed and
+ // are parsed using scanner S in the U::fail mode. If args is true, then
+ // both options and arguments are allowed in any order with options parsed
+ // in the U::stop mode.
//
// Pass each default options file path to the specified function prior to
// load (can be used for tracing, etc). The function signature is:
@@ -105,6 +81,15 @@ LIBBUTL_MODEXPORT namespace butl
//
// Note that the extra directory options files are never considered remote.
//
+ // For the convenience of implementation, the function parses the option
+ // files in the reverse order. Thus, to make sure that positions in the
+ // options list monotonically increase, it needs the maximum number of
+ // arguments, globally and per file, to be specified. This way the starting
+ // options position for each file will be less than for the previously
+ // parsed file by arg_max_file and equal to arg_max - arg_max_file for the
+ // first file. If the actual number of arguments exceeds the specified, then
+ // invalid_argument is thrown.
+ //
template <typename O, typename S, typename U, typename F>
default_options<O>
load_default_options (const optional<dir_path>& sys_dir,
@@ -112,6 +97,9 @@ LIBBUTL_MODEXPORT namespace butl
const optional<dir_path>& extra_dir,
const default_options_files&,
F&&,
+ const std::string& option,
+ std::size_t arg_max,
+ std::size_t arg_max_file,
bool args = false);
// Merge the default options/arguments and the command line
@@ -149,12 +137,25 @@ LIBBUTL_MODEXPORT namespace butl
AS
merge_default_arguments (const default_options<O>&, const AS&, F&&);
- // Find a common start (parent) directory stopping at home or root
- // (excluding).
+ // Find a common start (parent) directory for directories specified as an
+ // iterator range, stopping at home or root (excluding). Optionally pass a
+ // function resolving an iterator into a directory in a way other than just
+ // dereferencing it. The function signature is:
+ //
+ // const dir_path& (I)
//
- LIBBUTL_SYMEXPORT optional<dir_path>
- default_options_start (const optional<dir_path>& home_dir,
- const std::vector<dir_path>&);
+ template <typename I, typename F>
+ optional<dir_path>
+ default_options_start (const optional<dir_path>& home, I, I, F&&);
+
+ template <typename I>
+ inline optional<dir_path>
+ default_options_start (const optional<dir_path>& home, I b, I e)
+ {
+ return default_options_start (home,
+ b, e,
+ [] (I i) -> const dir_path& {return *i;});
+ }
}
#include <libbutl/default-options.ixx>
diff --git a/libbutl/default-options.ixx b/libbutl/default-options.ixx
index 4a551ac..7248d7d 100644
--- a/libbutl/default-options.ixx
+++ b/libbutl/default-options.ixx
@@ -1,7 +1,7 @@
// file : libbutl/default-options.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
{
template <typename O>
inline O
diff --git a/libbutl/default-options.txx b/libbutl/default-options.txx
index 5245bd6..aa254b2 100644
--- a/libbutl/default-options.txx
+++ b/libbutl/default-options.txx
@@ -1,7 +1,15 @@
// file : libbutl/default-options.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <utility> // move(), forward(), make_pair()
+#include <algorithm> // reverse()
+#include <stdexcept> // invalid_argument
+#include <system_error>
+
+#include <libbutl/git.hxx>
+#include <libbutl/filesystem.hxx>
+
+namespace butl
{
inline bool
options_dir_exists (const dir_path& d)
@@ -14,10 +22,11 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
throw std::make_pair (path_cast<path> (d), std::move (e));
}
- // Search for and parse the options files in the specified directory and
- // its local/ subdirectory, if exists, in the reverse order and append the
- // options to the resulting list. Return false if --no-default-options is
- // encountered.
+ // Search for and parse the options files in the specified directory and its
+ // local/ subdirectory, if exists, in the reverse order and append the
+ // options to the resulting list. Verify that the number of arguments
+ // doesn't exceed the limits and decrement arg_max by arg_max_file after
+ // parsing each file. Return false if --no-default-options is encountered.
//
// Note that by default we check for the local/ subdirectory even if we
// don't think it belongs to the remote directory; the user may move things
@@ -31,10 +40,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
template <typename O, typename S, typename U, typename F>
bool
load_default_options_files (const dir_path& d,
+ const std::string& opt,
bool args,
bool remote,
const small_vector<path, 2>& fs,
F&& fn,
+ std::size_t& arg_max,
+ std::size_t arg_max_file,
default_options<O>& def_ops,
bool load_sub = true,
bool load_dir = true)
@@ -43,7 +55,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
bool r (true);
- auto load = [args, &fs, &fn, &def_ops, &r] (const dir_path& d, bool rem)
+ auto load = [&opt, args, &fs, &fn, &def_ops, &arg_max, arg_max_file, &r]
+ (const dir_path& d, bool rem)
{
using namespace std;
@@ -55,9 +68,14 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
{
if (file_exists (p)) // Follows symlinks.
{
+ if (arg_max < arg_max_file)
+ throw invalid_argument ("too many options files");
+
+ size_t start_pos (arg_max - arg_max_file);
+
fn (p, rem, false /* overwrite */);
- S s (p.string ());
+ S s (p.string (), opt, start_pos);
// @@ Note that the potentially thrown exceptions (unknown option,
// unexpected argument, etc) will not contain any location
@@ -66,7 +84,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
// this in CLI.
//
O o;
- small_vector<std::string, 1> as;
+ small_vector<string, 1> as;
if (args)
{
@@ -79,6 +97,15 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
else
o.parse (s, U::fail, U::fail);
+ if (s.position () > arg_max)
+ throw invalid_argument ("too many options in file " +
+ p.string ());
+
+ // Don't decrement arg_max for the empty option files.
+ //
+ if (s.position () != start_pos)
+ arg_max = start_pos;
+
if (o.no_default_options ())
r = false;
@@ -88,9 +115,9 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
rem});
}
}
- catch (std::system_error& e)
+ catch (system_error& e)
{
- throw std::make_pair (move (p), std::move (e));
+ throw make_pair (move (p), move (e));
}
}
};
@@ -116,6 +143,9 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
const optional<dir_path>& extra_dir,
const default_options_files& ofs,
F&& fn,
+ const std::string& opt,
+ std::size_t arg_max,
+ std::size_t arg_max_file,
bool args)
{
if (sys_dir)
@@ -206,10 +236,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
if (load_extra && extra_dir->sub (d))
{
load = load_default_options_files<O, S, U> (*extra_dir,
+ opt,
args,
false /* remote */,
ofs.files,
std::forward<F> (fn),
+ arg_max,
+ arg_max_file,
r);
load_extra = false;
@@ -219,10 +252,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
if (load && options_dir_exists (od))
load = load_default_options_files<O, S, U> (od,
+ opt,
args,
remote,
ofs.files,
std::forward<F> (fn),
+ arg_max,
+ arg_max_file,
r,
load_build2_local,
load_build2);
@@ -235,10 +271,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
if (load && load_extra)
load = load_default_options_files<O, S, U> (*extra_dir,
+ opt,
args,
false /* remote */,
ofs.files,
std::forward<F> (fn),
+ arg_max,
+ arg_max_file,
r);
if (load && home_dir)
@@ -247,19 +286,25 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
if (options_dir_exists (d))
load = load_default_options_files<O, S, U> (d,
+ opt,
args,
false /* remote */,
ofs.files,
std::forward<F> (fn),
+ arg_max,
+ arg_max_file,
r);
}
if (load && sys_dir && options_dir_exists (*sys_dir))
load_default_options_files<O, S, U> (*sys_dir,
+ opt,
args,
false /* remote */,
ofs.files,
std::forward<F> (fn),
+ arg_max,
+ arg_max_file,
r);
std::reverse (r.begin (), r.end ());
@@ -310,4 +355,43 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
r.insert (r.end (), cmd_args.begin (), cmd_args.end ());
return r;
}
+
+ template <typename I, typename F>
+ optional<dir_path>
+ default_options_start (const optional<dir_path>& home, I b, I e, F&& f)
+ {
+ if (home)
+ assert (home->absolute () && home->normalized ());
+
+ if (b == e)
+ return nullopt;
+
+ // Use the first directory as a start.
+ //
+ I i (b);
+ dir_path d (f (i));
+
+ // Try to find a common prefix for each subsequent directory.
+ //
+ for (++i; i != e; ++i)
+ {
+ bool p (false);
+
+ for (;
+ !(d.root () || (home && d == *home));
+ d = d.directory ())
+ {
+ if (f (i).sub (d))
+ {
+ p = true;
+ break;
+ }
+ }
+
+ if (!p)
+ return nullopt;
+ }
+
+ return d;
+ }
}
diff --git a/libbutl/diagnostics.cxx b/libbutl/diagnostics.cxx
index b038e5d..6ac8192 100644
--- a/libbutl/diagnostics.cxx
+++ b/libbutl/diagnostics.cxx
@@ -1,9 +1,7 @@
// file : libbutl/diagnostics.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/diagnostics.mxx>
-#endif
+#include <libbutl/diagnostics.hxx>
#ifndef _WIN32
# include <unistd.h> // write()
@@ -12,49 +10,36 @@
# include <io.h> //_write()
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <utility>
-#include <exception>
-
#include <ios> // ios::failure
#include <mutex>
#include <string>
+#include <cassert>
#include <cstddef> // size_t
#include <iostream> // cerr
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-module butl.diagnostics;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-import std.threading;
-import butl.utility;
-import butl.optional;
-import butl.fdstream; // stderr_fd(), fdterm()
+#ifndef LIBBUTL_MINGW_STDTHREAD
+# include <mutex>
#else
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
+# include <libbutl/mingw-mutex.hxx>
#endif
+#include <libbutl/ft/lang.hxx> // thread_local
+
+#include <libbutl/utility.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+
using namespace std;
namespace butl
{
ostream* diag_stream = &cerr;
- static mutex diag_mutex;
+#ifndef LIBBUTL_MINGW_STDTHREAD
+ static std::mutex diag_mutex;
+#else
+ static mingw_stdthread::mutex diag_mutex;
+#endif
string diag_progress;
static string diag_progress_blank; // Being printed blanks out the line.
@@ -158,28 +143,28 @@ namespace butl
default_writer (const diag_record& r)
{
r.os.put ('\n');
- diag_stream_lock () << r.os.str ();
+
+ diag_stream_lock l;
+ (*diag_stream) << r.os.str ();
// We can endup flushing the result of several writes. The last one may
// possibly be incomplete, but that's not a problem as it will also be
// followed by the flush() call.
//
- // @@ Strange: why not just hold the lock for both write and flush?
- //
diag_stream->flush ();
}
- void (*diag_record::writer) (const diag_record&) = &default_writer;
+ diag_writer* diag_record::writer = &default_writer;
void diag_record::
- flush () const
+ flush (void (*w) (const diag_record&)) const
{
if (!empty_)
{
if (epilogue_ == nullptr)
{
- if (writer != nullptr)
- writer (*this);
+ if (w != nullptr || (w = writer) != nullptr)
+ w (*this);
empty_ = true;
}
@@ -189,8 +174,8 @@ namespace butl
//
auto e (epilogue_);
epilogue_ = nullptr;
- e (*this); // Can throw.
- flush (); // Call ourselves to write the data in case it returns.
+ e (*this, w); // Can throw.
+ flush (w); // Call ourselves to write the data in case it returns.
}
}
}
@@ -213,4 +198,28 @@ namespace butl
flush ();
#endif
}
+
+ // Diagnostics stack.
+ //
+ static
+#ifdef __cpp_thread_local
+ thread_local
+#else
+ __thread
+#endif
+ const diag_frame* diag_frame_stack = nullptr;
+
+ const diag_frame* diag_frame::
+ stack () noexcept
+ {
+ return diag_frame_stack;
+ }
+
+ const diag_frame* diag_frame::
+ stack (const diag_frame* f) noexcept
+ {
+ const diag_frame* r (diag_frame_stack);
+ diag_frame_stack = f;
+ return r;
+ }
}
diff --git a/libbutl/diagnostics.mxx b/libbutl/diagnostics.hxx
index d41ba74..c6db34b 100644
--- a/libbutl/diagnostics.mxx
+++ b/libbutl/diagnostics.hxx
@@ -1,32 +1,19 @@
-// file : libbutl/diagnostics.mxx -*- C++ -*-
+// file : libbutl/diagnostics.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ostream>
#include <sstream>
#include <utility> // move(), forward()
#include <exception> // uncaught_exception[s]()
-#endif
#include <libbutl/ft/exception.hxx> // uncaught_exceptions
-#ifdef __cpp_modules_ts
-export module butl.diagnostics;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Diagnostic facility base infrastructure.
//
@@ -40,8 +27,11 @@ LIBBUTL_MODEXPORT namespace butl
LIBBUTL_SYMEXPORT extern std::ostream* diag_stream;
// Acquire the diagnostics exclusive access mutex in ctor, release in dtor.
- // An object of the type must be created prior to writing to diag_stream (see
- // above).
+ // An object of the type must be created prior to writing to diag_stream
+ // (see above).
+ //
+ // Note that this class also manages the interaction with the progress
+ // printing (see below).
//
struct LIBBUTL_SYMEXPORT diag_stream_lock
{
@@ -87,13 +77,26 @@ LIBBUTL_MODEXPORT namespace butl
~diag_progress_lock ();
};
+ // Diagnostic record and marks (error, warn, etc).
//
+ // There are two ways to use this facility in a project: simple, where we
+ // just alias the types in our namespace, and complex, where instead we
+ // derive from them and "override" (hide, really) operator<< (and a few
+ // other functions) in order to make ADL look in our namespace rather than
+ // butl. In the simple case we may have to resort to defining some
+ // operator<< overloads in namespace std in order to satisfy ADL. This is
+ // usually not an acceptable approach for libraries, which is where the
+ // complex case comes in (see libbuild2 for a "canonical" example of the
+ // complex case). Note also that it doesn't seem worth templatazing epilogue
+ // so the complex case may also need to do a few casts but those should be
+ // limited to the diagnostics infrastructure.
//
struct diag_record;
template <typename> struct diag_prologue;
template <typename> struct diag_mark;
- using diag_epilogue = void (const diag_record&);
+ using diag_writer = void (const diag_record&);
+ using diag_epilogue = void (const diag_record&, diag_writer*);
struct LIBBUTL_SYMEXPORT diag_record
{
@@ -130,7 +133,7 @@ LIBBUTL_MODEXPORT namespace butl
full () const {return !empty_;}
void
- flush () const;
+ flush (diag_writer* = nullptr) const;
void
append (const char* indent, diag_epilogue* e) const
@@ -163,7 +166,7 @@ LIBBUTL_MODEXPORT namespace butl
#endif
empty_ (r.empty_),
epilogue_ (r.epilogue_),
- os (std::move (r.os))
+ os (std::move (r.os)) // Note: can throw.
{
if (!empty_)
{
@@ -181,7 +184,7 @@ LIBBUTL_MODEXPORT namespace butl
// Diagnostics writer. The default implementation writes the record text
// to diag_stream. If it is NULL, then the record text is ignored.
//
- static void (*writer) (const diag_record&);
+ static diag_writer* writer;
protected:
#ifdef __cpp_lib_uncaught_exceptions
@@ -276,4 +279,97 @@ LIBBUTL_MODEXPORT namespace butl
e.B::operator() (r);
}
};
+
+ // Diagnostics stack. Each frame is "applied" to the diag record.
+ //
+ // Unfortunately most of our use-cases don't fit into the 2-pointer small
+ // object optimization of std::function. So we have to complicate things
+ // a bit here.
+ //
+ struct LIBBUTL_SYMEXPORT diag_frame
+ {
+ explicit
+ diag_frame (void (*f) (const diag_frame&, const diag_record&))
+ : func_ (f)
+ {
+ if (func_ != nullptr)
+ prev_ = stack (this);
+ }
+
+ diag_frame (diag_frame&& x)
+ : func_ (x.func_)
+ {
+ if (func_ != nullptr)
+ {
+ prev_ = x.prev_;
+ stack (this);
+
+ x.func_ = nullptr;
+ }
+ }
+
+ diag_frame& operator= (diag_frame&&) = delete;
+
+ diag_frame (const diag_frame&) = delete;
+ diag_frame& operator= (const diag_frame&) = delete;
+
+ ~diag_frame ()
+ {
+ if (func_ != nullptr )
+ stack (prev_);
+ }
+
+ // Normally passed as an epilogue. Writer is not used.
+ //
+ static void
+ apply (const diag_record& r, diag_writer* = nullptr)
+ {
+ for (const diag_frame* f (stack ()); f != nullptr; f = f->prev_)
+ f->func_ (*f, r);
+ }
+
+ // Tip of the stack.
+ //
+ static const diag_frame*
+ stack () noexcept;
+
+ // Set the new and return the previous tip of the stack.
+ //
+ static const diag_frame*
+ stack (const diag_frame*) noexcept;
+
+ struct stack_guard
+ {
+ explicit stack_guard (const diag_frame* s): s_ (stack (s)) {}
+ ~stack_guard () {stack (s_);}
+ const diag_frame* s_;
+ };
+
+ private:
+ void (*func_) (const diag_frame&, const diag_record&);
+ const diag_frame* prev_;
+ };
+
+ template <typename F>
+ struct diag_frame_impl: diag_frame
+ {
+ explicit
+ diag_frame_impl (F f): diag_frame (&thunk), func_ (move (f)) {}
+
+ private:
+ static void
+ thunk (const diag_frame& f, const diag_record& r)
+ {
+ static_cast<const diag_frame_impl&> (f).func_ (r);
+ }
+
+ const F func_;
+ };
+
+ template <typename F>
+ inline diag_frame_impl<F>
+ make_diag_frame (F f)
+ {
+ return diag_frame_impl<F> (move (f));
+ }
}
diff --git a/libbutl/export.hxx b/libbutl/export.hxx
index 3353ca8..dc04f85 100644
--- a/libbutl/export.hxx
+++ b/libbutl/export.hxx
@@ -3,14 +3,6 @@
#pragma once
-// If modules are available, setup the module export.
-//
-#ifdef __cpp_modules_ts
-# define LIBBUTL_MODEXPORT export
-#else
-# define LIBBUTL_MODEXPORT
-#endif
-
// Normally we don't export class templates (but do complete specializations),
// inline functions, and classes with only inline member functions. Exporting
// classes that inherit from non-exported/imported bases (e.g., std::string)
diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx
index 377681e..07cb9f2 100644
--- a/libbutl/fdstream.cxx
+++ b/libbutl/fdstream.cxx
@@ -1,9 +1,7 @@
// file : libbutl/fdstream.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/fdstream.hxx>
#include <errno.h> // errno, E*
@@ -12,69 +10,54 @@
# include <unistd.h> // close(), read(), write(), lseek(), dup(), pipe(),
// ftruncate(), isatty(), ssize_t, STD*_FILENO
# include <sys/uio.h> // writev(), iovec
-# include <sys/stat.h> // stat(), S_I*
+# include <sys/stat.h> // stat(), fstat(), S_I*
# include <sys/time.h> // timeval
# include <sys/types.h> // stat, off_t
# include <sys/select.h>
#else
# include <libbutl/win32-utility.hxx>
-# include <io.h> // _close(), _read(), _write(), _setmode(), _sopen(),
- // _lseek(), _dup(), _pipe(), _chsize_s,
- // _get_osfhandle()
-# include <share.h> // _SH_DENYNO
-# include <stdio.h> // _fileno(), stdin, stdout, stderr, SEEK_*
-# include <fcntl.h> // _O_*
-# include <sys/stat.h> // S_I*
+# ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
+# define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x04
+# endif
+
+# include <io.h> // _close(), _read(), _write(), _setmode(), _sopen(),
+ // _lseek(), _dup(), _pipe(), _chsize_s,
+ // _get_osfhandle()
+# include <share.h> // _SH_DENYNO
+# include <stdio.h> // _fileno(), stdin, stdout, stderr, SEEK_*
+# include <fcntl.h> // _O_*
+# include <sys/types.h> // _stat
+# include <sys/stat.h> // fstat(), S_I*
+
+# ifdef _MSC_VER // Unlikely to be fixed in newer versions.
+# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+# define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
+# endif
# include <wchar.h> // wcsncmp(), wcsstr()
+# include <thread> // this_thread::yield()
# include <algorithm> // count()
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <vector>
-#include <string>
-#include <istream>
-#include <ostream>
-#include <utility>
-#include <cstdint>
-#include <cstddef>
-
#include <ios> // ios_base::openmode, ios_base::failure
#include <new> // bad_alloc
#include <limits> // numeric_limits
-#include <cstring> // memcpy(), memmove()
+#include <cassert>
+#include <cstring> // memcpy(), memmove(), memchr(), strcmp()
+#include <cstdlib> // getenv()
#include <iostream> // cin, cout
#include <exception> // uncaught_exception[s]()
#include <stdexcept> // invalid_argument
#include <system_error>
-#endif
-#include <libbutl/ft/exception.hxx> // uncaught_exceptions
+#include <libbutl/ft/exception.hxx> // uncaught_exceptions
#include <libbutl/process-details.hxx>
-#ifdef __cpp_modules_ts
-module butl.fdstream;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.threading; // Clang wants it in purview (see process-details.hxx).
-#endif
-import butl.path;
-import butl.filesystem;
-import butl.small_vector;
-#endif
-
-import butl.utility; // throw_*_ios_failure(), function_cast()
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx> // throw_*_ios_failure(), function_cast()
+#include <libbutl/timestamp.hxx>
using namespace std;
@@ -164,7 +147,7 @@ namespace butl
}
#endif
- // fdbuf
+ // fdstreambuf
//
// Return true if the file descriptor is in the non-blocking mode. Throw
// ios::failure on the underlying OS error.
@@ -185,7 +168,7 @@ namespace butl
#endif
}
- void fdbuf::
+ void fdstreambuf::
open (auto_fd&& fd, uint64_t pos)
{
close ();
@@ -198,7 +181,7 @@ namespace butl
fd_ = move (fd);
}
- bool fdbuf::
+ bool fdstreambuf::
blocking (bool m)
{
// Verify that the file descriptor is open.
@@ -222,7 +205,7 @@ namespace butl
return !m;
}
- streamsize fdbuf::
+ streamsize fdstreambuf::
showmanyc ()
{
if (!is_open ())
@@ -257,7 +240,7 @@ namespace butl
return 0;
}
- fdbuf::int_type fdbuf::
+ fdstreambuf::int_type fdstreambuf::
underflow ()
{
int_type r (traits_type::eof ());
@@ -279,7 +262,7 @@ namespace butl
return r;
}
- bool fdbuf::
+ bool fdstreambuf::
load ()
{
// Doesn't handle blocking mode and so should not be called.
@@ -296,7 +279,7 @@ namespace butl
return n != 0;
}
- void fdbuf::
+ void fdstreambuf::
seekg (uint64_t off)
{
// In the future we may implement the blocking behavior for a non-blocking
@@ -331,7 +314,7 @@ namespace butl
setg (buf_, buf_, buf_);
}
- fdbuf::int_type fdbuf::
+ fdstreambuf::int_type fdstreambuf::
overflow (int_type c)
{
int_type r (traits_type::eof ());
@@ -359,7 +342,7 @@ namespace butl
return r;
}
- int fdbuf::
+ int fdstreambuf::
sync ()
{
if (!is_open ())
@@ -376,15 +359,7 @@ namespace butl
return save () ? 0 : -1;
}
-#ifdef _WIN32
- static inline int
- write (int fd, const void* buf, size_t n)
- {
- return _write (fd, buf, static_cast<unsigned int> (n));
- }
-#endif
-
- bool fdbuf::
+ bool fdstreambuf::
save ()
{
size_t n (pptr () - pbase ());
@@ -395,7 +370,7 @@ namespace butl
// descriptor opened for read-only access (while -1 with errno EBADF is
// expected). This is in contrast with VC's _write() and POSIX's write().
//
- auto m (write (fd_.get (), buf_, n));
+ auto m (fdwrite (fd_.get (), buf_, n));
if (m == -1)
throw_generic_ios_failure (errno);
@@ -411,7 +386,7 @@ namespace butl
return true;
}
- streamsize fdbuf::
+ streamsize fdstreambuf::
xsputn (const char_type* s, streamsize sn)
{
// The xsputn() function interface doesn't support the non-blocking
@@ -510,7 +485,7 @@ namespace butl
// Flush the buffer.
//
size_t wn (bn + an);
- int r (wn > 0 ? write (fd_.get (), buf_, wn) : 0);
+ streamsize r (wn > 0 ? fdwrite (fd_.get (), buf_, wn) : 0);
if (r == -1)
throw_generic_ios_failure (errno);
@@ -553,7 +528,7 @@ namespace butl
// The data tail doesn't fit the buffer so write it to the file.
//
- r = write (fd_.get (), s, n);
+ r = fdwrite (fd_.get (), s, n);
if (r == -1)
throw_generic_ios_failure (errno);
@@ -568,13 +543,13 @@ namespace butl
//
// - basic_ostream::seekp(pos) ->
// basic_streambuf::pubseekpos(pos, ios::out) ->
- // fdbuf::seekpos(pos, ios::out)
+ // fdstreambuf::seekpos(pos, ios::out)
//
// - basic_istream::seekg(pos) ->
// basic_streambuf::pubseekpos(pos, ios::in) ->
- // fdbuf::seekpos(pos, ios::in)
+ // fdstreambuf::seekpos(pos, ios::in)
//
- fdbuf::pos_type fdbuf::
+ fdstreambuf::pos_type fdstreambuf::
seekpos (pos_type pos, ios_base::openmode which)
{
// Note that the position type provides an explicit conversion to the
@@ -589,21 +564,21 @@ namespace butl
//
// - basic_ostream::seekp(off, dir) ->
// basic_streambuf::pubseekoff(off, dir, ios::out) ->
- // fdbuf::seekoff(off, dir, ios::out)
+ // fdstreambuf::seekoff(off, dir, ios::out)
//
// - basic_ostream::tellp() ->
// basic_streambuf::pubseekoff(0, ios::cur, ios::out) ->
- // fdbuf::seekoff(0, ios::cur, ios::out)
+ // fdstreambuf::seekoff(0, ios::cur, ios::out)
//
// - basic_istream::seekg(off, dir) ->
// basic_streambuf::pubseekoff(off, dir, ios::in) ->
- // fdbuf::seekoff(off, dir, ios::in)
+ // fdstreambuf::seekoff(off, dir, ios::in)
//
// - basic_istream::tellg() ->
// basic_streambuf::pubseekoff(0, ios::cur, ios::in) ->
- // fdbuf::seekoff(0, ios::cur, ios::in)
+ // fdstreambuf::seekoff(0, ios::cur, ios::in)
//
- fdbuf::pos_type fdbuf::
+ fdstreambuf::pos_type fdstreambuf::
seekoff (off_type off, ios_base::seekdir dir, ios_base::openmode which)
{
// The seekoff() function interface doesn't support the non-blocking
@@ -827,9 +802,8 @@ namespace butl
catch (const ios_base::failure&) {}
}
- // Underlying file descriptor is closed by fdbuf dtor with errors (if any)
- // being ignored.
- //
+ // Underlying file descriptor is closed by fdstreambuf dtor with errors
+ // (if any) being ignored.
}
void ifdstream::
@@ -848,6 +822,13 @@ namespace butl
}
void ifdstream::
+ open (auto_fd&& fd, fdstream_mode m, std::uint64_t pos)
+ {
+ open (mode (std::move (fd), m), pos);
+ skip_ = (m & fdstream_mode::skip) == fdstream_mode::skip;
+ }
+
+ void ifdstream::
close ()
{
if (skip_ && is_open () && good ())
@@ -863,7 +844,7 @@ namespace butl
}
ifdstream&
- getline (ifdstream& is, string& s, char delim)
+ getline (ifdstream& is, string& l, char delim)
{
ifdstream::iostate eb (is.exceptions ());
assert (eb & ifdstream::badbit);
@@ -871,16 +852,16 @@ namespace butl
// Amend the exception mask to prevent exceptions being thrown by the C++
// IO runtime to avoid incompatibility issues due to ios_base::failure ABI
// fiasco (#66145). We will not restore the mask when ios_base::failure is
- // thrown by fdbuf since there is no way to "silently" restore it if the
- // corresponding bits are in the error state without the exceptions() call
- // throwing ios_base::failure. Not restoring exception mask on throwing
- // because of badbit should probably be ok since the stream is no longer
- // usable.
+ // thrown by fdstreambuf since there is no way to "silently" restore it if
+ // the corresponding bits are in the error state without the exceptions()
+ // call throwing ios_base::failure. Not restoring exception mask on
+ // throwing because of badbit should probably be ok since the stream is no
+ // longer usable.
//
if (eb != ifdstream::badbit)
is.exceptions (ifdstream::badbit);
- std::getline (is, s, delim);
+ std::getline (is, l, delim);
// Throw if any of the newly set bits are present in the exception mask.
//
@@ -893,6 +874,58 @@ namespace butl
return is;
}
+ bool
+ getline_non_blocking (ifdstream& is, string& l, char delim)
+ {
+ assert (!is.blocking () && (is.exceptions () & ifdstream::badbit) != 0);
+
+ fdstreambuf& sb (*static_cast<fdstreambuf*> (is.rdbuf ()));
+
+ // Read until blocked (0), EOF (-1) or encounter the delimiter.
+ //
+ // Note that here we reasonably assume that any failure in in_avail()
+ // will lead to badbit and thus an exception (see showmanyc()).
+ //
+ streamsize s;
+ while ((s = sb.in_avail ()) > 0)
+ {
+ const char* p (sb.gptr ());
+ size_t n (sb.egptr () - p);
+
+ const char* e (static_cast<const char*> (memchr (p, delim, n)));
+ if (e != nullptr)
+ n = e - p;
+
+ l.append (p, n);
+
+ // Note: consume the delimiter if found.
+ //
+ sb.gbump (static_cast<int> (n + (e != nullptr ? 1 : 0)));
+
+ if (e != nullptr)
+ break;
+ }
+
+ // Here s can be:
+ //
+ // -1 -- EOF.
+ // 0 -- blocked before encountering delimiter/EOF.
+ // >0 -- encountered the delimiter.
+ //
+ if (s == -1)
+ {
+ is.setstate (ifdstream::eofbit);
+
+ // If we couldn't extract anything, not even the delimiter, then this is
+ // a failure per the getline() interface.
+ //
+ if (l.empty ())
+ is.setstate (ifdstream::failbit);
+ }
+
+ return s != 0;
+ }
+
// ofdstream
//
ofdstream::
@@ -1042,10 +1075,11 @@ namespace butl
#endif
// Unlike other platforms, *BSD allows opening a directory as a file which
- // will cause all kinds of problems upstream (e.g., cpfile()). So we detect
- // and diagnose this.
+ // will cause all kinds of problems upstream (e.g., cpfile()). So we
+ // detect and diagnose this. Note: not certain this is the case for NetBSD
+ // and OpenBSD.
//
-#if defined(__FreeBSD__) || defined(__NetBSD__)
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
{
struct stat s;
if (stat (f, &s) == 0 && S_ISDIR (s.st_mode))
@@ -1131,12 +1165,17 @@ namespace butl
// underlying CreateFile() function call (see mventry() for details). If
// that's the case, we will keep trying to open the file for two seconds.
//
- for (size_t i (0); i < 21; ++i)
+ // Also, it turns out, if someone memory-maps a file, it takes Windows
+ // some time to realize it's been unmapped and until then any attempt to
+ // open it results in EINVAL POSIX error, ERROR_USER_MAPPED_FILE system
+ // error. So we retry those as well.
+ //
+ for (size_t i (0); i < 41; ++i)
{
- // Sleep 100 milliseconds before the open retry.
+ // Sleep 50 milliseconds before the open retry.
//
if (i != 0)
- Sleep (100);
+ Sleep (50);
fd = pass_perm
? _sopen (f, of, _SH_DENYNO, pf)
@@ -1150,10 +1189,11 @@ namespace butl
// Note that MinGW's _sopen() is just a stub forwarding the call to the
// (publicly available) MSVCRT's implementation.
//
- if (!(fd == -1 &&
- out &&
- errno == EACCES &&
- GetLastError () == ERROR_SHARING_VIOLATION))
+ if (!(fd == -1 &&
+ out &&
+ (errno == EACCES || errno == EINVAL) &&
+ (GetLastError () == ERROR_SHARING_VIOLATION ||
+ GetLastError () == ERROR_USER_MAPPED_FILE)))
break;
}
@@ -1362,6 +1402,28 @@ namespace butl
throw_generic_ios_failure (errno);
}
+ entry_stat
+ fdstat (int fd)
+ {
+ struct stat s;
+ if (fstat (fd, &s) != 0)
+ throw_generic_error (errno);
+
+ auto m (s.st_mode);
+ entry_type t (entry_type::unknown);
+
+ // Note: cannot be a symlink.
+ //
+ if (S_ISREG (m))
+ t = entry_type::regular;
+ else if (S_ISDIR (m))
+ t = entry_type::directory;
+ else if (S_ISBLK (m) || S_ISCHR (m) || S_ISFIFO (m) || S_ISSOCK (m))
+ t = entry_type::other;
+
+ return entry_stat {t, static_cast<uint64_t> (s.st_size)};
+ }
+
bool
fdterm (int fd)
{
@@ -1382,9 +1444,23 @@ namespace butl
throw_generic_ios_failure (errno);
}
- pair<size_t, size_t>
- fdselect (fdselect_set& read, fdselect_set& write)
+ bool
+ fdterm_color (int, bool)
{
+ const char* t (std::getenv ("TERM"));
+
+ // This test was lifted from GCC (Emacs shell sets TERM=dumb).
+ //
+ return t != nullptr && strcmp (t, "dumb") != 0;
+ }
+
+ static pair<size_t, size_t>
+ fdselect (fdselect_set& read,
+ fdselect_set& write,
+ const chrono::milliseconds* timeout)
+ {
+ using namespace chrono;
+
// Copy fdselect_set into the native fd_set, updating max_fd. Also clear
// the ready flag in the source set.
//
@@ -1396,6 +1472,8 @@ namespace butl
for (fdselect_state& s: from)
{
+ s.ready = false;
+
if (s.fd == nullfd)
continue;
@@ -1403,7 +1481,6 @@ namespace butl
throw invalid_argument ("invalid file descriptor");
FD_SET (s.fd, &to);
- s.ready = false;
if (max_fd < s.fd)
max_fd = s.fd;
@@ -1420,29 +1497,64 @@ namespace butl
++max_fd;
+ // Note that if the timeout is not NULL, then the select timeout needs to
+ // be recalculated for each select() call (of which we can potentially
+ // have multiple due to EINTR). So the timeout can be used as bool.
+ //
+ timestamp now;
+ timestamp deadline;
+
+ if (timeout)
+ {
+ now = system_clock::now ();
+ deadline = now + *timeout;
+ }
+
// Repeat the select() call while getting the EINTR error and throw on
// any other error.
//
// Note that select() doesn't modify the sets on failure (according to
// POSIX standard as well as to the Linux, FreeBSD and MacOS man pages).
//
- for (;;)
+ for (timeval tv;;)
{
+ if (timeout)
+ {
+ if (now < deadline)
+ {
+ microseconds t (duration_cast<microseconds> (deadline - now));
+ tv.tv_sec = t.count () / 1000000;
+ tv.tv_usec = t.count () % 1000000;
+ }
+ else
+ {
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ }
+ }
+
int r (select (max_fd,
&rds,
&wds,
nullptr /* exceptfds */,
- nullptr /* timeout */));
+ timeout ? &tv : nullptr));
if (r == -1)
{
if (errno == EINTR)
+ {
+ if (timeout)
+ now = system_clock::now ();
+
continue;
+ }
throw_system_ios_failure (errno);
}
- assert (r != 0); // We don't expect the timeout to occur.
+ if (!timeout)
+ assert (r != 0); // We don't expect the timeout to occur.
+
break;
}
@@ -1475,6 +1587,12 @@ namespace butl
return read (fd, buf, n);
}
+ streamsize
+ fdwrite (int fd, const void* buf, size_t n)
+ {
+ return write (fd, buf, n);
+ }
+
#else
auto_fd
@@ -1730,9 +1848,34 @@ namespace butl
throw_generic_ios_failure (e);
}
+ entry_stat
+ fdstat (int fd)
+ {
+ // Since symlinks have been taken care of, we can just _fstat().
+ //
+ struct __stat64 s;
+ if (_fstat64 (fd, &s) != 0)
+ throw_generic_error (errno);
+
+ auto m (s.st_mode);
+ entry_type t (entry_type::unknown);
+
+ if (S_ISREG (m))
+ t = entry_type::regular;
+ else if (S_ISDIR (m))
+ t = entry_type::directory;
+ else if (S_ISCHR (m))
+ t = entry_type::other;
+
+ return entry_stat {t, static_cast<uint64_t> (s.st_size)};
+ }
+
bool
fdterm (int fd)
{
+ // @@ Both GCC and Clang simply call GetConsoleMode() for this check. I
+ // wonder why we don't do the same? See also fdterm_color() below.
+
// We don't need to close it (see fd_to_handle()).
//
HANDLE h (fd_to_handle (fd));
@@ -1818,9 +1961,49 @@ namespace butl
return false;
}
- pair<size_t, size_t>
- fdselect (fdselect_set& read, fdselect_set& write)
+ bool
+ fdterm_color (int fd, bool enable)
+ {
+ // We don't need to close it (see fd_to_handle()).
+ //
+ HANDLE h (fd_to_handle (fd));
+
+ // See GH issue #312 for background on this logic.
+ //
+ DWORD m;
+ if (!GetConsoleMode (h, &m))
+ throw_system_ios_failure (GetLastError ());
+
+ // Some terminals (e.g. Windows Terminal) enable VT processing by default.
+ //
+ if ((m & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0)
+ return true;
+
+ if (enable)
+ {
+ // If SetConsoleMode() fails, assume VT processing is unsupported (it
+ // is only supported from a certain build of Windows 10).
+ //
+ // Note that Wine pretends to support this but doesn't handle the escape
+ // sequences. See https://bugs.winehq.org/show_bug.cgi?id=49780.
+ //
+ if (SetConsoleMode (h,
+ (m |
+ ENABLE_PROCESSED_OUTPUT |
+ ENABLE_VIRTUAL_TERMINAL_PROCESSING)))
+ return true;
+ }
+
+ return false;
+ }
+
+ static pair<size_t, size_t>
+ fdselect (fdselect_set& read,
+ fdselect_set& write,
+ const chrono::milliseconds* timeout)
{
+ using namespace chrono;
+
if (!write.empty ())
throw invalid_argument ("write file descriptor set is not supported");
@@ -1830,25 +2013,35 @@ namespace butl
for (fdselect_state& s: read)
{
+ s.ready = false;
+
if (s.fd == nullfd)
continue;
if (s.fd < 0)
throw invalid_argument ("invalid file descriptor");
- s.ready = false;
++n;
}
if (n == 0)
throw invalid_argument ("empty file descriptor set");
+ // Note that if the timeout is not NULL, then the deadline needs to be
+ // checked prior to re-probing the pipe for data presence. So the timeout
+ // can be used as bool.
+ //
+ timestamp deadline;
+
+ if (timeout)
+ deadline = system_clock::now () + *timeout;
+
// Keep iterating through the set until at least one byte can be read from
// any of the pipes or any of them get closed (so can read eof).
//
size_t r (0);
- while (true)
+ for (size_t i (0);; ++i)
{
for (fdselect_state& s: read)
{
@@ -1915,13 +2108,37 @@ namespace butl
throw_system_ios_failure (e);
}
- // Bail out if some descriptors are ready for reading and sleep a bit
- // and repeat otherwise.
+ // Bail out if some descriptors are ready for reading or the deadline
+ // has been reached, if specified, and sleep a bit and repeat otherwise.
//
if (r != 0)
break;
- Sleep (50);
+ // Use exponential backoff but not too aggressive and with 25ms max.
+ //
+ DWORD t (
+ static_cast<DWORD> (i <= 1000 ? 0 :
+ i >= 1000 + 100 ? 25 : 1 + ((i - 1000) / 4)));
+
+ if (timeout)
+ {
+ timestamp now (system_clock::now ());
+
+ if (now < deadline)
+ {
+ milliseconds tm (duration_cast<milliseconds> (deadline - now));
+
+ if (t > tm.count ())
+ t = static_cast<DWORD> (tm.count ());
+ }
+ else
+ break;
+ }
+
+ if (t == 0)
+ this_thread::yield ();
+ else
+ Sleep (t);
}
return make_pair (r, 0);
@@ -1964,5 +2181,26 @@ namespace butl
return r;
}
+ streamsize
+ fdwrite (int fd, const void* buf, size_t n)
+ {
+ return _write (fd, buf, static_cast<unsigned int> (n));
+ }
+
#endif
+
+ pair<size_t, size_t>
+ fdselect (fdselect_set& read, fdselect_set& write)
+ {
+ return fdselect (read, write, nullptr /* timeout */);
+ }
+
+ template <>
+ pair<size_t, size_t>
+ fdselect (fdselect_set& read,
+ fdselect_set& write,
+ const chrono::milliseconds& timeout)
+ {
+ return fdselect (read, write, &timeout);
+ }
}
diff --git a/libbutl/fdstream.mxx b/libbutl/fdstream.hxx
index 0d56131..9c8f786 100644
--- a/libbutl/fdstream.mxx
+++ b/libbutl/fdstream.hxx
@@ -1,45 +1,26 @@
-// file : libbutl/fdstream.mxx -*- C++ -*-
+// file : libbutl/fdstream.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-#include <cassert>
-#ifndef __cpp_lib_modules_ts
#include <ios> // streamsize
#include <vector>
#include <string>
+#include <chrono>
#include <istream>
#include <ostream>
#include <utility> // move(), pair
#include <cstdint> // uint16_t, uint64_t
#include <cstddef> // size_t
-#include <iterator>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.fdstream;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.filesystem; // permissions
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/filesystem.hxx> // permissions, entry_stat
+#include <libbutl/small-vector.hxx>
+#include <libbutl/bufstreambuf.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// RAII type for file descriptors. Note that failure to close the descriptor
// is silently ignored by both the destructor and reset().
@@ -53,9 +34,6 @@ LIBBUTL_MODEXPORT namespace butl
constexpr operator int () const {return -1;}
};
-#if defined(__cpp_modules_ts) && defined(__clang__) //@@ MOD Clang duplicate sym.
- inline
-#endif
constexpr nullfd_t nullfd (-1);
class LIBBUTL_SYMEXPORT auto_fd
@@ -141,9 +119,9 @@ LIBBUTL_MODEXPORT namespace butl
// - input or output but not both (can use a union of two streams for that)
// - no support for put back
// - use of tell[gp]() and seek[gp]() is discouraged on Windows for
- // fdstreams opened in the text mode (see fdbuf::seekoff() implementation
- // for reasoning and consider using non-standard tellg() and seekg() in
- // fdbuf, instead)
+ // fdstreams opened in the text mode (see fdstreambuf::seekoff()
+ // implementation for reasoning and consider using non-standard tellg()
+ // and seekg() in fdstreambuf, instead)
// - non-blocking file descriptor is supported only by showmanyc() function
// and only for pipes on Windows, in contrast to POSIX systems
// - throws ios::failure in case of open(), read(), write(), close(),
@@ -156,20 +134,26 @@ LIBBUTL_MODEXPORT namespace butl
// - passing to constructor auto_fd with a negative file descriptor is valid
// and results in the creation of an unopened object
//
- class LIBBUTL_SYMEXPORT fdbuf: public std::basic_streambuf<char>
+ class LIBBUTL_SYMEXPORT fdstreambuf: public bufstreambuf
{
public:
- fdbuf () = default;
+ // Reasonable (for stack allocation) buffer size that provides decent
+ // performance.
+ //
+ static const std::size_t buffer_size = 8192;
+
+ fdstreambuf () = default;
// Unless specified, the current read/write position is assumed to
// be 0 (note: not queried).
//
- fdbuf (auto_fd&&, std::uint64_t pos = 0);
+ fdstreambuf (auto_fd&&, std::uint64_t pos = 0);
- // Before we invented auto_fd into fdstreams we keept fdbuf opened on
- // faulty close attempt. Now fdbuf is always closed by close() function.
- // This semantics change seems to be the right one as there is no reason to
- // expect fdclose() to succeed after it has already failed once.
+ // Before we invented auto_fd into fdstreams we keept fdstreambuf opened
+ // on faulty close attempt. Now fdstreambuf is always closed by close()
+ // function. This semantics change seems to be the right one as there is
+ // no reason to expect fdclose() to succeed after it has already failed
+ // once.
//
void
close () {fd_.close ();}
@@ -195,14 +179,11 @@ LIBBUTL_MODEXPORT namespace butl
bool
blocking (bool);
- public:
- using base = std::basic_streambuf<char>;
-
- using int_type = base::int_type;
- using traits_type = base::traits_type;
+ bool
+ blocking () const {return !non_blocking_;}
- using pos_type = base::pos_type; // std::streampos
- using off_type = base::off_type; // std::streamoff
+ public:
+ using base = bufstreambuf;
// basic_streambuf input interface.
//
@@ -221,13 +202,7 @@ LIBBUTL_MODEXPORT namespace butl
// Return the (logical) position of the next byte to be read.
//
- // Note that on Windows when reading in the text mode the logical position
- // may differ from the physical file descriptor position due to the CRLF
- // character sequence translation. See the seekoff() implementation for
- // more background on this issue.
- //
- std::uint64_t
- tellg () const {return off_ - (egptr () - gptr ());}
+ using base::tellg;
// Seek to the (logical) position as if by reading the specified number of
// bytes from the beginning of the stream. Throw ios::failure on the
@@ -254,8 +229,7 @@ LIBBUTL_MODEXPORT namespace butl
// Return the (logical) position of the next byte to be written.
//
- std::uint64_t
- tellp () const {return off_ + (pptr () - buf_);}
+ using base::tellp;
// basic_streambuf positioning interface (both input/output).
//
@@ -272,8 +246,7 @@ LIBBUTL_MODEXPORT namespace butl
private:
auto_fd fd_;
- std::uint64_t off_;
- char buf_[8192];
+ char buf_[buffer_size];
bool non_blocking_ = false;
};
@@ -306,7 +279,9 @@ LIBBUTL_MODEXPORT namespace butl
binary = 0x02,
skip = 0x04,
blocking = 0x08,
- non_blocking = 0x10
+ non_blocking = 0x10,
+
+ none = 0
};
inline fdstream_mode operator& (fdstream_mode, fdstream_mode);
@@ -346,8 +321,11 @@ LIBBUTL_MODEXPORT namespace butl
int
fd () const {return buf_.fd ();}
+ bool
+ blocking () const {return buf_.blocking ();}
+
protected:
- fdbuf buf_;
+ fdstreambuf buf_;
};
// iofdstream constructors and open() functions that take openmode as an
@@ -521,11 +499,7 @@ LIBBUTL_MODEXPORT namespace butl
}
void
- open (auto_fd&& fd, fdstream_mode m, std::uint64_t pos = 0)
- {
- open (std::move (fd), pos);
- skip_ = (m & fdstream_mode::skip) == fdstream_mode::skip;
- }
+ open (auto_fd&& fd, fdstream_mode m, std::uint64_t pos = 0);
void close ();
auto_fd release (); // Note: no skipping.
@@ -680,6 +654,54 @@ LIBBUTL_MODEXPORT namespace butl
LIBBUTL_SYMEXPORT ifdstream&
getline (ifdstream&, std::string&, char delim = '\n');
+ // The non-blocking getline() version that reads the line in potentially
+ // multiple calls. Key differences compared to getline():
+ //
+ // - Stream must be in the non-blocking mode and exception mask must have
+ // at least badbit.
+ //
+ // - Return type is bool instead of stream. Return true if the line has been
+ // read or false if it should be called again once the stream has more
+ // data to read. Also return true on failure.
+ //
+ // - The string must be empty on the first call.
+ //
+ // - There could still be data to read in the stream's buffer (as opposed to
+ // file descriptor) after this function returns true and you should be
+ // careful not to block on fdselect() in this case. In fact, the
+ // recommended pattern is to call this function first and only call
+ // fdselect() if it returns false.
+ //
+ // The typical usage in combination with the eof() helper:
+ //
+ // fdselect_set fds {is.fd (), ...};
+ // fdselect_state& ist (fds[0]);
+ // fdselect_state& ...;
+ //
+ // for (string l; ist.fd != nullfd || ...; )
+ // {
+ // if (ist.fd != nullfd && getline_non_blocking (is, l))
+ // {
+ // if (eof (is))
+ // ist.fd = nullfd;
+ // else
+ // {
+ // // Consume line.
+ //
+ // l.clear ();
+ // }
+ //
+ // continue;
+ // }
+ //
+ // ifdselect (fds);
+ //
+ // // Handle other ready fds.
+ // }
+ //
+ LIBBUTL_SYMEXPORT bool
+ getline_non_blocking (ifdstream&, std::string&, char delim = '\n');
+
// Open a file returning an auto_fd that holds its file descriptor on
// success and throwing ios::failure otherwise.
//
@@ -865,12 +887,28 @@ LIBBUTL_MODEXPORT namespace butl
LIBBUTL_SYMEXPORT void
fdtruncate (int, std::uint64_t);
- // Test whether a file descriptor refers to a terminal. Throw ios::failure on
+ // Return filesystem entry stat from file descriptor. Throw ios::failure on
// the underlying OS error.
//
+ // See also path_entry() in filesystem.
+ //
+ LIBBUTL_SYMEXPORT entry_stat
+ fdstat (int);
+
+ // Test whether a file descriptor refers to a terminal. Throw ios::failure
+ // on the underlying OS error.
+ //
LIBBUTL_SYMEXPORT bool
fdterm (int);
+ // Test whether a terminal file descriptor supports ANSI color output. If
+ // the enable argument is true, then also try to enable color output (only
+ // applicable on some platforms, such as Windows). Throw ios::failure on the
+ // underlying OS error.
+ //
+ LIBBUTL_SYMEXPORT bool
+ fdterm_color (int, bool enable);
+
// Wait until one or more file descriptors becomes ready for input (reading)
// or output (writing). Return the pair of numbers of descriptors that are
// ready. Throw std::invalid_argument if anything is wrong with arguments
@@ -878,7 +916,7 @@ LIBBUTL_MODEXPORT namespace butl
// underlying OS error.
//
// Note that the function clears all the previously-ready entries on each
- // call. Entries with nullfd are ignored.
+ // call. Entries with nullfd are ignored (but cleared).
//
// On Windows only pipes and only their input (read) ends are supported.
//
@@ -886,11 +924,13 @@ LIBBUTL_MODEXPORT namespace butl
{
int fd;
bool ready;
+ void* data; // Arbitrary data which can be associated with the descriptor.
// Note: intentionally non-explicit to allow implicit initialization when
// pushing to fdselect_set.
//
- fdselect_state (int fd): fd (fd), ready (false) {}
+ fdselect_state (int fd, void* d = nullptr)
+ : fd (fd), ready (false), data (d) {}
};
using fdselect_set = small_vector<fdselect_state, 4>;
@@ -915,10 +955,25 @@ LIBBUTL_MODEXPORT namespace butl
// As above but wait up to the specified timeout returning a pair of zeroes
// if none of the descriptors became ready.
//
- // @@ Maybe merge it with the above via a default/optional value?
- //
- // LIBBUTL_SYMEXPORT std::pair<std::size_t, std::size_t>
- // fdselect (fdselect_set&, fdselect_set&, const duration& timeout);
+ template <typename R, typename P>
+ std::pair<std::size_t, std::size_t>
+ fdselect (fdselect_set&, fdselect_set&, const std::chrono::duration<R, P>&);
+
+ template <typename R, typename P>
+ inline std::size_t
+ ifdselect (fdselect_set& ifds, const std::chrono::duration<R, P>& timeout)
+ {
+ fdselect_set ofds;
+ return fdselect (ifds, ofds, timeout).first;
+ }
+
+ template <typename R, typename P>
+ inline std::size_t
+ ofdselect (fdselect_set& ofds, const std::chrono::duration<R, P>& timeout)
+ {
+ fdselect_set ifds;
+ return fdselect (ifds, ofds, timeout).second;
+ }
// POSIX read() function wrapper. In particular, it supports the semantics
// of non-blocking read for pipes on Windows.
@@ -928,6 +983,11 @@ LIBBUTL_MODEXPORT namespace butl
//
LIBBUTL_SYMEXPORT std::streamsize
fdread (int, void*, std::size_t);
+
+ // POSIX write() function wrapper, for uniformity.
+ //
+ LIBBUTL_SYMEXPORT std::streamsize
+ fdwrite (int, const void*, std::size_t);
}
#include <libbutl/fdstream.ixx>
diff --git a/libbutl/fdstream.ixx b/libbutl/fdstream.ixx
index fb57b99..e024af9 100644
--- a/libbutl/fdstream.ixx
+++ b/libbutl/fdstream.ixx
@@ -1,6 +1,8 @@
// file : libbutl/fdstream.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
+#include <cassert>
+
namespace butl
{
// auto_fd
@@ -27,16 +29,16 @@ namespace butl
reset ();
}
- // fdbuf
+ // fdstreambuf
//
- inline fdbuf::
- fdbuf (auto_fd&& fd, std::uint64_t pos)
+ inline fdstreambuf::
+ fdstreambuf (auto_fd&& fd, std::uint64_t pos)
{
if (fd.get () >= 0)
open (std::move (fd), pos);
}
- inline auto_fd fdbuf::
+ inline auto_fd fdstreambuf::
release ()
{
return std::move (fd_);
@@ -165,6 +167,8 @@ namespace butl
inline std::vector<char> ifdstream::
read_binary ()
{
+ // @@ TODO: surely there is a more efficient way! See sha256!
+
std::vector<char> v (std::istreambuf_iterator<char> (*this),
std::istreambuf_iterator<char> ());
return v;
@@ -353,4 +357,23 @@ namespace butl
{
return fdmode (stderr_fd (), m);
}
+
+ // fdselect
+ //
+ // Implement fdselect() function templates in terms of their milliseconds
+ // specialization.
+ //
+ template <>
+ LIBBUTL_SYMEXPORT std::pair<std::size_t, std::size_t>
+ fdselect (fdselect_set&, fdselect_set&, const std::chrono::milliseconds&);
+
+ template <typename R, typename P>
+ inline std::pair<std::size_t, std::size_t>
+ fdselect (fdselect_set& ifds,
+ fdselect_set& ofds,
+ const std::chrono::duration<R, P>& timeout)
+ {
+ using namespace std::chrono;
+ return fdselect (ifds, ofds, duration_cast<milliseconds> (timeout));
+ }
}
diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx
index 9e8a232..28a0de8 100644
--- a/libbutl/filesystem.cxx
+++ b/libbutl/filesystem.cxx
@@ -1,9 +1,7 @@
// file : libbutl/filesystem.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/filesystem.mxx>
-#endif
+#include <libbutl/filesystem.hxx>
#include <errno.h> // errno, E*
@@ -18,61 +16,34 @@
#else
# include <libbutl/win32-utility.hxx>
-# include <io.h> // _find*(), _unlink(), _chmod()
+# include <io.h> // _unlink(), _chmod()
# include <direct.h> // _mkdir(), _rmdir()
# include <winioctl.h> // FSCTL_SET_REPARSE_POINT
# include <sys/types.h> // _stat
# include <sys/stat.h> // _stat(), S_I*
-# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t
-# include <cstring> // strncmp()
-
# ifdef _MSC_VER // Unlikely to be fixed in newer versions.
# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+# define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
# endif
-#endif
-
-#include <cassert>
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-#include <utility>
-#include <iterator>
-#include <functional>
+# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t
+# include <cstring> // strncmp()
+# include <type_traits> // is_same
+#endif
+#include <chrono>
#include <vector>
#include <memory> // unique_ptr
+#include <cassert>
#include <algorithm> // find(), copy()
#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.filesystem;
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.timestamp;
-import butl.path_pattern;
-#endif
-
-import butl.utility; // throw_generic_error()
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // throw_generic_error()
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
#ifndef _WIN32
# ifndef PATH_MAX
@@ -213,6 +184,19 @@ namespace butl
// static inline constexpr int
// ansec (...) {return 0;}
+ static inline entry_time
+ entry_tm (const struct stat& s) noexcept
+ {
+ auto tm = [] (time_t sec, auto nsec) -> timestamp
+ {
+ return system_clock::from_time_t (sec) +
+ chrono::duration_cast<duration> (chrono::nanoseconds (nsec));
+ };
+
+ return {tm (s.st_mtime, mnsec<struct stat> (&s, true)),
+ tm (s.st_atime, ansec<struct stat> (&s, true))};
+ }
+
// Return the modification and access times of a regular file or directory.
//
static entry_time
@@ -230,14 +214,7 @@ namespace butl
if (dir ? !S_ISDIR (s.st_mode) : !S_ISREG (s.st_mode))
return {timestamp_nonexistent, timestamp_nonexistent};
- auto tm = [] (time_t sec, auto nsec) -> timestamp
- {
- return system_clock::from_time_t (sec) +
- chrono::duration_cast<duration> (chrono::nanoseconds (nsec));
- };
-
- return {tm (s.st_mtime, mnsec<struct stat> (&s, true)),
- tm (s.st_atime, ansec<struct stat> (&s, true))};
+ return entry_tm (s);
}
// Set the modification and access times for a regular file or directory.
@@ -339,16 +316,15 @@ namespace butl
// Open a filesystem entry for reading and optionally writing its
// meta-information and return the entry handle and meta-information if the
- // path refers to an existing entry and nullhandle otherwise. Follow reparse
- // points by default. Underlying OS errors are reported by throwing
- // std::system_error, unless ignore_error is true in which case nullhandle
- // is returned. In the latter case the error code can be obtained by calling
- // GetLastError().
+ // path refers to an existing entry and nullhandle otherwise. Underlying OS
+ // errors are reported by throwing std::system_error, unless ignore_error is
+ // true in which case nullhandle is returned. In the latter case the error
+ // code can be obtained by calling GetLastError().
//
static inline pair<win32::auto_handle, BY_HANDLE_FILE_INFORMATION>
entry_info_handle (const char* p,
bool write,
- bool fr = true,
+ bool follow_reparse_points,
bool ie = false)
{
// Open the entry for reading/writing its meta-information. Follow reparse
@@ -363,7 +339,7 @@ namespace butl
nullptr,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS | // Required for a directory.
- (fr ? 0 : FILE_FLAG_OPEN_REPARSE_POINT),
+ (follow_reparse_points ? 0 : FILE_FLAG_OPEN_REPARSE_POINT),
nullptr));
if (h == nullhandle)
@@ -388,13 +364,15 @@ namespace butl
}
// Return a flag indicating whether the path is to an existing filesystem
- // entry and its meta-information if so. Follow reparse points by default.
+ // entry and its meta-information if so.
//
static inline pair<bool, BY_HANDLE_FILE_INFORMATION>
- path_entry_info (const char* p, bool fr = true, bool ie = false)
+ path_entry_handle_info (const char* p,
+ bool follow_reparse_points,
+ bool ie = false)
{
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
- entry_info_handle (p, false /* write */, fr, ie));
+ entry_info_handle (p, false /* write */, follow_reparse_points, ie));
if (hi.first == nullhandle)
return make_pair (false, BY_HANDLE_FILE_INFORMATION ());
@@ -406,9 +384,34 @@ namespace butl
}
static inline pair<bool, BY_HANDLE_FILE_INFORMATION>
- path_entry_info (const path& p, bool fr = true, bool ie = false)
+ path_entry_handle_info (const path& p, bool fr, bool ie = false)
{
- return path_entry_info (p.string ().c_str (), fr, ie);
+ return path_entry_handle_info (p.string ().c_str (), fr, ie);
+ }
+
+ // Return a flag indicating whether the path is to an existing filesystem
+ // entry and its extended attributes if so. Don't follow reparse points.
+ //
+ static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA>
+ path_entry_info (const char* p, bool ie = false)
+ {
+ WIN32_FILE_ATTRIBUTE_DATA r;
+ if (!GetFileAttributesExA (p, GetFileExInfoStandard, &r))
+ {
+ DWORD ec;
+ if (ie || error_file_not_found (ec = GetLastError ()))
+ return make_pair (false, WIN32_FILE_ATTRIBUTE_DATA ());
+
+ throw_system_error (ec);
+ }
+
+ return make_pair (true, r);
+ }
+
+ static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA>
+ path_entry_info (const path& p, bool ie = false)
+ {
+ return path_entry_info (p.string ().c_str (), ie);
}
// Reparse point data.
@@ -644,8 +647,48 @@ namespace butl
return reparse_point_entry (p.string ().c_str (), ie);
}
- pair<bool, entry_stat>
- path_entry (const char* p, bool fl, bool ie)
+ static inline timestamp
+ to_timestamp (const FILETIME& t)
+ {
+ // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
+ // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
+ // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
+ //
+ uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
+ t.dwLowDateTime);
+
+ nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch.
+ nsec *= 100; // Now in nanoseconds.
+
+ return timestamp (
+ chrono::duration_cast<duration> (chrono::nanoseconds (nsec)));
+ }
+
+ static inline FILETIME
+ to_filetime (timestamp t)
+ {
+ // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
+ // (1601-01-01T00:00:00Z). To convert "UNIX epoch" (1970-01-01T00:00:00Z)
+ // to it we need to add 11644473600 seconds.
+ //
+ uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> (
+ t.time_since_epoch ()).count ());
+
+ ticks /= 100; // Now in 100 nanosecond "ticks".
+ ticks += 11644473600ULL * 10000000; // Now in "Windows epoch".
+
+ FILETIME r;
+ r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF;
+ r.dwLowDateTime = ticks & 0xFFFFFFFF;
+ return r;
+ }
+
+ // If the being returned entry type is regular or directory and et is not
+ // NULL, then also save the entry modification and access times into the
+ // referenced variable.
+ //
+ static inline pair<bool, entry_stat>
+ path_entry (const char* p, bool fl, bool ie, entry_time* et)
{
// A path like 'C:', while being a root path in our terminology, is not as
// such for Windows, that maintains current directory for each drive, and
@@ -656,73 +699,105 @@ namespace butl
string d;
if (path::traits_type::root (p))
{
- d = p;
+ d = string (p); // GCC bug #105329.
d += path::traits_type::directory_separator;
p = d.c_str ();
}
// Stat the entry not following reparse points.
//
- pair<bool, BY_HANDLE_FILE_INFORMATION> pi (
- path_entry_info (p, false /* follow_reparse_points */, ie));
+ pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p, ie));
if (!pi.first)
return make_pair (false, entry_stat {entry_type::unknown, 0});
- if (reparse_point (pi.second.dwFileAttributes))
+ auto entry_info = [et] (const auto& ei)
{
- pair<entry_type, path> rp (reparse_point_entry (p, ie));
+ if (et != nullptr)
+ {
+ et->modification = to_timestamp (ei.ftLastWriteTime);
+ et->access = to_timestamp (ei.ftLastAccessTime);
+ }
+
+ if (directory (ei.dwFileAttributes))
+ return make_pair (true, entry_stat {entry_type::directory, 0});
+ else
+ return make_pair (
+ true,
+ entry_stat {entry_type::regular,
+ ((uint64_t (ei.nFileSizeHigh) << 32) | ei.nFileSizeLow)});
+ };
+
+ if (!reparse_point (pi.second.dwFileAttributes))
+ return entry_info (pi.second);
- if (rp.first == entry_type::symlink)
+ pair<entry_type, path> rp (reparse_point_entry (p, ie));
+
+ if (rp.first == entry_type::symlink)
+ {
+ // If following symlinks is requested, then follow the reparse point and
+ // return its target information. Otherwise, return the symlink entry
+ // type.
+ //
+ if (fl)
{
- // If following symlinks is requested, then follow the reparse point,
- // overwrite its own information with the resolved target information,
- // and fall through. Otherwise, return the symlink entry type.
- //
- if (fl)
- {
- pi = path_entry_info (p, true /* follow_reparse_points */, ie);
+ pair<bool, BY_HANDLE_FILE_INFORMATION> pi (
+ path_entry_handle_info (p, true /* follow_reparse_points */, ie));
- if (!pi.first)
- return make_pair (false, entry_stat {entry_type::unknown, 0});
- }
- else
- return make_pair (true, entry_stat {entry_type::symlink, 0});
+ return pi.first
+ ? entry_info (pi.second)
+ : make_pair (false, entry_stat {entry_type::unknown, 0});
}
- else if (rp.first == entry_type::unknown)
- return make_pair (false, entry_stat {entry_type::unknown, 0});
- else // entry_type::other
- return make_pair (true, entry_stat {entry_type::other, 0});
+ else
+ return make_pair (true, entry_stat {entry_type::symlink, 0});
}
+ else if (rp.first == entry_type::unknown)
+ return make_pair (false, entry_stat {entry_type::unknown, 0});
+ else // entry_type::other
+ return make_pair (true, entry_stat {entry_type::other, 0});
+ }
- if (directory (pi.second.dwFileAttributes))
- return make_pair (true, entry_stat {entry_type::directory, 0});
- else
- return make_pair (
- true,
- entry_stat {entry_type::regular,
- ((uint64_t (pi.second.nFileSizeHigh) << 32) |
- pi.second.nFileSizeLow)});
+ static inline pair<bool, entry_stat>
+ path_entry (const path& p, bool fl, bool ie, entry_time* et)
+ {
+ return path_entry (p.string ().c_str (), fl, ie, et);
+ }
+
+ pair<bool, entry_stat>
+ path_entry (const char* p, bool fl, bool ie)
+ {
+ return path_entry (p, fl, ie, nullptr /* entry_time */);
}
permissions
path_permissions (const path& p)
{
- pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p));
+ // Let's optimize for the common case when the entry is not a reparse
+ // point.
+ //
+ auto attr_to_perm = [] (const auto& pi) -> permissions
+ {
+ if (!pi.first)
+ throw_generic_error (ENOENT);
- if (!pi.first)
- throw_generic_error (ENOENT);
+ // On Windows a filesystem entry is always readable. Also there is no
+ // notion of group/other permissions at OS level, so we extrapolate user
+ // permissions to group/other permissions (as the _stat() function
+ // does).
+ //
+ permissions r (permissions::ru | permissions::rg | permissions::ro);
- // On Windows a filesystem entry is always readable. Also there is no
- // notion of group/other permissions at OS level, so we extrapolate user
- // permissions to group/other permissions (as the _stat() function does).
- //
- permissions r (permissions::ru | permissions::rg | permissions::ro);
+ if (!readonly (pi.second.dwFileAttributes))
+ r |= permissions::wu | permissions::wg | permissions::wo;
- if (!readonly (pi.second.dwFileAttributes))
- r |= permissions::wu | permissions::wg | permissions::wo;
+ return r;
+ };
- return r;
+ pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p));
+ return !pi.first || !reparse_point (pi.second.dwFileAttributes)
+ ? attr_to_perm (pi)
+ : attr_to_perm (
+ path_entry_handle_info (p, true /* follow_reparse_points */));
}
void
@@ -748,50 +823,26 @@ namespace butl
static entry_time
entry_tm (const char* p, bool dir)
{
- pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p));
-
- // If the entry is of the wrong type, then let's pretend that it doesn't
- // exists.
+ // Let's optimize for the common case when the entry is not a reparse
+ // point.
//
- if (!pi.first || directory (pi.second.dwFileAttributes) != dir)
- return {timestamp_nonexistent, timestamp_nonexistent};
-
- auto tm = [] (const FILETIME& t) -> timestamp
+ auto attr_to_time = [dir] (const auto& pi) -> entry_time
{
- // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
- // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
- // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
+ // If the entry is of the wrong type, then let's pretend that it doesn't
+ // exists.
//
- uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
- t.dwLowDateTime);
-
- nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch.
- nsec *= 100; // Now in nanoseconds.
+ if (!pi.first || directory (pi.second.dwFileAttributes) != dir)
+ return entry_time {timestamp_nonexistent, timestamp_nonexistent};
- return timestamp (
- chrono::duration_cast<duration> (chrono::nanoseconds (nsec)));
+ return entry_time {to_timestamp (pi.second.ftLastWriteTime),
+ to_timestamp (pi.second.ftLastAccessTime)};
};
- return {tm (pi.second.ftLastWriteTime), tm (pi.second.ftLastAccessTime)};
- }
-
- static inline FILETIME
- to_filetime (timestamp t)
- {
- // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
- // (1601-01-01T00:00:00Z). To convert "UNIX epoch"
- // (1970-01-01T00:00:00Z) to it we need to add 11644473600 seconds.
- //
- uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> (
- t.time_since_epoch ()).count ());
-
- ticks /= 100; // Now in 100 nanosecond "ticks".
- ticks += 11644473600ULL * 10000000; // Now in "Windows epoch".
-
- FILETIME r;
- r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF;
- r.dwLowDateTime = ticks & 0xFFFFFFFF;
- return r;
+ pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p));
+ return !pi.first || !reparse_point (pi.second.dwFileAttributes)
+ ? attr_to_time (pi)
+ : attr_to_time (
+ path_entry_handle_info (p, true /* follow_reparse_points */));
}
// Set the modification and access times for a regular file or directory.
@@ -802,7 +853,9 @@ namespace butl
// See also touch_file() below.
//
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
- entry_info_handle (p, true /* write */));
+ entry_info_handle (p,
+ true /* write */,
+ true /* follow_reparse_points */));
// If the entry is of the wrong type, then let's pretend that it doesn't
// exist.
@@ -887,7 +940,9 @@ namespace butl
// implicitly.
//
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
- entry_info_handle (p.string ().c_str (), true /* write */));
+ entry_info_handle (p.string ().c_str (),
+ true /* write */,
+ true /* follow_reparse_points */));
if (hi.first != nullhandle)
{
@@ -1036,7 +1091,7 @@ namespace butl
//
try
{
- for (const dir_entry& de: dir_iterator (p, false /* ignore_dangling */))
+ for (const dir_entry& de: dir_iterator (p, dir_iterator::no_follow))
{
path ep (p / de.path ()); //@@ Would be good to reuse the buffer.
@@ -1063,8 +1118,8 @@ namespace butl
}
}
- rmfile_status
- try_rmfile (const path& p, bool ignore_error)
+ optional<rmfile_status>
+ try_rmfile_maybe_ignore_error (const path& p, bool ignore_error)
{
rmfile_status r (rmfile_status::success);
const char* f (p.string ().c_str ());
@@ -1087,12 +1142,12 @@ namespace butl
// failure (see mventry() for details). If that's the case, we will keep
// trying to move the file for two seconds.
//
- for (size_t i (0); i < 21; ++i)
+ for (size_t i (0); i < 41; ++i)
{
- // Sleep 100 milliseconds before the removal retry.
+ // Sleep 50 milliseconds before the removal retry.
//
if (i != 0)
- Sleep (100);
+ Sleep (50);
ur = _unlink (f);
@@ -1143,6 +1198,8 @@ namespace butl
r = rmfile_status::not_exist;
else if (!ignore_error)
throw_generic_error (errno);
+ else
+ return nullopt;
}
return r;
@@ -1447,7 +1504,7 @@ namespace butl
path r (p);
bool exists;
- for (size_t i (0); true; ++i)
+ for (size_t i (0);; ++i)
{
pair<bool, entry_stat> pe (path_entry (r));
@@ -1596,7 +1653,7 @@ namespace butl
rm = auto_rmfile (to);
- // Throws ios::failure on fdbuf read/write failures.
+ // Throws ios::failure on fdstreambuf read/write failures.
//
// Note that the eof check is important: if the stream is at eof (empty
// file) then this write will fail.
@@ -1638,9 +1695,12 @@ namespace butl
}
void
- cpfile (const path& from, const path& to, cpflags fl)
+ cpfile (const path& from,
+ const path& to,
+ cpflags fl,
+ optional<permissions> cperm)
{
- permissions perm (path_permissions (from));
+ permissions perm (cperm ? *cperm : path_permissions (from));
auto_rmfile rm;
cpfile<is_base_of<system_error, ios_base::failure>::value> (
@@ -1732,12 +1792,12 @@ namespace butl
// fdopen().
//
DWORD ec;
- for (size_t i (0); i < 21; ++i)
+ for (size_t i (0); i < 41; ++i)
{
// Sleep 100 milliseconds before the move retry.
//
if (i != 0)
- Sleep (100);
+ Sleep (50);
if (MoveFileExA (f, t, mfl))
return;
@@ -1839,7 +1899,7 @@ namespace butl
h_ = x.h_;
x.h_ = nullptr;
- ignore_dangling_ = x.ignore_dangling_;
+ mode_ = x.mode_;
}
return *this;
}
@@ -1860,6 +1920,11 @@ namespace butl
entry_type dir_entry::
type (bool follow_symlinks) const
{
+ // Note that this function can only be used for resolving an entry type
+ // lazily and thus can't be used with the detect_dangling dir_iterator
+ // mode (see dir_iterator::next () implementation for details). Thus, we
+ // always throw on the stat()/lstat() failure.
+ //
path_type p (b_ / p_);
struct stat s;
if ((follow_symlinks
@@ -1867,7 +1932,18 @@ namespace butl
: lstat (p.string ().c_str (), &s)) != 0)
throw_generic_error (errno);
- return butl::type (s);
+ entry_type r (butl::type (s));
+
+ // While at it, also save the entry modification and access times.
+ //
+ if (r != entry_type::symlink)
+ {
+ entry_time t (entry_tm (s));
+ mtime_ = t.modification;
+ atime_ = t.access;
+ }
+
+ return r;
}
// dir_iterator
@@ -1878,8 +1954,8 @@ namespace butl
};
dir_iterator::
- dir_iterator (const dir_path& d, bool ignore_dangling)
- : ignore_dangling_ (ignore_dangling)
+ dir_iterator (const dir_path& d, mode m)
+ : mode_ (m)
{
unique_ptr<DIR, dir_deleter> h (opendir (d.string ().c_str ()));
h_ = h.get ();
@@ -1895,7 +1971,7 @@ namespace butl
}
template <typename D>
- static inline /*constexpr*/ entry_type
+ static inline /*constexpr*/ optional<entry_type>
d_type (const D* d, decltype(d->d_type)*)
{
switch (d->d_type)
@@ -1923,13 +1999,13 @@ namespace butl
#endif
return entry_type::other;
- default: return entry_type::unknown;
+ default: return nullopt;
}
}
template <typename D>
- static inline constexpr entry_type
- d_type (...) {return entry_type::unknown;}
+ static inline constexpr optional<entry_type>
+ d_type (...) {return nullopt;}
void dir_iterator::
next ()
@@ -1951,25 +2027,43 @@ namespace butl
e_.p_ = move (p);
e_.t_ = d_type<struct dirent> (de, nullptr);
- e_.lt_ = entry_type::unknown;
+ e_.lt_ = nullopt;
+
+ e_.mtime_ = timestamp_unknown;
+ e_.atime_ = timestamp_unknown;
// If requested, we ignore dangling symlinks, skipping ones with
- // non-existing or inaccessible targets.
+ // non-existing or inaccessible targets (ignore_dangling mode), or set
+ // the entry_type::unknown type for them (detect_dangling mode).
//
- if (ignore_dangling_)
+ if (mode_ != no_follow)
{
- // Note that ltype () can potentially lstat() (see d_type() for
+ bool dd (mode_ == detect_dangling);
+
+ // Note that ltype () can potentially lstat() (see type() for
// details) and so throw. We, however, need to skip the entry if it
// is already removed (due to a race) and throw on any other error.
//
path fp (e_.base () / e_.path ());
const char* p (fp.string ().c_str ());
- if (e_.t_ == entry_type::unknown)
+ if (!e_.t_)
{
struct stat s;
if (lstat (p, &s) != 0)
{
+ // Given that we have already enumerated the filesystem entry,
+ // these error codes can only mean that the entry doesn't exist
+ // anymore and so we always skip it.
+ //
+ // If errno is EACCES, then the permission to search a directory
+ // we currently iterate over has been revoked. Throwing in this
+ // case sounds like the best choice.
+ //
+ // Note that according to POSIX the filesystem entry we call
+ // lstat() on doesn't require any specific permissions to be
+ // granted.
+ //
if (errno == ENOENT || errno == ENOTDIR)
continue;
@@ -1977,21 +2071,53 @@ namespace butl
}
e_.t_ = type (s);
+
+ if (*e_.t_ != entry_type::symlink)
+ {
+ entry_time t (entry_tm (s));
+ e_.mtime_ = t.modification;
+ e_.atime_ = t.access;
+ }
}
- if (e_.t_ == entry_type::symlink)
+ // The entry type should be present and may not be
+ // entry_type::unknown.
+ //
+ //assert (e_.t_ && *e_.t_ != entry_type::unknown);
+
+ // Check if the symlink target exists and is accessible and set the
+ // target type.
+ //
+ if (*e_.t_ == entry_type::symlink)
{
struct stat s;
if (stat (p, &s) != 0)
{
if (errno == ENOENT || errno == ENOTDIR || errno == EACCES)
- continue;
-
- throw_generic_error (errno);
+ {
+ if (dd)
+ e_.lt_ = entry_type::unknown;
+ else
+ continue;
+ }
+ else
+ throw_generic_error (errno);
}
+ else
+ {
+ e_.lt_ = type (s);
- e_.lt_ = type (s); // While at it, set the target type.
+ entry_time t (entry_tm (s));
+ e_.mtime_ = t.modification;
+ e_.atime_ = t.access;
+ }
}
+
+ // The symlink target type should be present and in the
+ // ignore_dangling mode it may not be entry_type::unknown.
+ //
+ //assert (*e_.t_ != entry_type::symlink ||
+ // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown)));
}
}
else if (errno == 0)
@@ -2012,11 +2138,49 @@ namespace butl
// dir_entry
//
+ entry_type dir_entry::
+ type (bool follow_symlinks) const
+ {
+ // Note that this function can only be used for resolving an entry type
+ // lazily and thus can't be used with the detect_dangling dir_iterator
+ // mode (see dir_iterator::next () implementation for details). Thus, we
+ // always throw if the entry info can't be retrieved.
+ //
+ // While at it, also save the entry modification and access times.
+ //
+ path_type p (base () / path ());
+ entry_time et;
+ pair<bool, entry_stat> e (
+ path_entry (p, follow_symlinks, false /* ignore_error */, &et));
+
+ if (!e.first)
+ throw_generic_error (ENOENT);
+
+ if (e.second.type == entry_type::regular ||
+ e.second.type == entry_type::directory)
+ {
+ mtime_ = et.modification;
+ atime_ = et.access;
+ }
+
+ return e.second.type;
+ }
+
+ // dir_iterator
+ //
+ static_assert(is_same<HANDLE, void*>::value, "HANDLE is not void*");
+
+ static inline HANDLE
+ to_handle (intptr_t h)
+ {
+ return reinterpret_cast<HANDLE> (h);
+ }
+
dir_iterator::
~dir_iterator ()
{
if (h_ != -1)
- _findclose (h_); // Ignore any errors.
+ FindClose (to_handle (h_)); // Ignore any errors.
}
dir_iterator& dir_iterator::
@@ -2026,56 +2190,32 @@ namespace butl
{
e_ = move (x.e_);
- if (h_ != -1 && _findclose (h_) == -1)
- throw_generic_error (errno);
+ if (h_ != -1 && !FindClose (to_handle (h_)))
+ throw_system_error (GetLastError ());
h_ = x.h_;
x.h_ = -1;
- ignore_dangling_ = x.ignore_dangling_;
+ mode_ = x.mode_;
}
return *this;
}
- entry_type dir_entry::
- type (bool follow_symlinks) const
- {
- path_type p (base () / path ());
- pair<bool, entry_stat> e (path_entry (p, follow_symlinks));
-
- if (!e.first)
- throw_generic_error (ENOENT);
-
- return e.second.type;
- }
-
- // dir_iterator
- //
- struct auto_dir
+ dir_iterator::
+ dir_iterator (const dir_path& d, mode m)
+ : mode_ (m)
{
- explicit
- auto_dir (intptr_t& h): h_ (&h) {}
-
- auto_dir (const auto_dir&) = delete;
- auto_dir& operator= (const auto_dir&) = delete;
-
- ~auto_dir ()
+ struct deleter
{
- if (h_ != nullptr && *h_ != -1)
- _findclose (*h_);
- }
-
- void release () {h_ = nullptr;}
+ void operator() (intptr_t* p) const
+ {
+ if (p != nullptr && *p != -1)
+ FindClose (to_handle (*p));
+ }
+ };
- private:
- intptr_t* h_;
- };
+ unique_ptr<intptr_t, deleter> h (&h_);
- dir_iterator::
- dir_iterator (const dir_path& d, bool ignore_dangling)
- : ignore_dangling_ (ignore_dangling)
- {
- auto_dir h (h_);
e_.b_ = d; // Used by next().
next ();
@@ -2088,31 +2228,37 @@ namespace butl
for (;;)
{
bool r;
- _finddata_t fi;
+ WIN32_FIND_DATA fi;
if (h_ == -1)
{
// The call is made from the constructor. Any other call with h_ == -1
// is illegal.
//
-
- // Check to distinguish non-existent vs empty directories.
+ // Note that we used to check for the directory existence before
+ // iterating over it. However, let's not pessimize things and only
+ // check for the directory existence if FindFirstFileExA() fails.
//
- if (!dir_exists (e_.base ()))
- throw_generic_error (ENOENT);
- h_ = _findfirst ((e_.base () / path ("*")).string ().c_str (), &fi);
- r = h_ != -1;
+ h_ = reinterpret_cast<intptr_t> (
+ FindFirstFileExA ((e_.base () / path ("*")).string ().c_str (),
+ FindExInfoBasic,
+ &fi,
+ FindExSearchNameMatch,
+ NULL,
+ 0));
+
+ r = (h_ != -1);
}
else
- r = _findnext (h_, &fi) == 0;
+ r = FindNextFileA (to_handle (h_), &fi);
if (r)
{
// We can accept some overhead for '.' and '..' (relying on short
// string optimization) in favor of a more compact code.
//
- path p (fi.name);
+ path p (fi.cFileName);
// Skip '.' and '..'.
//
@@ -2121,26 +2267,47 @@ namespace butl
e_.p_ = move (p);
- // Note that the entry type detection always requires to additionally
- // query the entry information. Thus, we evaluate its type lazily.
+ DWORD a (fi.dwFileAttributes);
+ bool rp (reparse_point (a));
+
+ // Evaluate the entry type lazily if this is a reparse point since it
+ // requires to additionally query the entry information (see
+ // reparse_point_entry() for details).
//
- e_.t_ = entry_type::unknown;
+ e_.t_ = rp ? nullopt :
+ directory (a) ? optional<entry_type> (entry_type::directory) :
+ optional<entry_type> (entry_type::regular) ;
+
+ e_.lt_ = nullopt;
- e_.lt_ = entry_type::unknown;
+ e_.mtime_ = rp ? timestamp_unknown : to_timestamp (fi.ftLastWriteTime);
+
+ // Note that according to MSDN for the FindFirstFile[Ex]() function
+ // "the NTFS file system delays updates to the last access time for a
+ // file by up to 1 hour after the last access" and "on the FAT file
+ // system access time has a resolution of 1 day".
+ //
+ e_.atime_ = timestamp_unknown;
// If requested, we ignore dangling symlinks and junctions, skipping
- // ones with non-existing or inaccessible targets.
+ // ones with non-existing or inaccessible targets (ignore_dangling
+ // mode), or set the entry_type::unknown type for them
+ // (detect_dangling mode).
//
- if (ignore_dangling_)
+ if (rp && mode_ != no_follow)
{
+ bool dd (mode_ == detect_dangling);
+
// Check the last error code throwing for codes other than "path not
- // found" and "access denied".
+ // found" and "access denied" and returning this error code
+ // otherwise.
//
auto verify_error = [] ()
{
DWORD ec (GetLastError ());
if (!error_file_not_found (ec) && ec != ERROR_ACCESS_DENIED)
throw_system_error (ec);
+ return ec;
};
// Note that ltype() queries the entry information due to the type
@@ -2151,48 +2318,50 @@ namespace butl
path fp (e_.base () / e_.path ());
const char* p (fp.string ().c_str ());
- DWORD a (GetFileAttributesA (p));
- if (a == INVALID_FILE_ATTRIBUTES)
- {
- // Note that sometimes trying to obtain attributes for a
- // filesystem entry that was potentially removed ends up with
- // ERROR_ACCESS_DENIED. One can argue that there can be another
- // reason for this error (antivirus, indexer, etc). However, given
- // that the entry is seen by a _find*() function and normally you
- // can retrieve attributes for a read-only entry and for an entry
- // opened in the non-shared mode (see the CreateFile() function
- // documentation for details) the only meaningful explanation for
- // ERROR_ACCESS_DENIED is that the entry is being removed. Also
- // the DeleteFile() documentation mentions such a possibility.
- //
- verify_error ();
- continue;
- }
+ pair<entry_type, path> rpe (
+ reparse_point_entry (p, true /* ignore_error */));
- if (reparse_point (a))
+ if (rpe.first == entry_type::unknown)
{
- pair<entry_type, path> rp (
- reparse_point_entry (p, true /* ignore_error */));
+ DWORD ec (verify_error ());
- if (rp.first == entry_type::unknown)
- {
- verify_error ();
+ // Silently skip the entry if it is not found (being already
+ // deleted) or we are in the ignore dangling mode. Otherwise, set
+ // the entry type to unknown.
+ //
+ // Note that sometimes trying to obtain information for a being
+ // removed filesystem entry ends up with ERROR_ACCESS_DENIED (see
+ // DeleteFile() and CreateFile() for details). Probably getting
+ // this error code while trying to obtain the reparse point
+ // information (involves calling CreateFile(FILE_READ_EA) and
+ // DeviceIoControl()) can also be interpreted differently. We,
+ // however, always treat it as "access denied" in the detect
+ // dangling mode for good measure. Let's see if that won't be too
+ // noisy.
+ //
+ if (ec != ERROR_ACCESS_DENIED || !dd)
continue;
- }
- e_.t_ = rp.first;
+ // Fall through.
}
- else
- e_.t_ = directory (a)
- ? entry_type::directory
- : entry_type::regular;
- if (e_.t_ == entry_type::symlink)
+ e_.t_ = rpe.first;
+
+ // In this mode the entry type should be present and in the
+ // ignore_dangling mode it may not be entry_type::unknown.
+ //
+ //assert (e_.t_ && (dd || *e_.t_ != entry_type::unknown));
+
+ // Check if the symlink target exists and is accessible and set the
+ // target type.
+ //
+ if (*e_.t_ == entry_type::symlink)
{
// Query the target info.
//
// Note that we use entry_info_handle() rather than
- // path_entry_info() to be able to verify an error on failure.
+ // path_entry_handle_info() to be able to verify an error on
+ // failure.
//
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> ti (
entry_info_handle (p,
@@ -2203,31 +2372,59 @@ namespace butl
if (ti.first == nullhandle)
{
verify_error ();
- continue;
+
+ if (dd)
+ e_.lt_ = entry_type::unknown;
+ else
+ continue;
}
+ else
+ {
+ ti.first.close (); // Checks for error.
- ti.first.close (); // Checks for error.
+ e_.lt_ = directory (ti.second.dwFileAttributes)
+ ? entry_type::directory
+ : entry_type::regular;
- // While at it, set the target type.
- //
- e_.lt_ = directory (ti.second.dwFileAttributes)
- ? entry_type::directory
- : entry_type::regular;
+ e_.mtime_ = to_timestamp (ti.second.ftLastWriteTime);
+ e_.atime_ = to_timestamp (ti.second.ftLastAccessTime);
+ }
}
+
+ // In this mode the symlink target type should be present and in the
+ // ignore_dangling mode it may not be entry_type::unknown.
+ //
+ //assert (*e_.t_ != entry_type::symlink ||
+ // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown)));
}
}
- else if (errno == ENOENT)
+ else
{
- // End of stream.
+ DWORD ec (GetLastError ());
+ bool first (h_ == -1);
+
+ // Check to distinguish non-existent vs empty directories.
+ //
+ // Note that dir_exists() handles not only the "filesystem entry does
+ // not exist" case but also the case when the entry exists but is not
+ // a directory.
//
- if (h_ != -1)
+ if (first && !dir_exists (e_.base ()))
+ throw_generic_error (ENOENT);
+
+ if (ec == (first ? ERROR_FILE_NOT_FOUND : ERROR_NO_MORE_FILES))
{
- _findclose (h_);
- h_ = -1;
+ // End of stream.
+ //
+ if (h_ != -1)
+ {
+ FindClose (to_handle (h_));
+ h_ = -1;
+ }
}
+ else
+ throw_system_error (ec);
}
- else
- throw_generic_error (errno);
break;
}
@@ -2235,14 +2432,27 @@ namespace butl
#endif
// Search for paths matching the pattern and call the specified function for
- // each matching path. Return false if the underlying func() call returns
- // false. Otherwise the function conforms to the path_search() description.
+ // each matching path. Return false if the underlying func() or
+ // dangling_func() call returns false. Otherwise the function conforms to
+ // the path_search() description.
//
// Note that the access to the traversed directory tree (real or virtual) is
// performed through the provided filesystem object.
//
static const string any_dir ("*/");
+ // Filesystem traversal callbacks.
+ //
+ // Called before entering a directory for the recursive traversal. If
+ // returns false, then the directory is not entered.
+ //
+ using preopen = function<bool (const dir_path&)>;
+
+ // Called before skipping a dangling link. If returns false, then the
+ // traversal is stopped.
+ //
+ using preskip = function<bool (const dir_entry&)>;
+
template <typename FS>
static bool
search (
@@ -2250,11 +2460,14 @@ namespace butl
dir_path pattern_dir,
path_match_flags fl,
const function<bool (path&&, const string& pattern, bool interm)>& func,
+ const function<bool (const dir_entry&)>& dangling_func,
FS& filesystem)
{
bool follow_symlinks ((fl & path_match_flags::follow_symlinks) !=
path_match_flags::none);
+ assert (follow_symlinks || dangling_func == nullptr);
+
// Fast-forward the leftmost pattern non-wildcard components. So, for
// example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/.
//
@@ -2301,17 +2514,47 @@ namespace butl
//
bool simple (pattern.simple ());
- // Note that we rely on "small function object" optimization here.
+ // If symlinks need to be followed, then pass the preskip callback for the
+ // filesystem iterator.
+ //
+ bool fs (follow_symlinks || !simple);
+ preskip ps;
+ bool dangling_stop (false);
+
+ if (fs)
+ {
+ if (dangling_func != nullptr)
+ {
+ // Note that we rely on the "small function object" optimization here.
+ //
+ ps = [&dangling_func, &dangling_stop] (const dir_entry& de) -> bool
+ {
+ dangling_stop = !dangling_func (de);
+ return !dangling_stop;
+ };
+ }
+ else
+ {
+ ps = [] (const dir_entry& de) -> bool
+ {
+ throw_generic_error (
+ de.ltype () == entry_type::symlink ? ENOENT : EACCES);
+ };
+ }
+ }
+
+ // Note that we rely on the "small function object" optimization here.
//
typename FS::iterator_type i (filesystem.iterator (
pattern_dir,
path_pattern_recursive (pcr),
path_pattern_self_matching (pcr),
- follow_symlinks || !simple,
+ fs,
[&pattern_dir, &func] (const dir_path& p) -> bool // Preopen.
{
return func (pattern_dir / p, any_dir, true);
- }));
+ },
+ move (ps)));
// Canonicalize the pattern component collapsing consecutive stars (used to
// express that it is recursive) into a single one.
@@ -2357,7 +2600,7 @@ namespace butl
// represented by the iterator as an empty path, and so we need to
// compute it (the leaf would actually be enough) for matching. This
// leaf can be acquired from the pattern_dir (if not empty) or
- // start_dir. We don't expect the start_dir to be empty, as the
+ // start_dir. We don't expect the start_dir to be empty, as the
// filesystem object must replace an empty start directory with the
// current one. This is the case when we search in the current directory
// (start_dir is empty) with a pattern that starts with a *** wildcard
@@ -2396,10 +2639,14 @@ namespace butl
pattern_dir / path_cast<dir_path> (move (p)),
fl,
func,
+ dangling_func,
filesystem))
return false;
}
+ if (dangling_stop)
+ return false;
+
// If requested, also search with the absent-matching pattern path
// component omitted, unless this is the only pattern component.
//
@@ -2407,8 +2654,15 @@ namespace butl
pc.to_directory () &&
(!pattern_dir.empty () || !simple) &&
pc.string ().find_first_not_of ('*') == string::npos &&
- !search (pattern.leaf (pc), pattern_dir, fl, func, filesystem))
+ !search (pattern.leaf (pc),
+ pattern_dir,
+ fl,
+ func,
+ dangling_func,
+ filesystem))
+ {
return false;
+ }
return true;
}
@@ -2417,8 +2671,6 @@ namespace butl
//
static const dir_path empty_dir;
- using preopen = function<bool (const dir_path&)>;
-
// Base for filesystem (see above) implementations.
//
// Don't copy start directory. It is expected to exist till the end of the
@@ -2468,13 +2720,17 @@ namespace butl
bool recursive,
bool self,
bool fs,
- preopen po)
+ preopen po,
+ preskip ps)
: start_ (move (p)),
recursive_ (recursive),
self_ (self),
follow_symlinks_ (fs),
- preopen_ (move (po))
+ preopen_ (move (po)),
+ preskip_ (move (ps))
{
+ assert (fs || ps == nullptr);
+
open (dir_path (), self_);
}
@@ -2484,12 +2740,16 @@ namespace butl
recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete;
recursive_dir_iterator (recursive_dir_iterator&&) = default;
- // Return false if no more entries left. Otherwise save the next entry path
- // and return true. The path is relative to the directory being
+ // Return false if no more entries left. Otherwise save the next entry
+ // path and return true. The path is relative to the directory being
// traversed and contains a trailing separator for sub-directories. Throw
// std::system_error in case of a failure (insufficient permissions,
// dangling symlink encountered, etc).
//
+ // If symlinks need to be followed, then skip inaccessible/dangling
+ // entries or, if the preskip callback is specified and returns false for
+ // such an entry, stop the entire traversal.
+ //
bool
next (path& p)
{
@@ -2498,44 +2758,64 @@ namespace butl
auto& i (iters_.back ());
- // If we got to the end of directory sub-entries, then go one level up
- // and return this directory path.
- //
- if (i.first == dir_iterator ())
+ for (;;) // Skip inaccessible/dangling entries.
{
- path d (move (i.second));
- iters_.pop_back ();
+ // If we got to the end of directory sub-entries, then go one level up
+ // and return this directory path.
+ //
+ if (i.first == dir_iterator ())
+ {
+ path d (move (i.second));
+ iters_.pop_back ();
+
+ // Return the path unless it is the last one (the directory we
+ // started to iterate from) and the self flag is not set.
+ //
+ if (iters_.empty () && !self_)
+ return false;
+
+ p = move (d);
+ return true;
+ }
+
+ const dir_entry& de (*i.first);
- // Return the path unless it is the last one (the directory we started
- // to iterate from) and the self flag is not set.
+ // Append separator if a directory. Note that dir_entry::type() can
+ // throw.
//
- if (iters_.empty () && !self_)
- return false;
+ entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
- p = move (d);
- return true;
- }
+ // If the entry turned out to be inaccessible/dangling, then skip it
+ // if the preskip function is not specified or returns true and stop
+ // the entire traversal otherwise.
+ //
+ if (et == entry_type::unknown)
+ {
+ if (preskip_ != nullptr && !preskip_ (de))
+ {
+ iters_.clear ();
+ return false;
+ }
- const dir_entry& de (*i.first);
+ ++i.first;
+ continue;
+ }
- // Append separator if a directory. Note that dir_entry::type() can
- // throw.
- //
- entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
- path pe (et == entry_type::directory
- ? path_cast<dir_path> (i.second / de.path ())
- : i.second / de.path ());
+ path pe (et == entry_type::directory
+ ? path_cast<dir_path> (i.second / de.path ())
+ : i.second / de.path ());
- ++i.first;
+ ++i.first;
- if (recursive_ && pe.to_directory ())
- {
- open (path_cast<dir_path> (move (pe)), true);
- return next (p);
- }
+ if (recursive_ && pe.to_directory ())
+ {
+ open (path_cast<dir_path> (move (pe)), true);
+ return next (p);
+ }
- p = move (pe);
- return true;
+ p = move (pe);
+ return true;
+ }
}
private:
@@ -2557,10 +2837,15 @@ namespace butl
{
dir_path d (start_ / p);
- // If we follow symlinks, then we ignore the dangling ones.
+ // If we follow symlinks, then we may need to skip the dangling
+ // ones. Note, however, that we will be skipping them not at the
+ // dir_iterator level but ourselves, after calling the preskip
+ // callback function (see next() for details).
//
i = dir_iterator (!d.empty () ? d : dir_path ("."),
- follow_symlinks_);
+ follow_symlinks_
+ ? dir_iterator::detect_dangling
+ : dir_iterator::no_follow);
}
iters_.emplace_back (move (i), move (p));
@@ -2590,6 +2875,7 @@ namespace butl
bool self_;
bool follow_symlinks_;
preopen preopen_;
+ preskip preskip_;
small_vector<pair<dir_iterator, dir_path>, 1> iters_;
};
@@ -2613,13 +2899,15 @@ namespace butl
bool recursive,
bool self,
bool follow_symlinks,
- preopen po) const
+ preopen po,
+ preskip ps) const
{
return iterator_type (start_ / p,
recursive,
self,
follow_symlinks,
- move (po));
+ move (po),
+ move (ps));
}
};
@@ -2628,10 +2916,11 @@ namespace butl
const path& pattern,
const function<bool (path&&, const string& pattern, bool interm)>& func,
const dir_path& start,
- path_match_flags flags)
+ path_match_flags flags,
+ const function<bool (const dir_entry&)>& dangling_func)
{
real_filesystem fs (pattern.relative () ? start : empty_dir);
- search (pattern, dir_path (), flags, func, fs);
+ search (pattern, dir_path (), flags, func, dangling_func, fs);
}
// Search path in the directory tree represented by a path.
@@ -2789,7 +3078,8 @@ namespace butl
bool recursive,
bool self,
bool /*follow_symlinks*/,
- preopen po)
+ preopen po,
+ preskip)
{
// If path and sub-path are non-empty, and both are absolute or relative,
// then no extra effort is required (prior to checking if one is a
@@ -2848,6 +3138,6 @@ namespace butl
path_match_flags flags)
{
path_filesystem fs (start, entry);
- search (pattern, dir_path (), flags, func, fs);
+ search (pattern, dir_path (), flags, func, nullptr /* dangle_func */, fs);
}
}
diff --git a/libbutl/filesystem.mxx b/libbutl/filesystem.hxx
index 935fc3f..0f5fb0b 100644
--- a/libbutl/filesystem.mxx
+++ b/libbutl/filesystem.hxx
@@ -1,9 +1,7 @@
-// file : libbutl/filesystem.mxx -*- C++ -*-
+// file : libbutl/filesystem.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
#include <errno.h> // E*
@@ -22,7 +20,6 @@
using mode_t = int;
#endif
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <cstddef> // ptrdiff_t
#include <cstdint> // uint16_t, etc
@@ -30,37 +27,45 @@
#include <iterator> // input_iterator_tag
#include <functional>
-#include <chrono> //@@ MOD needed by timestamp module (no re-export).
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/timestamp.hxx>
+#include <libbutl/path-pattern.hxx> // path_match_flags
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.filesystem;
+#include <libbutl/export.hxx>
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
+namespace butl
+{
+ // Path permissions.
+ //
+ enum class permissions: std::uint16_t
+ {
+ // Note: matching POSIX values.
+ //
+ xo = 0001,
+ wo = 0002,
+ ro = 0004,
-import butl.path;
-import butl.timestamp;
-import butl.path_pattern; // path_match_flags
+ xg = 0010,
+ wg = 0020,
+ rg = 0040,
-import butl.utility; // operator<<(ostream,exception), throw_generic_error()
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/timestamp.mxx>
-#include <libbutl/path-pattern.mxx>
+ xu = 0100,
+ wu = 0200,
+ ru = 0400,
-#include <libbutl/utility.mxx>
-#endif
+ none = 0
+ };
-#include <libbutl/export.hxx>
+ inline permissions operator& (permissions, permissions);
+ inline permissions operator| (permissions, permissions);
+ inline permissions operator&= (permissions&, permissions);
+ inline permissions operator|= (permissions&, permissions);
-LIBBUTL_MODEXPORT namespace butl
-{
// Return true if the path is to an existing regular file. Note that by
// default this function follows symlinks. Underlying OS errors are reported
- // by throwing std::system_error, unless ignore_error is true.
+ // by throwing std::system_error, unless ignore_error is true (in which case
+ // erroneous entries are treated as non-existent).
//
LIBBUTL_SYMEXPORT bool
file_exists (const char*,
@@ -73,7 +78,8 @@ LIBBUTL_MODEXPORT namespace butl
// Return true if the path is to an existing directory. Note that this
// function follows symlinks. Underlying OS errors are reported by throwing
- // std::system_error, unless ignore_error is true.
+ // std::system_error, unless ignore_error is true (in which case erroneous
+ // entries are treated as non-existent).
//
LIBBUTL_SYMEXPORT bool
dir_exists (const char*, bool ignore_error = false);
@@ -84,7 +90,8 @@ LIBBUTL_MODEXPORT namespace butl
// Return true if the path is to an existing file system entry. Note that by
// default this function doesn't follow symlinks. Underlying OS errors are
- // reported by throwing std::system_error, unless ignore_error is true.
+ // reported by throwing std::system_error, unless ignore_error is true (in
+ // which case erroneous entries are treated as non-existent).
//
LIBBUTL_SYMEXPORT bool
entry_exists (const char*,
@@ -117,7 +124,10 @@ LIBBUTL_MODEXPORT namespace butl
// Return a flag indicating if the path is to an existing filesystem entry
// and its info if so. Note that by default this function doesn't follow
// symlinks. Underlying OS errors are reported by throwing
- // std::system_error, unless ignore_error is true.
+ // std::system_error, unless ignore_error is true (in which case erroneous
+ // entries are treated as non-existent).
+ //
+ // See also fdstat() in fdstream.
//
LIBBUTL_SYMEXPORT std::pair<bool, entry_stat>
path_entry (const char*,
@@ -206,9 +216,12 @@ LIBBUTL_MODEXPORT namespace butl
// is not atomic. It is also not atomic for the directory-type reparse point
// removal.
//
- LIBBUTL_SYMEXPORT rmfile_status
+ rmfile_status
try_rmfile (const path&, bool ignore_error = false);
+ optional<rmfile_status>
+ try_rmfile_ignore_error (const path&);
+
// Automatically try to remove a non-empty path on destruction unless
// cancelled. Since the non-cancelled destruction will normally happen as a
// result of an exception, the failure to remove the path is silently
@@ -228,8 +241,8 @@ LIBBUTL_MODEXPORT namespace butl
// Movable-only type. Move-assignment cancels the lhs object.
//
- auto_rm (auto_rm&&);
- auto_rm& operator= (auto_rm&&);
+ auto_rm (auto_rm&&) noexcept;
+ auto_rm& operator= (auto_rm&&) noexcept;
auto_rm (const auto_rm&) = delete;
auto_rm& operator= (const auto_rm&) = delete;
@@ -394,11 +407,13 @@ LIBBUTL_MODEXPORT namespace butl
inline cpflags operator&= (cpflags&, cpflags);
inline cpflags operator|= (cpflags&, cpflags);
- // Copy a regular file, including its permissions, and optionally timestamps.
- // Throw std::system_error on failure. Fail if the destination file exists
- // and the overwrite_content flag is not set. Leave permissions of an
- // existing destination file intact unless the overwrite_permissions flag is
- // set. Delete incomplete copies before throwing.
+ // Copy a regular file, including its permissions (unless custom permissions
+ // are specified), and optionally timestamps. Throw std::system_error on
+ // failure. Fail if the destination file exists and the overwrite_content
+ // flag is not set. Leave permissions of an existing destination file intact
+ // (including if custom permissions are specified) unless the
+ // overwrite_permissions flag is set. Delete incomplete copies before
+ // throwing.
//
// Note that in case of overwriting, the existing destination file gets
// truncated (not deleted) prior to being overwritten. As a side-effect,
@@ -410,7 +425,10 @@ LIBBUTL_MODEXPORT namespace butl
// fail.
//
LIBBUTL_SYMEXPORT void
- cpfile (const path& from, const path& to, cpflags = cpflags::none);
+ cpfile (const path& from,
+ const path& to,
+ cpflags = cpflags::none,
+ optional<permissions> perm = nullopt);
// Copy a regular file into (inside) an existing directory.
//
@@ -618,32 +636,6 @@ LIBBUTL_MODEXPORT namespace butl
return dir_atime (p.string ().c_str (), t);
}
- // Path permissions.
- //
- enum class permissions: std::uint16_t
- {
- // Note: matching POSIX values.
- //
- xo = 0001,
- wo = 0002,
- ro = 0004,
-
- xg = 0010,
- wg = 0020,
- rg = 0040,
-
- xu = 0100,
- wu = 0200,
- ru = 0400,
-
- none = 0
- };
-
- inline permissions operator& (permissions, permissions);
- inline permissions operator| (permissions, permissions);
- inline permissions operator&= (permissions&, permissions);
- inline permissions operator|= (permissions&, permissions);
-
// Get path permissions. Throw std::system_error on failure. Note that this
// function resolves symlinks.
//
@@ -665,12 +657,45 @@ LIBBUTL_MODEXPORT namespace butl
// Symlink target type in case of the symlink, ltype() otherwise.
//
+ // If type() returns entry_type::unknown then this entry is inaccessible
+ // (ltype() also returns entry_type::unknown) or is a dangling symlink
+ // (ltype() returns entry_type::symlink). Used with the detect_dangling
+ // dir_iterator mode. Note that on POSIX ltype() can never return unknown
+ // (because it is part of the directory iteration result).
+ //
entry_type
type () const;
entry_type
ltype () const;
+ // Modification and access times of the filesystem entry if it is not a
+ // symlink and of the symlink target otherwise.
+ //
+ // These are provided as an optimization if they can be obtained as a
+ // byproduct of work that is already being done anyway (iteration itself,
+ // calls to [l]type(), etc). If (not yet) available, timestamp_unknown is
+ // returned.
+ //
+ // Specifically:
+ //
+ // - On Windows mtime is always set by dir_iterator for entries other than
+ // reparse points.
+ //
+ // - On all platforms mtime and atime are always set for symlink targets
+ // by dir_iterator in the {detect,ignore}_dangling modes.
+ //
+ // - On all platforms mtime and atime can potentially be set by [l]type()
+ // if the stat() call is required to retrieve the type information (the
+ // native directory entry iterating API doesn't provide it, the type of
+ // the symlink target is queried, etc).
+ //
+ timestamp
+ mtime () const {return mtime_;}
+
+ timestamp
+ atime () const {return atime_;}
+
// Entry path (excluding the base). To get the full path, do
// base () / path ().
//
@@ -681,8 +706,17 @@ LIBBUTL_MODEXPORT namespace butl
base () const {return b_;}
dir_entry () = default;
- dir_entry (entry_type t, path_type p, dir_path b)
- : t_ (t), p_ (std::move (p)), b_ (std::move (b)) {}
+
+ dir_entry (entry_type t,
+ path_type p,
+ dir_path b,
+ timestamp mt = timestamp_unknown,
+ timestamp at = timestamp_unknown)
+ : t_ (t),
+ mtime_ (mt),
+ atime_ (at),
+ p_ (std::move (p)),
+ b_ (std::move (b)) {}
private:
entry_type
@@ -691,8 +725,14 @@ LIBBUTL_MODEXPORT namespace butl
private:
friend class dir_iterator;
- mutable entry_type t_ = entry_type::unknown; // Lazy evaluation.
- mutable entry_type lt_ = entry_type::unknown; // Lazy evaluation.
+ // Note: lazy evaluation.
+ //
+ mutable optional<entry_type> t_; // Entry type.
+ mutable optional<entry_type> lt_; // Symlink target type.
+
+ mutable timestamp mtime_ = timestamp_unknown;
+ mutable timestamp atime_ = timestamp_unknown;
+
path_type p_;
dir_path b_;
};
@@ -709,12 +749,15 @@ LIBBUTL_MODEXPORT namespace butl
~dir_iterator ();
dir_iterator () = default;
- // If it is requested to ignore dangling symlinks, then the increment
- // operator will skip symlinks that refer to non-existing or inaccessible
- // targets. That implies that it will always try to stat() symlinks.
+ // If the mode is either ignore_dangling or detect_dangling, then stat()
+ // the entry and either ignore inaccessible/dangling entry or return it
+ // with the corresponding dir_entry type set to unknown (see dir_entry
+ // type()/ltype() for details).
//
+ enum mode {no_follow, detect_dangling, ignore_dangling};
+
explicit
- dir_iterator (const dir_path&, bool ignore_dangling);
+ dir_iterator (const dir_path&, mode);
dir_iterator (const dir_iterator&) = delete;
dir_iterator& operator= (const dir_iterator&) = delete;
@@ -740,10 +783,10 @@ LIBBUTL_MODEXPORT namespace butl
#ifndef _WIN32
DIR* h_ = nullptr;
#else
- intptr_t h_ = -1;
+ intptr_t h_ = -1; // INVALID_HANDLE_VALUE
#endif
- bool ignore_dangling_ = false;
+ mode mode_ = no_follow;
};
// Range-based for loop support.
@@ -769,7 +812,7 @@ LIBBUTL_MODEXPORT namespace butl
// Wildcard pattern search (aka glob).
//
- // For details on the wildcard patterns see <libbutl/path-pattern.mxx>
+ // For details on the wildcard patterns see <libbutl/path-pattern.hxx>
// Search for paths matching the pattern calling the specified function for
// each matching path (see below for details).
@@ -834,9 +877,20 @@ LIBBUTL_MODEXPORT namespace butl
// (a/b/, b*/, true)
// (a/b/c/, c*/, false)
//
- // Note that recursive iterating through directories currently goes
- // depth-first which make sense for the cleanup use cases. In future we may
- // want to make it controllable.
+ // Note that recursive iterating through directories currently goes depth-
+ // first which make sense for the cleanup use cases. In the future we may
+ // want to make this controllable.
+ //
+ // If the match flags contain follow_symlinks, then call the dangling
+ // callback function for inaccessible/dangling entries if specified, and
+ // throw appropriate std::system_error otherwise. If the callback function
+ // returns true, then inaccessible/dangling entry is ignored. Otherwise,
+ // the entire search is stopped.
+ //
+ // Note also that if pattern is not simple (that is, contains directory
+ // components), then some symlinks (those that are matched against the
+ // directory components) may still be followed and thus the dangling
+ // function called.
//
LIBBUTL_SYMEXPORT void
path_search (const path& pattern,
@@ -844,7 +898,8 @@ LIBBUTL_MODEXPORT namespace butl
const std::string& pattern,
bool interm)>&,
const dir_path& start = dir_path (),
- path_match_flags = path_match_flags::follow_symlinks);
+ path_match_flags = path_match_flags::follow_symlinks,
+ const std::function<bool (const dir_entry&)>& dangling = nullptr);
// Same as above, but behaves as if the directory tree being searched
// through contains only the specified entry. The start directory is used if
diff --git a/libbutl/filesystem.ixx b/libbutl/filesystem.ixx
index f7c3777..b3f9224 100644
--- a/libbutl/filesystem.ixx
+++ b/libbutl/filesystem.ixx
@@ -1,6 +1,9 @@
// file : libbutl/filesystem.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
+#include <libbutl/utility.hxx> // operator<<(ostream,exception),
+ // throw_generic_error()
+
namespace butl
{
inline bool
@@ -8,7 +11,7 @@ namespace butl
{
// @@ Could 0 size be a valid and faster way?
//
- return dir_iterator (d, false /* ignore_dangling */) == dir_iterator ();
+ return dir_iterator (d, dir_iterator::no_follow) == dir_iterator ();
}
inline bool
@@ -38,6 +41,23 @@ namespace butl
return e ? rmdir_status::success : rmdir_status::not_exist;
}
+ LIBBUTL_SYMEXPORT optional<rmfile_status>
+ try_rmfile_maybe_ignore_error (const path&, bool ignore_error);
+
+ inline rmfile_status
+ try_rmfile (const path& p, bool ignore_error)
+ {
+ auto r (try_rmfile_maybe_ignore_error (p, ignore_error));
+ return r ? *r : rmfile_status::success;
+ }
+
+ inline optional<rmfile_status>
+ try_rmfile_ignore_error (const path& p)
+ {
+ return try_rmfile_maybe_ignore_error (p, true);
+ }
+
+
inline path
followsymlink (const path& p)
{
@@ -53,7 +73,7 @@ namespace butl
//
template <typename P>
inline auto_rm<P>::
- auto_rm (auto_rm&& x)
+ auto_rm (auto_rm&& x) noexcept
: path (std::move (x.path)), active (x.active)
{
x.active = false;
@@ -61,7 +81,7 @@ namespace butl
template <typename P>
inline auto_rm<P>& auto_rm<P>::
- operator= (auto_rm&& x)
+ operator= (auto_rm&& x) noexcept
{
if (this != &x)
{
@@ -117,54 +137,28 @@ namespace butl
static_cast<std::uint16_t> (y));
}
- // path_match_flags
- //
- inline path_match_flags operator& (path_match_flags x, path_match_flags y)
- {
- return x &= y;
- }
-
- inline path_match_flags operator| (path_match_flags x, path_match_flags y)
- {
- return x |= y;
- }
-
- inline path_match_flags operator&= (path_match_flags& x, path_match_flags y)
- {
- return x = static_cast<path_match_flags> (
- static_cast<std::uint16_t> (x) &
- static_cast<std::uint16_t> (y));
- }
-
- inline path_match_flags operator|= (path_match_flags& x, path_match_flags y)
- {
- return x = static_cast<path_match_flags> (
- static_cast<std::uint16_t> (x) |
- static_cast<std::uint16_t> (y));
- }
-
// dir_entry
//
inline entry_type dir_entry::
ltype () const
{
- return t_ != entry_type::unknown ? t_ : (t_ = type (false));
+ return t_ ? *t_ : *(t_ = type (false /* follow_symlinks */));
}
inline entry_type dir_entry::
type () const
{
entry_type t (ltype ());
- return t != entry_type::symlink
- ? t
- : lt_ != entry_type::unknown ? lt_ : (lt_ = type (true));
+ return t != entry_type::symlink ? t :
+ lt_ ? *lt_ :
+ *(lt_ = type (true /* follow_symlinks */));
}
// dir_iterator
//
inline dir_iterator::
dir_iterator (dir_iterator&& x) noexcept
- : e_ (std::move (x.e_)), h_ (x.h_), ignore_dangling_ (x.ignore_dangling_)
+ : e_ (std::move (x.e_)), h_ (x.h_), mode_ (x.mode_)
{
#ifndef _WIN32
x.h_ = nullptr;
diff --git a/libbutl/ft/lang.hxx b/libbutl/ft/lang.hxx
index 567f5a4..82971d2 100644
--- a/libbutl/ft/lang.hxx
+++ b/libbutl/ft/lang.hxx
@@ -7,9 +7,14 @@
// __cpp_thread_local (extension)
//
// If this macro is undefined then one may choose to fallback to __thread.
-// Note, however, that it only for values that do not require dynamic
+// Note, however, that it only works for values that do not require dynamic
// (runtime) initialization.
//
+// Note that thread_local with dynamic allocation/destruction appears to be
+// broken when we use our own implementation of C++14 threads on MinGW. So
+// we restrict ourselves to __thread which appears to be functioning, at
+// least in the POSIX threads GCC configuration.
+//
#ifndef __cpp_thread_local
//
// Apparently Apple's Clang "temporarily disabled" C++11 thread_local until
@@ -20,7 +25,7 @@
# if __apple_build_version__ >= 8000000
# define __cpp_thread_local 201103
# endif
-# else
+# elif !defined(LIBBUTL_MINGW_STDTHREAD)
# define __cpp_thread_local 201103
# endif
#endif
diff --git a/libbutl/git.cxx b/libbutl/git.cxx
index b9dd9bc..f37e16a 100644
--- a/libbutl/git.cxx
+++ b/libbutl/git.cxx
@@ -1,43 +1,11 @@
// file : libbutl/git.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/git.mxx>
-#endif
+#include <libbutl/git.hxx>
-// C includes.
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
-#include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.git;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.semantic_version
-#endif
-
-import butl.utility; // digit()
-import butl.filesystem; // entry_exists()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/semantic-version.mxx>
-#endif
+#include <libbutl/optional.hxx>
+#include <libbutl/filesystem.hxx> // entry_exists()
+#include <libbutl/semantic-version.hxx>
using namespace std;
@@ -68,7 +36,9 @@ namespace butl
// MinGit: git version 2.16.1.windows.1
//
if (s.compare (0, 12, "git version ") == 0)
- return parse_semantic_version (s, 12, "" /* build_separators */);
+ return parse_semantic_version (s, 12,
+ semantic_version::allow_build,
+ "" /* build_separators */);
return nullopt;
}
diff --git a/libbutl/git.mxx b/libbutl/git.hxx
index 3f003be..add721e 100644
--- a/libbutl/git.mxx
+++ b/libbutl/git.hxx
@@ -1,35 +1,17 @@
-// file : libbutl/git.mxx -*- C++ -*-
+// file : libbutl/git.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.git;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.semantic_version;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/semantic-version.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/semantic-version.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Return true if the specified directory is a git repository root (contains
// the .git filesystem entry).
diff --git a/libbutl/host-os-release.cxx b/libbutl/host-os-release.cxx
new file mode 100644
index 0000000..f13f62c
--- /dev/null
+++ b/libbutl/host-os-release.cxx
@@ -0,0 +1,323 @@
+// file : libbutl/host-os-release.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbutl/host-os-release.hxx>
+
+#include <sstream>
+#include <stdexcept> // runtime_error
+
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/filesystem.hxx> // file_exists()
+#include <libbutl/string-parser.hxx> // parse_quoted()
+
+#ifdef _WIN32
+# include <libbutl/win32-utility.hxx>
+#endif
+
+using namespace std;
+
+namespace butl
+{
+ // Note: exported for access from the test.
+ //
+ LIBBUTL_SYMEXPORT os_release
+ host_os_release_linux (path f = {})
+ {
+ os_release r;
+
+ // According to os-release(5), we should use /etc/os-release and fallback
+ // to /usr/lib/os-release if the former does not exist. It also lists the
+ // fallback values for individual variables, in case some are not present.
+ //
+ auto exists = [] (const path& f)
+ {
+ try
+ {
+ return file_exists (f);
+ }
+ catch (const system_error& e)
+ {
+ ostringstream os;
+ os << "unable to stat path " << f << ": " << e;
+ throw runtime_error (os.str ());
+ }
+ };
+
+ if (!f.empty ()
+ ? exists (f)
+ : (exists (f = path ("/etc/os-release")) ||
+ exists (f = path ("/usr/lib/os-release"))))
+ {
+ try
+ {
+ ifdstream ifs (f, ifdstream::badbit);
+
+ string l;
+ for (uint64_t ln (1); !eof (getline (ifs, l)); ++ln)
+ {
+ trim (l);
+
+ // Skip blanks lines and comments.
+ //
+ if (l.empty () || l[0] == '#')
+ continue;
+
+ // The variable assignments are in the "shell style" and so can be
+ // quoted/escaped. For now we only handle quoting, which is what all
+ // the instances seen in the wild seems to use.
+ //
+ size_t p (l.find ('='));
+ if (p == string::npos)
+ continue;
+
+ string n (l, 0, p);
+ l.erase (0, p + 1);
+
+ using string_parser::parse_quoted;
+ using string_parser::invalid_string;
+
+ try
+ {
+ if (n == "ID_LIKE")
+ {
+ r.like_ids.clear ();
+
+ vector<string> vs (parse_quoted (l, true /* unquote */));
+ for (const string& v: vs)
+ {
+ for (size_t b (0), e (0); next_word (v, b, e); )
+ {
+ r.like_ids.push_back (string (v, b, e - b));
+ }
+ }
+ }
+ else if (string* p = (n == "ID" ? &r.name_id :
+ n == "VERSION_ID" ? &r.version_id :
+ n == "VARIANT_ID" ? &r.variant_id :
+ n == "NAME" ? &r.name :
+ n == "VERSION_CODENAME" ? &r.version_codename :
+ n == "VARIANT" ? &r.variant :
+ nullptr))
+ {
+ vector<string> vs (parse_quoted (l, true /* unquote */));
+ switch (vs.size ())
+ {
+ case 0: *p = ""; break;
+ case 1: *p = move (vs.front ()); break;
+ default: throw invalid_string (0, "multiple values");
+ }
+ }
+ }
+ catch (const invalid_string& e)
+ {
+ ostringstream os;
+ os << "invalid " << n << " value in " << f << ':' << ln << ": "
+ << e;
+ throw runtime_error (os.str ());
+ }
+ }
+
+ ifs.close ();
+ }
+ catch (const ios::failure& e)
+ {
+ ostringstream os;
+ os << "unable to read from " << f << ": " << e;
+ throw runtime_error (os.str ());
+ }
+ }
+
+ // Assign fallback values.
+ //
+ if (r.name_id.empty ()) r.name_id = "linux";
+ if (r.name.empty ()) r.name = "Linux";
+
+ return r;
+ }
+
+ static os_release
+ host_os_release_macos ()
+ {
+ // Run sw_vers -productVersion to get Mac OS version.
+ //
+ try
+ {
+ process pr;
+ try
+ {
+ fdpipe pipe (fdopen_pipe ());
+
+ pr = process_start (0, pipe, 2, "sw_vers", "-productVersion");
+
+ pipe.out.close ();
+ ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit);
+
+ // The output should be one line containing the version.
+ //
+ optional<string> v;
+ for (string l; !eof (getline (is, l)); )
+ {
+ if (l.empty () || v)
+ {
+ v = nullopt;
+ break;
+ }
+
+ v = move (l);
+ }
+
+ is.close (); // Detect errors.
+
+ if (pr.wait ())
+ {
+ if (!v)
+ throw runtime_error ("unexpected sw_vers -productVersion output");
+
+ return os_release {"macos", {}, move (*v), "", "Mac OS", "", ""};
+ }
+
+ }
+ catch (const ios::failure& e)
+ {
+ if (pr.wait ())
+ {
+ ostringstream os;
+ os << "error reading sw_vers output: " << e;
+ throw runtime_error (os.str ());
+ }
+
+ // Fall through.
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+ throw runtime_error ("process sw_vers exited with non-zero code");
+ }
+ catch (const process_error& e)
+ {
+ ostringstream os;
+ os << "unable to execute sw_vers: " << e;
+ throw runtime_error (os.str ());
+ }
+ }
+
+ static os_release
+ host_os_release_windows ()
+ {
+#ifdef _WIN32
+ // The straightforward way to get the version would be the GetVersionEx()
+ // Win32 function. However, if the application is built with a certain
+ // assembly manifest, this function will return the version the
+ // application was built for rather than what's actually running.
+ //
+ // The other plausible options are to call the `ver` program and parse it
+ // output (of questionable regularity) or to call RtlGetVersion(). The
+ // latter combined with GetProcAddress() seems to be a widely-used
+ // approach, so we are going with that (seeing that we employ a similar
+ // technique in quite a few places).
+ //
+ HMODULE nh (GetModuleHandle ("ntdll.dll"));
+ if (nh == nullptr)
+ throw runtime_error ("unable to get handle to ntdll.dll");
+
+ using RtlGetVersion = LONG /*NTSTATUS*/ (WINAPI*)(PRTL_OSVERSIONINFOW);
+
+ RtlGetVersion gv (
+ function_cast<RtlGetVersion> (
+ GetProcAddress (nh, "RtlGetVersion")));
+
+ // RtlGetVersion() is available from Windows 2000 which is way before
+ // anything we might possibly care about (e.g., XP or 7).
+ //
+ if (gv == nullptr)
+ throw runtime_error ("unable to get address of RtlGetVersion()");
+
+ RTL_OSVERSIONINFOW vi;
+ vi.dwOSVersionInfoSize = sizeof (vi);
+ gv (&vi); // Always succeeds, according to documentation.
+
+ // Ok, the real mess starts here. Here is how the commonly known Windows
+ // versions correspond to the major/minor/build numbers and how we will
+ // map them (note that there are also Server versions in the mix; see the
+ // OSVERSIONINFOEXW struct documentation for the complete picture):
+ //
+ // major minor build mapped
+ // Windows 11 10 0 >=22000 11
+ // Windows 10 10 0 <22000 10
+ // Windows 8.1 6 3 8.1
+ // Windows 8 6 2 8
+ // Windows 7 6 1 7
+ // Windows Vista 6 0 6
+ // Windows XP Pro/64-bit 5 2 5.2
+ // Windows XP 5 1 5.1
+ // Windows 2000 5 0 5
+ //
+ // Based on this it's probably not wise to try to map any future versions
+ // automatically.
+ //
+ string v;
+ if (vi.dwMajorVersion == 10 && vi.dwMinorVersion == 0)
+ {
+ v = vi.dwBuildNumber >= 22000 ? "11" : "10";
+ }
+ else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 3) v = "8.1";
+ else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 2) v = "8";
+ else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 1) v = "7";
+ else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 0) v = "6";
+ else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 2) v = "5.2";
+ else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 1) v = "5.1";
+ else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 0) v = "5";
+ else throw ("unknown windows version " +
+ std::to_string (vi.dwMajorVersion) + '.' +
+ std::to_string (vi.dwMinorVersion) + '.' +
+ std::to_string (vi.dwBuildNumber));
+
+ return os_release {"windows", {}, move (v), "", "Windows", "", ""};
+#else
+ throw runtime_error ("unexpected host operating system");
+#endif
+ }
+
+ optional<os_release>
+ host_os_release (const target_triplet& h)
+ {
+ const string& c (h.class_);
+ const string& s (h.system);
+
+ if (c == "linux")
+ return host_os_release_linux ();
+
+ if (c == "macos")
+ return host_os_release_macos ();
+
+ if (c == "windows")
+ return host_os_release_windows ();
+
+ if (c == "bsd")
+ {
+ // @@ TODO: ideally we would want to run uname and obtain the actual
+ // version we are runnig on rather than what we've been built for.
+ // (Think also how this will affect tests).
+ //
+ if (s == "freebsd")
+ return os_release {"freebsd", {}, h.version, "", "FreeBSD", "", ""};
+
+ if (s == "netbsd")
+ return os_release {"netbsd", {}, h.version, "", "NetBSD", "", ""};
+
+ if (s == "openbsd")
+ return os_release {"openbsd", {}, h.version, "", "OpenBSD", "", ""};
+
+ // Assume some other BSD.
+ //
+ return os_release {s, {}, h.version, "", s, "", ""};
+ }
+
+ return nullopt;
+ }
+}
diff --git a/libbutl/host-os-release.hxx b/libbutl/host-os-release.hxx
new file mode 100644
index 0000000..058afdc
--- /dev/null
+++ b/libbutl/host-os-release.hxx
@@ -0,0 +1,86 @@
+// file : libbutl/host-os-release.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <libbutl/optional.hxx>
+#include <libbutl/target-triplet.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ // Information extracted from /etc/os-release on Linux. See os-release(5)
+ // for background. For other platforms we derive the equivalent information
+ // from other sources. Some examples:
+ //
+ // {"debian", {}, "10", "",
+ // "Debian GNU/Linux", "buster", ""}
+ //
+ // {"fedora", {}, "35", "workstation",
+ // "Fedora Linux", "", "Workstation Edition"}
+ //
+ // {"ubuntu", {"debian"}, "20.04", "",
+ // "Ubuntu", "focal", ""}
+ //
+ // {"macos", {}, "12.5", "",
+ // "Mac OS", "", ""}
+ //
+ // {"freebsd", {}, "13.1", "",
+ // "FreeBSD", "", ""}
+ //
+ // {"windows", {}, "10", "",
+ // "Windows", "", ""}
+ //
+ // Note that for Mac OS, the version is the Mac OS version (as printed by
+ // sw_vers) rather than Darwin version (as printed by uname).
+ //
+ // For Windows we currently do not distinguish the Server edition and the
+ // version mapping is as follows:
+ //
+ // Windows 11 11
+ // Windows 10 10
+ // Windows 8.1 8.1
+ // Windows 8 8
+ // Windows 7 7
+ // Windows Vista 6
+ // Windows XP Pro/64-bit 5.2
+ // Windows XP 5.1
+ // Windows 2000 5
+ //
+ // Note that version_id may be empty, for example, on Debian testing:
+ //
+ // {"debian", {}, "", "",
+ // "Debian GNU/Linux", "", ""}
+ //
+ // Note also that we don't extract PRETTY_NAME because its content is
+ // unpredictable. For example, it may include variant, as in "Fedora Linux
+ // 35 (Workstation Edition)". Instead, construct it from the individual
+ // components as appropriate, normally "$name $version ($version_codename)".
+ //
+ struct os_release
+ {
+ std::string name_id; // ID
+ std::vector<std::string> like_ids; // ID_LIKE
+ std::string version_id; // VERSION_ID
+ std::string variant_id; // VARIANT_ID
+
+ std::string name; // NAME
+ std::string version_codename; // VERSION_CODENAME
+ std::string variant; // VARIANT
+ };
+
+ // Return the release information for the specified host or nullopt if the
+ // specific host is unknown/unsupported. Throw std::runtime_error if
+ // anything goes wrong.
+ //
+ // Note that "host" here implies that we may be running programs, reading
+ // files, examining environment variables, etc., of the machine we are
+ // running on.
+ //
+ LIBBUTL_SYMEXPORT optional<os_release>
+ host_os_release (const target_triplet& host);
+}
diff --git a/libbutl/json/event.hxx b/libbutl/json/event.hxx
new file mode 100644
index 0000000..77185cc
--- /dev/null
+++ b/libbutl/json/event.hxx
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace butl
+{
+ namespace json
+ {
+ // Parsing/serialization event.
+ //
+ enum class event: std::uint8_t
+ {
+ begin_object = 1,
+ end_object,
+ begin_array,
+ end_array,
+ name,
+ string,
+ number,
+ boolean,
+ null
+ };
+
+ constexpr std::size_t event_count = 9;
+ }
+}
diff --git a/libbutl/json/parser.cxx b/libbutl/json/parser.cxx
new file mode 100644
index 0000000..8ef7422
--- /dev/null
+++ b/libbutl/json/parser.cxx
@@ -0,0 +1,645 @@
+#define PDJSON_SYMEXPORT static // See below.
+
+#include <libbutl/json/parser.hxx>
+
+#include <istream>
+
+// There is an issue (segfault) with using std::current_exception() and
+// std::rethrow_exception() with older versions of libc++ on Linux. While the
+// exact root cause hasn't been determined, the suspicion is that something
+// gets messed up if we "smuggle" std::exception_ptr through extern "C" call
+// frames (we cannot even destroy such an exception without a segfault). We
+// also could not determine in which version exactly this has been fixed but
+// we know that libc++ 6.0.0 doesn't appear to have this issue (though we are
+// not entirely sure the issue is (only) in libc++; libgcc_s could also be
+// involved).
+//
+// The workaround is to just catch (and note) the exception and then throw a
+// new instance of generic std::istream::failure. In order not to drag the
+// below test into the header, we wrap exception_ptr with optional<> and use
+// NULL to indicate the presence of the exception when the workaround is
+// required.
+//
+// Note that if/when we drop this workaround, we should also get rid of
+// optional<> in stream::exception member.
+//
+#undef LIBBUTL_JSON_NO_EXCEPTION_PTR
+
+#if defined (__linux__) && defined(__clang__)
+# if __has_include(<__config>)
+# include <__config> // _LIBCPP_VERSION
+# if _LIBCPP_VERSION < 6000
+# define LIBBUTL_JSON_NO_EXCEPTION_PTR 1
+# endif
+# endif
+#endif
+
+namespace butl
+{
+ namespace json
+ {
+ using namespace std;
+
+ parser::
+ ~parser ()
+ {
+ json_close (impl_);
+ }
+
+ static int
+ stream_get (void* x)
+ {
+ auto& s (*static_cast<parser::stream*> (x));
+
+ // In the multi-value mode reading of whitespaces/separators is split
+ // between our code and pdjson's. As a result, these functions may end
+ // up being called more than once after EOF is reached. Which is
+ // something iostream does not handle gracefully.
+ //
+ if (!s.is->eof ())
+ {
+ try
+ {
+ // We first peek not to trip failbit on EOF.
+ //
+ if (s.is->peek () != istream::traits_type::eof ())
+ return static_cast<char> (s.is->get ());
+ }
+ catch (...)
+ {
+#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR
+ s.exception = current_exception ();
+#else
+ s.exception = nullptr;
+#endif
+ }
+ }
+
+ return EOF;
+ }
+
+ static int
+ stream_peek (void* x)
+ {
+ auto& s (*static_cast<parser::stream*> (x));
+
+ if (!s.is->eof ())
+ {
+ try
+ {
+ auto c (s.is->peek ());
+ if (c != istream::traits_type::eof ())
+ return static_cast<char> (c);
+ }
+ catch (...)
+ {
+#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR
+ s.exception = current_exception ();
+#else
+ s.exception = nullptr;
+#endif
+ }
+ }
+
+ return EOF;
+ }
+
+ // NOTE: watch out for exception safety (specifically, doing anything that
+ // might throw after opening the stream).
+ //
+ parser::
+ parser (istream& is, const char* n, bool mv, const char* sep) noexcept
+ : input_name (n),
+ stream_ {&is, nullopt},
+ multi_value_ (mv),
+ separators_ (sep),
+ raw_s_ (nullptr),
+ raw_n_ (0)
+ {
+ json_open_user (impl_, &stream_get, &stream_peek, &stream_);
+ json_set_streaming (impl_, multi_value_);
+ }
+
+ parser::
+ parser (const void* t,
+ size_t s,
+ const char* n,
+ bool mv,
+ const char* sep) noexcept
+ : input_name (n),
+ stream_ {nullptr, nullopt},
+ multi_value_ (mv),
+ separators_ (sep),
+ raw_s_ (nullptr),
+ raw_n_ (0)
+ {
+ json_open_buffer (impl_, t, s);
+ json_set_streaming (impl_, multi_value_);
+ }
+
+ optional<event> parser::
+ next ()
+ {
+ name_p_ = value_p_ = location_p_ = false;
+
+ // Note that for now we don't worry about the state of the parser if
+ // next_impl() throws assuming it is not going to be reused.
+ //
+ if (peeked_)
+ {
+ parsed_ = peeked_;
+ peeked_ = nullopt;
+ }
+ else
+ parsed_ = next_impl ();
+
+ return translate (*parsed_);
+ }
+
+ optional<event> parser::
+ peek ()
+ {
+ if (!peeked_)
+ {
+ if (parsed_)
+ {
+ cache_parsed_data ();
+ cache_parsed_location ();
+ }
+ peeked_ = next_impl ();
+ }
+ return translate (*peeked_);
+ }
+
+ static inline const char*
+ event_name (event e)
+ {
+ switch (e)
+ {
+ case event::begin_object: return "beginning of object";
+ case event::end_object: return "end of object";
+ case event::begin_array: return "beginning of array";
+ case event::end_array: return "end of array";
+ case event::name: return "member name";
+ case event::string: return "string value";
+ case event::number: return "numeric value";
+ case event::boolean: return "boolean value";
+ case event::null: return "null value";
+ }
+
+ return "";
+ }
+
+ bool parser::
+ next_expect (event p, optional<event> s)
+ {
+ optional<event> e (next ());
+ bool r;
+ if (e && ((r = *e == p) || (s && *e == *s)))
+ return r;
+
+ string d ("expected ");
+ d += event_name (p);
+
+ if (s)
+ {
+ d += " or ";
+ d += event_name (*s);
+ }
+
+ if (e)
+ {
+ d += " instead of ";
+ d += event_name (*e);
+ }
+
+ throw invalid_json_input (input_name != nullptr ? input_name : "",
+ line (),
+ column (),
+ position (),
+ move (d));
+ }
+
+ void parser::
+ next_expect_name (const char* n, bool su)
+ {
+ for (;;)
+ {
+ next_expect (event::name);
+
+ if (name () == n)
+ return;
+
+ if (!su)
+ break;
+
+ next_expect_value_skip ();
+ }
+
+ string d ("expected object member name '");
+ d += n;
+ d += "' instead of '";
+ d += name ();
+ d += '\'';
+
+ throw invalid_json_input (input_name != nullptr ? input_name : "",
+ line (),
+ column (),
+ position (),
+ move (d));
+ }
+
+ void parser::
+ next_expect_value_skip ()
+ {
+ optional<event> e (next ());
+
+ if (e)
+ {
+ switch (*e)
+ {
+ case event::begin_object:
+ case event::begin_array:
+ {
+ // Skip until matching end_object/array keeping track of nesting.
+ // We are going to rely on the fact that we should either get such
+ // an event or next() should throw.
+ //
+ event be (*e);
+ event ee (be == event::begin_object
+ ? event::end_object
+ : event::end_array);
+
+ for (size_t n (0);; )
+ {
+ event e (*next ());
+
+ if (e == ee)
+ {
+ if (n == 0)
+ break;
+
+ --n;
+ }
+ else if (e == be)
+ ++n;
+ }
+
+ return;
+ }
+ case event::string:
+ case event::number:
+ case event::boolean:
+ case event::null:
+ return;
+ case event::name:
+ case event::end_object:
+ case event::end_array:
+ break;
+ }
+ }
+
+ string d ("expected value");
+
+ if (e)
+ {
+ d += " instead of ";
+ d += event_name (*e);
+ }
+
+ throw invalid_json_input (input_name != nullptr ? input_name : "",
+ line (),
+ column (),
+ position (),
+ move (d));
+ }
+
+ std::uint64_t parser::
+ line () const noexcept
+ {
+ if (!location_p_)
+ {
+ if (!parsed_)
+ return 0;
+
+ assert (!peeked_);
+
+ return static_cast<uint64_t> (
+ json_get_lineno (const_cast<json_stream*> (impl_)));
+ }
+
+ return line_;
+ }
+
+ std::uint64_t parser::
+ column () const noexcept
+ {
+ if (!location_p_)
+ {
+ if (!parsed_)
+ return 0;
+
+ assert (!peeked_);
+
+ return static_cast<uint64_t> (
+ json_get_column (const_cast<json_stream*> (impl_)));
+ }
+
+ return column_;
+ }
+
+ std::uint64_t parser::
+ position () const noexcept
+ {
+ if (!location_p_)
+ {
+ if (!parsed_)
+ return 0;
+
+ assert (!peeked_);
+
+ return static_cast<uint64_t> (
+ json_get_position (const_cast<json_stream*> (impl_)));
+ }
+
+ return position_;
+ }
+
+ json_type parser::
+ next_impl ()
+ {
+ raw_s_ = nullptr;
+ raw_n_ = 0;
+ json_type e;
+
+ // Read characters between values skipping required separators and JSON
+ // whitespaces. Return whether a required separator was encountered as
+ // well as the first non-separator/whitespace character (which, if EOF,
+ // should trigger a check for input/output errors).
+ //
+ // Note that the returned non-separator will not have been extracted
+ // from the input (so position, column, etc. will still refer to its
+ // predecessor).
+ //
+ auto skip_separators = [this] () -> pair<bool, int>
+ {
+ bool r (separators_ == nullptr);
+
+ int c;
+ for (; (c = json_source_peek (impl_)) != EOF; json_source_get (impl_))
+ {
+ // User separator.
+ //
+ if (separators_ != nullptr && *separators_ != '\0')
+ {
+ if (strchr (separators_, c) != nullptr)
+ {
+ r = true;
+ continue;
+ }
+ }
+
+ // JSON separator.
+ //
+ if (json_isspace (c))
+ {
+ if (separators_ != nullptr && *separators_ == '\0')
+ r = true;
+
+ continue;
+ }
+
+ break;
+ }
+
+ return make_pair (r, c);
+ };
+
+ // In the multi-value mode skip any instances of required separators
+ // (and any other JSON whitespace) preceding the first JSON value.
+ //
+ if (multi_value_ && !parsed_ && !peeked_)
+ {
+ if (skip_separators ().second == EOF && stream_.is != nullptr)
+ {
+ if (stream_.exception) goto fail_rethrow;
+ if (stream_.is->fail ()) goto fail_stream;
+ }
+ }
+
+ e = json_next (impl_);
+
+ // First check for a pending input/output error.
+ //
+ if (stream_.is != nullptr)
+ {
+ if (stream_.exception) goto fail_rethrow;
+ if (stream_.is->fail ()) goto fail_stream;
+ }
+
+ // There are two ways to view separation between two values: as following
+ // the first value or as preceding the second value. And one aspect that
+ // is determined by this is whether a separation violation is a problem
+ // with the first value or with the second, which becomes important if
+ // the user bails out before parsing the second value.
+ //
+ // Consider these two unseparated value (yes, in JSON they are two
+ // values, leading zeros are not allowed in JSON numbers):
+ //
+ // 01
+ //
+ // If the user bails out after parsing 0 in a stream that should have
+ // been newline-delimited, they most likely would want to get an error
+ // since this is most definitely an invalid value rather than two
+ // values that are not properly separated. So in this light we handle
+ // separators at the end of the first value.
+ //
+ switch (e)
+ {
+ case JSON_DONE:
+ {
+ // Deal with the following value separators.
+ //
+ // Note that we must not do this for the second JSON_DONE (or the
+ // first one in case there are no values) that signals the end of
+ // input.
+ //
+ if (multi_value_ &&
+ (parsed_ || peeked_) &&
+ (peeked_ ? *peeked_ : *parsed_) != JSON_DONE)
+ {
+ auto p (skip_separators ());
+
+ if (p.second == EOF && stream_.is != nullptr)
+ {
+ if (stream_.exception) goto fail_rethrow;
+ if (stream_.is->fail ()) goto fail_stream;
+ }
+
+ // Note that we don't require separators after the last value.
+ //
+ if (!p.first && p.second != EOF)
+ {
+ json_source_get (impl_); // Consume to update column number.
+ goto fail_separation;
+ }
+
+ json_reset (impl_);
+ }
+ break;
+ }
+ case JSON_ERROR: goto fail_json;
+ case JSON_STRING:
+ case JSON_NUMBER:
+ raw_s_ = json_get_string (impl_, &raw_n_);
+ raw_n_--; // Includes terminating `\0`.
+ break;
+ case JSON_TRUE: raw_s_ = "true"; raw_n_ = 4; break;
+ case JSON_FALSE: raw_s_ = "false"; raw_n_ = 5; break;
+ case JSON_NULL: raw_s_ = "null"; raw_n_ = 4; break;
+ default: break;
+ }
+
+ return e;
+
+ fail_json:
+ throw invalid_json_input (
+ input_name != nullptr ? input_name : "",
+ static_cast<uint64_t> (json_get_lineno (impl_)),
+ static_cast<uint64_t> (json_get_column (impl_)),
+ static_cast<uint64_t> (json_get_position (impl_)),
+ json_get_error (impl_));
+
+ fail_separation:
+ throw invalid_json_input (
+ input_name != nullptr ? input_name : "",
+ static_cast<uint64_t> (json_get_lineno (impl_)),
+ static_cast<uint64_t> (json_get_column (impl_)),
+ static_cast<uint64_t> (json_get_position (impl_)),
+ "missing separator between JSON values");
+
+ fail_stream:
+ throw invalid_json_input (
+ input_name != nullptr ? input_name : "",
+ static_cast<uint64_t> (json_get_lineno (impl_)),
+ static_cast<uint64_t> (json_get_column (impl_)),
+ static_cast<uint64_t> (json_get_position (impl_)),
+ "unable to read JSON input text");
+
+ fail_rethrow:
+#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR
+ rethrow_exception (move (*stream_.exception));
+#else
+ throw istream::failure ("unable to read");
+#endif
+ }
+
+ optional<event> parser::
+ translate (json_type e) const noexcept
+ {
+ switch (e)
+ {
+ case JSON_DONE: return nullopt;
+ case JSON_OBJECT: return event::begin_object;
+ case JSON_OBJECT_END: return event::end_object;
+ case JSON_ARRAY: return event::begin_array;
+ case JSON_ARRAY_END: return event::end_array;
+ case JSON_STRING:
+ {
+ // This can be a value or, inside an object, a name from the
+ // name/value pair.
+ //
+ size_t n;
+ return json_get_context (const_cast<json_stream*> (impl_), &n) ==
+ JSON_OBJECT &&
+ n % 2 == 1
+ ? event::name
+ : event::string;
+ }
+ case JSON_NUMBER: return event::number;
+ case JSON_TRUE: return event::boolean;
+ case JSON_FALSE: return event::boolean;
+ case JSON_NULL: return event::null;
+ case JSON_ERROR: assert (false); // Should've been handled by caller.
+ }
+
+ return nullopt; // Should never reach.
+ }
+
+ void parser::
+ cache_parsed_data ()
+ {
+ name_p_ = value_p_ = false;
+ if (const optional<event> e = translate (*parsed_))
+ {
+ if (e == event::name)
+ {
+ name_.assign (raw_s_, raw_n_);
+ name_p_ = true;
+ }
+ else if (value_event (e))
+ {
+ value_.assign (raw_s_, raw_n_);
+ value_p_ = true;
+ }
+ }
+ }
+
+ void parser::
+ cache_parsed_location () noexcept
+ {
+ line_ = static_cast<uint64_t> (json_get_lineno (impl_));
+ column_ = static_cast<uint64_t> (json_get_column (impl_));
+ position_ = static_cast<uint64_t> (json_get_position (impl_));
+ location_p_ = true;
+ }
+
+ bool parser::
+ value_event (optional<event> e) noexcept
+ {
+ if (!e)
+ return false;
+
+ switch (*e)
+ {
+ case event::string:
+ case event::number:
+ case event::boolean:
+ case event::null:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ [[noreturn]] void parser::
+ throw_invalid_value (const char* type, const char* v, size_t n) const
+ {
+ string d (string ("invalid ") + type + " value: '");
+ d.append (v, n);
+ d += '\'';
+
+ throw invalid_json_input (input_name != nullptr ? input_name : "",
+ line (),
+ column (),
+ position (),
+ move (d));
+ }
+ } // namespace json
+} // namespace butl
+
+// Include the implementation into our translation unit (instead of compiling
+// it separately) to (hopefully) get function inlining without LTO.
+//
+// Let's keep it last since the implementation defines a couple of macros.
+//
+#if defined(__clang__) || defined(__GNUC__)
+# pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+extern "C"
+{
+#define PDJSON_STACK_INC 16
+#define PDJSON_STACK_MAX 2048
+#include "pdjson.c"
+}
diff --git a/libbutl/json/parser.hxx b/libbutl/json/parser.hxx
new file mode 100644
index 0000000..95d9c4e
--- /dev/null
+++ b/libbutl/json/parser.hxx
@@ -0,0 +1,705 @@
+#pragma once
+
+#ifdef BUILD2_BOOTSTRAP
+# error JSON parser not available during bootstrap
+#endif
+
+#include <iosfwd>
+#include <string>
+#include <cstddef> // size_t
+#include <cstdint> // uint64_t
+#include <utility> // pair
+#include <exception> // exception_ptr
+#include <stdexcept> // invalid_argument
+
+#include <libbutl/optional.hxx> // butl::optional is std::optional or similar.
+
+#include <libbutl/json/event.hxx>
+
+#include <libbutl/json/pdjson.h> // Implementation details.
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ // Using the RFC8259 terminology: JSON (input) text, JSON value, object
+ // member.
+ //
+ namespace json
+ {
+ class invalid_json_input: public std::invalid_argument
+ {
+ public:
+ std::string name;
+ std::uint64_t line;
+ std::uint64_t column;
+ std::uint64_t position;
+
+ invalid_json_input (std::string name,
+ std::uint64_t line,
+ std::uint64_t column,
+ std::uint64_t position,
+ const std::string& description);
+
+ invalid_json_input (std::string name,
+ std::uint64_t line,
+ std::uint64_t column,
+ std::uint64_t position,
+ const char* description);
+ };
+
+ class LIBBUTL_SYMEXPORT parser
+ {
+ public:
+ const char* input_name;
+
+ // Construction.
+ //
+
+ // Parse JSON input text from std::istream.
+ //
+ // The name argument is used to identify the input being parsed. Note
+ // that the stream, name, and separators are kept as references so they
+ // must outlive the parser instance.
+ //
+ // If stream exceptions are enabled then the std::ios_base::failure
+ // exception is used to report input/output errors (badbit and failbit).
+ // Otherwise, those are reported as the invalid_json_input exception.
+ //
+ // If multi_value is true, enable the multi-value mode in which case the
+ // input stream may contain multiple JSON values (more precisely, zero
+ // or more). If false (the default), parsing will fail unless there is
+ // exactly one JSON value in the input stream.
+ //
+ // If multi_value is true, the separators argument specifies the
+ // required separator characters between JSON values. At least one of
+ // them must be present between every pair of JSON values (in addition
+ // to any number of JSON whitespaces). No separators are required after
+ // the last JSON value (but any found will be skipped).
+ //
+ // Specifically, if it is NULL, then no separation is required (that is,
+ // both `{...}{...}` and `{...} {...}` would be valid). If it is empty,
+ // then at least one JSON whitespace is required. And if it is non-
+ // empty, then at least one of its characters must be present (for
+ // example, "\n\t" would require at least one newline or TAB character
+ // between JSON values).
+ //
+ // Note that a separator need not be valid JSON whitespace: any
+ // character is acceptable (though it probably shouldn't be an object,
+ // array, or string delimiter and should not occur within a non-self-
+ // delimited top-level value, such as `true`, `false`, `null`, or a
+ // number). All instances of required separators before and after a
+ // value are skipped. Therefore JSON Text Sequences (RFC 7464; AKA
+ // Record Separator-delimited JSON), which requires the RS (0x1E)
+ // character before each value, can be handled as well.
+ //
+ parser (std::istream&,
+ const std::string& name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (std::istream&,
+ const char* name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (std::istream&,
+ std::string&&,
+ bool = false,
+ const char* = nullptr) = delete;
+
+ // Parse a memory buffer that contains the entire JSON input text.
+ //
+ // The name argument is used to identify the input being parsed. Note
+ // that the buffer, name, and separators are kept as references so they
+ // must outlive the parser instance.
+ //
+ parser (const void* text,
+ std::size_t size,
+ const std::string& name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (const void* text,
+ std::size_t size,
+ const char* name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (const void*,
+ std::size_t,
+ std::string&&,
+ bool = false,
+ const char* = nullptr) = delete;
+
+ // Similar to the above but parse a string.
+ //
+ parser (const std::string& text,
+ const std::string& name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (const std::string& text,
+ const char* name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (const std::string&,
+ std::string&&,
+ bool = false,
+ const char* = nullptr) = delete;
+
+ // Similar to the above but parse a C-string.
+ //
+ parser (const char* text,
+ const std::string& name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (const char* text,
+ const char* name,
+ bool multi_value = false,
+ const char* separators = nullptr) noexcept;
+
+ parser (const char*,
+ std::string&&,
+ bool = false,
+ const char* = nullptr) = delete;
+
+ parser (parser&&) = delete;
+ parser (const parser&) = delete;
+
+ parser& operator= (parser&&) = delete;
+ parser& operator= (const parser&) = delete;
+
+ // Event iteration.
+ //
+
+ // Return the next event or nullopt if end of input is reached.
+ //
+ // In the single-value parsing mode (default) the parsing code could
+ // look like this:
+ //
+ // while (optional<event> e = p.next ())
+ // {
+ // switch (*e)
+ // {
+ // // ...
+ // }
+ // }
+ //
+ // In the multi-value mode the parser additionally returns nullopt after
+ // every JSON value parsed (so there will be two nullopt's after the
+ // last JSON value, the second indicating the end of input).
+ //
+ // One way to perform multi-value parsing is with the help of the peek()
+ // function (see below):
+ //
+ // while (p.peek ())
+ // {
+ // while (optional<event> e = p.next ())
+ // {
+ // switch (*e)
+ // {
+ // //...
+ // }
+ // }
+ // }
+ //
+ // Note that while the single-value mode will always parse exactly one
+ // value, the multi-value mode will accept zero values in which case a
+ // single nullopt is returned.
+ //
+ optional<event>
+ next ();
+
+ // The range-based for loop support.
+ //
+ // In the single-value parsing mode (default) the parsing code could
+ // look like this:
+ //
+ // for (event e: p)
+ // {
+ // switch (e)
+ // {
+ // //...
+ // }
+ // }
+ //
+ // And in the multi-value mode (see next() for more information) like
+ // this:
+ //
+ // while (p.peek ())
+ // {
+ // for (event e: p)
+ // {
+ // switch (e)
+ // {
+ // //...
+ // }
+ // }
+ // }
+ //
+ // Note that generally, the iterator interface doesn't make much sense
+ // for the parser so for now we have an implementation that is just
+ // enough for the range-based for.
+ //
+ struct iterator;
+
+ iterator begin () {return iterator (this, next ());}
+ iterator end () {return iterator (nullptr, nullopt);}
+
+ // Return the next event without considering it parsed. In other words,
+ // after this call, any subsequent calls to peek() and the next call to
+ // next() (if any) will all return the same event.
+ //
+ // Note that the name, value, and line corresponding to the peeked event
+ // are not accessible with name(), value() and line(); these functions
+ // will still return values corresponding to the most recent call to
+ // next(). The peeked values, however, can be accessed in the raw form
+ // using data().
+ //
+ optional<event>
+ peek ();
+
+
+ // Event data access.
+ //
+
+ // Return the object member name.
+ //
+ const std::string&
+ name ();
+
+ // Any value (string, number, boolean, and null) can be retrieved as a
+ // string. Calling this function after any non-value events is illegal.
+ //
+ // Note that the value is returned as a non-const string reference and
+ // you are allowed to move the value out of it. However, this should not
+ // be done unnecessarily or in cases where the small string optimization
+ // is likely since the string's buffer is reused to store subsequent
+ // values.
+ //
+ std::string&
+ value ();
+
+ // Convert the value to an integer, floating point, or bool. Throw
+ // invalid_json_input if the conversion is impossible without a loss.
+ //
+ template <typename T>
+ T
+ value () const;
+
+ // Return the value or object member name in the raw form.
+ //
+ // Calling this function on non-value/name events is legal in which case
+ // NULL is returned. Note also that the returned data corresponds to the
+ // most recent event, whether peeked or parsed.
+ //
+ std::pair<const char*, std::size_t>
+ data () const {return std::make_pair (raw_s_, raw_n_);}
+
+
+ // Higher-level API suitable for parsing specific JSON vocabularies.
+ //
+ // The API summary:
+ //
+ // void next_expect (event);
+ // bool next_expect (event primary, event secondary);
+ //
+ // void next_expect_name (string name, bool skip_unknown = false);
+ //
+ // std::string& next_expect_string ();
+ // T next_expect_string<T> ();
+ // std::string& next_expect_number ();
+ // T next_expect_number<T> ();
+ // std::string& next_expect_boolean ();
+ // T next_expect_boolean<T>();
+ //
+ // std::string* next_expect_string_null ();
+ // optional<T> next_expect_string_null<T> ();
+ // std::string* next_expect_number_null ();
+ // optional<T> next_expect_number_null<T> ();
+ // std::string* next_expect_boolean_null ();
+ // optional<T> next_expect_boolean_null<T>();
+ //
+ // std::string& next_expect_member_string (string name, bool = false);
+ // T next_expect_member_string<T> (string name, bool = false);
+ // std::string& next_expect_member_number (string name, bool = false);
+ // T next_expect_member_number<T> (string name, bool = false);
+ // std::string& next_expect_member_boolean (string name, bool = false);
+ // T next_expect_member_boolean<T>(string name, bool = false);
+ //
+ // std::string* next_expect_member_string_null (string, bool = false);
+ // optional<T> next_expect_member_string_null<T> (string, bool = false);
+ // std::string* next_expect_member_number_null (string, bool = false);
+ // optional<T> next_expect_member_number_null<T> (string, bool = false);
+ // std::string* next_expect_member_boolean_null (string, bool = false);
+ // optional<T> next_expect_member_boolean_null<T>(string, bool = false);
+ //
+ // void next_expect_member_object (string name, bool = false);
+ // bool next_expect_member_object_null(string name, bool = false);
+ //
+ // void next_expect_member_array (string name, bool = false);
+ // bool next_expect_member_array_null(string name, bool = false);
+ //
+ // void next_expect_value_skip();
+
+ // Get the next event and make sure that it's what's expected: primary
+ // or, if specified, secondary event. If it is not either, then throw
+ // invalid_json_input with appropriate description. Return true if it is
+ // primary.
+ //
+ // The secondary expected event is primarily useful for handling
+ // optional members. For example:
+ //
+ // while (p.next_expect (event::name, event::end_object))
+ // {
+ // // Handle object member.
+ // }
+ //
+ // Or homogeneous arrays:
+ //
+ // while (p.next_expect (event::string, event::end_array))
+ // {
+ // // Handle array element.
+ // }
+ //
+ // Or values that can be null:
+ //
+ // if (p.next_expect (event::begin_object, event::null))
+ // {
+ // // Parse object.
+ // }
+ //
+ bool
+ next_expect (event primary, optional<event> secondary = nullopt);
+
+ // Get the next event and make sure it is event::name and the object
+ // member matches the specified name. If either is not, then throw
+ // invalid_json_input with appropriate description. If skip_unknown is
+ // true, then skip over unknown member names until a match is found.
+ //
+ void
+ next_expect_name (const char* name, bool skip_unknown = false);
+
+ void
+ next_expect_name (const std::string&, bool = false);
+
+ // Get the next event and make sure it is event::<type> returning its
+ // value similar to the value() functions. If it is not, then throw
+ // invalid_json_input with appropriate description.
+ //
+ std::string&
+ next_expect_string ();
+
+ template <typename T>
+ T
+ next_expect_string ();
+
+ std::string&
+ next_expect_number ();
+
+ template <typename T>
+ T
+ next_expect_number ();
+
+ std::string&
+ next_expect_boolean ();
+
+ template <typename T>
+ T
+ next_expect_boolean ();
+
+ // Similar to next_expect_<type>() but in addition to event::<type> also
+ // allow event::null, in which case returning no value.
+ //
+ std::string*
+ next_expect_string_null ();
+
+ template <typename T>
+ optional<T>
+ next_expect_string_null ();
+
+ std::string*
+ next_expect_number_null ();
+
+ template <typename T>
+ optional<T>
+ next_expect_number_null ();
+
+ std::string*
+ next_expect_boolean_null ();
+
+ template <typename T>
+ optional<T>
+ next_expect_boolean_null ();
+
+ // Call next_expect_name() followed by next_expect_<type>[_null]()
+ // returning its result. In other words, parse the entire object member
+ // with the specifed name and of type <type>, returning its value.
+
+ // next_expect_member_string()
+ //
+ std::string&
+ next_expect_member_string (const char* name, bool skip_unknown = false);
+
+ std::string&
+ next_expect_member_string (const std::string&, bool = false);
+
+ template <typename T>
+ T
+ next_expect_member_string (const char*, bool = false);
+
+ template <typename T>
+ T
+ next_expect_member_string (const std::string&, bool = false);
+
+ // next_expect_member_number()
+ //
+ std::string&
+ next_expect_member_number (const char* name, bool skip_unknown = false);
+
+ std::string&
+ next_expect_member_number (const std::string&, bool = false);
+
+ template <typename T>
+ T
+ next_expect_member_number (const char*, bool = false);
+
+ template <typename T>
+ T
+ next_expect_member_number (const std::string&, bool = false);
+
+ // next_expect_member_boolean()
+ //
+ std::string&
+ next_expect_member_boolean (const char* name, bool skip_unknown = false);
+
+ std::string&
+ next_expect_member_boolean (const std::string&, bool = false);
+
+ template <typename T>
+ T
+ next_expect_member_boolean (const char*, bool = false);
+
+ template <typename T>
+ T
+ next_expect_member_boolean (const std::string&, bool = false);
+
+ // next_expect_member_string_null()
+ //
+ std::string*
+ next_expect_member_string_null (const char*, bool = false);
+
+ std::string*
+ next_expect_member_string_null (const std::string&, bool = false);
+
+ template <typename T>
+ optional<T>
+ next_expect_member_string_null (const char*, bool = false);
+
+ template <typename T>
+ optional<T>
+ next_expect_member_string_null (const std::string&, bool = false);
+
+ // next_expect_member_number_null()
+ //
+ std::string*
+ next_expect_member_number_null (const char*, bool = false);
+
+ std::string*
+ next_expect_member_number_null (const std::string&, bool = false);
+
+ template <typename T>
+ optional<T>
+ next_expect_member_number_null (const char*, bool = false);
+
+ template <typename T>
+ optional<T>
+ next_expect_member_number_null (const std::string&, bool = false);
+
+ // next_expect_member_boolean_null()
+ //
+ std::string*
+ next_expect_member_boolean_null (const char*, bool = false);
+
+ std::string*
+ next_expect_member_boolean_null (const std::string&, bool = false);
+
+ template <typename T>
+ optional<T>
+ next_expect_member_boolean_null (const char*, bool = false);
+
+ template <typename T>
+ optional<T>
+ next_expect_member_boolean_null (const std::string&, bool = false);
+
+ // Call next_expect_name() followed by next_expect(event::begin_object).
+ // In the _null version also allow event::null, in which case return
+ // false.
+ //
+ void
+ next_expect_member_object (const char* name, bool skip_unknown = false);
+
+ void
+ next_expect_member_object (const std::string&, bool = false);
+
+ bool
+ next_expect_member_object_null (const char*, bool = false);
+
+ bool
+ next_expect_member_object_null (const std::string&, bool = false);
+
+ // Call next_expect_name() followed by next_expect(event::begin_array).
+ // In the _null version also allow event::null, in which case return
+ // false.
+ //
+ void
+ next_expect_member_array (const char* name, bool skip_unknown = false);
+
+ void
+ next_expect_member_array (const std::string&, bool = false);
+
+ bool
+ next_expect_member_array_null (const char*, bool = false);
+
+ bool
+ next_expect_member_array_null (const std::string&, bool = false);
+
+ // Get the next event and make sure it is the beginning of a value
+ // (begin_object, begin_array, string, number, boolean, null). If it is
+ // not, then throw invalid_json_input with appropriate description.
+ // Otherwise, skip until the end of the value, recursively in case of
+ // object and array.
+ //
+ // This function is primarily useful for skipping unknown object
+ // members, for example:
+ //
+ // while (p.next_expect (event::name, event::end_object))
+ // {
+ // if (p.name () == "known")
+ // {
+ // // Handle known member.
+ // }
+ // else
+ // p.next_expect_value_skip ();
+ // }
+ //
+ void
+ next_expect_value_skip ();
+
+ // Parsing location.
+ //
+
+ // Return the line number (1-based) corresponding to the most recently
+ // parsed event or 0 if nothing has been parsed yet.
+ //
+ std::uint64_t
+ line () const noexcept;
+
+ // Return the column number (1-based) corresponding to the beginning of
+ // the most recently parsed event or 0 if nothing has been parsed yet.
+ //
+ std::uint64_t
+ column () const noexcept;
+
+ // Return the position (byte offset) pointing immediately after the most
+ // recently parsed event or 0 if nothing has been parsed yet.
+ //
+ std::uint64_t
+ position () const noexcept;
+
+ // Implementation details.
+ //
+ public:
+ struct iterator
+ {
+ using value_type = event;
+
+ explicit
+ iterator (parser* p = nullptr, optional<event> e = nullopt)
+ : p_ (p), e_ (e) {}
+
+ event operator* () const {return *e_;}
+ iterator& operator++ () {e_ = p_->next (); return *this;}
+
+ // Comparison only makes sense when comparing to end (eof).
+ //
+ bool operator== (iterator y) const {return !e_ && !y.e_;}
+ bool operator!= (iterator y) const {return !(*this == y);}
+
+ private:
+ parser* p_;
+ optional<event> e_;
+ };
+
+ struct stream
+ {
+ std::istream* is;
+ optional<std::exception_ptr> exception;
+ };
+
+ [[noreturn]] void
+ throw_invalid_value (const char* type, const char*, std::size_t) const;
+
+ ~parser ();
+
+ private:
+ // Functionality shared by next() and peek().
+ //
+ json_type
+ next_impl ();
+
+ // Translate the event produced by the most recent call to next_impl().
+ //
+ // Note that the underlying parser state determines whether name or
+ // value is returned when translating JSON_STRING.
+ //
+ optional<event>
+ translate (json_type) const noexcept;
+
+ // Cache state (name/value) produced by the most recent call to
+ // next_impl().
+ //
+ void
+ cache_parsed_data ();
+
+ // Cache the location numbers as determined by the most recent call to
+ // next_impl().
+ //
+ void
+ cache_parsed_location () noexcept;
+
+ // Return true if this is a value event (string, number, boolean, or
+ // null).
+ //
+ static bool
+ value_event (optional<event>) noexcept;
+
+ stream stream_;
+
+ bool multi_value_;
+ const char* separators_;
+
+ // The *_p_ members indicate whether the value is present (cached).
+ // Note: not using optional not to reallocate the string's buffer.
+ //
+ std::string name_; bool name_p_ = false;
+ std::string value_; bool value_p_ = false;
+ std::uint64_t line_, column_, position_; bool location_p_ = false;
+
+ optional<json_type> parsed_; // Current parsed event if any.
+ optional<json_type> peeked_; // Current peeked event if any.
+
+ ::json_stream impl_[1];
+
+ // Cached raw value.
+ //
+ const char* raw_s_;
+ std::size_t raw_n_;
+ };
+ }
+}
+
+#include <libbutl/json/parser.ixx>
diff --git a/libbutl/json/parser.ixx b/libbutl/json/parser.ixx
new file mode 100644
index 0000000..cf6dca3
--- /dev/null
+++ b/libbutl/json/parser.ixx
@@ -0,0 +1,552 @@
+#include <cerrno>
+#include <limits> // numeric_limits
+#include <utility> // move()
+#include <cassert>
+#include <cstdlib> // strto*()
+#include <type_traits> // enable_if, is_*
+#include <cstring> // strlen()
+
+namespace butl
+{
+ namespace json
+ {
+ inline invalid_json_input::
+ invalid_json_input (std::string n,
+ std::uint64_t l,
+ std::uint64_t c,
+ std::uint64_t p,
+ const std::string& d)
+ : invalid_json_input (move (n), l, c, p, d.c_str ())
+ {
+ }
+
+ inline invalid_json_input::
+ invalid_json_input (std::string n,
+ std::uint64_t l,
+ std::uint64_t c,
+ std::uint64_t p,
+ const char* d)
+ : invalid_argument (d),
+ name (std::move (n)),
+ line (l), column (c), position (p)
+ {
+ }
+
+ inline parser::
+ parser (std::istream& is,
+ const std::string& n,
+ bool mv,
+ const char* sep) noexcept
+ : parser (is, n.c_str (), mv, sep)
+ {
+ }
+
+ inline parser::
+ parser (const void* t,
+ std::size_t s,
+ const std::string& n,
+ bool mv,
+ const char* sep) noexcept
+ : parser (t, s, n.c_str (), mv, sep)
+ {
+ }
+
+ inline parser::
+ parser (const std::string& t,
+ const std::string& n,
+ bool mv,
+ const char* sep) noexcept
+ : parser (t.data (), t.size (), n.c_str (), mv, sep)
+ {
+ }
+
+ inline parser::
+ parser (const std::string& t,
+ const char* n,
+ bool mv,
+ const char* sep) noexcept
+ : parser (t.data (), t.size (), n, mv, sep)
+ {
+ }
+
+ inline parser::
+ parser (const char* t,
+ const std::string& n,
+ bool mv,
+ const char* sep) noexcept
+ : parser (t, std::strlen (t), n.c_str (), mv, sep)
+ {
+ }
+
+ inline parser::
+ parser (const char* t,
+ const char* n,
+ bool mv,
+ const char* sep) noexcept
+ : parser (t, std::strlen (t), n, mv, sep)
+ {
+ }
+
+ inline const std::string& parser::
+ name ()
+ {
+ if (!name_p_)
+ {
+ assert (parsed_ && !peeked_ && !value_p_);
+ cache_parsed_data ();
+ assert (name_p_);
+ }
+ return name_;
+ }
+
+ inline std::string& parser::
+ value ()
+ {
+ if (!value_p_)
+ {
+ assert (parsed_ && !peeked_ && !name_p_);
+ cache_parsed_data ();
+ assert (value_p_);
+ }
+ return value_;
+ }
+
+ // Note: one day we will be able to use C++17 from_chars() which was made
+ // exactly for this.
+ //
+ template <typename T>
+ inline typename std::enable_if<std::is_same<T, bool>::value, T>::type
+ parse_value (const char* b, size_t, const parser&)
+ {
+ return *b == 't';
+ }
+
+ template <typename T>
+ inline typename std::enable_if<
+ std::is_integral<T>::value &&
+ std::is_signed<T>::value &&
+ !std::is_same<T, bool>::value, T>::type
+ parse_value (const char* b, size_t n, const parser& p)
+ {
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ std::int64_t v (strtoll (b, &e, 10)); // Can't throw.
+
+ if (e == b || e != b + n || errno == ERANGE ||
+ v < std::numeric_limits<T>::min () ||
+ v > std::numeric_limits<T>::max ())
+ p.throw_invalid_value ("signed integer", b, n);
+
+ return static_cast<T> (v);
+ }
+
+ template <typename T>
+ inline typename std::enable_if<
+ std::is_integral<T>::value &&
+ std::is_unsigned<T>::value &&
+ !std::is_same<T, bool>::value, T>::type
+ parse_value (const char* b, size_t n, const parser& p)
+ {
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ std::uint64_t v (strtoull (b, &e, 10)); // Can't throw.
+
+ if (e == b || e != b + n || errno == ERANGE ||
+ v > std::numeric_limits<T>::max ())
+ p.throw_invalid_value ("unsigned integer", b, n);
+
+ return static_cast<T> (v);
+ }
+
+ template <typename T>
+ inline typename std::enable_if<std::is_same<T, float>::value, T>::type
+ parse_value (const char* b, size_t n, const parser& p)
+ {
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ T r (std::strtof (b, &e));
+
+ if (e == b || e != b + n || errno == ERANGE)
+ p.throw_invalid_value ("float", b, n);
+
+ return r;
+ }
+
+ template <typename T>
+ inline typename std::enable_if<std::is_same<T, double>::value, T>::type
+ parse_value (const char* b, size_t n, const parser& p)
+ {
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ T r (std::strtod (b, &e));
+
+ if (e == b || e != b + n || errno == ERANGE)
+ p.throw_invalid_value ("double", b, n);
+
+ return r;
+ }
+
+ template <typename T>
+ inline typename std::enable_if<std::is_same<T, long double>::value, T>::type
+ parse_value (const char* b, size_t n, const parser& p)
+ {
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ T r (std::strtold (b, &e));
+
+ if (e == b || e != b + n || errno == ERANGE)
+ p.throw_invalid_value ("long double", b, n);
+
+ return r;
+ }
+
+ template <typename T>
+ inline T parser::
+ value () const
+ {
+ if (!value_p_)
+ {
+ assert (parsed_ && !peeked_ && value_event (translate (*parsed_)));
+ return parse_value<T> (raw_s_, raw_n_, *this);
+ }
+
+ return parse_value<T> (value_.data (), value_.size (), *this);
+ }
+
+ inline void parser::
+ next_expect_name (const std::string& n, bool su)
+ {
+ next_expect_name (n.c_str (), su);
+ }
+
+ // next_expect_<type>()
+ //
+ inline std::string& parser::
+ next_expect_string ()
+ {
+ next_expect (event::string);
+ return value ();
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_string ()
+ {
+ next_expect (event::string);
+ return value<T> ();
+ }
+
+ inline std::string& parser::
+ next_expect_number ()
+ {
+ next_expect (event::number);
+ return value ();
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_number ()
+ {
+ next_expect (event::number);
+ return value<T> ();
+ }
+
+ inline std::string& parser::
+ next_expect_boolean ()
+ {
+ next_expect (event::boolean);
+ return value ();
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_boolean ()
+ {
+ next_expect (event::boolean);
+ return value<T> ();
+ }
+
+ // next_expect_<type>_null()
+ //
+ inline std::string* parser::
+ next_expect_string_null ()
+ {
+ return next_expect (event::string, event::null) ? &value () : nullptr;
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_string_null ()
+ {
+ return next_expect (event::string, event::null)
+ ? optional<T> (value<T> ())
+ : nullopt;
+ }
+
+ inline std::string* parser::
+ next_expect_number_null ()
+ {
+ return next_expect (event::number, event::null) ? &value () : nullptr;
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_number_null ()
+ {
+ return next_expect (event::number, event::null)
+ ? optional<T> (value<T> ())
+ : nullopt;
+ }
+
+ inline std::string* parser::
+ next_expect_boolean_null ()
+ {
+ return next_expect (event::boolean, event::null) ? &value () : nullptr;
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_boolean_null ()
+ {
+ return next_expect (event::boolean, event::null)
+ ? optional<T> (value<T> ())
+ : nullopt;
+ }
+
+ // next_expect_member_string()
+ //
+ inline std::string& parser::
+ next_expect_member_string (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_string ();
+ }
+
+ inline std::string& parser::
+ next_expect_member_string (const std::string& n, bool su)
+ {
+ return next_expect_member_string (n.c_str (), su);
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_member_string (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_string<T> ();
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_member_string (const std::string& n, bool su)
+ {
+ return next_expect_member_string<T> (n.c_str (), su);
+ }
+
+ // next_expect_member_number()
+ //
+ inline std::string& parser::
+ next_expect_member_number (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_number ();
+ }
+
+ inline std::string& parser::
+ next_expect_member_number (const std::string& n, bool su)
+ {
+ return next_expect_member_number (n.c_str (), su);
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_member_number (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_number<T> ();
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_member_number (const std::string& n, bool su)
+ {
+ return next_expect_member_number<T> (n.c_str (), su);
+ }
+
+ // next_expect_member_boolean()
+ //
+ inline std::string& parser::
+ next_expect_member_boolean (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_boolean ();
+ }
+
+ inline std::string& parser::
+ next_expect_member_boolean (const std::string& n, bool su)
+ {
+ return next_expect_member_boolean (n.c_str (), su);
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_member_boolean (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_boolean<T> ();
+ }
+
+ template <typename T>
+ inline T parser::
+ next_expect_member_boolean (const std::string& n, bool su)
+ {
+ return next_expect_member_boolean<T> (n.c_str (), su);
+ }
+
+ // next_expect_member_string_null()
+ //
+ inline std::string* parser::
+ next_expect_member_string_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_string_null ();
+ }
+
+ inline std::string* parser::
+ next_expect_member_string_null (const std::string& n, bool su)
+ {
+ return next_expect_member_string_null (n.c_str (), su);
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_member_string_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_string_null<T> ();
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_member_string_null (const std::string& n, bool su)
+ {
+ return next_expect_member_string_null<T> (n.c_str (), su);
+ }
+
+ // next_expect_member_number_null()
+ //
+ inline std::string* parser::
+ next_expect_member_number_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_number_null ();
+ }
+
+ inline std::string* parser::
+ next_expect_member_number_null (const std::string& n, bool su)
+ {
+ return next_expect_member_number_null (n.c_str (), su);
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_member_number_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_number_null<T> ();
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_member_number_null (const std::string& n, bool su)
+ {
+ return next_expect_member_number_null<T> (n.c_str (), su);
+ }
+
+ // next_expect_member_boolean_null()
+ //
+ inline std::string* parser::
+ next_expect_member_boolean_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_boolean_null ();
+ }
+
+ inline std::string* parser::
+ next_expect_member_boolean_null (const std::string& n, bool su)
+ {
+ return next_expect_member_boolean_null (n.c_str (), su);
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_member_boolean_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect_boolean_null<T> ();
+ }
+
+ template <typename T>
+ inline optional<T> parser::
+ next_expect_member_boolean_null (const std::string& n, bool su)
+ {
+ return next_expect_member_boolean_null<T> (n.c_str (), su);
+ }
+
+ // next_expect_member_object[_null]()
+ //
+ inline void parser::
+ next_expect_member_object (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ next_expect (event::begin_object);
+ }
+
+ inline void parser::
+ next_expect_member_object (const std::string& n, bool su)
+ {
+ next_expect_member_object (n.c_str (), su);
+ }
+
+ inline bool parser::
+ next_expect_member_object_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect (event::begin_object, event::null);
+ }
+
+ inline bool parser::
+ next_expect_member_object_null (const std::string& n, bool su)
+ {
+ return next_expect_member_object_null (n.c_str (), su);
+ }
+
+ // next_expect_member_array[_null]()
+ //
+ inline void parser::
+ next_expect_member_array (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ next_expect (event::begin_array);
+ }
+
+ inline void parser::
+ next_expect_member_array (const std::string& n, bool su)
+ {
+ next_expect_member_array (n.c_str (), su);
+ }
+
+ inline bool parser::
+ next_expect_member_array_null (const char* n, bool su)
+ {
+ next_expect_name (n, su);
+ return next_expect (event::begin_array, event::null);
+ }
+
+ inline bool parser::
+ next_expect_member_array_null (const std::string& n, bool su)
+ {
+ return next_expect_member_array_null (n.c_str (), su);
+ }
+ }
+}
diff --git a/libbutl/json/pdjson.c b/libbutl/json/pdjson.c
new file mode 100644
index 0000000..ae10c95
--- /dev/null
+++ b/libbutl/json/pdjson.c
@@ -0,0 +1,1044 @@
+#ifndef _POSIX_C_SOURCE
+# define _POSIX_C_SOURCE 200112L
+#elif _POSIX_C_SOURCE < 200112L
+# error incompatible _POSIX_C_SOURCE level
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#ifndef PDJSON_H
+# include "pdjson.h"
+#endif
+
+#define JSON_FLAG_ERROR (1u << 0)
+#define JSON_FLAG_STREAMING (1u << 1)
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+
+#define json_error(json, format, ...) \
+ if (!(json->flags & JSON_FLAG_ERROR)) { \
+ json->flags |= JSON_FLAG_ERROR; \
+ _snprintf_s(json->errmsg, sizeof(json->errmsg), \
+ _TRUNCATE, \
+ format, \
+ __VA_ARGS__); \
+ } \
+
+#else
+
+#define json_error(json, format, ...) \
+ if (!(json->flags & JSON_FLAG_ERROR)) { \
+ json->flags |= JSON_FLAG_ERROR; \
+ snprintf(json->errmsg, sizeof(json->errmsg), \
+ format, \
+ __VA_ARGS__); \
+ } \
+
+#endif /* _MSC_VER */
+
+/* See also PDJSON_STACK_MAX below. */
+#ifndef PDJSON_STACK_INC
+# define PDJSON_STACK_INC 4
+#endif
+
+struct json_stack {
+ enum json_type type;
+ long count;
+};
+
+static enum json_type
+push(json_stream *json, enum json_type type)
+{
+ json->stack_top++;
+
+#ifdef PDJSON_STACK_MAX
+ if (json->stack_top > PDJSON_STACK_MAX) {
+ json_error(json, "%s", "maximum depth of nesting reached");
+ return JSON_ERROR;
+ }
+#endif
+
+ if (json->stack_top >= json->stack_size) {
+ struct json_stack *stack;
+ size_t size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack);
+ stack = (struct json_stack *)json->alloc.realloc(json->stack, size);
+ if (stack == NULL) {
+ json_error(json, "%s", "out of memory");
+ return JSON_ERROR;
+ }
+
+ json->stack_size += PDJSON_STACK_INC;
+ json->stack = stack;
+ }
+
+ json->stack[json->stack_top].type = type;
+ json->stack[json->stack_top].count = 0;
+
+ return type;
+}
+
+/* Note: c is assumed not to be EOF. */
+static enum json_type
+pop(json_stream *json, int c, enum json_type expected)
+{
+ if (json->stack == NULL || json->stack[json->stack_top].type != expected) {
+ json_error(json, "unexpected byte '%c'", c);
+ return JSON_ERROR;
+ }
+ json->stack_top--;
+ return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END;
+}
+
+static int buffer_peek(struct json_source *source)
+{
+ if (source->position < source->source.buffer.length)
+ return source->source.buffer.buffer[source->position];
+ else
+ return EOF;
+}
+
+static int buffer_get(struct json_source *source)
+{
+ int c = source->peek(source);
+ if (c != EOF)
+ source->position++;
+ return c;
+}
+
+static int stream_get(struct json_source *source)
+{
+ int c = fgetc(source->source.stream.stream);
+ if (c != EOF)
+ source->position++;
+ return c;
+}
+
+static int stream_peek(struct json_source *source)
+{
+ int c = fgetc(source->source.stream.stream);
+ ungetc(c, source->source.stream.stream);
+ return c;
+}
+
+static void init(json_stream *json)
+{
+ json->lineno = 1;
+ json->linepos = 0;
+ json->lineadj = 0;
+ json->linecon = 0;
+ json->colno = 0;
+ json->flags = JSON_FLAG_STREAMING;
+ json->errmsg[0] = '\0';
+ json->ntokens = 0;
+ json->next = (enum json_type)0;
+
+ json->stack = NULL;
+ json->stack_top = -1;
+ json->stack_size = 0;
+
+ json->data.string = NULL;
+ json->data.string_size = 0;
+ json->data.string_fill = 0;
+ json->source.position = 0;
+
+ json->alloc.malloc = malloc;
+ json->alloc.realloc = realloc;
+ json->alloc.free = free;
+}
+
+static enum json_type
+is_match(json_stream *json, const char *pattern, enum json_type type)
+{
+ int c;
+ for (const char *p = pattern; *p; p++) {
+ if (*p != (c = json->source.get(&json->source))) {
+ if (c != EOF) {
+ json_error(json, "expected '%c' instead of byte '%c'", *p, c);
+ } else {
+ json_error(json, "expected '%c' instead of end of text", *p);
+ }
+ return JSON_ERROR;
+ }
+ }
+ return type;
+}
+
+static int pushchar(json_stream *json, int c)
+{
+ if (json->data.string_fill == json->data.string_size) {
+ size_t size = json->data.string_size * 2;
+ char *buffer = (char *)json->alloc.realloc(json->data.string, size);
+ if (buffer == NULL) {
+ json_error(json, "%s", "out of memory");
+ return -1;
+ } else {
+ json->data.string_size = size;
+ json->data.string = buffer;
+ }
+ }
+ json->data.string[json->data.string_fill++] = c;
+ return 0;
+}
+
+static int init_string(json_stream *json)
+{
+ json->data.string_fill = 0;
+ if (json->data.string == NULL) {
+ json->data.string_size = 1024;
+ json->data.string = (char *)json->alloc.malloc(json->data.string_size);
+ if (json->data.string == NULL) {
+ json_error(json, "%s", "out of memory");
+ return -1;
+ }
+ }
+ json->data.string[0] = '\0';
+ return 0;
+}
+
+static int encode_utf8(json_stream *json, unsigned long c)
+{
+ if (c < 0x80UL) {
+ return pushchar(json, c);
+ } else if (c < 0x0800UL) {
+ return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) &&
+ (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
+ } else if (c < 0x010000UL) {
+ if (c >= 0xd800 && c <= 0xdfff) {
+ json_error(json, "invalid codepoint %06lx", c);
+ return -1;
+ }
+ return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) &&
+ (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
+ (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
+ } else if (c < 0x110000UL) {
+ return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) &&
+ (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) &&
+ (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) &&
+ (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
+ } else {
+ json_error(json, "unable to encode %06lx as UTF-8", c);
+ return -1;
+ }
+}
+
+static int hexchar(int c)
+{
+ switch (c) {
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ case 'a':
+ case 'A': return 10;
+ case 'b':
+ case 'B': return 11;
+ case 'c':
+ case 'C': return 12;
+ case 'd':
+ case 'D': return 13;
+ case 'e':
+ case 'E': return 14;
+ case 'f':
+ case 'F': return 15;
+ default:
+ return -1;
+ }
+}
+
+static long
+read_unicode_cp(json_stream *json)
+{
+ long cp = 0;
+ int shift = 12;
+
+ for (size_t i = 0; i < 4; i++) {
+ int c = json->source.get(&json->source);
+ int hc;
+
+ if (c == EOF) {
+ json_error(json, "%s", "unterminated string literal in Unicode");
+ return -1;
+ } else if ((hc = hexchar(c)) == -1) {
+ json_error(json, "invalid escape Unicode byte '%c'", c);
+ return -1;
+ }
+
+ cp += hc * (1 << shift);
+ shift -= 4;
+ }
+
+
+ return cp;
+}
+
+static int read_unicode(json_stream *json)
+{
+ long cp, h, l;
+
+ if ((cp = read_unicode_cp(json)) == -1) {
+ return -1;
+ }
+
+ if (cp >= 0xd800 && cp <= 0xdbff) {
+ /* This is the high portion of a surrogate pair; we need to read the
+ * lower portion to get the codepoint
+ */
+ h = cp;
+
+ int c = json->source.get(&json->source);
+ if (c == EOF) {
+ json_error(json, "%s", "unterminated string literal in Unicode");
+ return -1;
+ } else if (c != '\\') {
+ json_error(json, "invalid continuation for surrogate pair '%c', "
+ "expected '\\'", c);
+ return -1;
+ }
+
+ c = json->source.get(&json->source);
+ if (c == EOF) {
+ json_error(json, "%s", "unterminated string literal in Unicode");
+ return -1;
+ } else if (c != 'u') {
+ json_error(json, "invalid continuation for surrogate pair '%c', "
+ "expected 'u'", c);
+ return -1;
+ }
+
+ if ((l = read_unicode_cp(json)) == -1) {
+ return -1;
+ }
+
+ if (l < 0xdc00 || l > 0xdfff) {
+ json_error(json, "surrogate pair continuation \\u%04lx out "
+ "of range (dc00-dfff)", l);
+ return -1;
+ }
+
+ cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
+ } else if (cp >= 0xdc00 && cp <= 0xdfff) {
+ json_error(json, "dangling surrogate \\u%04lx", cp);
+ return -1;
+ }
+
+ return encode_utf8(json, cp);
+}
+
+static int
+read_escaped(json_stream *json)
+{
+ int c = json->source.get(&json->source);
+ if (c == EOF) {
+ json_error(json, "%s", "unterminated string literal in escape");
+ return -1;
+ } else if (c == 'u') {
+ if (read_unicode(json) != 0)
+ return -1;
+ } else {
+ switch (c) {
+ case '\\':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case '/':
+ case '"':
+ {
+ const char *codes = "\\bfnrt/\"";
+ const char *p = strchr(codes, c);
+ if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0)
+ return -1;
+ }
+ break;
+ default:
+ json_error(json, "invalid escaped byte '%c'", c);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+char_needs_escaping(int c)
+{
+ if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+utf8_seq_length(char byte)
+{
+ unsigned char u = (unsigned char) byte;
+ if (u < 0x80) return 1;
+
+ if (0x80 <= u && u <= 0xBF)
+ {
+ // second, third or fourth byte of a multi-byte
+ // sequence, i.e. a "continuation byte"
+ return 0;
+ }
+ else if (u == 0xC0 || u == 0xC1)
+ {
+ // overlong encoding of an ASCII byte
+ return 0;
+ }
+ else if (0xC2 <= u && u <= 0xDF)
+ {
+ // 2-byte sequence
+ return 2;
+ }
+ else if (0xE0 <= u && u <= 0xEF)
+ {
+ // 3-byte sequence
+ return 3;
+ }
+ else if (0xF0 <= u && u <= 0xF4)
+ {
+ // 4-byte sequence
+ return 4;
+ }
+ else
+ {
+ // u >= 0xF5
+ // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8
+ return 0;
+ }
+}
+
+static int
+is_legal_utf8(const unsigned char *bytes, int length)
+{
+ if (0 == bytes || 0 == length) return 0;
+
+ unsigned char a;
+ const unsigned char* srcptr = bytes + length;
+ switch (length)
+ {
+ default:
+ return 0;
+ // Everything else falls through when true.
+ case 4:
+ if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ /* FALLTHRU */
+ case 3:
+ if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ /* FALLTHRU */
+ case 2:
+ a = (*--srcptr);
+ switch (*bytes)
+ {
+ case 0xE0:
+ if (a < 0xA0 || a > 0xBF) return 0;
+ break;
+ case 0xED:
+ if (a < 0x80 || a > 0x9F) return 0;
+ break;
+ case 0xF0:
+ if (a < 0x90 || a > 0xBF) return 0;
+ break;
+ case 0xF4:
+ if (a < 0x80 || a > 0x8F) return 0;
+ break;
+ default:
+ if (a < 0x80 || a > 0xBF) return 0;
+ break;
+ }
+ /* FALLTHRU */
+ case 1:
+ if (*bytes >= 0x80 && *bytes < 0xC2) return 0;
+ }
+ return *bytes <= 0xF4;
+}
+
+static int
+read_utf8(json_stream* json, int next_char)
+{
+ int count = utf8_seq_length(next_char);
+ if (!count)
+ {
+ json_error(json, "%s", "invalid UTF-8 character");
+ return -1;
+ }
+
+ char buffer[4];
+ buffer[0] = next_char;
+ int i;
+ for (i = 1; i < count; ++i)
+ {
+ if ((next_char = json->source.get(&json->source)) == EOF)
+ break;
+
+ buffer[i] = next_char;
+ json->lineadj++;
+ }
+
+ if (i != count || !is_legal_utf8((unsigned char*) buffer, count))
+ {
+ json_error(json, "%s", "invalid UTF-8 text");
+ return -1;
+ }
+
+ for (i = 0; i < count; ++i)
+ {
+ if (pushchar(json, buffer[i]) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+static enum json_type
+read_string(json_stream *json)
+{
+ if (init_string(json) != 0)
+ return JSON_ERROR;
+ while (1) {
+ int c = json->source.get(&json->source);
+ if (c == EOF) {
+ json_error(json, "%s", "unterminated string literal");
+ return JSON_ERROR;
+ } else if (c == '"') {
+ if (pushchar(json, '\0') == 0)
+ return JSON_STRING;
+ else
+ return JSON_ERROR;
+ } else if (c == '\\') {
+ if (read_escaped(json) != 0)
+ return JSON_ERROR;
+ } else if ((unsigned) c >= 0x80) {
+ if (read_utf8(json, c) != 0)
+ return JSON_ERROR;
+ } else {
+ if (char_needs_escaping(c)) {
+ json_error(json, "%s", "unescaped control character in string");
+ return JSON_ERROR;
+ }
+
+ if (pushchar(json, c) != 0)
+ return JSON_ERROR;
+ }
+ }
+ return JSON_ERROR;
+}
+
+static int
+is_digit(int c)
+{
+ return c >= 48 /*0*/ && c <= 57 /*9*/;
+}
+
+static int
+read_digits(json_stream *json)
+{
+ int c;
+ unsigned nread = 0;
+ while (is_digit(c = json->source.peek(&json->source))) {
+ if (pushchar(json, json->source.get(&json->source)) != 0)
+ return -1;
+
+ nread++;
+ }
+
+ if (nread == 0) {
+ if (c != EOF) {
+ json_error(json, "expected digit instead of byte '%c'", c);
+ } else {
+ json_error(json, "%s", "expected digit instead of end of text");
+ }
+ return -1;
+ }
+
+ return 0;
+}
+
+static enum json_type
+read_number(json_stream *json, int c)
+{
+ if (pushchar(json, c) != 0)
+ return JSON_ERROR;
+ if (c == '-') {
+ c = json->source.get(&json->source);
+ if (is_digit(c)) {
+ return read_number(json, c);
+ } else {
+ if (c != EOF) {
+ json_error(json, "unexpected byte '%c' in number", c);
+ } else {
+ json_error(json, "%s", "unexpected end of text in number");
+ }
+ return JSON_ERROR;
+ }
+ } else if (strchr("123456789", c) != NULL) {
+ c = json->source.peek(&json->source);
+ if (is_digit(c)) {
+ if (read_digits(json) != 0)
+ return JSON_ERROR;
+ }
+ }
+ /* Up to decimal or exponent has been read. */
+ c = json->source.peek(&json->source);
+ if (strchr(".eE", c) == NULL) {
+ if (pushchar(json, '\0') != 0)
+ return JSON_ERROR;
+ else
+ return JSON_NUMBER;
+ }
+ if (c == '.') {
+ json->source.get(&json->source); // consume .
+ if (pushchar(json, c) != 0)
+ return JSON_ERROR;
+ if (read_digits(json) != 0)
+ return JSON_ERROR;
+ }
+ /* Check for exponent. */
+ c = json->source.peek(&json->source);
+ if (c == 'e' || c == 'E') {
+ json->source.get(&json->source); // consume e/E
+ if (pushchar(json, c) != 0)
+ return JSON_ERROR;
+ c = json->source.peek(&json->source);
+ if (c == '+' || c == '-') {
+ json->source.get(&json->source); // consume
+ if (pushchar(json, c) != 0)
+ return JSON_ERROR;
+ if (read_digits(json) != 0)
+ return JSON_ERROR;
+ } else if (is_digit(c)) {
+ if (read_digits(json) != 0)
+ return JSON_ERROR;
+ } else {
+ json->source.get(&json->source); // consume (for column)
+ if (c != EOF) {
+ json_error(json, "unexpected byte '%c' in number", c);
+ } else {
+ json_error(json, "%s", "unexpected end of text in number");
+ }
+ return JSON_ERROR;
+ }
+ }
+ if (pushchar(json, '\0') != 0)
+ return JSON_ERROR;
+ else
+ return JSON_NUMBER;
+}
+
+bool
+json_isspace(int c)
+{
+ switch (c) {
+ case 0x09:
+ case 0x0a:
+ case 0x0d:
+ case 0x20:
+ return true;
+ }
+
+ return false;
+}
+
+static void newline(json_stream *json)
+{
+ json->lineno++;
+ json->linepos = json->source.position;
+ json->lineadj = 0;
+ json->linecon = 0;
+}
+
+/* Returns the next non-whitespace character in the stream.
+ *
+ * Note that this is the only function (besides user-facing json_source_get())
+ * that needs to worry about newline housekeeping.
+ */
+static int next(json_stream *json)
+{
+ int c;
+ while (json_isspace(c = json->source.get(&json->source)))
+ if (c == '\n')
+ newline(json);
+ return c;
+}
+
+static enum json_type
+read_value(json_stream *json, int c)
+{
+ enum json_type type;
+ size_t colno = json_get_column(json);
+
+ json->ntokens++;
+
+ switch (c) {
+ case EOF:
+ json_error(json, "%s", "unexpected end of text");
+ type = JSON_ERROR;
+ break;
+ case '{':
+ type = push(json, JSON_OBJECT);
+ break;
+ case '[':
+ type = push(json, JSON_ARRAY);
+ break;
+ case '"':
+ type = read_string(json);
+ break;
+ case 'n':
+ type = is_match(json, "ull", JSON_NULL);
+ break;
+ case 'f':
+ type = is_match(json, "alse", JSON_FALSE);
+ break;
+ case 't':
+ type = is_match(json, "rue", JSON_TRUE);
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '-':
+ type = init_string(json) == 0 ? read_number(json, c) : JSON_ERROR;
+ break;
+ default:
+ type = JSON_ERROR;
+ json_error(json, "unexpected byte '%c' in value", c);
+ break;
+ }
+
+ if (type != JSON_ERROR)
+ json->colno = colno;
+
+ return type;
+}
+
+enum json_type json_peek(json_stream *json)
+{
+ enum json_type next;
+ if (json->next)
+ next = json->next;
+ else
+ next = json->next = json_next(json);
+ return next;
+}
+
+enum json_type json_next(json_stream *json)
+{
+ if (json->flags & JSON_FLAG_ERROR)
+ return JSON_ERROR;
+ if (json->next != 0) {
+ enum json_type next = json->next;
+ json->next = (enum json_type)0;
+ return next;
+ }
+
+ json->colno = 0;
+
+ if (json->ntokens > 0 && json->stack_top == (size_t)-1) {
+
+ /* In the streaming mode leave any trailing whitespaces in the stream.
+ * This allows the user to validate any desired separation between
+ * values (such as newlines) using json_source_get/peek() with any
+ * remaining whitespaces ignored as leading when we parse the next
+ * value. */
+ if (!(json->flags & JSON_FLAG_STREAMING)) {
+ int c = next(json);
+ if (c != EOF) {
+ json_error(json, "expected end of text instead of byte '%c'", c);
+ return JSON_ERROR;
+ }
+ }
+
+ return JSON_DONE;
+ }
+ int c = next(json);
+ if (json->stack_top == (size_t)-1) {
+ if (c == EOF && (json->flags & JSON_FLAG_STREAMING))
+ return JSON_DONE;
+
+ return read_value(json, c);
+ }
+ if (json->stack[json->stack_top].type == JSON_ARRAY) {
+ if (json->stack[json->stack_top].count == 0) {
+ if (c == ']') {
+ return pop(json, c, JSON_ARRAY);
+ }
+ json->stack[json->stack_top].count++;
+ return read_value(json, c);
+ } else if (c == ',') {
+ json->stack[json->stack_top].count++;
+ return read_value(json, next(json));
+ } else if (c == ']') {
+ return pop(json, c, JSON_ARRAY);
+ } else {
+ if (c != EOF) {
+ json_error(json, "unexpected byte '%c'", c);
+ } else {
+ json_error(json, "%s", "unexpected end of text");
+ }
+ return JSON_ERROR;
+ }
+ } else if (json->stack[json->stack_top].type == JSON_OBJECT) {
+ if (json->stack[json->stack_top].count == 0) {
+ if (c == '}') {
+ return pop(json, c, JSON_OBJECT);
+ }
+
+ /* No member name/value pairs yet. */
+ enum json_type value = read_value(json, c);
+ if (value != JSON_STRING) {
+ if (value != JSON_ERROR)
+ json_error(json, "%s", "expected member name or '}'");
+ return JSON_ERROR;
+ } else {
+ json->stack[json->stack_top].count++;
+ return value;
+ }
+ } else if ((json->stack[json->stack_top].count % 2) == 0) {
+ /* Expecting comma followed by member name. */
+ if (c != ',' && c != '}') {
+ json_error(json, "%s", "expected ',' or '}' after member value");
+ return JSON_ERROR;
+ } else if (c == '}') {
+ return pop(json, c, JSON_OBJECT);
+ } else {
+ enum json_type value = read_value(json, next(json));
+ if (value != JSON_STRING) {
+ if (value != JSON_ERROR)
+ json_error(json, "%s", "expected member name");
+ return JSON_ERROR;
+ } else {
+ json->stack[json->stack_top].count++;
+ return value;
+ }
+ }
+ } else if ((json->stack[json->stack_top].count % 2) == 1) {
+ /* Expecting colon followed by value. */
+ if (c != ':') {
+ json_error(json, "%s", "expected ':' after member name");
+ return JSON_ERROR;
+ } else {
+ json->stack[json->stack_top].count++;
+ return read_value(json, next(json));
+ }
+ }
+ }
+ json_error(json, "%s", "invalid parser state");
+ return JSON_ERROR;
+}
+
+void json_reset(json_stream *json)
+{
+ json->stack_top = -1;
+ json->ntokens = 0;
+ json->flags &= ~JSON_FLAG_ERROR;
+ json->errmsg[0] = '\0';
+}
+
+enum json_type json_skip(json_stream *json)
+{
+ enum json_type type = json_next(json);
+ size_t cnt_arr = 0;
+ size_t cnt_obj = 0;
+
+ for (enum json_type skip = type; ; skip = json_next(json)) {
+ if (skip == JSON_ERROR || skip == JSON_DONE)
+ return skip;
+
+ if (skip == JSON_ARRAY) {
+ ++cnt_arr;
+ } else if (skip == JSON_ARRAY_END && cnt_arr > 0) {
+ --cnt_arr;
+ } else if (skip == JSON_OBJECT) {
+ ++cnt_obj;
+ } else if (skip == JSON_OBJECT_END && cnt_obj > 0) {
+ --cnt_obj;
+ }
+
+ if (!cnt_arr && !cnt_obj)
+ break;
+ }
+
+ return type;
+}
+
+enum json_type json_skip_until(json_stream *json, enum json_type type)
+{
+ while (1) {
+ enum json_type skip = json_skip(json);
+
+ if (skip == JSON_ERROR || skip == JSON_DONE)
+ return skip;
+
+ if (skip == type)
+ break;
+ }
+
+ return type;
+}
+
+const char *json_get_string(json_stream *json, size_t *length)
+{
+ if (length != NULL)
+ *length = json->data.string_fill;
+ if (json->data.string == NULL)
+ return "";
+ else
+ return json->data.string;
+}
+
+double json_get_number(json_stream *json)
+{
+ char *p = json->data.string;
+ return p == NULL ? 0 : strtod(p, NULL);
+}
+
+const char *json_get_error(json_stream *json)
+{
+ return json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL;
+}
+
+size_t json_get_lineno(json_stream *json)
+{
+ return json->lineno;
+}
+
+size_t json_get_position(json_stream *json)
+{
+ return json->source.position;
+}
+
+size_t json_get_column(json_stream *json)
+{
+ return json->colno == 0
+ ? json->source.position == 0 ? 1 : json->source.position - json->linepos - json->lineadj
+ : json->colno;
+}
+
+size_t json_get_depth(json_stream *json)
+{
+ return json->stack_top + 1;
+}
+
+/* Return the current parsing context, that is, JSON_OBJECT if we are inside
+ an object, JSON_ARRAY if we are inside an array, and JSON_DONE if we are
+ not yet/anymore in either.
+
+ Additionally, for the first two cases, also return the number of parsing
+ events that have already been observed at this level with json_next/peek().
+ In particular, inside an object, an odd number would indicate that the just
+ observed JSON_STRING event is a member name.
+*/
+enum json_type json_get_context(json_stream *json, size_t *count)
+{
+ if (json->stack_top == (size_t)-1)
+ return JSON_DONE;
+
+ if (count != NULL)
+ *count = json->stack[json->stack_top].count;
+
+ return json->stack[json->stack_top].type;
+}
+
+int json_source_get(json_stream *json)
+{
+ /* If the caller reads a multi-byte UTF-8 sequence, we expect them to read
+ * it in its entirety. We also assume that any invalid bytes within such a
+ * sequence belong to the same column (as opposed to starting a new column
+ * or some such). */
+
+ int c = json->source.get(&json->source);
+ if (json->linecon > 0) {
+ /* Expecting a continuation byte within a multi-byte UTF-8 sequence. */
+ json->linecon--;
+ if (c != EOF)
+ json->lineadj++;
+ } else if (c == '\n')
+ newline(json);
+ else if (c >= 0xC2 && c <= 0xF4) /* First in multi-byte UTF-8 sequence. */
+ json->linecon = utf8_seq_length(c) - 1;
+
+ return c;
+}
+
+int json_source_peek(json_stream *json)
+{
+ return json->source.peek(&json->source);
+}
+
+void json_open_buffer(json_stream *json, const void *buffer, size_t size)
+{
+ init(json);
+ json->source.get = buffer_get;
+ json->source.peek = buffer_peek;
+ json->source.source.buffer.buffer = (const char *)buffer;
+ json->source.source.buffer.length = size;
+}
+
+void json_open_string(json_stream *json, const char *string)
+{
+ json_open_buffer(json, string, strlen(string));
+}
+
+void json_open_stream(json_stream *json, FILE * stream)
+{
+ init(json);
+ json->source.get = stream_get;
+ json->source.peek = stream_peek;
+ json->source.source.stream.stream = stream;
+}
+
+static int user_get(struct json_source *json)
+{
+ int c = json->source.user.get(json->source.user.ptr);
+ if (c != EOF)
+ json->position++;
+ return c;
+}
+
+static int user_peek(struct json_source *json)
+{
+ return json->source.user.peek(json->source.user.ptr);
+}
+
+void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user)
+{
+ init(json);
+ json->source.get = user_get;
+ json->source.peek = user_peek;
+ json->source.source.user.ptr = user;
+ json->source.source.user.get = get;
+ json->source.source.user.peek = peek;
+}
+
+void json_set_allocator(json_stream *json, json_allocator *a)
+{
+ json->alloc = *a;
+}
+
+void json_set_streaming(json_stream *json, bool streaming)
+{
+ if (streaming)
+ json->flags |= JSON_FLAG_STREAMING;
+ else
+ json->flags &= ~JSON_FLAG_STREAMING;
+}
+
+void json_close(json_stream *json)
+{
+ json->alloc.free(json->stack);
+ json->alloc.free(json->data.string);
+}
diff --git a/libbutl/json/pdjson.h b/libbutl/json/pdjson.h
new file mode 100644
index 0000000..ac698e4
--- /dev/null
+++ b/libbutl/json/pdjson.h
@@ -0,0 +1,147 @@
+#ifndef PDJSON_H
+#define PDJSON_H
+
+#ifndef PDJSON_SYMEXPORT
+# define PDJSON_SYMEXPORT
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#else
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+ #include <stdbool.h>
+#else
+ #ifndef bool
+ #define bool int
+ #define true 1
+ #define false 0
+ #endif /* bool */
+#endif /* __STDC_VERSION__ */
+#endif /* __cplusplus */
+
+#include <stdio.h>
+
+enum json_type {
+ JSON_ERROR = 1, JSON_DONE,
+ JSON_OBJECT, JSON_OBJECT_END, JSON_ARRAY, JSON_ARRAY_END,
+ JSON_STRING, JSON_NUMBER, JSON_TRUE, JSON_FALSE, JSON_NULL
+};
+
+struct json_allocator {
+ void *(*malloc)(size_t);
+ void *(*realloc)(void *, size_t);
+ void (*free)(void *);
+};
+
+typedef int (*json_user_io)(void *user);
+
+typedef struct json_stream json_stream;
+typedef struct json_allocator json_allocator;
+
+PDJSON_SYMEXPORT void json_open_buffer(json_stream *json, const void *buffer, size_t size);
+PDJSON_SYMEXPORT void json_open_string(json_stream *json, const char *string);
+PDJSON_SYMEXPORT void json_open_stream(json_stream *json, FILE *stream);
+PDJSON_SYMEXPORT void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user);
+PDJSON_SYMEXPORT void json_close(json_stream *json);
+
+PDJSON_SYMEXPORT void json_set_allocator(json_stream *json, json_allocator *a);
+PDJSON_SYMEXPORT void json_set_streaming(json_stream *json, bool mode);
+
+PDJSON_SYMEXPORT enum json_type json_next(json_stream *json);
+PDJSON_SYMEXPORT enum json_type json_peek(json_stream *json);
+PDJSON_SYMEXPORT void json_reset(json_stream *json);
+PDJSON_SYMEXPORT const char *json_get_string(json_stream *json, size_t *length);
+PDJSON_SYMEXPORT double json_get_number(json_stream *json);
+
+PDJSON_SYMEXPORT enum json_type json_skip(json_stream *json);
+PDJSON_SYMEXPORT enum json_type json_skip_until(json_stream *json, enum json_type type);
+
+PDJSON_SYMEXPORT size_t json_get_lineno(json_stream *json);
+PDJSON_SYMEXPORT size_t json_get_position(json_stream *json);
+PDJSON_SYMEXPORT size_t json_get_column(json_stream *json);
+PDJSON_SYMEXPORT size_t json_get_depth(json_stream *json);
+PDJSON_SYMEXPORT enum json_type json_get_context(json_stream *json, size_t *count);
+PDJSON_SYMEXPORT const char *json_get_error(json_stream *json);
+
+PDJSON_SYMEXPORT int json_source_get(json_stream *json);
+PDJSON_SYMEXPORT int json_source_peek(json_stream *json);
+PDJSON_SYMEXPORT bool json_isspace(int c);
+
+/* internal */
+
+struct json_source {
+ int (*get)(struct json_source *);
+ int (*peek)(struct json_source *);
+ size_t position;
+ union {
+ struct {
+ FILE *stream;
+ } stream;
+ struct {
+ const char *buffer;
+ size_t length;
+ } buffer;
+ struct {
+ void *ptr;
+ json_user_io get;
+ json_user_io peek;
+ } user;
+ } source;
+};
+
+struct json_stream {
+ size_t lineno;
+
+ /* While counting lines is straightforward, columns are tricky because we
+ * have to count codepoints, not bytes. We could have peppered the code
+ * with increments in all the relevant places but that seems inelegant.
+ * So instead we calculate the column dynamically, based on the current
+ * position.
+ *
+ * Specifically, we will remember the position at the beginning of each
+ * line (linepos) and, assuming only the ASCII characters on the line, the
+ * column will be the difference between the current position and linepos.
+ * Of course there could also be multi-byte UTF-8 sequences which we will
+ * handle by keeping an adjustment (lineadj) -- the number of continuation
+ * bytes encountered on this line so far. Finally, for json_source_get()
+ * we also have to keep the number of remaining continuation bytes in the
+ * current multi-byte UTF-8 sequence (linecon).
+ *
+ * This is not the end of the story, however: with only the just described
+ * approach we will always end up with the column of the latest character
+ * read which is not what we want when returning potentially multi-
+ * character value events (string, number, etc); in these cases we want to
+ * return the column of the first character (note that if the value itself
+ * is invalid and we are returning JSON_ERROR, we still want the current
+ * column). So to handle this we will cache the start column (colno) for
+ * such events.
+ */
+ size_t linepos; /* Position at the beginning of the current line. */
+ size_t lineadj; /* Adjustment for multi-byte UTF-8 sequences. */
+ size_t linecon; /* Number of remaining UTF-8 continuation bytes. */
+ size_t colno; /* Start column for value events or 0. */
+
+ struct json_stack *stack;
+ size_t stack_top;
+ size_t stack_size;
+ enum json_type next;
+ unsigned flags;
+
+ struct {
+ char *string;
+ size_t string_fill;
+ size_t string_size;
+ } data;
+
+ size_t ntokens;
+
+ struct json_source source;
+ struct json_allocator alloc;
+ char errmsg[128];
+};
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif
diff --git a/libbutl/json/serializer.cxx b/libbutl/json/serializer.cxx
new file mode 100644
index 0000000..fbd569a
--- /dev/null
+++ b/libbutl/json/serializer.cxx
@@ -0,0 +1,671 @@
+#include <cstdio> // snprintf
+#include <cstdarg> // va_list
+#include <cstring> // memcpy
+#include <ostream>
+
+#include <libbutl/json/serializer.hxx>
+
+using namespace std;
+
+namespace butl
+{
+ namespace json
+ {
+ using buffer = buffer_serializer::buffer;
+ using error_code = invalid_json_output::error_code;
+
+ template <typename T>
+ static void
+ dynarray_overflow (void* d, event, buffer& b, size_t ex)
+ {
+ T& v (*static_cast<T*> (d));
+ v.resize (b.capacity + ex);
+ v.resize (v.capacity ());
+ // const_cast is required for std::string pre C++17.
+ //
+ b.data = const_cast<typename T::value_type*> (v.data ());
+ b.capacity = v.size ();
+ }
+
+ template <typename T>
+ static void
+ dynarray_flush (void* d, event, buffer& b)
+ {
+ T& v (*static_cast<T*> (d));
+ v.resize (b.size);
+ b.data = const_cast<typename T::value_type*> (v.data ());
+ b.capacity = b.size;
+ }
+
+ buffer_serializer::
+ buffer_serializer (string& s, size_t i)
+ : buffer_serializer (const_cast<char*> (s.data ()), size_, s.size (),
+ dynarray_overflow<string>,
+ dynarray_flush<string>,
+ &s,
+ i)
+ {
+ size_ = s.size ();
+ }
+
+ buffer_serializer::
+ buffer_serializer (vector<char>& v, size_t i)
+ : buffer_serializer (v.data (), size_, v.size (),
+ dynarray_overflow<vector<char>>,
+ dynarray_flush<vector<char>>,
+ &v,
+ i)
+ {
+ size_ = v.size ();
+ }
+
+ static void
+ ostream_overflow (void* d, event e, buffer& b, size_t)
+ {
+ ostream& s (*static_cast<ostream*> (d));
+ s.write (static_cast<char*> (b.data), b.size);
+ if (s.fail ())
+ throw invalid_json_output (
+ e, error_code::buffer_overflow, "unable to write JSON output text");
+ b.size = 0;
+ }
+
+ static void
+ ostream_flush (void* d, event e, buffer& b)
+ {
+ ostream_overflow (d, e, b, 0);
+
+ ostream& s (*static_cast<ostream*> (d));
+ s.flush ();
+ if (s.fail ())
+ throw invalid_json_output (
+ e, error_code::buffer_overflow, "unable to write JSON output text");
+ }
+
+ stream_serializer::
+ stream_serializer (ostream& os, size_t i)
+ : buffer_serializer (tmp_, sizeof (tmp_),
+ ostream_overflow,
+ ostream_flush,
+ &os,
+ i)
+ {
+ }
+
+ bool buffer_serializer::
+ next (optional<event> e, pair<const char*, size_t> val, bool check)
+ {
+ if (absent_ == 2)
+ goto fail_complete;
+
+ if (e == nullopt)
+ {
+ if (!state_.empty ())
+ goto fail_incomplete;
+
+ absent_++;
+ return false;
+ }
+
+ absent_ = 0; // Clear inter-value absent event.
+
+ {
+ state* st (state_.empty () ? nullptr : &state_.back ());
+
+ auto name_expected = [] (const state& s)
+ {
+ return s.type == event::begin_object && s.count % 2 == 0;
+ };
+
+ auto make_str = [] (const char* s, size_t n)
+ {
+ return make_pair (s, n);
+ };
+
+ // When it comes to pretty-printing, the common way to do it is along
+ // these lines:
+ //
+ // {
+ // "str": "value",
+ // "obj": {
+ // "arr": [
+ // 1,
+ // 2,
+ // 3
+ // ]
+ // },
+ // "num": 123
+ // }
+ //
+ // Empty objects and arrays are printed without a newline:
+ //
+ // {
+ // "obj": {},
+ // "arr": []
+ // }
+ //
+ // There are two types of separators: between name and value, which is
+ // always ": ", and before/after value inside an object or array which
+ // is either newline followed by indentation, or comma followed by
+ // newline followed by indentation (we also have separation between
+ // top-level values but that's orthogonal to pretty-printing).
+ //
+ // Based on this observation, we are going to handle the latter case by
+ // starting with the ",\n" string (in this->sep_) and pushing/popping
+ // indentation spaces as we enter/leave objects and arrays. We handle
+ // the cases where we don't need the comma by simply skipping it in the
+ // C-string pointer.
+ //
+ bool pp (indent_ != 0);
+
+ pair<const char*, size_t> sep;
+ if (st != nullptr)
+ {
+ // The name-value separator.
+ //
+ if (st->type == event::begin_object && st->count % 2 == 1)
+ {
+ sep = !pp ? make_str (":", 1) : make_str (": ", 2);
+ }
+ // We don't need the comma if we are closing the object or array.
+ //
+ else if (e == event::end_array || e == event::end_object)
+ {
+ // But in this case we need to unindent one level prior to writing
+ // the brace. Also handle the empty object/array as a special case.
+ //
+ sep = !pp || st->count == 0
+ ? make_str (nullptr, 0)
+ : make_str (sep_.c_str () + 1, sep_.size () - 1 - indent_);
+ }
+ // Or if this is the first value (note: must come after end_*).
+ //
+ else if (st->count == 0)
+ {
+ sep = !pp
+ ? make_str (nullptr, 0)
+ : make_str (sep_.c_str () + 1, sep_.size () - 1);
+ }
+ else
+ {
+ sep = !pp
+ ? make_str (",", 1)
+ : make_str (sep_.c_str (), sep_.size ());
+ }
+ }
+ else if (values_ != 0) // Subsequent top-level value.
+ {
+ // Top-level value separation. For now we always separate them with
+ // newlines, which is the most common/sensible way.
+ //
+ sep = make_str ("\n", 1);
+ }
+
+ switch (*e)
+ {
+ case event::begin_array:
+ case event::begin_object:
+ {
+ if (st != nullptr && name_expected (*st))
+ goto fail_unexpected_event;
+
+ write (*e,
+ sep,
+ make_str (e == event::begin_array ? "[" : "{", 1),
+ false);
+
+ if (st != nullptr)
+ st->count++;
+
+ if (pp)
+ sep_.append (indent_, ' ');
+
+ state_.push_back (state {*e, 0});
+ break;
+ }
+ case event::end_array:
+ case event::end_object:
+ {
+ if (st == nullptr || (e == event::end_array
+ ? st->type != event::begin_array
+ : !name_expected (*st)))
+ goto fail_unexpected_event;
+
+ write (*e,
+ sep,
+ make_str (e == event::end_array ? "]" : "}", 1),
+ false);
+
+ if (pp)
+ sep_.erase (sep_.size () - indent_);
+
+ state_.pop_back ();
+ break;
+ }
+ case event::name:
+ case event::string:
+ {
+ if (e == event::name
+ ? (st == nullptr || !name_expected (*st))
+ : (st != nullptr && name_expected (*st)))
+ goto fail_unexpected_event;
+
+ write (*e, sep, val, check, '"');
+
+ if (st != nullptr)
+ st->count++;
+ break;
+ }
+ case event::null:
+ case event::boolean:
+ {
+ if (e == event::null && val.first == nullptr)
+ val = {"null", 4};
+ else if (check)
+ {
+ auto eq = [&val] (const char* v, size_t n)
+ {
+ return val.second == n && memcmp (val.first, v, n) == 0;
+ };
+
+ if (e == event::null)
+ {
+ if (!eq ("null", 4))
+ goto fail_null;
+ }
+ else
+ {
+ if (!eq ("true", 4) && !eq ("false", 5))
+ goto fail_bool;
+ }
+ }
+ }
+ // Fall through.
+ case event::number:
+ {
+ // Note: this event is also used by value_json_text().
+
+ if (st != nullptr && name_expected (*st))
+ goto fail_unexpected_event;
+
+ write (*e, sep, val, check);
+
+ if (st != nullptr)
+ st->count++;
+ break;
+ }
+ }
+ }
+
+ if (state_.empty ())
+ {
+ values_++;
+ if (flush_ != nullptr)
+ flush_ (data_, *e, buf_);
+
+ return false;
+ }
+
+ return true;
+
+ fail_complete:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "value sequence is complete");
+ fail_incomplete:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "value is incomplete");
+ fail_null:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "invalid null value");
+ fail_bool:
+ throw invalid_json_output (
+ e, error_code::invalid_value, "invalid boolean value");
+ fail_unexpected_event:
+ throw invalid_json_output (
+ e, error_code::unexpected_event, "unexpected event");
+ }
+
+ // JSON escape sequences for control characters <= 0x1F.
+ //
+ static const char* json_escapes[] =
+ {"\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005",
+ "\\u0006", "\\u0007", "\\b", "\\t", "\\n", "\\u000B",
+ "\\f", "\\r", "\\u000E", "\\u000F", "\\u0010", "\\u0011",
+ "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017",
+ "\\u0018", "\\u0019", "\\u001A", "\\u001B", "\\u001C", "\\u001D",
+ "\\u001E", "\\u001F"};
+
+ void buffer_serializer::
+ write (event e,
+ pair<const char*, size_t> sep,
+ pair<const char*, size_t> val,
+ bool check,
+ char q)
+ {
+ // Assumptions:
+ //
+ // 1. A call to overflow should be able to provide enough capacity to
+ // write the entire separator (in other words, we are not going to
+ // bother with chunking the separator).
+ //
+ // 2. Similarly, a call to overflow should be able to provide enough
+ // capacity to write an entire UTF-8 multi-byte sequence.
+ //
+ // 3. Performance-wise, we do not expect very long contiguous sequences
+ // of character that require escaping.
+
+ // Total number of bytes remaining to be written and the capacity
+ // currently available.
+ //
+ size_t size (sep.second + val.second + (q != '\0' ? 2 : 0));
+ size_t cap (buf_.capacity - buf_.size);
+
+ auto grow = [this, e, &size, &cap] (size_t min, size_t extra = 0)
+ {
+ if (overflow_ == nullptr)
+ return false;
+
+ extra += size;
+ extra -= cap;
+ overflow_ (data_, e, buf_, extra > min ? extra : min);
+ cap = buf_.capacity - buf_.size;
+
+ return cap >= min;
+ };
+
+ auto append = [this, &cap, &size] (const char* d, size_t s)
+ {
+ memcpy (static_cast<char*> (buf_.data) + buf_.size, d, s);
+ buf_.size += s;
+ cap -= s;
+ size -= s;
+ };
+
+ // Return the longest chunk of input that fits into the buffer and does
+ // not end in the middle of a multi-byte UTF-8 sequence. Assume value
+ // size and capacity are not 0. Return NULL in first if no chunk could
+ // be found that fits into the remaining space. In this case, second is
+ // the additional (to size) required space (used to handle escapes in
+ // the checked version).
+ //
+ // The basic idea is to seek in the input buffer to the capacity of the
+ // output buffer (unless the input is shorter than the output). If we
+ // ended up in the middle of a multi-byte UTF-8 sequence, then seek back
+ // until we end up at the UTF-8 sequence boundary. Note that this
+ // implementation assumes valid UTF-8.
+ //
+ auto chunk = [&cap, &val] () -> pair<const char*, size_t>
+ {
+ pair<const char*, size_t> r (nullptr, 0);
+
+ if (cap >= val.second)
+ r = val;
+ else
+ {
+ // Start from the character past capacity and search for a UTF-8
+ // sequence boundary.
+ //
+ for (const char* p (val.first + cap); p != val.first; --p)
+ {
+ const auto u (static_cast<uint8_t> (*p));
+ if (u < 0x80 || u > 0xBF) // Not a continuation byte
+ {
+ r = {val.first, p - val.first};
+ break;
+ }
+ }
+ }
+
+ val.first += r.second;
+ val.second -= r.second;
+
+ return r;
+ };
+
+ // Escaping and UTF-8-validating version of chunk().
+ //
+ // There are three classes of mandatory escapes in a JSON string:
+ //
+ // - \\ and \"
+ //
+ // - \b \f \n \r \t for popular control characters
+ //
+ // - \u00NN for other control characters <= 0x1F
+ //
+ // If the input begins with a character that must be escaped, return
+ // only its escape sequence. Otherwise validate and return everything up
+ // to the end of input or buffer capacity, but cutting it short before
+ // the next character that must be escaped or the first UTF-8 sequence
+ // that would not fit.
+ //
+ // Return string::npos in second in case of a stray continuation byte or
+ // any byte in an invalid UTF-8 range (for example, an "overlong" 2-byte
+ // encoding of a 7-bit/ASCII character or a 4-, 5-, or 6-byte sequence
+ // that would encode a codepoint beyond the U+10FFFF Unicode limit).
+ //
+ auto chunk_checked = [&cap, &size, &val] () -> pair<const char*, size_t>
+ {
+ pair<const char*, size_t> r (nullptr, 0);
+
+ // Check whether the first character needs to be escaped.
+ //
+ const uint8_t c (val.first[0]);
+ if (c == '"')
+ r = {"\\\"", 2};
+ else if (c == '\\')
+ r = {"\\\\", 2};
+ else if (c <= 0x1F)
+ {
+ auto s (json_escapes[c]);
+ r = {s, s[1] == 'u' ? 6 : 2};
+ }
+
+ if (r.first != nullptr)
+ {
+ // Return in second the additional (to size) space required.
+ //
+ if (r.second > cap)
+ return {nullptr, r.second - 1};
+
+ // If we had to escape the character then adjust size accordingly
+ // (see append() above).
+ //
+ size += r.second - 1;
+
+ val.first += 1;
+ val.second -= 1;
+ return r;
+ }
+
+ // First character doesn't need to be escaped. Return as much of the
+ // rest of the input as possible.
+ //
+ size_t i (0);
+ for (size_t n (min (cap, val.second)); i != n; i++)
+ {
+ const uint8_t c1 (val.first[i]);
+
+ if (c1 == '"' || c1 == '\\' || c1 <= 0x1F) // Needs to be escaped.
+ break;
+ else if (c1 >= 0x80) // Not ASCII, so validate as a UTF-8 sequence.
+ {
+ size_t i1 (i); // Position of the first byte.
+
+ // The control flow here is to continue if valid and to fall
+ // through to return on error.
+ //
+ if (c1 >= 0xC2 && c1 <= 0xDF) // 2-byte sequence.
+ {
+ if (i + 2 <= val.second) // Sequence is complete in JSON value.
+ {
+ if (i + 2 > cap) // Sequence won't fit.
+ break;
+
+ const uint8_t c2 (val.first[++i]);
+
+ if (c2 >= 0x80 && c2 <= 0xBF)
+ continue;
+ }
+ }
+ else if (c1 >= 0xE0 && c1 <= 0xEF) // 3-byte sequence.
+ {
+ if (i + 3 <= val.second)
+ {
+ if (i + 3 > cap)
+ break;
+
+ const uint8_t c2 (val.first[++i]), c3 (val.first[++i]);
+
+ if (c3 >= 0x80 && c3 <= 0xBF)
+ {
+ switch (c1)
+ {
+ case 0xE0: if (c2 >= 0xA0 && c2 <= 0xBF) continue; break;
+ case 0xED: if (c2 >= 0x80 && c2 <= 0x9F) continue; break;
+ default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
+ }
+ }
+ }
+ }
+ else if (c1 >= 0xF0 && c1 <= 0xF4) // 4-byte sequence.
+ {
+ if (i + 4 <= val.second)
+ {
+ if (i + 4 > cap)
+ break;
+
+ const uint8_t c2 (val.first[++i]),
+ c3 (val.first[++i]),
+ c4 (val.first[++i]);
+
+ if (c3 >= 0x80 && c3 <= 0xBF &&
+ c4 >= 0x80 && c4 <= 0xBF)
+ {
+ switch (c1)
+ {
+ case 0xF0: if (c2 >= 0x90 && c2 <= 0xBF) continue; break;
+ case 0xF4: if (c2 >= 0x80 && c2 <= 0x8F) continue; break;
+ default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
+ }
+ }
+ }
+ }
+
+ r = {val.first, string::npos};
+
+ // Update val to point to the beginning of the invalid sequence.
+ //
+ val.first += i1;
+ val.second -= i1;
+
+ return r;
+ }
+ }
+
+ if (i != 0) // We have a chunk.
+ {
+ r = {val.first, i};
+
+ val.first += i;
+ val.second -= i;
+ }
+
+ return r;
+ };
+
+ // Value's original size (used to calculate the offset of the errant
+ // character in case of a validation failure).
+ //
+ const size_t vn (val.second);
+
+ // Write the separator, if any.
+ //
+ if (sep.second != 0)
+ {
+ if (cap < sep.second && !grow (sep.second))
+ goto fail_nospace;
+
+ append (sep.first, sep.second);
+ }
+
+ // Write the value's opening quote, if requested.
+ //
+ if (q != '\0')
+ {
+ if (cap == 0 && !grow (1))
+ goto fail_nospace;
+
+ append ("\"", 1);
+ }
+
+ // Write the value, unless empty.
+ //
+ while (val.second != 0)
+ {
+ pair<const char*, size_t> ch (nullptr, 0);
+
+ if (cap != 0)
+ ch = check ? chunk_checked () : chunk ();
+
+ if (ch.first == nullptr)
+ {
+ // The minimum extra bytes we need the overflow function to be able
+ // to provide is based on these sequences that we do not break:
+ //
+ // - 4 bytes for a UTF-8 sequence
+ // - 6 bytes for an escaped Unicode sequence (\uXXXX).
+ //
+ if (!grow (6, ch.second))
+ goto fail_nospace;
+ }
+ else if (ch.second != string::npos)
+ append (ch.first, ch.second);
+ else
+ goto fail_utf8;
+ }
+
+ // Write the value's closing quote, if requested.
+ //
+ if (q != '\0')
+ {
+ if (cap == 0 && !grow (1))
+ goto fail_nospace;
+
+ append ("\"", 1);
+ }
+
+ return;
+
+ // Note: keep descriptions consistent with the parser.
+ //
+ fail_utf8:
+ throw invalid_json_output (e,
+ e == event::name ? error_code::invalid_name
+ : error_code::invalid_value,
+ "invalid UTF-8 text",
+ vn - val.second);
+
+ fail_nospace:
+ throw invalid_json_output (
+ e, error_code::buffer_overflow, "insufficient space in buffer");
+ }
+
+ size_t buffer_serializer::
+ to_chars_impl (char* b, size_t n, const char* f, ...)
+ {
+ va_list a;
+ va_start (a, f);
+ const int r (vsnprintf (b, n, f, a));
+ va_end (a);
+
+ if (r < 0 || r >= static_cast<int> (n))
+ {
+ throw invalid_json_output (event::number,
+ error_code::invalid_value,
+ "unable to convert number to string");
+ }
+
+ return static_cast<size_t> (r);
+ }
+ }
+}
diff --git a/libbutl/json/serializer.hxx b/libbutl/json/serializer.hxx
new file mode 100644
index 0000000..5192cb4
--- /dev/null
+++ b/libbutl/json/serializer.hxx
@@ -0,0 +1,413 @@
+#pragma once
+
+#ifdef BUILD2_BOOTSTRAP
+# error JSON serializer not available during bootstrap
+#endif
+
+#include <array>
+#include <iosfwd>
+#include <string>
+#include <vector>
+#include <cstddef> // size_t, nullptr_t
+#include <utility> // pair
+#include <stdexcept> // invalid_argument
+#include <type_traits> // enable_if, is_*
+
+#include <libbutl/optional.hxx> // butl::optional is std::optional or similar.
+
+#include <libbutl/json/event.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ // Using the RFC8259 terminology: JSON (output) text, JSON value, object
+ // member.
+ //
+ namespace json
+ {
+ class invalid_json_output: public std::invalid_argument
+ {
+ public:
+ using event_type = json::event;
+
+ enum class error_code
+ {
+ buffer_overflow,
+ unexpected_event,
+ invalid_name,
+ invalid_value
+ };
+
+ invalid_json_output (optional<event_type> event,
+ error_code code,
+ const char* description,
+ std::size_t offset = std::string::npos);
+
+ invalid_json_output (optional<event_type> event,
+ error_code code,
+ const std::string& description,
+ std::size_t offset = std::string::npos);
+
+ // Event that triggered the error. If the error is in the value, then
+ // offset points to the offending byte (for example, the beginning of an
+ // invalid UTF-8 byte sequence). Otherwise, offset is string::npos.
+ //
+ optional<event_type> event;
+ error_code code;
+ std::size_t offset;
+ };
+
+ // The serializer makes sure the resulting JSON is syntactically but not
+ // necessarily semantically correct. For example, it's possible to
+ // serialize a number event with non-numeric data.
+ //
+ // Note that unlike the parser, the serializer is always in the multi-
+ // value mode allowing the serialization of zero or more values. Note also
+ // that while values are separated with newlines, there is no trailing
+ // newline after the last (or only) value and the user is expected to add
+ // it manually if needed.
+ //
+ // Also note that while RFC8259 recommends object members to have unique
+ // names, the serializer does not enforce this.
+ //
+ class LIBBUTL_SYMEXPORT buffer_serializer
+ {
+ public:
+ // Serialize to string growing it as necessary.
+ //
+ // The indentation argument specifies the number of indentation spaces
+ // that should be used for pretty-printing. If 0 is passed, no
+ // pretty-printing is performed.
+ //
+ explicit
+ buffer_serializer (std::string&, std::size_t indentation = 2);
+
+ // Serialize to vector of characters growing it as necessary.
+ //
+ explicit
+ buffer_serializer (std::vector<char>&, std::size_t indentation = 2);
+
+ // Serialize to a fixed array.
+ //
+ // The length of the output text written is tracked in the size
+ // argument.
+ //
+ // If the array is not big enough to store the entire output text, the
+ // next() call that reaches the limit will throw invalid_json_output.
+ //
+ template <std::size_t N>
+ buffer_serializer (std::array<char, N>&, std::size_t& size,
+ std::size_t indentation = 2);
+
+ // Serialize to a fixed buffer.
+ //
+ // The length of the output text written is tracked in the size
+ // argument.
+ //
+ // If the buffer is not big enough to store the entire output text, the
+ // next() call that reaches the limit will throw invalid_json_output.
+ //
+ buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
+ std::size_t indentation = 2);
+
+ // The overflow function is called when the output buffer is out of
+ // space. The extra argument is a hint indicating the extra space likely
+ // to be required.
+ //
+ // Possible strategies include re-allocating a larger buffer or flushing
+ // the contents of the original buffer to the output destination. In
+ // case of a reallocation, the implementation is responsible for copying
+ // the contents of the original buffer over.
+ //
+ // The flush function is called when the complete JSON value has been
+ // serialized to the buffer. It can be used to write the contents of the
+ // buffer to the output destination. Note that flush is not called after
+ // the second absent (nullopt) event (or the only absent event; see
+ // next() for details).
+ //
+ // Both functions are passed the original buffer, its size (the amount
+ // of output text), and its capacity. They return (by modifying the
+ // argument) the replacement buffer and its size and capacity (these may
+ // refer to the original buffer). If space cannot be made available, the
+ // implementation can throw an appropriate exception (for example,
+ // std::bad_alloc or std::ios_base::failure). Any exceptions thrown is
+ // propagated to the user.
+ //
+ struct buffer
+ {
+ void* data;
+ std::size_t& size;
+ std::size_t capacity;
+ };
+
+ using overflow_function = void (void* data,
+ event,
+ buffer&,
+ std::size_t extra);
+ using flush_function = void (void* data, event, buffer&);
+
+ // Serialize using a custom buffer and overflow/flush functions (both
+ // are optional).
+ //
+ buffer_serializer (void* buf, std::size_t capacity,
+ overflow_function*,
+ flush_function*,
+ void* data,
+ std::size_t indentation = 2);
+
+ // As above but the length of the output text written is tracked in the
+ // size argument.
+ //
+ buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
+ overflow_function*,
+ flush_function*,
+ void* data,
+ std::size_t indentation = 2);
+
+ // Begin/end an object.
+ //
+ // The member_begin_object() version is a shortcut for:
+ //
+ // member_name (name, check);
+ // begin_object ();
+ //
+ void
+ begin_object ();
+
+ void
+ member_begin_object (const char*, bool check = true);
+
+ void
+ member_begin_object (const std::string&, bool check = true);
+
+ void
+ end_object ();
+
+ // Serialize an object member (name and value).
+ //
+ // If check is false, then don't check whether the name (or value, if
+ // it's a string) is valid UTF-8 and don't escape any characters.
+ //
+ template <typename T>
+ void
+ member (const char* name, const T& value, bool check = true);
+
+ template <typename T>
+ void
+ member (const std::string& name, const T& value, bool check = true);
+
+ // Serialize an object member name.
+ //
+ // If check is false, then don't check whether the name is valid UTF-8
+ // and don't escape any characters.
+ //
+ void
+ member_name (const char*, bool check = true);
+
+ void
+ member_name (const std::string&, bool check = true);
+
+ // Begin/end an array.
+ //
+ // The member_begin_array() version is a shortcut for:
+ //
+ // member_name (name, check);
+ // begin_array ();
+ //
+ void
+ begin_array ();
+
+ void
+ member_begin_array (const char*, bool check = true);
+
+ void
+ member_begin_array (const std::string&, bool check = true);
+
+ void
+ end_array ();
+
+ // Serialize a string.
+ //
+ // If check is false, then don't check whether the value is valid UTF-8
+ // and don't escape any characters.
+ //
+ // Note that a NULL C-string pointer is serialized as a null value.
+ //
+ void
+ value (const char*, bool check = true);
+
+ void
+ value (const std::string&, bool check = true);
+
+ // Serialize a number.
+ //
+ template <typename T>
+ typename std::enable_if<std::is_integral<T>::value ||
+ std::is_floating_point<T>::value>::type
+ value (T);
+
+ // Serialize a boolean value.
+ //
+ void
+ value (bool);
+
+ // Serialize a null value.
+ //
+ void
+ value (std::nullptr_t);
+
+ // Serialize value as a pre-serialized JSON value.
+ //
+ // Note that the value is expected to be a valid (and suitable) UTF-8-
+ // encoded JSON text. Note also that if pretty-printing is enabled,
+ // the resulting output may not be correctly indented.
+ //
+ void
+ value_json_text (const char*);
+
+ void
+ value_json_text (const std::string&);
+
+ // Serialize next JSON event.
+ //
+ // If check is false, then don't check whether the value is valid UTF-8
+ // and don't escape any characters.
+ //
+ // Return true if more events are required to complete the (top-level)
+ // value (that is, it is currently incomplete) and false otherwise.
+ // Throw invalid_json_output exception in case of an invalid event or
+ // value.
+ //
+ // At the end of the value an optional absent (nullopt) event can be
+ // serialized to verify the value is complete. If it is incomplete an
+ // invalid_json_output exception is thrown. An optional followup absent
+ // event can be serialized to indicate the completion of a multi-value
+ // sequence (one and only absent event indicates a zero value sequence).
+ // If anything is serialized to a complete value sequence an
+ // invalid_json_output exception is thrown.
+ //
+ // Note that this function was designed to be easily invoked with the
+ // output from parser::next() and parser::data(). For example, for a
+ // single-value mode:
+ //
+ // optional<event> e;
+ // do
+ // {
+ // e = p.next ();
+ // s.next (e, p.data ());
+ // }
+ // while (e);
+ //
+ // For a multi-value mode:
+ //
+ // while (p.peek ())
+ // {
+ // optional<event> e;
+ // do
+ // {
+ // e = p.next ();
+ // s.next (e, p.data ());
+ // }
+ // while (e);
+ // }
+ // s.next (nullopt); // End of value sequence.
+ //
+ bool
+ next (optional<event> event,
+ std::pair<const char*, std::size_t> value = {},
+ bool check = true);
+
+ private:
+ void
+ write (event,
+ std::pair<const char*, std::size_t> sep,
+ std::pair<const char*, std::size_t> val,
+ bool check, char quote = '\0');
+
+ // Forward a value(v, check) call to value(v) ignoring the check
+ // argument. Used in the member() implementation.
+ //
+ template <typename T>
+ void
+ value (const T& v, bool /*check*/)
+ {
+ value (v);
+ }
+
+ // Convert numbers to string.
+ //
+ static std::size_t to_chars (char*, std::size_t, int);
+ static std::size_t to_chars (char*, std::size_t, long);
+ static std::size_t to_chars (char*, std::size_t, long long);
+ static std::size_t to_chars (char*, std::size_t, unsigned int);
+ static std::size_t to_chars (char*, std::size_t, unsigned long);
+ static std::size_t to_chars (char*, std::size_t, unsigned long long);
+ static std::size_t to_chars (char*, std::size_t, double);
+ static std::size_t to_chars (char*, std::size_t, long double);
+
+ static std::size_t to_chars_impl (char*, size_t, const char* fmt, ...);
+
+ buffer buf_;
+ std::size_t size_;
+ overflow_function* overflow_;
+ flush_function* flush_;
+ void* data_;
+
+ // State of a "structured type" (array or object; as per the RFC
+ // terminology).
+ //
+ struct state
+ {
+ const event type; // Type kind (begin_array or begin_object).
+ std::size_t count; // Number of events serialized inside this type.
+ };
+
+ // Stack of nested structured type states.
+ //
+ // @@ TODO: would have been nice to use small_vector.
+ //
+ std::vector<state> state_;
+
+ // The number of consecutive absent events (nullopt) serialized thus
+ // far.
+ //
+ // Note: initialized to 1 to naturally handle a single absent event
+ // (declares an empty value sequence complete).
+ //
+ std::size_t absent_ = 1;
+
+ // The number of spaces with which to indent (once for each level of
+ // nesting). If zero, pretty-printing is disabled.
+ //
+ std::size_t indent_;
+
+ // Separator and indentation before/after value inside an object or
+ // array (see pretty-printing implementation for details).
+ //
+ std::string sep_;
+
+ // The number of complete top-level values serialized thus far.
+ //
+ std::size_t values_ = 0;
+ };
+
+ class LIBBUTL_SYMEXPORT stream_serializer: public buffer_serializer
+ {
+ public:
+ // Serialize to std::ostream.
+ //
+ // If stream exceptions are enabled then the std::ios_base::failure
+ // exception is used to report input/output errors (badbit and failbit).
+ // Otherwise, those are reported as the invalid_json_output exception.
+ //
+ explicit
+ stream_serializer (std::ostream&, std::size_t indentation = 2);
+
+ protected:
+ char tmp_[4096];
+ };
+ }
+}
+
+#include <libbutl/json/serializer.ixx>
diff --git a/libbutl/json/serializer.ixx b/libbutl/json/serializer.ixx
new file mode 100644
index 0000000..a719ef6
--- /dev/null
+++ b/libbutl/json/serializer.ixx
@@ -0,0 +1,247 @@
+#include <cstring> // strlen()
+
+namespace butl
+{
+ namespace json
+ {
+ inline invalid_json_output::
+ invalid_json_output (optional<event_type> e,
+ error_code c,
+ const char* d,
+ std::size_t o)
+ : std::invalid_argument (d), event (e), code (c), offset (o)
+ {
+ }
+
+ inline invalid_json_output::
+ invalid_json_output (optional<event_type> e,
+ error_code c,
+ const std::string& d,
+ std::size_t o)
+ : invalid_json_output (e, c, d.c_str (), o)
+ {
+ }
+
+ inline buffer_serializer::
+ buffer_serializer (void* b, std::size_t& s, std::size_t c,
+ overflow_function* o, flush_function* f, void* d,
+ std::size_t i)
+ : buf_ {b, s, c},
+ overflow_ (o),
+ flush_ (f),
+ data_ (d),
+ indent_ (i),
+ sep_ (indent_ != 0 ? ",\n" : "")
+ {
+ }
+
+ template <std::size_t N>
+ inline buffer_serializer::
+ buffer_serializer (std::array<char, N>& a, std::size_t& s, std::size_t i)
+ : buffer_serializer (a.data (), s, a.size (),
+ nullptr, nullptr, nullptr,
+ i)
+ {
+ }
+
+ inline buffer_serializer::
+ buffer_serializer (void* b, std::size_t& s, std::size_t c, std::size_t i)
+ : buffer_serializer (b, s, c, nullptr, nullptr, nullptr, i)
+ {
+ }
+
+ inline buffer_serializer::
+ buffer_serializer (void* b, std::size_t c,
+ overflow_function* o, flush_function* f, void* d,
+ std::size_t i)
+ : buffer_serializer (b, size_, c, o, f, d, i)
+ {
+ size_ = 0;
+ }
+
+ inline void buffer_serializer::
+ begin_object ()
+ {
+ next (event::begin_object);
+ }
+
+ inline void buffer_serializer::
+ end_object ()
+ {
+ next (event::end_object);
+ }
+
+ inline void buffer_serializer::
+ member_name (const char* n, bool c)
+ {
+ next (event::name, {n, n != nullptr ? std::strlen (n) : 0}, c);
+ }
+
+ inline void buffer_serializer::
+ member_name (const std::string& n, bool c)
+ {
+ next (event::name, {n.c_str (), n.size ()}, c);
+ }
+
+ inline void buffer_serializer::
+ member_begin_object (const char* n, bool c)
+ {
+ member_name (n, c);
+ begin_object ();
+ }
+
+ inline void buffer_serializer::
+ member_begin_object (const std::string& n, bool c)
+ {
+ member_name (n, c);
+ begin_object ();
+ }
+
+ template <typename T>
+ inline void buffer_serializer::
+ member (const char* n, const T& v, bool c)
+ {
+ member_name (n, c);
+ value (v, c);
+ }
+
+ template <typename T>
+ inline void buffer_serializer::
+ member (const std::string& n, const T& v, bool c)
+ {
+ member_name (n, c);
+ value (v, c);
+ }
+
+ inline void buffer_serializer::
+ begin_array ()
+ {
+ next (event::begin_array);
+ }
+
+ inline void buffer_serializer::
+ member_begin_array (const char* n, bool c)
+ {
+ member_name (n, c);
+ begin_array ();
+ }
+
+ inline void buffer_serializer::
+ member_begin_array (const std::string& n, bool c)
+ {
+ member_name (n, c);
+ begin_array ();
+ }
+
+ inline void buffer_serializer::
+ end_array ()
+ {
+ next (event::end_array);
+ }
+
+ inline void buffer_serializer::
+ value (const char* v, bool c)
+ {
+ if (v != nullptr)
+ next (event::string, {v, std::strlen (v)}, c);
+ else
+ next (event::null);
+ }
+
+ inline void buffer_serializer::
+ value (const std::string& v, bool c)
+ {
+ next (event::string, {v.c_str (), v.size ()}, c);
+ }
+
+ template <typename T>
+ typename std::enable_if<std::is_integral<T>::value ||
+ std::is_floating_point<T>::value>::type
+ buffer_serializer::
+ value (T v)
+ {
+ // The largest 128-bit integer has 39 digits, and long floating point
+ // numbers will fit because they are output in scientific notation.
+ //
+ char b[40];
+ const std::size_t n (to_chars (b, sizeof (b), v));
+ next (event::number, {b, n});
+ }
+
+ inline void buffer_serializer::
+ value (bool b)
+ {
+ next (event::boolean,
+ b ? std::make_pair ("true", 4) : std::make_pair ("false", 5));
+ }
+
+ inline void buffer_serializer::
+ value (std::nullptr_t)
+ {
+ next (event::null);
+ }
+
+ inline void buffer_serializer::
+ value_json_text (const char* v)
+ {
+ // Use event::number (which doesn't involve any quoting) with a disabled
+ // check.
+ //
+ next (event::number, {v, std::strlen (v)}, false /* check */);
+ }
+
+ inline void buffer_serializer::
+ value_json_text (const std::string& v)
+ {
+ next (event::number, {v.c_str (), v.size ()}, false /* check */);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, int v)
+ {
+ return to_chars_impl (b, s, "%d", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, long v)
+ {
+ return to_chars_impl (b, s, "%ld", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, long long v)
+ {
+ return to_chars_impl (b, s, "%lld", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, unsigned v)
+ {
+ return to_chars_impl (b, s, "%u", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, unsigned long v)
+ {
+ return to_chars_impl (b, s, "%lu", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, unsigned long long v)
+ {
+ return to_chars_impl (b, s, "%llu", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, double v)
+ {
+ return to_chars_impl (b, s, "%.10g", v);
+ }
+
+ inline size_t buffer_serializer::
+ to_chars (char* b, size_t s, long double v)
+ {
+ return to_chars_impl (b, s, "%.10Lg", v);
+ }
+ }
+}
diff --git a/libbutl/lz4-stream.cxx b/libbutl/lz4-stream.cxx
new file mode 100644
index 0000000..8001770
--- /dev/null
+++ b/libbutl/lz4-stream.cxx
@@ -0,0 +1,281 @@
+// file : libbutl/lz4-stream.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbutl/lz4-stream.hxx>
+
+#include <cstring> // memcpy()
+#include <stdexcept> // invalid_argument
+
+#include <libbutl/utility.hxx> // eof()
+
+using namespace std;
+
+namespace butl
+{
+ namespace lz4
+ {
+ // istream
+ //
+
+ // Read into the specified buffer returning the number of bytes read and
+ // the eof flag.
+ //
+ pair<size_t, bool> istreambuf::
+ read (char* b, size_t c)
+ {
+ size_t n (0);
+ bool e (false);
+
+ // @@ TODO: would it be faster to do a direct buffer copy if input
+ // stream is bufstreabuf-based (see sha*.cxx for code)?
+ do
+ {
+ e = eof (is_->read (b + n, c - n));
+ n += static_cast<size_t> (is_->gcount ());
+ }
+ while (!e && n != c);
+
+ return make_pair (n, e);
+ }
+
+ optional<uint64_t> istreambuf::
+ open (std::istream& is, bool end)
+ {
+ assert (is.exceptions () == std::istream::badbit);
+
+ is_ = &is;
+ end_ = end;
+
+ // Read in the header and allocate the buffers.
+ //
+ // What if we hit EOF here? And could begin() return 0? Turns out the
+ // answer to both questions is yes: 0-byte content compresses to 15
+ // bytes (with or without content size; 1-byte -- to 20/28 bytes). We
+ // can ignore EOF here since an attempt to read more will result in
+ // another EOF. And our load() is prepared to handle 0 hint.
+ //
+ // @@ We could end up leaving some of the input content from the header
+ // in the input buffer which the caller will have to way of using
+ // (e.g., in a stream of compressed contents). Doesn't look like
+ // there is much we can do (our streams don't support putback) other
+ // than document this limitation.
+ //
+ optional<uint64_t> r;
+
+ d_.hn = read (d_.hb, sizeof (d_.hb)).first;
+ h_ = d_.begin (&r);
+
+ ib_.reset ((d_.ib = new char[d_.ic]));
+ ob_.reset ((d_.ob = new char[d_.oc]));
+
+ // Copy over whatever is left in the header buffer.
+ //
+ memcpy (d_.ib, d_.hb, (d_.in = d_.hn));
+
+ setg (d_.ob, d_.ob, d_.ob);
+ return r;
+ }
+
+ void istreambuf::
+ close ()
+ {
+ if (is_open ())
+ {
+ is_ = nullptr;
+ }
+ }
+
+ istreambuf::int_type istreambuf::
+ underflow ()
+ {
+ int_type r (traits_type::eof ());
+
+ if (is_open ())
+ {
+ if (gptr () < egptr () || load ())
+ r = traits_type::to_int_type (*gptr ());
+ }
+
+ return r;
+ }
+
+ bool istreambuf::
+ load ()
+ {
+ // Note that the first call to this function may be with h_ == 0 (see
+ // open() for details). In which case we just need to verify there is
+ // no just after the compressed content.
+ //
+ bool r;
+
+ if (h_ == 0)
+ r = false; // EOF
+ else
+ {
+ // Note: next() may just buffer the data.
+ //
+ do
+ {
+ // Note that on the first call we may already have some data in the
+ // input buffer (leftover header data).
+ //
+ if (h_ > d_.in)
+ {
+ pair<size_t, bool> p (read (d_.ib + d_.in, h_ - d_.in));
+
+ d_.in += p.first;
+
+ if (p.second && d_.in != h_)
+ throw invalid_argument ("incomplete LZ4 compressed content");
+ }
+
+ h_ = d_.next (); // Clears d_.in.
+
+ } while (d_.on == 0 && h_ != 0);
+
+ setg (d_.ob, d_.ob, d_.ob + d_.on);
+ off_ += d_.on;
+ r = (d_.on != 0);
+ }
+
+ // If we don't expect any more compressed content and we were asked to
+ // end the underlying input stream, then verify there is no more input.
+ //
+ if (h_ == 0 && end_)
+ {
+ end_ = false;
+
+ if (d_.in != 0 ||
+ (!is_->eof () &&
+ is_->good () &&
+ is_->peek () != istream::traits_type::eof ()))
+ throw invalid_argument ("junk after LZ4 compressed content");
+ }
+
+ return r;
+ }
+
+ // ostream
+ //
+
+ void ostreambuf::
+ write (char* b, std::size_t n)
+ {
+ os_->write (b, static_cast<streamsize> (n));
+ }
+
+ void ostreambuf::
+ open (std::ostream& os,
+ int level,
+ int block_id,
+ optional<std::uint64_t> content_size)
+ {
+ assert (os.exceptions () == (std::ostream::badbit |
+ std::ostream::failbit));
+
+ os_ = &os;
+
+ // Determine required buffer capacities.
+ //
+ c_.begin (level, block_id, content_size);
+
+ ib_.reset ((c_.ib = new char[c_.ic]));
+ ob_.reset ((c_.ob = new char[c_.oc]));
+
+ setp (c_.ib, c_.ib + c_.ic - 1); // Keep space for overflow's char.
+ end_ = false;
+ }
+
+ void ostreambuf::
+ close ()
+ {
+ if (is_open ())
+ {
+ if (!end_)
+ save ();
+
+ os_ = nullptr;
+ }
+ }
+
+ ostreambuf::
+ ~ostreambuf ()
+ {
+ close ();
+ }
+
+ ostreambuf::int_type ostreambuf::
+ overflow (int_type c)
+ {
+ int_type r (traits_type::eof ());
+
+ if (is_open () && c != traits_type::eof ())
+ {
+ // Store last character in the space we reserved in open(). Note
+ // that pbump() doesn't do any checks.
+ //
+ *pptr () = traits_type::to_char_type (c);
+ pbump (1);
+
+ save ();
+ r = c;
+ }
+
+ return r;
+ }
+
+ void ostreambuf::
+ save ()
+ {
+ c_.in = pptr () - pbase ();
+ off_ += c_.in;
+
+ // We assume this is the end if the input buffer is not full.
+ //
+ end_ = (c_.in != c_.ic);
+ c_.next (end_);
+
+ if (c_.on != 0) // next() may just buffer the data.
+ write (c_.ob, c_.on);
+
+ setp (c_.ib, c_.ib + c_.ic - 1);
+ }
+
+ streamsize ostreambuf::
+ xsputn (const char_type* s, streamsize sn)
+ {
+ if (!is_open () || end_)
+ return 0;
+
+ // To avoid futher 'signed/unsigned comparison' compiler warnings.
+ //
+ size_t n (static_cast<size_t> (sn));
+
+ // The plan is to keep copying the data into the input buffer and
+ // calling save() (our compressor API currently has no way of avoiding
+ // the copy).
+ //
+ while (n != 0)
+ {
+ // Amount of free space in the buffer (including the extra byte
+ // we've reserved).
+ //
+ size_t an (epptr () - pptr () + 1);
+
+ size_t m (n > an ? an : n);
+ memcpy (pptr (), s, m);
+ pbump (static_cast<int> (m));
+
+ if (n < an)
+ break; // All fitted with at least 1 byte left.
+
+ save ();
+
+ s += m;
+ n -= m;
+ }
+
+ return sn;
+ }
+ }
+}
diff --git a/libbutl/lz4-stream.hxx b/libbutl/lz4-stream.hxx
new file mode 100644
index 0000000..b11c0a2
--- /dev/null
+++ b/libbutl/lz4-stream.hxx
@@ -0,0 +1,280 @@
+// file : libbutl/lz4-stream.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <memory> // unique_ptr
+#include <cstddef> // size_t
+#include <cstdint> // uint64_t
+#include <utility> // move()
+#include <istream>
+#include <ostream>
+#include <cassert>
+
+#include <libbutl/lz4.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/bufstreambuf.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ namespace lz4
+ {
+ // istream
+ //
+
+ class LIBBUTL_SYMEXPORT istreambuf: public bufstreambuf
+ {
+ public:
+ optional<std::uint64_t>
+ open (std::istream&, bool end);
+
+ bool
+ is_open () const {return is_ != nullptr;}
+
+ void
+ close ();
+
+ public:
+ using base = bufstreambuf;
+
+ // basic_streambuf input interface.
+ //
+ public:
+ virtual int_type
+ underflow () override;
+
+ // Direct access to the get area. Use with caution.
+ //
+ using base::gptr;
+ using base::egptr;
+ using base::gbump;
+
+ // Return the (logical) position of the next byte to be read.
+ //
+ using base::tellg;
+
+ private:
+ std::pair<std::size_t, bool>
+ read (char*, std::size_t);
+
+ bool
+ load ();
+
+ private:
+ std::istream* is_ = nullptr;
+ bool end_;
+ decompressor d_;
+ std::unique_ptr<char[]> ib_; // Decompressor input buffer.
+ std::unique_ptr<char[]> ob_; // Decompressor output buffer.
+ std::size_t h_; // Decompressor next chunk hint.
+ };
+
+ // Typical usage:
+ //
+ // try
+ // {
+ // ifdstream ifs (..., fdopen_mode::binary, ifdstream::badbit);
+ // lz4::istream izs (ifs, true /* end */);
+ // ... // Read from izs.
+ // }
+ // catch (const invalid_argument& e)
+ // {
+ // ... // Invalid compressed content, call e.what() for description.
+ // }
+ // catch (/* ifdstream exceptions */)
+ // {
+ // ...
+ // }
+ //
+ // See class decompressor for details on semantics nad exceptions thrown.
+ //
+ // @@ TODO: get rid of badbit-only requirement.
+ // @@ TODO: re-openning support (will need compressor reset).
+ //
+ class LIBBUTL_SYMEXPORT istream: public std::istream
+ {
+ public:
+ explicit
+ istream (iostate e = badbit | failbit)
+ : std::istream (&buf_)
+ {
+ assert (e & badbit);
+ exceptions (e);
+ }
+
+ // The underlying input stream is expected to throw on badbit but not
+ // failbit. If end is true, then on reaching the end of compressed data
+ // verify there is no more input.
+ //
+ // Note that this implementation does not support handing streams of
+ // compressed contents (end is false) that may include individual
+ // contents that uncompress to 0 bytes (see istreambuf::open()
+ // implementation for details).
+ //
+ istream (std::istream& is, bool end, iostate e = badbit | failbit)
+ : istream (e)
+ {
+ open (is, end);
+ }
+
+ // Return decompressed content size, if available.
+ //
+ optional<std::uint64_t>
+ open (std::istream& is, bool end)
+ {
+ return buf_.open (is, end);
+ }
+
+ bool
+ is_open () const
+ {
+ return buf_.is_open ();
+ }
+
+ // Signal that no further uncompressed input will be read.
+ //
+ void
+ close ()
+ {
+ return buf_.close ();
+ }
+
+ private:
+ istreambuf buf_;
+ };
+
+ // ostream
+ //
+
+ class LIBBUTL_SYMEXPORT ostreambuf: public bufstreambuf
+ {
+ public:
+ void
+ open (std::ostream&,
+ int compression_level,
+ int block_size_id,
+ optional<std::uint64_t> content_size);
+
+ bool
+ is_open () const {return os_ != nullptr;}
+
+ void
+ close ();
+
+ virtual
+ ~ostreambuf () override;
+
+ public:
+ using base = bufstreambuf;
+
+ // basic_streambuf output interface.
+ //
+ // Note that syncing the input buffer before the end doesn't make much
+ // sense (it will just get buffered in the compressor). In fact, it can
+ // break our single-shot compression arrangement (for compatibility with
+ // the lz4 utility). Thus we inherit noop sync() from the base.
+ //
+ public:
+ virtual int_type
+ overflow (int_type) override;
+
+ virtual std::streamsize
+ xsputn (const char_type*, std::streamsize) override;
+
+ // Return the (logical) position of the next byte to be written.
+ //
+ using base::tellp;
+
+ private:
+ void
+ write (char*, std::size_t);
+
+ void
+ save ();
+
+ private:
+ std::ostream* os_ = nullptr;
+ bool end_;
+ compressor c_;
+ std::unique_ptr<char[]> ib_; // Compressor input buffer.
+ std::unique_ptr<char[]> ob_; // Compressor output buffer.
+ };
+
+ // Typical usage:
+ //
+ // try
+ // {
+ // ofdstream ofs (..., fdopen_mode::binary);
+ // lz4::ostream ozs (ofs, 9, 4 /* 64KB */, nullopt /* content_size */);
+ //
+ // ... // Write to ozs.
+ //
+ // ozs.close ();
+ // ofs.close ();
+ // }
+ // catch (/* ofdstream exceptions */)
+ // {
+ // ...
+ // }
+ //
+ // See class compressor for details on semantics nad exceptions thrown.
+ //
+ // @@ TODO: re-openning support (will need compressor reset).
+ //
+ class LIBBUTL_SYMEXPORT ostream: public std::ostream
+ {
+ public:
+ explicit
+ ostream (iostate e = badbit | failbit)
+ : std::ostream (&buf_)
+ {
+ assert (e & badbit);
+ exceptions (e);
+ }
+
+ // The underlying output stream is expected to throw on badbit or
+ // failbit.
+ //
+ // See compress() for the description of the compression level, block
+ // size and content size arguments.
+ //
+ ostream (std::ostream& os,
+ int compression_level,
+ int block_size_id,
+ optional<std::uint64_t> content_size,
+ iostate e = badbit | failbit)
+ : ostream (e)
+ {
+ open (os, compression_level, block_size_id, content_size);
+ }
+
+ void
+ open (std::ostream& os,
+ int compression_level,
+ int block_size_id,
+ optional<std::uint64_t> content_size)
+ {
+ buf_.open (os, compression_level, block_size_id, content_size);
+ }
+
+ bool
+ is_open () const
+ {
+ return buf_.is_open ();
+ }
+
+ // Signal that no further uncompressed output will be written.
+ //
+ void
+ close ()
+ {
+ return buf_.close ();
+ }
+
+ private:
+ ostreambuf buf_;
+ };
+ }
+}
diff --git a/libbutl/lz4.c b/libbutl/lz4.c
new file mode 100644
index 0000000..3f0e430
--- /dev/null
+++ b/libbutl/lz4.c
@@ -0,0 +1,2495 @@
+/*
+ LZ4 - Fast LZ compression algorithm
+ Copyright (C) 2011-present, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://www.lz4.org
+ - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+/*-************************************
+* Tuning parameters
+**************************************/
+/*
+ * LZ4_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4_HEAPMODE
+# define LZ4_HEAPMODE 0
+#endif
+
+/*
+ * LZ4_ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
+
+
+/*-************************************
+* CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ * It can generate buggy code on targets which assembly generation depends on alignment.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
+# if defined(__GNUC__) && \
+ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define LZ4_FORCE_MEMORY_ACCESS 2
+# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
+# define LZ4_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */
+# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */
+# define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+
+/*-************************************
+* Dependency
+**************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+# define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */
+#include "lz4.h"
+/* see also "memory routines" below */
+
+
+/*-************************************
+* Compiler Options
+**************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */
+# include <intrin.h> /* only present in VS2005+ */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif /* _MSC_VER */
+
+#ifndef LZ4_FORCE_INLINE
+# ifdef _MSC_VER /* Visual Studio */
+# define LZ4_FORCE_INLINE static __forceinline
+# else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define LZ4_FORCE_INLINE static inline
+# endif
+# else
+# define LZ4_FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+# endif /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
+# define LZ4_FORCE_O2 __attribute__((optimize("O2")))
+# undef LZ4_FORCE_INLINE
+# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
+#else
+# define LZ4_FORCE_O2
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
+# define expect(expr,value) (__builtin_expect ((expr),(value)) )
+#else
+# define expect(expr,value) (expr)
+#endif
+
+#ifndef likely
+#define likely(expr) expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
+#define unlikely(expr) expect((expr) != 0, 0)
+#endif
+
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif
+
+
+/*-************************************
+* Memory routines
+**************************************/
+#ifdef LZ4_USER_MEMORY_FUNCTIONS
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void LZ4_free(void* p);
+# define ALLOC(s) LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p) LZ4_free(p)
+#else
+# include <stdlib.h> /* malloc, calloc, free */
+# define ALLOC(s) malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p) free(p)
+#endif
+
+#include <string.h> /* memset, memcpy */
+#define MEM_INIT(p,v,s) memset((p),(v),(s))
+
+
+/*-************************************
+* Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
+# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS 4
+#define ML_MASK ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+* Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+# include <assert.h>
+#else
+# ifndef assert
+# define assert(condition) ((void)0)
+# endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+# include <stdio.h>
+ static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+ return ((size_t)ptr & (alignment -1)) == 0;
+}
+
+
+/*-************************************
+* Types
+**************************************/
+#include <limits.h>
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+ typedef uintptr_t uptrval;
+#else
+# if UINT_MAX != 4294967295UL
+# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+ typedef size_t uptrval; /* generally true, except OpenVMS-64 */
+#endif
+
+#if defined(__x86_64__)
+ typedef U64 reg_t; /* 64-bits in x32 mode */
+#else
+ typedef size_t reg_t; /* 32-bits in x32 mode */
+#endif
+
+typedef enum {
+ notLimited = 0,
+ limitedOutput = 1,
+ fillOutput = 2
+} limitedOutput_directive;
+
+
+/*-************************************
+* Reading and writing into memory
+**************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+
+static unsigned LZ4_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+
+
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+/* lie to the compiler about data alignment; use with caution */
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; }
+static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; }
+static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; }
+
+#else /* safe and portable access using memcpy() */
+
+static U16 LZ4_read16(const void* memPtr)
+{
+ U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+ U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static reg_t LZ4_read_ARCH(const void* memPtr)
+{
+ reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static void LZ4_write16(void* memPtr, U16 value)
+{
+ LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+static void LZ4_write32(void* memPtr, U32 value)
+{
+ LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* LZ4_FORCE_MEMORY_ACCESS */
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+ if (LZ4_isLittleEndian()) {
+ return LZ4_read16(memPtr);
+ } else {
+ const BYTE* p = (const BYTE*)memPtr;
+ return (U16)((U16)p[0] + (p[1]<<8));
+ }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+ if (LZ4_isLittleEndian()) {
+ LZ4_write16(memPtr, value);
+ } else {
+ BYTE* p = (BYTE*)memPtr;
+ p[0] = (BYTE) value;
+ p[1] = (BYTE)(value>>8);
+ }
+}
+
+/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+ BYTE* d = (BYTE*)dstPtr;
+ const BYTE* s = (const BYTE*)srcPtr;
+ BYTE* const e = (BYTE*)dstEnd;
+
+ do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+}
+
+static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
+
+
+#ifndef LZ4_FAST_DEC_LOOP
+# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+# define LZ4_FAST_DEC_LOOP 1
+# elif defined(__aarch64__) && !defined(__clang__)
+ /* On aarch64, we disable this optimization for clang because on certain
+ * mobile chipsets, performance is reduced with clang. For information
+ * refer to https://github.com/lz4/lz4/pull/707 */
+# define LZ4_FAST_DEC_LOOP 1
+# else
+# define LZ4_FAST_DEC_LOOP 0
+# endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+ assert(srcPtr + offset == dstPtr);
+ if (offset < 8) {
+ LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
+ dstPtr[0] = srcPtr[0];
+ dstPtr[1] = srcPtr[1];
+ dstPtr[2] = srcPtr[2];
+ dstPtr[3] = srcPtr[3];
+ srcPtr += inc32table[offset];
+ LZ4_memcpy(dstPtr+4, srcPtr, 4);
+ srcPtr -= dec64table[offset];
+ dstPtr += 8;
+ } else {
+ LZ4_memcpy(dstPtr, srcPtr, 8);
+ dstPtr += 8;
+ srcPtr += 8;
+ }
+
+ LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}
+
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+ BYTE* d = (BYTE*)dstPtr;
+ const BYTE* s = (const BYTE*)srcPtr;
+ BYTE* const e = (BYTE*)dstEnd;
+
+ do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}
+
+/* LZ4_memcpy_using_offset() presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+ BYTE v[8];
+
+ assert(dstEnd >= dstPtr + MINMATCH);
+
+ switch(offset) {
+ case 1:
+ MEM_INIT(v, *srcPtr, 8);
+ break;
+ case 2:
+ LZ4_memcpy(v, srcPtr, 2);
+ LZ4_memcpy(&v[2], srcPtr, 2);
+ LZ4_memcpy(&v[4], v, 4);
+ break;
+ case 4:
+ LZ4_memcpy(v, srcPtr, 4);
+ LZ4_memcpy(&v[4], srcPtr, 4);
+ break;
+ default:
+ LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+ return;
+ }
+
+ LZ4_memcpy(dstPtr, v, 8);
+ dstPtr += 8;
+ while (dstPtr < dstEnd) {
+ LZ4_memcpy(dstPtr, v, 8);
+ dstPtr += 8;
+ }
+}
+#endif
+
+
+/*-************************************
+* Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (reg_t val)
+{
+ assert(val != 0);
+ if (LZ4_isLittleEndian()) {
+ if (sizeof(val) == 8) {
+# if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+ return (unsigned)_tzcnt_u64(val) >> 3;
+# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanForward64(&r, (U64)val);
+ return (unsigned)r >> 3;
+# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_ctzll((U64)val) >> 3;
+# else
+ const U64 m = 0x0101010101010101ULL;
+ val ^= val - 1;
+ return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
+# endif
+ } else /* 32 bits */ {
+# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r;
+ _BitScanForward(&r, (U32)val);
+ return (unsigned)r >> 3;
+# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_ctz((U32)val) >> 3;
+# else
+ const U32 m = 0x01010101;
+ return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
+# endif
+ }
+ } else /* Big Endian CPU */ {
+ if (sizeof(val)==8) {
+# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_clzll((U64)val) >> 3;
+# else
+#if 1
+ /* this method is probably faster,
+ * but adds a 128 bytes lookup table */
+ static const unsigned char ctz7_tab[128] = {
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ };
+ U64 const mask = 0x0101010101010101ULL;
+ U64 const t = (((val >> 8) - mask) | val) & mask;
+ return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+ /* this method doesn't consume memory space like the previous one,
+ * but it contains several branches,
+ * that may end up slowing execution */
+ static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
+ Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+ Note that this code path is never triggered in 32-bits mode. */
+ unsigned r;
+ if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+ r += (!val);
+ return r;
+#endif
+# endif
+ } else /* 32 bits */ {
+# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_clz((U32)val) >> 3;
+# else
+ val >>= 8;
+ val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+ (val + 0x00FF0000)) >> 24;
+ return (unsigned)val ^ 3;
+# endif
+ }
+ }
+}
+
+
+#define STEPSIZE sizeof(reg_t)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+ const BYTE* const pStart = pIn;
+
+ if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) {
+ pIn+=STEPSIZE; pMatch+=STEPSIZE;
+ } else {
+ return LZ4_NbCommonBytes(diff);
+ } }
+
+ while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+ pIn += LZ4_NbCommonBytes(diff);
+ return (unsigned)(pIn - pStart);
+ }
+
+ if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+ if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+ if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+ return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/*-************************************
+* Local Constants
+**************************************/
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6; /* Increase this value ==> compression run slower on incompressible data */
+
+
+/*-************************************
+* Local Structures and types
+**************************************/
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
+
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ * blob being compressed are valid and refer to the preceding
+ * content (of length ctx->dictSize), which is available
+ * contiguously preceding in memory the content currently
+ * being compressed.
+ * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere
+ * else in memory, starting at ctx->dictionary with length
+ * ctx->dictSize.
+ * - usingDictCtx : Like usingExtDict, but everything concerning the preceding
+ * content is in a separate context, pointed to by
+ * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
+ * entries in the current context that refer to positions
+ * preceding the beginning of the current compression are
+ * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ * ->dictSize describe the location and size of the preceding
+ * content, and matches are found by looking in the ctx
+ * ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+
+/*-************************************
+* Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
+int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
+
+
+/*-************************************
+* Internal Definitions used in Tests
+**************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+ int compressedSize, int maxOutputSize,
+ const void* dictStart, size_t dictSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*-******************************
+* Compression functions
+********************************/
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+{
+ if (tableType == byU16)
+ return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+ else
+ return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+ const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+ if (LZ4_isLittleEndian()) {
+ const U64 prime5bytes = 889523592379ULL;
+ return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
+ } else {
+ const U64 prime8bytes = 11400714785074694791ULL;
+ return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+ }
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+{
+ if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+ return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+ switch (tableType)
+ {
+ default: /* fallthrough */
+ case clearedTable: { /* illegal! */ assert(0); return; }
+ case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
+{
+ switch (tableType)
+ {
+ default: /* fallthrough */
+ case clearedTable: /* fallthrough */
+ case byPtr: { /* illegal! */ assert(0); return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+ void* tableBase, tableType_t const tableType,
+ const BYTE* srcBase)
+{
+ switch (tableType)
+ {
+ case clearedTable: { /* illegal! */ assert(0); return; }
+ case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+ LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+{
+ LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+ if (tableType == byU32) {
+ const U32* const hashTable = (const U32*) tableBase;
+ assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+ return hashTable[h];
+ }
+ if (tableType == byU16) {
+ const U16* const hashTable = (const U16*) tableBase;
+ assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+ return hashTable[h];
+ }
+ assert(0); return 0; /* forbidden case */
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+ if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+ { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+ const void* tableBase, tableType_t tableType,
+ const BYTE* srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+ return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+ const int inputSize,
+ const tableType_t tableType) {
+ /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+ * therefore safe to use no matter what mode we're in. Otherwise, we figure
+ * out if it's safe to leave as is or whether it needs to be reset.
+ */
+ if ((tableType_t)cctx->tableType != clearedTable) {
+ assert(inputSize >= 0);
+ if ((tableType_t)cctx->tableType != tableType
+ || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+ || ((tableType == byU32) && cctx->currentOffset > 1 GB)
+ || tableType == byPtr
+ || inputSize >= 4 KB)
+ {
+ DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+ MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+ cctx->currentOffset = 0;
+ cctx->tableType = (U32)clearedTable;
+ } else {
+ DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+ }
+ }
+
+ /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
+ * than compressing without a gap. However, compressing with
+ * currentOffset == 0 is faster still, so we preserve that case.
+ */
+ if (cctx->currentOffset != 0 && tableType == byU32) {
+ DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+ cctx->currentOffset += 64 KB;
+ }
+
+ /* Finally, clear history */
+ cctx->dictCtx = NULL;
+ cctx->dictionary = NULL;
+ cctx->dictSize = 0;
+}
+
+/** LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time.
+ * Presumed already validated at this stage:
+ * - source != NULL
+ * - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
+ LZ4_stream_t_internal* const cctx,
+ const char* const source,
+ char* const dest,
+ const int inputSize,
+ int *inputConsumed, /* only written when outputDirective == fillOutput */
+ const int maxOutputSize,
+ const limitedOutput_directive outputDirective,
+ const tableType_t tableType,
+ const dict_directive dictDirective,
+ const dictIssue_directive dictIssue,
+ const int acceleration)
+{
+ int result;
+ const BYTE* ip = (const BYTE*) source;
+
+ U32 const startIndex = cctx->currentOffset;
+ const BYTE* base = (const BYTE*) source - startIndex;
+ const BYTE* lowLimit;
+
+ const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+ const BYTE* const dictionary =
+ dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+ const U32 dictSize =
+ dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+ const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */
+
+ int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+ U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */
+ const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
+ const BYTE* anchor = (const BYTE*) source;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
+ const BYTE* const matchlimit = iend - LASTLITERALS;
+
+ /* the dictCtx currentOffset is indexed on the start of the dictionary,
+ * while a dictionary in the current context precedes the currentOffset */
+ const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
+ dictionary + dictSize - dictCtx->currentOffset :
+ dictionary + dictSize - startIndex;
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* const olimit = op + maxOutputSize;
+
+ U32 offset = 0;
+ U32 forwardH;
+
+ DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+ assert(ip != NULL);
+ /* If init conditions are not met, we don't have to mark stream
+ * as having dirty context, since no action was taken yet */
+ if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+ if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */
+ if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */
+ assert(acceleration >= 1);
+
+ lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+ /* Update context state */
+ if (dictDirective == usingDictCtx) {
+ /* Subsequent linked blocks can't use the dictionary. */
+ /* Instead, they use the block we just compressed. */
+ cctx->dictCtx = NULL;
+ cctx->dictSize = (U32)inputSize;
+ } else {
+ cctx->dictSize += (U32)inputSize;
+ }
+ cctx->currentOffset += (U32)inputSize;
+ cctx->tableType = (U32)tableType;
+
+ if (inputSize<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
+
+ /* First Byte */
+ LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+ ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+ /* Main Loop */
+ for ( ; ; ) {
+ const BYTE* match;
+ BYTE* token;
+ const BYTE* filledIp;
+
+ /* Find a match */
+ if (tableType == byPtr) {
+ const BYTE* forwardIp = ip;
+ int step = 1;
+ int searchMatchNb = acceleration << LZ4_skipTrigger;
+ do {
+ U32 const h = forwardH;
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+ assert(ip < mflimitPlusOne);
+
+ match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+ forwardH = LZ4_hashPosition(forwardIp, tableType);
+ LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+ } while ( (match+LZ4_DISTANCE_MAX < ip)
+ || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+ } else { /* byU32, byU16 */
+
+ const BYTE* forwardIp = ip;
+ int step = 1;
+ int searchMatchNb = acceleration << LZ4_skipTrigger;
+ do {
+ U32 const h = forwardH;
+ U32 const current = (U32)(forwardIp - base);
+ U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+ assert(matchIndex <= current);
+ assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+ assert(ip < mflimitPlusOne);
+
+ if (dictDirective == usingDictCtx) {
+ if (matchIndex < startIndex) {
+ /* there was no match, try the dictionary */
+ assert(tableType == byU32);
+ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+ match = dictBase + matchIndex;
+ matchIndex += dictDelta; /* make dictCtx index comparable with current context */
+ lowLimit = dictionary;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source;
+ }
+ } else if (dictDirective==usingExtDict) {
+ if (matchIndex < startIndex) {
+ DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex);
+ assert(startIndex - matchIndex >= MINMATCH);
+ match = dictBase + matchIndex;
+ lowLimit = dictionary;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source;
+ }
+ } else { /* single continuous memory segment */
+ match = base + matchIndex;
+ }
+ forwardH = LZ4_hashPosition(forwardIp, tableType);
+ LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+ DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex);
+ if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */
+ assert(matchIndex < current);
+ if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+ && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+ continue;
+ } /* too far */
+ assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */
+
+ if (LZ4_read32(match) == LZ4_read32(ip)) {
+ if (maybe_extMem) offset = current - matchIndex;
+ break; /* match found */
+ }
+
+ } while(1);
+ }
+
+ /* Catch up */
+ filledIp = ip;
+ while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+ /* Encode Literals */
+ { unsigned const litLength = (unsigned)(ip - anchor);
+ token = op++;
+ if ((outputDirective == limitedOutput) && /* Check output buffer overflow */
+ (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ if ((outputDirective == fillOutput) &&
+ (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+ op--;
+ goto _last_literals;
+ }
+ if (litLength >= RUN_MASK) {
+ int len = (int)(litLength - RUN_MASK);
+ *token = (RUN_MASK<<ML_BITS);
+ for(; len >= 255 ; len-=255) *op++ = 255;
+ *op++ = (BYTE)len;
+ }
+ else *token = (BYTE)(litLength<<ML_BITS);
+
+ /* Copy Literals */
+ LZ4_wildCopy8(op, anchor, op+litLength);
+ op+=litLength;
+ DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+ (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
+ }
+
+_next_match:
+ /* at this stage, the following variables must be correctly set :
+ * - ip : at start of LZ operation
+ * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
+ * - offset : if maybe_ext_memSegment==1 (constant)
+ * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+ * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+ */
+
+ if ((outputDirective == fillOutput) &&
+ (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+ /* the match was too close to the end, rewind and go to last literals */
+ op = token;
+ goto _last_literals;
+ }
+
+ /* Encode Offset */
+ if (maybe_extMem) { /* static test */
+ DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+ assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
+ LZ4_writeLE16(op, (U16)offset); op+=2;
+ } else {
+ DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match));
+ assert(ip-match <= LZ4_DISTANCE_MAX);
+ LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+ }
+
+ /* Encode MatchLength */
+ { unsigned matchCode;
+
+ if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+ && (lowLimit==dictionary) /* match within extDict */ ) {
+ const BYTE* limit = ip + (dictEnd-match);
+ assert(dictEnd > match);
+ if (limit > matchlimit) limit = matchlimit;
+ matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+ ip += (size_t)matchCode + MINMATCH;
+ if (ip==limit) {
+ unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
+ matchCode += more;
+ ip += more;
+ }
+ DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH);
+ } else {
+ matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+ ip += (size_t)matchCode + MINMATCH;
+ DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH);
+ }
+
+ if ((outputDirective) && /* Check output buffer overflow */
+ (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
+ if (outputDirective == fillOutput) {
+ /* Match description too long : reduce it */
+ U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
+ ip -= matchCode - newMatchCode;
+ assert(newMatchCode < matchCode);
+ matchCode = newMatchCode;
+ if (unlikely(ip <= filledIp)) {
+ /* We have already filled up to filledIp so if ip ends up less than filledIp
+ * we have positions in the hash table beyond the current position. This is
+ * a problem if we reuse the hash table. So we have to remove these positions
+ * from the hash table.
+ */
+ const BYTE* ptr;
+ DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+ for (ptr = ip; ptr <= filledIp; ++ptr) {
+ U32 const h = LZ4_hashPosition(ptr, tableType);
+ LZ4_clearHash(h, cctx->hashTable, tableType);
+ }
+ }
+ } else {
+ assert(outputDirective == limitedOutput);
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ }
+ if (matchCode >= ML_MASK) {
+ *token += ML_MASK;
+ matchCode -= ML_MASK;
+ LZ4_write32(op, 0xFFFFFFFF);
+ while (matchCode >= 4*255) {
+ op+=4;
+ LZ4_write32(op, 0xFFFFFFFF);
+ matchCode -= 4*255;
+ }
+ op += matchCode / 255;
+ *op++ = (BYTE)(matchCode % 255);
+ } else
+ *token += (BYTE)(matchCode);
+ }
+ /* Ensure we have enough space for the last literals. */
+ assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
+
+ anchor = ip;
+
+ /* Test end of chunk */
+ if (ip >= mflimitPlusOne) break;
+
+ /* Fill table */
+ LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
+
+ /* Test next position */
+ if (tableType == byPtr) {
+
+ match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+ LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+ if ( (match+LZ4_DISTANCE_MAX >= ip)
+ && (LZ4_read32(match) == LZ4_read32(ip)) )
+ { token=op++; *token=0; goto _next_match; }
+
+ } else { /* byU32, byU16 */
+
+ U32 const h = LZ4_hashPosition(ip, tableType);
+ U32 const current = (U32)(ip-base);
+ U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+ assert(matchIndex < current);
+ if (dictDirective == usingDictCtx) {
+ if (matchIndex < startIndex) {
+ /* there was no match, try the dictionary */
+ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+ match = dictBase + matchIndex;
+ lowLimit = dictionary; /* required for match length counter */
+ matchIndex += dictDelta;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source; /* required for match length counter */
+ }
+ } else if (dictDirective==usingExtDict) {
+ if (matchIndex < startIndex) {
+ match = dictBase + matchIndex;
+ lowLimit = dictionary; /* required for match length counter */
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source; /* required for match length counter */
+ }
+ } else { /* single memory segment */
+ match = base + matchIndex;
+ }
+ LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+ assert(matchIndex < current);
+ if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+ && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+ && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+ token=op++;
+ *token=0;
+ if (maybe_extMem) offset = current - matchIndex;
+ DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+ (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+ goto _next_match;
+ }
+ }
+
+ /* Prepare next loop */
+ forwardH = LZ4_hashPosition(++ip, tableType);
+
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRun = (size_t)(iend - anchor);
+ if ( (outputDirective) && /* Check output buffer overflow */
+ (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+ if (outputDirective == fillOutput) {
+ /* adapt lastRun to fill 'dst' */
+ assert(olimit >= op);
+ lastRun = (size_t)(olimit-op) - 1/*token*/;
+ lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/
+ } else {
+ assert(outputDirective == limitedOutput);
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
+ if (lastRun >= RUN_MASK) {
+ size_t accumulator = lastRun - RUN_MASK;
+ *op++ = RUN_MASK << ML_BITS;
+ for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRun<<ML_BITS);
+ }
+ LZ4_memcpy(op, anchor, lastRun);
+ ip = anchor + lastRun;
+ op += lastRun;
+ }
+
+ if (outputDirective == fillOutput) {
+ *inputConsumed = (int) (((const char*)ip)-source);
+ }
+ result = (int)(((char*)op) - dest);
+ assert(result > 0);
+ DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
+ return result;
+}
+
+/** LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time;
+ * takes care of src == (NULL, 0)
+ * and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+ LZ4_stream_t_internal* const cctx,
+ const char* const src,
+ char* const dst,
+ const int srcSize,
+ int *inputConsumed, /* only written when outputDirective == fillOutput */
+ const int dstCapacity,
+ const limitedOutput_directive outputDirective,
+ const tableType_t tableType,
+ const dict_directive dictDirective,
+ const dictIssue_directive dictIssue,
+ const int acceleration)
+{
+ DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+ srcSize, dstCapacity);
+
+ if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */
+ if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */
+ if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */
+ DEBUGLOG(5, "Generating an empty block");
+ assert(outputDirective == notLimited || dstCapacity >= 1);
+ assert(dst != NULL);
+ dst[0] = 0;
+ if (outputDirective == fillOutput) {
+ assert (inputConsumed != NULL);
+ *inputConsumed = 0;
+ }
+ return 1;
+ }
+ assert(src != NULL);
+
+ return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+ inputConsumed, /* only written into if outputDirective == fillOutput */
+ dstCapacity, outputDirective,
+ tableType, dictDirective, dictIssue, acceleration);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+ LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+ assert(ctx != NULL);
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+ if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+ if (inputSize < LZ4_64Klimit) {
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ if (inputSize < LZ4_64Klimit) {
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+ LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+ if (dstCapacity >= LZ4_compressBound(srcSize)) {
+ if (srcSize < LZ4_64Klimit) {
+ const tableType_t tableType = byU16;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ if (ctx->currentOffset) {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+ } else {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ if (srcSize < LZ4_64Klimit) {
+ const tableType_t tableType = byU16;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ if (ctx->currentOffset) {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+ } else {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+ int result;
+#if (LZ4_HEAPMODE)
+ LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
+ if (ctxPtr == NULL) return 0;
+#else
+ LZ4_stream_t ctx;
+ LZ4_stream_t* const ctxPtr = &ctx;
+#endif
+ result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (LZ4_HEAPMODE)
+ FREEMEM(ctxPtr);
+#endif
+ return result;
+}
+
+
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
+{
+ return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
+}
+
+
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
+static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+ void* const s = LZ4_initStream(state, sizeof (*state));
+ assert(s != NULL); (void)s;
+
+ if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */
+ return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+ } else {
+ if (*srcSizePtr < LZ4_64Klimit) {
+ return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+ } else {
+ tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
+ } }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (LZ4_HEAPMODE)
+ LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
+ if (ctx == NULL) return 0;
+#else
+ LZ4_stream_t ctxBody;
+ LZ4_stream_t* ctx = &ctxBody;
+#endif
+
+ int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (LZ4_HEAPMODE)
+ FREEMEM(ctx);
+#endif
+ return result;
+}
+
+
+
+/*-******************************
+* Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+ LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
+ LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+ DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+ if (lz4s == NULL) return NULL;
+ LZ4_initStream(lz4s, sizeof(*lz4s));
+ return lz4s;
+}
+
+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+ typedef struct { char c; LZ4_stream_t t; } t_a;
+ return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+ return 1; /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+ DEBUGLOG(5, "LZ4_initStream");
+ if (buffer == NULL) { return NULL; }
+ if (size < sizeof(LZ4_stream_t)) { return NULL; }
+ if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+ MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+ return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+ DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
+ MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
+}
+
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+ LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+ if (!LZ4_stream) return 0; /* support free on NULL */
+ DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
+ FREEMEM(LZ4_stream);
+ return (0);
+}
+
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+ LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+ const tableType_t tableType = byU32;
+ const BYTE* p = (const BYTE*)dictionary;
+ const BYTE* const dictEnd = p + dictSize;
+ const BYTE* base;
+
+ DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+ /* It's necessary to reset the context,
+ * and not just continue it with prepareTable()
+ * to avoid any risk of generating overflowing matchIndex
+ * when compressing using this dictionary */
+ LZ4_resetStream(LZ4_dict);
+
+ /* We always increment the offset by 64 KB, since, if the dict is longer,
+ * we truncate it to the last 64k, and if it's shorter, we still want to
+ * advance by a whole window length so we can provide the guarantee that
+ * there are only valid offsets in the window, which allows an optimization
+ * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+ * dictionary isn't a full 64k. */
+ dict->currentOffset += 64 KB;
+
+ if (dictSize < (int)HASH_UNIT) {
+ return 0;
+ }
+
+ if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+ base = dictEnd - dict->currentOffset;
+ dict->dictionary = p;
+ dict->dictSize = (U32)(dictEnd - p);
+ dict->tableType = (U32)tableType;
+
+ while (p <= dictEnd-HASH_UNIT) {
+ LZ4_putPosition(p, dict->hashTable, tableType, base);
+ p+=3;
+ }
+
+ return (int)dict->dictSize;
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
+ const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
+ &(dictionaryStream->internal_donotuse);
+
+ DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+ workingStream, dictionaryStream,
+ dictCtx != NULL ? dictCtx->dictSize : 0);
+
+ if (dictCtx != NULL) {
+ /* If the current offset is zero, we will never look in the
+ * external dictionary context, since there is no value a table
+ * entry can take that indicate a miss. In that case, we need
+ * to bump the offset to something non-zero.
+ */
+ if (workingStream->internal_donotuse.currentOffset == 0) {
+ workingStream->internal_donotuse.currentOffset = 64 KB;
+ }
+
+ /* Don't actually attach an empty dictionary.
+ */
+ if (dictCtx->dictSize == 0) {
+ dictCtx = NULL;
+ }
+ }
+ workingStream->internal_donotuse.dictCtx = dictCtx;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
+{
+ assert(nextSize >= 0);
+ if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */
+ /* rescale hash table */
+ U32 const delta = LZ4_dict->currentOffset - 64 KB;
+ const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+ int i;
+ DEBUGLOG(4, "LZ4_renormDictT");
+ for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+ if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+ else LZ4_dict->hashTable[i] -= delta;
+ }
+ LZ4_dict->currentOffset = 64 KB;
+ if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+ LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+ }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+ const char* source, char* dest,
+ int inputSize, int maxOutputSize,
+ int acceleration)
+{
+ const tableType_t tableType = byU32;
+ LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
+ const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+ DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
+
+ LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+ /* invalidate tiny dictionaries */
+ if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */
+ && (dictEnd != (const BYTE*)source) ) {
+ DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+ streamPtr->dictSize = 0;
+ streamPtr->dictionary = (const BYTE*)source;
+ dictEnd = (const BYTE*)source;
+ }
+
+ /* Check overlapping input/dictionary space */
+ { const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+ if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+ streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+ if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+ if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+ streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+ }
+ }
+
+ /* prefix mode : source data follows dictionary */
+ if (dictEnd == (const BYTE*)source) {
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+ return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
+ else
+ return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
+ }
+
+ /* external dictionary mode */
+ { int result;
+ if (streamPtr->dictCtx) {
+ /* We depend here on the fact that dictCtx'es (produced by
+ * LZ4_loadDict) guarantee that their tables contain no references
+ * to offsets between dictCtx->currentOffset - 64 KB and
+ * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+ * to use noDictIssue even when the dict isn't a full 64 KB.
+ */
+ if (inputSize > 4 KB) {
+ /* For compressing large blobs, it is faster to pay the setup
+ * cost to copy the dictionary's tables into the active context,
+ * so that the compression loop is only looking into one table.
+ */
+ LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+ }
+ } else {
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+ }
+ }
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)inputSize;
+ return result;
+ }
+}
+
+
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
+{
+ LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
+ int result;
+
+ LZ4_renormDictT(streamPtr, srcSize);
+
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+ }
+
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)srcSize;
+
+ return result;
+}
+
+
+/*! LZ4_saveDict() :
+ * If previously compressed data block is not guaranteed to remain available at its memory location,
+ * save it into a safer place (char* safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
+ * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+ LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
+ const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
+
+ if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+ if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
+
+ if (safeBuffer == NULL) assert(dictSize == 0);
+ if (dictSize > 0)
+ memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+ dict->dictionary = (const BYTE*)safeBuffer;
+ dict->dictSize = (U32)dictSize;
+
+ return dictSize;
+}
+
+
+
+/*-*******************************
+ * Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+
+/* Read the variable-length literal or match length.
+ *
+ * ip - pointer to use as input.
+ * lencheck - end ip. Return an error if ip advances >= lencheck.
+ * loop_check - check ip >= lencheck in body of loop. Returns loop_error if so.
+ * initial_check - check ip >= lencheck before start of loop. Returns initial_error if so.
+ * error (output) - error code. Should be set to 0 before call.
+ */
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
+LZ4_FORCE_INLINE unsigned
+read_variable_length(const BYTE**ip, const BYTE* lencheck,
+ int loop_check, int initial_check,
+ variable_length_error* error)
+{
+ U32 length = 0;
+ U32 s;
+ if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
+ *error = initial_error;
+ return length;
+ }
+ do {
+ s = **ip;
+ (*ip)++;
+ length += s;
+ if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
+ *error = loop_error;
+ return length;
+ }
+ } while (s==255);
+
+ return length;
+}
+
+/*! LZ4_decompress_generic() :
+ * This generic decompression function covers all use cases.
+ * It shall be instantiated several times, using different sets of directives.
+ * Note that it is important for performance that this function really get inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
+ const char* const src,
+ char* const dst,
+ int srcSize,
+ int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
+
+ endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */
+ earlyEnd_directive partialDecoding, /* full, partial */
+ dict_directive dict, /* noDict, withPrefix64k, usingExtDict */
+ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
+ const BYTE* const dictStart, /* only if dict==usingExtDict */
+ const size_t dictSize /* note : = 0 if noDict */
+ )
+{
+ if ((src == NULL) || (outputSize < 0)) { return -1; }
+
+ { const BYTE* ip = (const BYTE*) src;
+ const BYTE* const iend = ip + srcSize;
+
+ BYTE* op = (BYTE*) dst;
+ BYTE* const oend = op + outputSize;
+ BYTE* cpy;
+
+ const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
+
+ const int safeDecode = (endOnInput==endOnInputSize);
+ const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+ /* Set up the "end" pointers for the shortcut. */
+ const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+ const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+ const BYTE* match;
+ size_t offset;
+ unsigned token;
+ size_t length;
+
+
+ DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+ /* Special cases */
+ assert(lowPrefix <= op);
+ if ((endOnInput) && (unlikely(outputSize==0))) {
+ /* Empty output buffer */
+ if (partialDecoding) return 0;
+ return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+ }
+ if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
+ if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
+
+ /* Currently the fast loop shows a regression on qualcomm arm chips. */
+#if LZ4_FAST_DEC_LOOP
+ if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+ DEBUGLOG(6, "skip fast decode loop");
+ goto safe_decode;
+ }
+
+ /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
+ while (1) {
+ /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+ assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+ if (endOnInput) { assert(ip < iend); }
+ token = *ip++;
+ length = token >> ML_BITS; /* literal length */
+
+ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+ /* decode literal length */
+ if (length == RUN_MASK) {
+ variable_length_error error = ok;
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+ if (error == initial_error) { goto _output_error; }
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+
+ /* copy literals */
+ cpy = op+length;
+ LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+ if (endOnInput) { /* LZ4_decompress_safe() */
+ if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
+ LZ4_wildCopy32(op, ip, cpy);
+ } else { /* LZ4_decompress_fast() */
+ if (cpy>oend-8) { goto safe_literal_copy; }
+ LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+ * it doesn't know input length, and only relies on end-of-block properties */
+ }
+ ip += length; op = cpy;
+ } else {
+ cpy = op+length;
+ if (endOnInput) { /* LZ4_decompress_safe() */
+ DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+ /* We don't need to check oend, since we check it once for each loop below */
+ if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
+ /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
+ LZ4_memcpy(op, ip, 16);
+ } else { /* LZ4_decompress_fast() */
+ /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+ * it doesn't know input length, and relies on end-of-block properties */
+ LZ4_memcpy(op, ip, 8);
+ if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
+ }
+ ip += length; op = cpy;
+ }
+
+ /* get offset */
+ offset = LZ4_readLE16(ip); ip+=2;
+ match = op - offset;
+ assert(match <= op);
+
+ /* get matchlength */
+ length = token & ML_MASK;
+
+ if (length == ML_MASK) {
+ variable_length_error error = ok;
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+ if (error != ok) { goto _output_error; }
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
+ length += MINMATCH;
+ if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+ goto safe_match_copy;
+ }
+ } else {
+ length += MINMATCH;
+ if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+ goto safe_match_copy;
+ }
+
+ /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
+ if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+ if (offset >= 8) {
+ assert(match >= lowPrefix);
+ assert(match <= op);
+ assert(op + 18 <= oend);
+
+ LZ4_memcpy(op, match, 8);
+ LZ4_memcpy(op+8, match+8, 8);
+ LZ4_memcpy(op+16, match+16, 2);
+ op += length;
+ continue;
+ } } }
+
+ if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ /* match starting within external dictionary */
+ if ((dict==usingExtDict) && (match < lowPrefix)) {
+ if (unlikely(op+length > oend-LASTLITERALS)) {
+ if (partialDecoding) {
+ DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+ length = MIN(length, (size_t)(oend-op));
+ } else {
+ goto _output_error; /* end-of-block condition violated */
+ } }
+
+ if (length <= (size_t)(lowPrefix-match)) {
+ /* match fits entirely within external dictionary : just copy */
+ memmove(op, dictEnd - (lowPrefix-match), length);
+ op += length;
+ } else {
+ /* match stretches into both external dictionary and current block */
+ size_t const copySize = (size_t)(lowPrefix - match);
+ size_t const restSize = length - copySize;
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+ if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
+ BYTE* const endOfMatch = op + restSize;
+ const BYTE* copyFrom = lowPrefix;
+ while (op < endOfMatch) { *op++ = *copyFrom++; }
+ } else {
+ LZ4_memcpy(op, lowPrefix, restSize);
+ op += restSize;
+ } }
+ continue;
+ }
+
+ /* copy match within block */
+ cpy = op + length;
+
+ assert((op <= oend) && (oend-op >= 32));
+ if (unlikely(offset<16)) {
+ LZ4_memcpy_using_offset(op, match, cpy, offset);
+ } else {
+ LZ4_wildCopy32(op, match, cpy);
+ }
+
+ op = cpy; /* wildcopy correction */
+ }
+ safe_decode:
+#endif
+
+ /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+ while (1) {
+ token = *ip++;
+ length = token >> ML_BITS; /* literal length */
+
+ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+ /* A two-stage shortcut for the most common case:
+ * 1) If the literal length is 0..14, and there is enough space,
+ * enter the shortcut and copy 16 bytes on behalf of the literals
+ * (in the fast mode, only 8 bytes can be safely copied this way).
+ * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+ * manner; but we ensure that there's enough space in the output for
+ * those 18 bytes earlier, upon entering the shortcut (in other words,
+ * there is a combined check for both stages).
+ */
+ if ( (endOnInput ? length != RUN_MASK : length <= 8)
+ /* strictly "less than" on input, to re-enter the loop with at least one byte */
+ && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+ /* Copy the literals */
+ LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
+ op += length; ip += length;
+
+ /* The second stage: prepare for match copying, decode full info.
+ * If it doesn't work out, the info won't be wasted. */
+ length = token & ML_MASK; /* match length */
+ offset = LZ4_readLE16(ip); ip += 2;
+ match = op - offset;
+ assert(match <= op); /* check overflow */
+
+ /* Do not deal with overlapping matches. */
+ if ( (length != ML_MASK)
+ && (offset >= 8)
+ && (dict==withPrefix64k || match >= lowPrefix) ) {
+ /* Copy the match. */
+ LZ4_memcpy(op + 0, match + 0, 8);
+ LZ4_memcpy(op + 8, match + 8, 8);
+ LZ4_memcpy(op +16, match +16, 2);
+ op += length + MINMATCH;
+ /* Both stages worked, load the next token. */
+ continue;
+ }
+
+ /* The second stage didn't work out, but the info is ready.
+ * Propel it right to the point of match copying. */
+ goto _copy_match;
+ }
+
+ /* decode literal length */
+ if (length == RUN_MASK) {
+ variable_length_error error = ok;
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+ if (error == initial_error) { goto _output_error; }
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+ }
+
+ /* copy literals */
+ cpy = op+length;
+#if LZ4_FAST_DEC_LOOP
+ safe_literal_copy:
+#endif
+ LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+ if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+ || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+ {
+ /* We've either hit the input parsing restriction or the output parsing restriction.
+ * In the normal scenario, decoding a full block, it must be the last sequence,
+ * otherwise it's an error (invalid input or dimensions).
+ * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
+ */
+ if (partialDecoding) {
+ /* Since we are partial decoding we may be in this block because of the output parsing
+ * restriction, which is not valid since the output buffer is allowed to be undersized.
+ */
+ assert(endOnInput);
+ DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+ DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+ DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+ DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+ /* Finishing in the middle of a literals segment,
+ * due to lack of input.
+ */
+ if (ip+length > iend) {
+ length = (size_t)(iend-ip);
+ cpy = op + length;
+ }
+ /* Finishing in the middle of a literals segment,
+ * due to lack of output space.
+ */
+ if (cpy > oend) {
+ cpy = oend;
+ assert(op<=oend);
+ length = (size_t)(oend-op);
+ }
+ } else {
+ /* We must be on the last sequence because of the parsing limitations so check
+ * that we exactly regenerate the original size (must be exact when !endOnInput).
+ */
+ if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
+ /* We must be on the last sequence (or invalid) because of the parsing limitations
+ * so check that we exactly consume the input and don't overrun the output buffer.
+ */
+ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
+ DEBUGLOG(6, "should have been last run of literals")
+ DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+ DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+ goto _output_error;
+ }
+ }
+ memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
+ ip += length;
+ op += length;
+ /* Necessarily EOF when !partialDecoding.
+ * When partialDecoding, it is EOF if we've either
+ * filled the output buffer or
+ * can't proceed with reading an offset for following match.
+ */
+ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
+ break;
+ }
+ } else {
+ LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+ ip += length; op = cpy;
+ }
+
+ /* get offset */
+ offset = LZ4_readLE16(ip); ip+=2;
+ match = op - offset;
+
+ /* get matchlength */
+ length = token & ML_MASK;
+
+ _copy_match:
+ if (length == ML_MASK) {
+ variable_length_error error = ok;
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+ if (error != ok) goto _output_error;
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
+ }
+ length += MINMATCH;
+
+#if LZ4_FAST_DEC_LOOP
+ safe_match_copy:
+#endif
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
+ /* match starting within external dictionary */
+ if ((dict==usingExtDict) && (match < lowPrefix)) {
+ if (unlikely(op+length > oend-LASTLITERALS)) {
+ if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+ else goto _output_error; /* doesn't respect parsing restriction */
+ }
+
+ if (length <= (size_t)(lowPrefix-match)) {
+ /* match fits entirely within external dictionary : just copy */
+ memmove(op, dictEnd - (lowPrefix-match), length);
+ op += length;
+ } else {
+ /* match stretches into both external dictionary and current block */
+ size_t const copySize = (size_t)(lowPrefix - match);
+ size_t const restSize = length - copySize;
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+ if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
+ BYTE* const endOfMatch = op + restSize;
+ const BYTE* copyFrom = lowPrefix;
+ while (op < endOfMatch) *op++ = *copyFrom++;
+ } else {
+ LZ4_memcpy(op, lowPrefix, restSize);
+ op += restSize;
+ } }
+ continue;
+ }
+ assert(match >= lowPrefix);
+
+ /* copy match within block */
+ cpy = op + length;
+
+ /* partialDecoding : may end anywhere within the block */
+ assert(op<=oend);
+ if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+ size_t const mlen = MIN(length, (size_t)(oend-op));
+ const BYTE* const matchEnd = match + mlen;
+ BYTE* const copyEnd = op + mlen;
+ if (matchEnd > op) { /* overlap copy */
+ while (op < copyEnd) { *op++ = *match++; }
+ } else {
+ LZ4_memcpy(op, match, mlen);
+ }
+ op = copyEnd;
+ if (op == oend) { break; }
+ continue;
+ }
+
+ if (unlikely(offset<8)) {
+ LZ4_write32(op, 0); /* silence msan warning when offset==0 */
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += inc32table[offset];
+ LZ4_memcpy(op+4, match, 4);
+ match -= dec64table[offset];
+ } else {
+ LZ4_memcpy(op, match, 8);
+ match += 8;
+ }
+ op += 8;
+
+ if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+ BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+ if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+ if (op < oCopyLimit) {
+ LZ4_wildCopy8(op, match, oCopyLimit);
+ match += oCopyLimit - op;
+ op = oCopyLimit;
+ }
+ while (op < cpy) { *op++ = *match++; }
+ } else {
+ LZ4_memcpy(op, match, 8);
+ if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
+ }
+ op = cpy; /* wildcopy correction */
+ }
+
+ /* end of decoding */
+ if (endOnInput) {
+ DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
+ return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
+ } else {
+ return (int) (((const char*)ip)-src); /* Nb of input bytes read */
+ }
+
+ /* Overflow error detected */
+ _output_error:
+ return (int) (-(((const char*)ip)-src))-1;
+ }
+}
+
+
+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+ endOnInputSize, decode_full_block, noDict,
+ (BYTE*)dest, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
+{
+ dstCapacity = MIN(targetOutputSize, dstCapacity);
+ return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+ endOnInputSize, partial_decode,
+ noDict, (BYTE*)dst, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, decode_full_block, withPrefix64k,
+ (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, withPrefix64k,
+ (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+ /* LZ4_decompress_fast doesn't validate match offsets,
+ * and thus serves well with any prefixed dictionary. */
+ return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+ size_t prefixSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, noDict,
+ (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+ int compressedSize, int maxOutputSize,
+ const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+ const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+ size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+ size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+/*===== streaming decompression functions =====*/
+
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+ LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+ LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
+ return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+ if (LZ4_stream == NULL) { return 0; } /* support free on NULL */
+ FREEMEM(LZ4_stream);
+ return 0;
+}
+
+/*! LZ4_setStreamDecode() :
+ * Use this function to instruct where to find the dictionary.
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ lz4sd->prefixSize = (size_t) dictSize;
+ lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+ lz4sd->externalDict = NULL;
+ lz4sd->extDictSize = 0;
+ return 1;
+}
+
+/*! LZ4_decoderRingBufferSize() :
+ * when setting a ring buffer for streaming decompression (optional scenario),
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * Note : in a ring buffer scenario,
+ * blocks are presumed decompressed next to each other.
+ * When not enough space remains for next block (remainingSize < maxBlockSize),
+ * decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+ if (maxBlockSize < 0) return 0;
+ if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+ if (maxBlockSize < 16) maxBlockSize = 16;
+ return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
+/*
+*_continue() :
+ These decoding functions allow decompression of multiple blocks in "streaming" mode.
+ Previously decoded blocks must still be available at the memory position where they were decoded.
+ If it's not possible, save the relevant part of decoded data into a safe buffer,
+ and indicate where it stands using LZ4_setStreamDecode()
+*/
+LZ4_FORCE_O2
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ int result;
+
+ if (lz4sd->prefixSize == 0) {
+ /* The first call, no dictionary yet. */
+ assert(lz4sd->extDictSize == 0);
+ result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)result;
+ lz4sd->prefixEnd = (BYTE*)dest + result;
+ } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+ /* They're rolling the current segment. */
+ if (lz4sd->prefixSize >= 64 KB - 1)
+ result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+ else if (lz4sd->extDictSize == 0)
+ result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+ lz4sd->prefixSize);
+ else
+ result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+ lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize += (size_t)result;
+ lz4sd->prefixEnd += result;
+ } else {
+ /* The buffer wraps around, or they're switching to another buffer. */
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)result;
+ lz4sd->prefixEnd = (BYTE*)dest + result;
+ }
+
+ return result;
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ int result;
+ assert(originalSize >= 0);
+
+ if (lz4sd->prefixSize == 0) {
+ assert(lz4sd->extDictSize == 0);
+ result = LZ4_decompress_fast(source, dest, originalSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)originalSize;
+ lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+ } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+ if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+ result = LZ4_decompress_fast(source, dest, originalSize);
+ else
+ result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+ lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize += (size_t)originalSize;
+ lz4sd->prefixEnd += originalSize;
+ } else {
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)originalSize;
+ lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+ }
+
+ return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+ These decoding functions work the same as "_continue" ones,
+ the dictionary must be explicitly provided within parameters
+*/
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+ if (dictSize==0)
+ return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+ if (dictStart+dictSize == dest) {
+ if (dictSize >= 64 KB - 1) {
+ return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+ if (dictSize==0 || dictStart+dictSize == dest)
+ return LZ4_decompress_fast(source, dest, originalSize);
+ assert(dictSize >= 0);
+ return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
+}
+
+
+/*=*************************************************
+* Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+ return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* src, char* dest, int srcSize)
+{
+ return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+ return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+ return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+ return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+ return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
+
+/*
+These decompression functions are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+ return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+ return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+ (void)inputBuffer;
+ LZ4_resetStream((LZ4_stream_t*)state);
+ return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+ (void)inputBuffer;
+ return LZ4_createStream();
+}
+
+char* LZ4_slideInputBuffer (void* state)
+{
+ /* avoid const char * -> char * conversion warning */
+ return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
+}
+
+#endif /* LZ4_COMMONDEFS_ONLY */
diff --git a/libbutl/lz4.cxx b/libbutl/lz4.cxx
new file mode 100644
index 0000000..2db7af2
--- /dev/null
+++ b/libbutl/lz4.cxx
@@ -0,0 +1,555 @@
+// file : libbutl/lz4.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbutl/lz4.hxx>
+
+// This careful macro dance makes sure that all the LZ4 C API functions are
+// made static while making sure we include the headers in the same way as the
+// implementation files that we include below.
+//
+#define LZ4LIB_VISIBILITY static
+#define LZ4_STATIC_LINKING_ONLY
+#define LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS
+#include "lz4.h"
+#include "lz4hc.h"
+
+#define LZ4FLIB_VISIBILITY static
+#define LZ4F_STATIC_LINKING_ONLY
+#define LZ4F_PUBLISH_STATIC_FUNCTIONS
+#define LZ4F_DISABLE_DEPRECATE_WARNINGS
+#include "lz4frame.h"
+
+#include <new> // bad_alloc
+#include <memory> // unique_ptr
+#include <cstring> // memcpy()
+#include <cassert>
+#include <stdexcept> // invalid_argument, logic_error
+
+#include <libbutl/utility.hxx> // eos()
+
+#if 0
+#include <libbutl/lz4-stream.hxx>
+#endif
+
+using namespace std;
+
+namespace butl
+{
+ namespace lz4
+ {
+ static inline size_t
+ block_size (LZ4F_blockSizeID_t id)
+ {
+ return (id == LZ4F_max4MB ? 4 * 1024 * 1024 :
+ id == LZ4F_max1MB ? 1 * 1024 * 1024 :
+ id == LZ4F_max256KB ? 256 * 1024 :
+ id == LZ4F_max64KB ? 64 * 1024 : 0);
+ }
+
+ [[noreturn]] static void
+ throw_exception (LZ4F_errorCodes c)
+ {
+ using i = invalid_argument;
+
+ switch (c)
+ {
+ case LZ4F_ERROR_GENERIC: throw i ("generic LZ4 error");
+ case LZ4F_ERROR_maxBlockSize_invalid: throw i ("invalid LZ4 block size");
+ case LZ4F_ERROR_blockMode_invalid: throw i ("invalid LZ4 block mode");
+ case LZ4F_ERROR_contentChecksumFlag_invalid: throw i ("invalid LZ4 content checksum flag");
+ case LZ4F_ERROR_compressionLevel_invalid: throw i ("invalid LZ4 compression level");
+ case LZ4F_ERROR_headerVersion_wrong: throw i ("wrong LZ4 header version");
+ case LZ4F_ERROR_blockChecksum_invalid: throw i ("invalid LZ4 block checksum");
+ case LZ4F_ERROR_reservedFlag_set: throw i ("reserved LZ4 flag set");
+ case LZ4F_ERROR_srcSize_tooLarge: throw i ("LZ4 input too large");
+ case LZ4F_ERROR_dstMaxSize_tooSmall: throw i ("LZ4 output too small");
+ case LZ4F_ERROR_frameHeader_incomplete: throw i ("incomplete LZ4 frame header");
+ case LZ4F_ERROR_frameType_unknown: throw i ("unknown LZ4 frame type");
+ case LZ4F_ERROR_frameSize_wrong: throw i ("wrong LZ4 frame size");
+ case LZ4F_ERROR_decompressionFailed: throw i ("invalid LZ4 compressed content");
+ case LZ4F_ERROR_headerChecksum_invalid: throw i ("invalid LZ4 header checksum");
+ case LZ4F_ERROR_contentChecksum_invalid: throw i ("invalid LZ4 content checksum");
+
+ case LZ4F_ERROR_allocation_failed: throw bad_alloc ();
+
+ // These seem to be programming errors.
+ //
+ case LZ4F_ERROR_srcPtr_wrong: // NULL pointer.
+ case LZ4F_ERROR_frameDecoding_alreadyStarted: // Incorrect call seq.
+
+ // We should never get these.
+ //
+ case LZ4F_OK_NoError:
+ case LZ4F_ERROR_maxCode:
+ case _LZ4F_dummy_error_enum_for_c89_never_used:
+ break;
+ }
+
+ assert (false);
+ throw logic_error (LZ4F_getErrorName ((LZ4F_errorCode_t)(-c)));
+ }
+
+ // As above but for erroneous LZ4F_*() function result.
+ //
+ [[noreturn]] static inline void
+ throw_exception (size_t r)
+ {
+ throw_exception (LZ4F_getErrorCode (r));
+ }
+
+ // compression
+ //
+
+ compressor::
+ ~compressor ()
+ {
+ if (LZ4F_cctx* ctx = static_cast<LZ4F_cctx*> (ctx_))
+ {
+ LZ4F_errorCode_t e (LZ4F_freeCompressionContext (ctx));
+ assert (!LZ4F_isError (e));
+ }
+ }
+
+ inline void compressor::
+ init_preferences (void* vp) const
+ {
+ LZ4F_preferences_t* p (static_cast<LZ4F_preferences_t*> (vp));
+
+ p->autoFlush = 1;
+ p->favorDecSpeed = 0;
+ p->compressionLevel = level_;
+ p->frameInfo.blockMode = LZ4F_blockLinked;
+ p->frameInfo.blockSizeID = static_cast<LZ4F_blockSizeID_t> (block_id_);
+ p->frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum;
+ p->frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
+ p->frameInfo.contentSize = content_size_
+ ? static_cast<unsigned long long> (*content_size_)
+ : 0;
+ }
+
+ void compressor::
+ begin (int level,
+ int block_id,
+ optional<uint64_t> content_size)
+ {
+ assert (block_id >= 4 && block_id <= 7);
+
+ level_ = level;
+ block_id_ = block_id;
+ content_size_ = content_size;
+
+ LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
+ init_preferences (&prefs);
+
+ // Input/output buffer capacities.
+ //
+ // To be binary compatible with the lz4 utility we have to compress
+ // files that fit into the block with a single *_compressFrame() call
+ // instead of *_compressBegin()/*_compressUpdate(). And to determine the
+ // output buffer capacity we must use *_compressFrameBound() instead of
+ // *_compressBound(). The problem is, at this stage (before filling the
+ // input buffer), we don't know which case it will be.
+ //
+ // However, in our case (autoFlush=1), *Bound() < *FrameBound() and so
+ // we can always use the latter at the cost of slight overhead. Also,
+ // using *FrameBound() allows us to call *Begin() and *Update() without
+ // flushing the buffer in between (this insight is based on studying the
+ // implementation of the *Bound() functions).
+ //
+ // Actually, we can use content_size (we can get away with much smaller
+ // buffers for small inputs). We just need to verify the caller is not
+ // lying to us (failed that, we may end up with strange error like
+ // insufficient output buffer space).
+ //
+ ic = block_size (prefs.frameInfo.blockSizeID);
+
+ if (content_size_ && *content_size_ < ic)
+ {
+ // This is nuanced: we need to add an extra byte in order to detect
+ // EOF.
+ //
+ ic = static_cast<size_t> (*content_size_) + 1;
+ }
+
+ oc = LZ4F_compressFrameBound (ic, &prefs);
+
+ begin_ = true;
+ }
+
+ void compressor::
+ next (bool end)
+ {
+ LZ4F_cctx* ctx;
+
+ // Unlike the decompression case below, compression cannot fail due to
+ // invalid content. So any LZ4F_*() function failure is either due to a
+ // programming bug or argument inconsistencies (e.g., content size does
+ // not match actual).
+
+ if (begin_)
+ {
+ begin_ = false;
+
+ LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
+ init_preferences (&prefs);
+
+ // If we've allocated smaller buffers based on content_size_, then
+ // verify the input size matches what's promised.
+ //
+ // Note also that LZ4F_compressFrame() does not fail if it doesn't
+ // match instead replacing it with the actual value.
+ //
+ size_t bs (block_size (prefs.frameInfo.blockSizeID));
+ if (content_size_ && *content_size_ < bs)
+ {
+ if (!end || in != *content_size_)
+ throw_exception (LZ4F_ERROR_frameSize_wrong);
+ }
+
+ // Must be < for lz4 compatibility (see EOF nuance above for the
+ // likely reason).
+ //
+ if (end && in < bs)
+ {
+ on = LZ4F_compressFrame (ob, oc, ib, in, &prefs);
+ if (LZ4F_isError (on))
+ throw_exception (on);
+
+ in = 0; // All consumed.
+ return;
+ }
+ else
+ {
+ if (LZ4F_isError (LZ4F_createCompressionContext (&ctx, LZ4F_VERSION)))
+ throw bad_alloc ();
+
+ ctx_ = ctx;
+
+ // Write the header.
+ //
+ on = LZ4F_compressBegin (ctx, ob, oc, &prefs);
+ if (LZ4F_isError (on))
+ throw_exception (on);
+
+ // Fall through.
+ }
+ }
+ else
+ {
+ ctx = static_cast<LZ4F_cctx*> (ctx_);
+ on = 0;
+ }
+
+ size_t n;
+
+ if (in != 0)
+ {
+ n = LZ4F_compressUpdate (ctx, ob + on, oc - on, ib, in, nullptr);
+ if (LZ4F_isError (n))
+ throw_exception (n);
+
+ in = 0; // All consumed.
+ on += n;
+ }
+
+ // Write the end marker.
+ //
+ if (end)
+ {
+ // Note that this call also verifies specified and actual content
+ // sizes match.
+ //
+ n = LZ4F_compressEnd (ctx, ob + on, oc - on, nullptr);
+ if (LZ4F_isError (n))
+ throw_exception (n);
+
+ on += n;
+ }
+ }
+
+ uint64_t
+ compress (ofdstream& os, ifdstream& is,
+ int level,
+ int block_id,
+ optional<uint64_t> content_size)
+ {
+#if 0
+ char buf[1024 * 3 + 7];
+ ostream cos (os, level, block_id, content_size);
+
+ for (bool e (false); !e; )
+ {
+ e = eof (is.read (buf, sizeof (buf)));
+ cos.write (buf, is.gcount ());
+ //for (streamsize i (0), n (is.gcount ()); i != n; ++i)
+ // cos.put (buf[i]);
+ }
+
+ cos.close ();
+ return content_size ? *content_size : 0;
+#else
+ compressor c;
+
+ // Input/output buffer guards.
+ //
+ unique_ptr<char[]> ibg;
+ unique_ptr<char[]> obg;
+
+ // First determine required buffer capacities.
+ //
+ c.begin (level, block_id, content_size);
+
+ ibg.reset ((c.ib = new char[c.ic]));
+ obg.reset ((c.ob = new char[c.oc]));
+
+ // Read into the input buffer updating the eof flag.
+ //
+ // Note that we could try to do direct fd read/write but that would
+ // complicate things quite a bit (error handling, stream state, etc).
+ //
+ bool eof (false);
+ auto read = [&is, &c, &eof] ()
+ {
+ eof = butl::eof (is.read (c.ib, c.ic));
+ c.in = static_cast<size_t> (is.gcount ());
+ };
+
+ // Write from the output buffer updating the total written.
+ //
+ uint64_t ot (0);
+ auto write = [&os, &c, &ot] ()
+ {
+ os.write (c.ob, static_cast<streamsize> (c.on));
+ ot += c.on;
+ };
+
+ // Keep reading, compressing, and writing chunks of content.
+ //
+ while (!eof)
+ {
+ read ();
+
+ c.next (eof);
+
+ if (c.on != 0) // next() may just buffer the data.
+ write ();
+ }
+
+ return ot;
+#endif
+ }
+
+ // decompression
+ //
+
+ static_assert (sizeof (decompressor::hb) == LZ4F_HEADER_SIZE_MAX,
+ "LZ4 header size mismatch");
+
+ decompressor::
+ ~decompressor ()
+ {
+ if (LZ4F_dctx* ctx = static_cast<LZ4F_dctx*> (ctx_))
+ {
+ LZ4F_errorCode_t e (LZ4F_freeDecompressionContext (ctx));
+ assert (!LZ4F_isError (e));
+ }
+ }
+
+ size_t decompressor::
+ begin (optional<uint64_t>* content_size)
+ {
+ LZ4F_dctx* ctx;
+
+ if (LZ4F_isError (LZ4F_createDecompressionContext (&ctx, LZ4F_VERSION)))
+ throw bad_alloc ();
+
+ ctx_ = ctx;
+
+ LZ4F_frameInfo_t info = LZ4F_INIT_FRAMEINFO;
+
+ // Input hint and end as signalled by the LZ4F_*() functions.
+ //
+ size_t h, e;
+
+ h = LZ4F_getFrameInfo (ctx, &info, hb, &(e = hn));
+ if (LZ4F_isError (h))
+ throw_exception (h);
+
+ if (content_size != nullptr)
+ {
+ if (info.contentSize != 0)
+ *content_size = static_cast<uint64_t> (info.contentSize);
+ else
+ *content_size = nullopt;
+ }
+
+ // Use the block size for the output buffer capacity and compressed
+ // bound plus the header size for the input. The expectation is that
+ // LZ4F_decompress() should never hint for more than that.
+ //
+ oc = block_size (info.blockSizeID);
+ ic = LZ4F_compressBound (oc, nullptr) + LZ4F_BLOCK_HEADER_SIZE;
+
+ assert (h <= ic);
+
+ // Move over whatever is left in the header buffer to be beginning.
+ //
+ hn -= e;
+ memmove (hb, hb + e, hn);
+
+ return h;
+ }
+
+ size_t decompressor::
+ next ()
+ {
+ LZ4F_dctx* ctx (static_cast<LZ4F_dctx*> (ctx_));
+
+ size_t h, e;
+
+ // Note that LZ4F_decompress() verifies specified and actual content
+ // sizes match (similar to compression).
+ //
+ h = LZ4F_decompress (ctx, ob, &(on = oc), ib, &(e = in), nullptr);
+ if (LZ4F_isError (h))
+ throw_exception (h);
+
+ // We expect LZ4F_decompress() to consume what it asked for.
+ //
+ assert (e == in && h <= ic);
+ in = 0; // All consumed.
+
+ return h;
+ }
+
+ uint64_t
+ decompress (ofdstream& os, ifdstream& is)
+ {
+ // Write the specified number of bytes from the output buffer updating
+ // the total written.
+ //
+ uint64_t ot (0);
+ auto write = [&os, &ot] (char* b, size_t n)
+ {
+ os.write (b, static_cast<streamsize> (n));
+ ot += n;
+ };
+
+#if 0
+ char buf[1024 * 3 + 7];
+ istream dis (is, true, istream::badbit);
+
+ for (bool e (false); !e; )
+ {
+ e = eof (dis.read (buf, sizeof (buf)));
+ write (buf, static_cast<size_t> (dis.gcount ()));
+ }
+#else
+ // Read into the specified buffer returning the number of bytes read and
+ // updating the eof flag.
+ //
+ bool eof (false);
+ auto read = [&is, &eof] (char* b, size_t c) -> size_t
+ {
+ size_t n (0);
+ do
+ {
+ eof = butl::eof (is.read (b + n, c - n));
+ n += static_cast<size_t> (is.gcount ());
+ }
+ while (!eof && n != c);
+
+ return n;
+ };
+
+ decompressor d;
+
+ // Input/output buffer guards.
+ //
+ unique_ptr<char[]> ibg;
+ unique_ptr<char[]> obg;
+
+ size_t h; // Input hint.
+
+ // First read in the header and allocate the buffers.
+ //
+ // What if we hit EOF here? And could begin() return 0? Turns out the
+ // answer to both questions is yes: 0-byte content compresses to 15
+ // bytes (with or without content size; 1-byte -- to 20/28 bytes). We
+ // can ignore EOF here since an attempt to read more will result in
+ // another EOF. And code below is prepared to handle 0 initial hint.
+ //
+ // @@ We could end up leaving some of the input content from the
+ // header in the input buffer which the caller will have to way
+ // of using/detecting.
+ //
+ d.hn = read (d.hb, sizeof (d.hb));
+ h = d.begin ();
+
+ ibg.reset ((d.ib = new char[d.ic]));
+ obg.reset ((d.ob = new char[d.oc]));
+
+ // Copy over whatever is left in the header buffer and read up to
+ // the hinted size.
+ //
+ memcpy (d.ib, d.hb, (d.in = d.hn));
+
+ if (h > d.in)
+ d.in += read (d.ib + d.in, h - d.in);
+
+ // Keep decompressing, writing, and reading chunks of compressed
+ // content.
+ //
+ while (h != 0)
+ {
+ h = d.next ();
+
+ if (d.on != 0) // next() may just buffer the data.
+ write (d.ob, d.on);
+
+ if (h != 0)
+ {
+ if (eof)
+ throw invalid_argument ("incomplete LZ4 compressed content");
+
+ d.in = read (d.ib, h);
+ }
+ }
+#endif
+
+ return ot;
+ }
+ }
+}
+
+// Include the implementation into our translation unit. Let's keep it last
+// since the implementation defines a bunch of macros.
+//
+#if defined(__clang__) || defined(__GNUC__)
+# pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+// This header is only include in the implementation so we can include it
+// here instead of the above.
+//
+#define XXH_PRIVATE_API // Makes API static and includes xxhash.c.
+#include "xxhash.h"
+
+// Clang targeting MSVC prior to version 10 has difficulty with _tzcnt_u64()
+// (see Clang bug 47099 for a potentially related issue). Including relevant
+// headers (<immintrin.h>, <intrin.h>) does not appear to help. So for now we
+// just disable the use of _tzcnt_u64().
+//
+#if defined(_MSC_VER) && defined(__clang__) && __clang_major__ < 10
+# define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+// Note that the order of inclusion is important (see *_SRC_INCLUDED macros).
+//
+extern "C"
+{
+#include "lz4.c"
+#include "lz4hc.c"
+#include "lz4frame.c"
+}
diff --git a/libbutl/lz4.h b/libbutl/lz4.h
new file mode 100644
index 0000000..7ab1e48
--- /dev/null
+++ b/libbutl/lz4.h
@@ -0,0 +1,774 @@
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Header File
+ * Copyright (C) 2011-present, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://www.lz4.org
+ - LZ4 source repository : https://github.com/lz4/lz4
+*/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
+/* --- Dependency --- */
+#include <stddef.h> /* size_t */
+
+
+/**
+ Introduction
+
+ LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+ scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+ multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+ The LZ4 compression library provides in-memory compression and decompression functions.
+ It gives full buffer control to user.
+ Compression can be done in:
+ - a single step (described as Simple Functions)
+ - a single step, reusing a context (described in Advanced Functions)
+ - unbounded multiple steps (described as Streaming compression)
+
+ lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+ Decompressing such a compressed block requires additional metadata.
+ Exact metadata depends on exact decompression function.
+ For the typical case of LZ4_decompress_safe(),
+ metadata includes block's compressed size, and maximum bound of decompressed size.
+ Each application is free to encode and pass such metadata in whichever way it wants.
+
+ lz4.h only handle blocks, it can not generate Frames.
+
+ Blocks are different from Frames (doc/lz4_Frame_format.md).
+ Frames bundle both blocks and metadata in a specified manner.
+ Embedding metadata is required for compressed data to be self-contained and portable.
+ Frame format is delivered through a companion API, declared in lz4frame.h.
+ The `lz4` CLI can only manage frames.
+*/
+
+/*^***************************************************************
+* Export parameters
+*****************************************************************/
+/*
+* LZ4_DLL_EXPORT :
+* Enable exporting of functions when building a Windows DLL
+* LZ4LIB_VISIBILITY :
+* Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+# if defined(__GNUC__) && (__GNUC__ >= 4)
+# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+# else
+# define LZ4LIB_VISIBILITY
+# endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+# define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*------ Version ------*/
+#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
+#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
+
+LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */
+
+
+/*-************************************
+* Tuning parameter
+**************************************/
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio.
+ * Reduced memory usage may improve speed, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
+
+
+/*-************************************
+* Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ * Compresses 'srcSize' bytes from buffer 'src'
+ * into already allocated 'dst' buffer of size 'dstCapacity'.
+ * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'src' into a more limited 'dst' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * In which case, 'dst' content is undefined (invalid).
+ * srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ * dstCapacity : size of buffer 'dst' (which must be already allocated)
+ * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ * or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+
+/*! LZ4_decompress_safe() :
+ * compressedSize : is the exact complete size of the compressed block.
+ * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ * If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ * If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ * it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ * even if the compressed block is maliciously modified to order the decoder to do these actions.
+ * In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ * The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+
+
+/*-************************************
+* Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*! LZ4_compressBound() :
+ Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+ This function is primarily useful for memory allocation purposes (destination buffer size).
+ Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+ Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+ inputSize : max supported value is LZ4_MAX_INPUT_SIZE
+ return : maximum output size in a "worst case" scenario
+ or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+ Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+ The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+ It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+ An acceleration value of "1" is the same as regular LZ4_compress_default()
+ Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+ Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_fast_extState() :
+ * Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ * Use LZ4_sizeofState() to know how much memory must be allocated,
+ * and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ * Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_destSize() :
+ * Reverse the logic : compresses as much data as possible from 'src' buffer
+ * into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ * This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ * or fill 'dst' buffer completely with as much data as possible from 'src'.
+ * note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ * New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ * or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ * the produced compressed content could, in specific circumstances,
+ * require to be decompressed into a destination buffer larger
+ * by at least 1 byte than the content to decompress.
+ * If an application uses `LZ4_compress_destSize()`,
+ * it's highly recommended to update liblz4 to v1.9.2 or better.
+ * If this can't be done or ensured,
+ * the receiving decompression function should provide
+ * a dstCapacity which is > decompressedSize, by at least 1 byte.
+ * See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+
+
+/*! LZ4_decompress_safe_partial() :
+ * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ * into destination buffer 'dst' of size 'dstCapacity'.
+ * Up to 'targetOutputSize' bytes will be decoded.
+ * The function stops decoding on reaching this objective.
+ * This can be useful to boost performance
+ * whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ * If source stream is detected malformed, function returns a negative result.
+ *
+ * Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ * Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ * Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ * so dstCapacity is kind of redundant.
+ * This is because in older versions of this function,
+ * decoding operation would still write complete sequences.
+ * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ * it could write more bytes, though only up to dstCapacity.
+ * Some "margin" used to be required for this operation to work properly.
+ * Thankfully, this is no longer necessary.
+ * The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ * Note 4 : If srcSize is the exact size of the block,
+ * then targetOutputSize can be any value,
+ * including larger than the block's decompressed size.
+ * The function will, at most, generate block's decompressed size.
+ *
+ * Note 5 : If srcSize is _larger_ than block's compressed size,
+ * then targetOutputSize **MUST** be <= block's decompressed size.
+ * Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+
+
+/*-*********************************************
+* Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
+
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ * (e.g., LZ4_compress_fast_continue()).
+ *
+ * An LZ4_stream_t must be initialized once before usage.
+ * This is automatically done when created by LZ4_createStream().
+ * However, should the LZ4_stream_t be simply declared on stack (for example),
+ * it's necessary to initialize it first, using LZ4_initStream().
+ *
+ * After init, start any new stream with LZ4_resetStream_fast().
+ * A same LZ4_stream_t can be re-used multiple times consecutively
+ * and compress multiple streams,
+ * provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ * LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ * but is not compatible with memory regions containing garbage data.
+ *
+ * Note: it's only useful to call LZ4_resetStream_fast()
+ * in the context of streaming compression.
+ * The *extState* functions perform their own resets.
+ * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ * Use this function to reference a static dictionary into LZ4_stream_t.
+ * The dictionary must remain available during compression.
+ * LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ * The same dictionary will have to be loaded on decompression side for successful decoding.
+ * Dictionary are useful for better compression of small data (KB range).
+ * While LZ4 accept any input as dictionary,
+ * results are generally better when using Zstandard's Dictionary Builder.
+ * Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ * Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ * or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ * Each block has precise boundaries.
+ * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ * Make sure that buffers are separated, by at least one byte.
+ * This construction ensures that each block only depends on previous block.
+ *
+ * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ * If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ * save it into a safer place (char* safeBuffer).
+ * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+
+
+/*-**********************************************
+* Streaming Decompression Functions
+* Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ * creation / destruction of streaming decompression tracking context.
+ * A tracking context can be re-used multiple times.
+ */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ * An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ * Use this function to start decompression of a new stream of blocks.
+ * A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ * Note : in a ring buffer scenario (optional),
+ * blocks are presumed decompressed next to each other
+ * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ * at which stage it resumes from beginning of ring buffer.
+ * When setting such a ring buffer for streaming decompression,
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ * These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ * A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ * Decompression functions only accepts one block at a time.
+ * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ * If less than 64KB of data has been decoded, all the data must be present.
+ *
+ * Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ * In which case, encoding and decoding buffers do not need to be synchronized.
+ * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ * - Synchronized mode :
+ * Decompression buffer size is _exactly_ the same as compression buffer size,
+ * and follows exactly same update rule (block boundaries at same positions),
+ * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * In which case, encoding and decoding buffers do not need to be synchronized,
+ * and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ * Whenever these conditions are not possible,
+ * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ * These decoding functions work the same as
+ * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ * They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ * Performance tip : Decompression speed can be substantially increased
+ * when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+
+/*^*************************************
+ * !!!!!! STATIC LINKING ONLY !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step.
+ * It is only safe to call if the state buffer is known to be correctly initialized already
+ * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ * From a high level, the difference is that
+ * this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ * This is an experimental API that allows
+ * efficient use of a static dictionary many times.
+ *
+ * Rather than re-loading the dictionary buffer into a working context before
+ * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ * working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ * in which the working stream references the dictionary stream in-place.
+ *
+ * Several assumptions are made about the state of the dictionary stream.
+ * Currently, only streams which have been prepared by LZ4_loadDict() should
+ * be expected to work.
+ *
+ * Alternatively, the provided dictionaryStream may be NULL,
+ * in which case any existing dictionary stream is unset.
+ *
+ * If a dictionary is provided, it replaces any pre-existing stream history.
+ * The dictionary contents are the only history that can be referenced and
+ * logically immediately precede the data compressed in the first subsequent
+ * compression call.
+ *
+ * The dictionary will only remain attached to the working stream through the
+ * first compression call, at the end of which it is cleared. The dictionary
+ * stream (and source buffer) must remain in-place / accessible / unchanged
+ * through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ * |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ * |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ * Note that it is a compile-time constant, so all compressions will apply this limit.
+ * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ * so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ * This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ * in which case, the return code will be 0 (zero).
+ * The caller must be ready for these cases to happen,
+ * and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
+# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif /* LZ4_STATIC_3504398509 */
+#endif /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ * Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+ typedef int8_t LZ4_i8;
+ typedef uint8_t LZ4_byte;
+ typedef uint16_t LZ4_u16;
+ typedef uint32_t LZ4_u32;
+#else
+ typedef signed char LZ4_i8;
+ typedef unsigned char LZ4_byte;
+ typedef unsigned short LZ4_u16;
+ typedef unsigned int LZ4_u32;
+#endif
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+ LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+ LZ4_u32 currentOffset;
+ LZ4_u32 tableType;
+ const LZ4_byte* dictionary;
+ const LZ4_stream_t_internal* dictCtx;
+ LZ4_u32 dictSize;
+};
+
+typedef struct {
+ const LZ4_byte* externalDict;
+ size_t extDictSize;
+ const LZ4_byte* prefixEnd;
+ size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+
+/*! LZ4_stream_t :
+ * Do not use below internal definitions directly !
+ * Declare or allocate an LZ4_stream_t instead.
+ * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ * The structure definition can be convenient for static allocation
+ * (on stack, or as part of larger structure).
+ * Init this structure with LZ4_initStream() before first use.
+ * note : only use this definition in association with static linking !
+ * this definition is not API/ABI safe, and may change in future versions.
+ */
+#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
+union LZ4_stream_u {
+ void* table[LZ4_STREAMSIZE_VOIDP];
+ LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+
+/*! LZ4_initStream() : v1.9.0+
+ * An LZ4_stream_t structure must be initialized at least once.
+ * This is automatically done when invoking LZ4_createStream(),
+ * but it's not when the structure is simply declared on stack (for example).
+ *
+ * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ * It can also initialize any arbitrary buffer of sufficient size,
+ * and will @return a pointer of proper type upon initialization.
+ *
+ * Note : initialization fails if size and alignment conditions are not respected.
+ * In which case, the function will @return NULL.
+ * Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ * Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure using LZ4_setStreamDecode() before first use.
+ * note : only use in association with static linking !
+ * this definition is not API/ABI safe,
+ * and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
+#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+ unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+ LZ4_streamDecode_t_internal internal_donotuse;
+} ; /* previously typedef'd to LZ4_streamDecode_t */
+
+
+
+/*-************************************
+* Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ * Deprecated functions make the compiler generate a warning when invoked.
+ * This is meant to invite users to update their source code.
+ * Should deprecation warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ * before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
+#else
+# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+# define LZ4_DEPRECATED(message) [[deprecated(message)]]
+# elif defined(_MSC_VER)
+# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+# define LZ4_DEPRECATED(message) __attribute__((deprecated))
+# else
+# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+# define LZ4_DEPRECATED(message) /* disabled */
+# endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ * These functions used to be faster than LZ4_decompress_safe(),
+ * but this is no longer the case. They are now slower.
+ * This is because LZ4_decompress_fast() doesn't know the input size,
+ * and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ * The last remaining LZ4_decompress_fast() specificity is that
+ * it can decompress a block without knowing its compressed size.
+ * Such functionality can be achieved in a more secure manner
+ * by employing LZ4_decompress_safe_partial().
+ *
+ * Parameters:
+ * originalSize : is the uncompressed size to regenerate.
+ * `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ * The function expects to finish at block's end exactly.
+ * If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ * Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ * These issues never happen if input (compressed) data is correct.
+ * But they may happen if input data is invalid (error or intentional tampering).
+ * As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ * An LZ4_stream_t structure must be initialized at least once.
+ * This is done with LZ4_initStream(), or LZ4_resetStream().
+ * Consider switching to LZ4_initStream(),
+ * invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+
+#endif /* LZ4_H_98237428734687 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/libbutl/lz4.hxx b/libbutl/lz4.hxx
new file mode 100644
index 0000000..7886788
--- /dev/null
+++ b/libbutl/lz4.hxx
@@ -0,0 +1,205 @@
+// file : libbutl/lz4.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+ namespace lz4
+ {
+ // Read the content from the input stream, compress it using the specified
+ // compression level and block size, and write the compressed content to
+ // the output stream. If content size is specified, then include it into
+ // the compressed content header. Return the compressed content size.
+ //
+ // This function may throw std::bad_alloc as well as exceptions thrown by
+ // fdstream read/write functions. It may also throw std::invalid_argument
+ // in case of argument inconsistencies (e.g., content size does not match
+ // actual) with what() returning the error description. The input stream
+ // is expected to throw on badbit (but not failbit). The output stream is
+ // expected to throw on badbit or failbit.
+ //
+ // The output and most likely the input streams must be in the binary
+ // mode.
+ //
+ // Valid values for the compression level are between 1 (fastest) and 12
+ // (best compression level) though, practically, after 9 returns are
+ // diminished.
+ //
+ // Valid block sizes and their IDs:
+ //
+ // 4: 64KB
+ // 5: 256KB
+ // 6: 1MB
+ // 7: 4MB
+ //
+ // Note that due to the underlying API limitations, 0 content size is
+ // treated as absent and it's therefore impossible to compress 0-byte
+ // content with content size.
+ //
+ // This function produces compressed content identical to:
+ //
+ // lz4 -z -<compression_level> -B<block_size_id> -BD [--content-size]
+ //
+ LIBBUTL_SYMEXPORT std::uint64_t
+ compress (ofdstream&,
+ ifdstream&,
+ int compression_level,
+ int block_size_id,
+ optional<std::uint64_t> content_size);
+
+ // Low-level iterative compression API.
+ //
+ // This API may throw std::bad_alloc in case of memory allocation errors
+ // and std::invalid_argument in case of argument inconsistencies (e.g.,
+ // content size does not match actual) with what() returning the error
+ // description.
+ //
+ // See the implementation of the compress() function above for usage
+ // example.
+ //
+ // @@ TODO: reset support.
+ //
+ struct LIBBUTL_SYMEXPORT compressor
+ {
+ // Buffer, current size (part filled with data), and capacity.
+ //
+ char* ib; std::size_t in, ic; // Input.
+ char* ob; std::size_t on, oc; // Output.
+
+ // As a first step call begin(). This function sets the required input
+ // and output buffer capacities (ic, oc).
+ //
+ // The caller normally allocates the input and output buffers and fills
+ // the input buffer.
+ //
+ void
+ begin (int compression_level,
+ int block_size_id,
+ optional<std::uint64_t> content_size);
+
+ // Then call next() to compress the next chunk of input passing true on
+ // reaching EOF. Note that the input buffer should be filled to capacity
+ // unless end is true and the output buffer must be flushed before each
+ // subsequent call to next().
+ //
+ void
+ next (bool end);
+
+ // Not copyable or movable.
+ //
+ compressor (const compressor&) = delete;
+ compressor (compressor&&) = delete;
+ compressor& operator= (const compressor&) = delete;
+ compressor& operator= (compressor&&) = delete;
+
+ // Implementation details.
+ //
+ compressor (): ctx_ (nullptr) {}
+ ~compressor ();
+
+ public:
+ void
+ init_preferences (void*) const;
+
+ void* ctx_;
+ int level_;
+ int block_id_;
+ optional<std::uint64_t> content_size_;
+ bool begin_;
+ };
+
+
+ // Read the compressed content from the input stream, decompress it, and
+ // write the decompressed content to the output stream. Return the
+ // decompressed content size.
+ //
+ // This function may throw std::bad_alloc as well as exceptions thrown by
+ // fdstream read/write functions. It may also throw std::invalid_argument
+ // if the compressed content is invalid with what() returning the error
+ // description. The input stream is expected to throw on badbit but not
+ // failbit. The output stream is expected to throw on badbit or failbit.
+ //
+ // The input and most likely the output streams must be in the binary
+ // mode.
+ //
+ // Note that this function does not require the input stream to reach EOF
+ // at the end of compressed content. So if you have this requirement, you
+ // will need to enforce it yourself.
+ //
+ LIBBUTL_SYMEXPORT std::uint64_t
+ decompress (ofdstream&, ifdstream&);
+
+ // Low-level iterative decompression API.
+ //
+ // This API may throw std::bad_alloc in case of memory allocation errors
+ // and std::invalid_argument if the compressed content is invalid with
+ // what() returning the error description.
+ //
+ // See the implementation of the decompress() function above for usage
+ // example.
+ //
+ // The LZ4F_*() decompression functions return a hint of how much data
+ // they want on the next call. So the plan is to allocate the input
+ // buffer large enough to hold anything that can be asked for and then
+ // fill it in in the asked chunks. This way we avoid having to shift the
+ // unread data around.
+ //
+ // @@ TODO: reset support.
+ //
+ struct LIBBUTL_SYMEXPORT decompressor
+ {
+ // Buffer, current size (part filled with data), and capacity.
+ //
+ char hb[19]; std::size_t hn ; // Header.
+ char* ib; std::size_t in, ic; // Input.
+ char* ob; std::size_t on, oc; // Output.
+
+ // As a first step, fill in the header buffer and call begin(). This
+ // function sets the required input and output buffer capacities (ic,
+ // oc) and the number of bytes left in the header buffer (hn) and
+ // returns the number of bytes expected by the following call to next().
+ // If content_size is not NULL, then it is set to the decompressed
+ // content size, if available.
+ //
+ // The caller normally allocates the input and output buffers, copies
+ // remaining header buffer data over to the input buffer, and then fills
+ // in the remainder of the input buffer up to what's expected by the
+ // call to next().
+ //
+ std::size_t
+ begin (optional<std::uint64_t>* content_size = nullptr);
+
+ // Then call next() to decompress the next chunk of input. This function
+ // returns the number of bytes expected by the following call to next()
+ // or 0 if no further input is expected. Note that the output buffer
+ // must be flushed before each subsequent call to next().
+ //
+ std::size_t
+ next ();
+
+ // Not copyable or movable.
+ //
+ decompressor (const decompressor&) = delete;
+ decompressor (decompressor&&) = delete;
+ decompressor& operator= (const decompressor&) = delete;
+ decompressor& operator= (decompressor&&) = delete;
+
+ // Implementation details.
+ //
+ decompressor (): hn (0), in (0), on (0), ctx_ (nullptr) {}
+ ~decompressor ();
+
+ public:
+ void* ctx_;
+ };
+ }
+}
diff --git a/libbutl/lz4frame.c b/libbutl/lz4frame.c
new file mode 100644
index 0000000..0db8c1e
--- /dev/null
+++ b/libbutl/lz4frame.c
@@ -0,0 +1,1899 @@
+/*
+ * LZ4 auto-framing library
+ * Copyright (C) 2011-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+/* LZ4F is a stand-alone API to create LZ4-compressed Frames
+ * in full conformance with specification v1.6.1 .
+ * This library rely upon memory management capabilities (malloc, free)
+ * provided either by <stdlib.h>,
+ * or redirected towards another library of user's choice
+ * (see Memory Routines below).
+ */
+
+
+/*-************************************
+* Compiler Options
+**************************************/
+#ifdef _MSC_VER /* Visual Studio */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/*-************************************
+* Tuning parameters
+**************************************/
+/*
+ * LZ4F_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4F_HEAPMODE
+# define LZ4F_HEAPMODE 0
+#endif
+
+
+/*-************************************
+* Memory routines
+**************************************/
+/*
+ * User may redirect invocations of
+ * malloc(), calloc() and free()
+ * towards another library or solution of their choice
+ * by modifying below section.
+ */
+#ifndef LZ4_SRC_INCLUDED /* avoid redefinition when sources are coalesced */
+# include <stdlib.h> /* malloc, calloc, free */
+# define ALLOC(s) malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,(s))
+# define FREEMEM(p) free(p)
+#endif
+
+#include <string.h> /* memset, memcpy, memmove */
+#ifndef LZ4_SRC_INCLUDED /* avoid redefinition when sources are coalesced */
+# define MEM_INIT(p,v,s) memset((p),(v),(s))
+#endif
+
+
+/*-************************************
+* Library declarations
+**************************************/
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+#define LZ4_STATIC_LINKING_ONLY
+#include "lz4.h"
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "lz4hc.h"
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/*-************************************
+* Debug
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+# include <assert.h>
+#else
+# ifndef assert
+# define assert(condition) ((void)0)
+# endif
+#endif
+
+#define LZ4F_STATIC_ASSERT(c) { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+# include <stdio.h>
+static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+
+/*-************************************
+* Basic Types
+**************************************/
+#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+#else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+#endif
+
+
+/* unoptimized version; solves endianess & alignment issues */
+static U32 LZ4F_readLE32 (const void* src)
+{
+ const BYTE* const srcPtr = (const BYTE*)src;
+ U32 value32 = srcPtr[0];
+ value32 += ((U32)srcPtr[1])<< 8;
+ value32 += ((U32)srcPtr[2])<<16;
+ value32 += ((U32)srcPtr[3])<<24;
+ return value32;
+}
+
+static void LZ4F_writeLE32 (void* dst, U32 value32)
+{
+ BYTE* const dstPtr = (BYTE*)dst;
+ dstPtr[0] = (BYTE)value32;
+ dstPtr[1] = (BYTE)(value32 >> 8);
+ dstPtr[2] = (BYTE)(value32 >> 16);
+ dstPtr[3] = (BYTE)(value32 >> 24);
+}
+
+static U64 LZ4F_readLE64 (const void* src)
+{
+ const BYTE* const srcPtr = (const BYTE*)src;
+ U64 value64 = srcPtr[0];
+ value64 += ((U64)srcPtr[1]<<8);
+ value64 += ((U64)srcPtr[2]<<16);
+ value64 += ((U64)srcPtr[3]<<24);
+ value64 += ((U64)srcPtr[4]<<32);
+ value64 += ((U64)srcPtr[5]<<40);
+ value64 += ((U64)srcPtr[6]<<48);
+ value64 += ((U64)srcPtr[7]<<56);
+ return value64;
+}
+
+static void LZ4F_writeLE64 (void* dst, U64 value64)
+{
+ BYTE* const dstPtr = (BYTE*)dst;
+ dstPtr[0] = (BYTE)value64;
+ dstPtr[1] = (BYTE)(value64 >> 8);
+ dstPtr[2] = (BYTE)(value64 >> 16);
+ dstPtr[3] = (BYTE)(value64 >> 24);
+ dstPtr[4] = (BYTE)(value64 >> 32);
+ dstPtr[5] = (BYTE)(value64 >> 40);
+ dstPtr[6] = (BYTE)(value64 >> 48);
+ dstPtr[7] = (BYTE)(value64 >> 56);
+}
+
+
+/*-************************************
+* Constants
+**************************************/
+#ifndef LZ4_SRC_INCLUDED /* avoid double definition */
+# define KB *(1<<10)
+# define MB *(1<<20)
+# define GB *(1<<30)
+#endif
+
+#define _1BIT 0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _8BITS 0xFF
+
+#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U
+#define LZ4F_MAGICNUMBER 0x184D2204U
+#define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
+#define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB
+
+static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN; /* 7 */
+static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX; /* 19 */
+static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE; /* block header : size, and compress flag */
+static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE; /* block footer : checksum (optional) */
+
+
+/*-************************************
+* Structures and local types
+**************************************/
+typedef struct LZ4F_cctx_s
+{
+ LZ4F_preferences_t prefs;
+ U32 version;
+ U32 cStage;
+ const LZ4F_CDict* cdict;
+ size_t maxBlockSize;
+ size_t maxBufferSize;
+ BYTE* tmpBuff;
+ BYTE* tmpIn;
+ size_t tmpInSize;
+ U64 totalInSize;
+ XXH32_state_t xxh;
+ void* lz4CtxPtr;
+ U16 lz4CtxAlloc; /* sized for: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+ U16 lz4CtxState; /* in use as: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+} LZ4F_cctx_t;
+
+
+/*-************************************
+* Error management
+**************************************/
+#define LZ4F_GENERATE_STRING(STRING) #STRING,
+static const char* LZ4F_errorStrings[] = { LZ4F_LIST_ERRORS(LZ4F_GENERATE_STRING) };
+
+
+unsigned LZ4F_isError(LZ4F_errorCode_t code)
+{
+ return (code > (LZ4F_errorCode_t)(-LZ4F_ERROR_maxCode));
+}
+
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code)
+{
+ static const char* codeError = "Unspecified error code";
+ if (LZ4F_isError(code)) return LZ4F_errorStrings[-(int)(code)];
+ return codeError;
+}
+
+LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult)
+{
+ if (!LZ4F_isError(functionResult)) return LZ4F_OK_NoError;
+ return (LZ4F_errorCodes)(-(ptrdiff_t)functionResult);
+}
+
+static LZ4F_errorCode_t err0r(LZ4F_errorCodes code)
+{
+ /* A compilation error here means sizeof(ptrdiff_t) is not large enough */
+ LZ4F_STATIC_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));
+ return (LZ4F_errorCode_t)-(ptrdiff_t)code;
+}
+
+unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; }
+
+int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; }
+
+size_t LZ4F_getBlockSize(unsigned blockSizeID)
+{
+ static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB };
+
+ if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+ if (blockSizeID < LZ4F_max64KB || blockSizeID > LZ4F_max4MB)
+ return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+ blockSizeID -= LZ4F_max64KB;
+ return blockSizes[blockSizeID];
+}
+
+/*-************************************
+* Private functions
+**************************************/
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+
+static BYTE LZ4F_headerChecksum (const void* header, size_t length)
+{
+ U32 const xxh = XXH32(header, length, 0);
+ return (BYTE)(xxh >> 8);
+}
+
+
+/*-************************************
+* Simple-pass compression functions
+**************************************/
+static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID,
+ const size_t srcSize)
+{
+ LZ4F_blockSizeID_t proposedBSID = LZ4F_max64KB;
+ size_t maxBlockSize = 64 KB;
+ while (requestedBSID > proposedBSID) {
+ if (srcSize <= maxBlockSize)
+ return proposedBSID;
+ proposedBSID = (LZ4F_blockSizeID_t)((int)proposedBSID + 1);
+ maxBlockSize <<= 2;
+ }
+ return requestedBSID;
+}
+
+/*! LZ4F_compressBound_internal() :
+ * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
+ * prefsPtr is optional : if NULL is provided, preferences will be set to cover worst case scenario.
+ * @return is always the same for a srcSize and prefsPtr, so it can be relied upon to size reusable buffers.
+ * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+ */
+static size_t LZ4F_compressBound_internal(size_t srcSize,
+ const LZ4F_preferences_t* preferencesPtr,
+ size_t alreadyBuffered)
+{
+ LZ4F_preferences_t prefsNull = LZ4F_INIT_PREFERENCES;
+ prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; /* worst case */
+ prefsNull.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; /* worst case */
+ { const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
+ U32 const flush = prefsPtr->autoFlush | (srcSize==0);
+ LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID;
+ size_t const blockSize = LZ4F_getBlockSize(blockID);
+ size_t const maxBuffered = blockSize - 1;
+ size_t const bufferedSize = MIN(alreadyBuffered, maxBuffered);
+ size_t const maxSrcSize = srcSize + bufferedSize;
+ unsigned const nbFullBlocks = (unsigned)(maxSrcSize / blockSize);
+ size_t const partialBlockSize = maxSrcSize & (blockSize-1);
+ size_t const lastBlockSize = flush ? partialBlockSize : 0;
+ unsigned const nbBlocks = nbFullBlocks + (lastBlockSize>0);
+
+ size_t const blockCRCSize = BFSize * prefsPtr->frameInfo.blockChecksumFlag;
+ size_t const frameEnd = BHSize + (prefsPtr->frameInfo.contentChecksumFlag*BFSize);
+
+ return ((BHSize + blockCRCSize) * nbBlocks) +
+ (blockSize * nbFullBlocks) + lastBlockSize + frameEnd;
+ }
+}
+
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+ LZ4F_preferences_t prefs;
+ size_t const headerSize = maxFHSize; /* max header size, including optional fields */
+
+ if (preferencesPtr!=NULL) prefs = *preferencesPtr;
+ else MEM_INIT(&prefs, 0, sizeof(prefs));
+ prefs.autoFlush = 1;
+
+ return headerSize + LZ4F_compressBound_internal(srcSize, &prefs, 0);;
+}
+
+
+/*! LZ4F_compressFrame_usingCDict() :
+ * Compress srcBuffer using a dictionary, in a single step.
+ * cdict can be NULL, in which case, no dictionary is used.
+ * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ * however, it's the only way to provide a dictID, so it's not recommended.
+ * @return : number of bytes written into dstBuffer,
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ LZ4F_preferences_t prefs;
+ LZ4F_compressOptions_t options;
+ BYTE* const dstStart = (BYTE*) dstBuffer;
+ BYTE* dstPtr = dstStart;
+ BYTE* const dstEnd = dstStart + dstCapacity;
+
+ if (preferencesPtr!=NULL)
+ prefs = *preferencesPtr;
+ else
+ MEM_INIT(&prefs, 0, sizeof(prefs));
+ if (prefs.frameInfo.contentSize != 0)
+ prefs.frameInfo.contentSize = (U64)srcSize; /* auto-correct content size if selected (!=0) */
+
+ prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize);
+ prefs.autoFlush = 1;
+ if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
+ prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* only one block => no need for inter-block link */
+
+ MEM_INIT(&options, 0, sizeof(options));
+ options.stableSrc = 1;
+
+ if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs)) /* condition to guarantee success */
+ return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+
+ { size_t const headerSize = LZ4F_compressBegin_usingCDict(cctx, dstBuffer, dstCapacity, cdict, &prefs); /* write header */
+ if (LZ4F_isError(headerSize)) return headerSize;
+ dstPtr += headerSize; /* header size */ }
+
+ assert(dstEnd >= dstPtr);
+ { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, (size_t)(dstEnd-dstPtr), srcBuffer, srcSize, &options);
+ if (LZ4F_isError(cSize)) return cSize;
+ dstPtr += cSize; }
+
+ assert(dstEnd >= dstPtr);
+ { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, (size_t)(dstEnd-dstPtr), &options); /* flush last block, and generate suffix */
+ if (LZ4F_isError(tailSize)) return tailSize;
+ dstPtr += tailSize; }
+
+ assert(dstEnd >= dstStart);
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressFrame() :
+ * Compress an entire srcBuffer into a valid LZ4 frame, in a single step.
+ * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ size_t result;
+#if (LZ4F_HEAPMODE)
+ LZ4F_cctx_t *cctxPtr;
+ result = LZ4F_createCompressionContext(&cctxPtr, LZ4F_VERSION);
+ if (LZ4F_isError(result)) return result;
+#else
+ LZ4F_cctx_t cctx;
+ LZ4_stream_t lz4ctx;
+ LZ4F_cctx_t *cctxPtr = &cctx;
+
+ DEBUGLOG(4, "LZ4F_compressFrame");
+ MEM_INIT(&cctx, 0, sizeof(cctx));
+ cctx.version = LZ4F_VERSION;
+ cctx.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
+ if (preferencesPtr == NULL ||
+ preferencesPtr->compressionLevel < LZ4HC_CLEVEL_MIN)
+ {
+ LZ4_initStream(&lz4ctx, sizeof(lz4ctx));
+ cctxPtr->lz4CtxPtr = &lz4ctx;
+ cctxPtr->lz4CtxAlloc = 1;
+ cctxPtr->lz4CtxState = 1;
+ }
+#endif
+
+ result = LZ4F_compressFrame_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+ srcBuffer, srcSize,
+ NULL, preferencesPtr);
+
+#if (LZ4F_HEAPMODE)
+ LZ4F_freeCompressionContext(cctxPtr);
+#else
+ if (preferencesPtr != NULL &&
+ preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN)
+ {
+ FREEMEM(cctxPtr->lz4CtxPtr);
+ }
+#endif
+ return result;
+}
+
+
+/*-***************************************************
+* Dictionary compression
+*****************************************************/
+
+struct LZ4F_CDict_s {
+ void* dictContent;
+ LZ4_stream_t* fastCtx;
+ LZ4_streamHC_t* HCCtx;
+}; /* typedef'd to LZ4F_CDict within lz4frame_static.h */
+
+/*! LZ4F_createCDict() :
+ * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ * LZ4F_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ * LZ4F_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4F_CDict creation, since its content is copied within CDict
+ * @return : digested dictionary for compression, or NULL if failed */
+LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize)
+{
+ const char* dictStart = (const char*)dictBuffer;
+ LZ4F_CDict* cdict = (LZ4F_CDict*) ALLOC(sizeof(*cdict));
+ DEBUGLOG(4, "LZ4F_createCDict");
+ if (!cdict) return NULL;
+ if (dictSize > 64 KB) {
+ dictStart += dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ cdict->dictContent = ALLOC(dictSize);
+ cdict->fastCtx = LZ4_createStream();
+ cdict->HCCtx = LZ4_createStreamHC();
+ if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) {
+ LZ4F_freeCDict(cdict);
+ return NULL;
+ }
+ memcpy(cdict->dictContent, dictStart, dictSize);
+ LZ4_loadDict (cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize);
+ LZ4_setCompressionLevel(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
+ LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize);
+ return cdict;
+}
+
+void LZ4F_freeCDict(LZ4F_CDict* cdict)
+{
+ if (cdict==NULL) return; /* support free on NULL */
+ FREEMEM(cdict->dictContent);
+ LZ4_freeStream(cdict->fastCtx);
+ LZ4_freeStreamHC(cdict->HCCtx);
+ FREEMEM(cdict);
+}
+
+
+/*-*********************************
+* Advanced compression functions
+***********************************/
+
+/*! LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential incompatible differences between different binaries.
+ * The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
+ * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+ */
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** LZ4F_compressionContextPtr, unsigned version)
+{
+ LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOC_AND_ZERO(sizeof(LZ4F_cctx_t));
+ if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed);
+
+ cctxPtr->version = version;
+ cctxPtr->cStage = 0; /* Next stage : init stream */
+
+ *LZ4F_compressionContextPtr = cctxPtr;
+
+ return LZ4F_OK_NoError;
+}
+
+
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctxPtr)
+{
+ if (cctxPtr != NULL) { /* support free on NULL */
+ FREEMEM(cctxPtr->lz4CtxPtr); /* note: LZ4_streamHC_t and LZ4_stream_t are simple POD types */
+ FREEMEM(cctxPtr->tmpBuff);
+ FREEMEM(cctxPtr);
+ }
+
+ return LZ4F_OK_NoError;
+}
+
+
+/**
+ * This function prepares the internal LZ4(HC) stream for a new compression,
+ * resetting the context and attaching the dictionary, if there is one.
+ *
+ * It needs to be called at the beginning of each independent compression
+ * stream (i.e., at the beginning of a frame in blockLinked mode, or at the
+ * beginning of each block in blockIndependent mode).
+ */
+static void LZ4F_initStream(void* ctx,
+ const LZ4F_CDict* cdict,
+ int level,
+ LZ4F_blockMode_t blockMode) {
+ if (level < LZ4HC_CLEVEL_MIN) {
+ if (cdict != NULL || blockMode == LZ4F_blockLinked) {
+ /* In these cases, we will call LZ4_compress_fast_continue(),
+ * which needs an already reset context. Otherwise, we'll call a
+ * one-shot API. The non-continued APIs internally perform their own
+ * resets at the beginning of their calls, where they know what
+ * tableType they need the context to be in. So in that case this
+ * would be misguided / wasted work. */
+ LZ4_resetStream_fast((LZ4_stream_t*)ctx);
+ }
+ LZ4_attach_dictionary((LZ4_stream_t *)ctx, cdict ? cdict->fastCtx : NULL);
+ } else {
+ LZ4_resetStreamHC_fast((LZ4_streamHC_t*)ctx, level);
+ LZ4_attach_HC_dictionary((LZ4_streamHC_t *)ctx, cdict ? cdict->HCCtx : NULL);
+ }
+}
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ * init streaming compression and writes frame header into dstBuffer.
+ * dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * @return : number of bytes written into dstBuffer for the header
+ * or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ LZ4F_preferences_t prefNull;
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+ BYTE* headerStart;
+
+ if (dstCapacity < maxFHSize) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+ MEM_INIT(&prefNull, 0, sizeof(prefNull));
+ if (preferencesPtr == NULL) preferencesPtr = &prefNull;
+ cctxPtr->prefs = *preferencesPtr;
+
+ /* Ctx Management */
+ { U16 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;
+ if (cctxPtr->lz4CtxAlloc < ctxTypeID) {
+ FREEMEM(cctxPtr->lz4CtxPtr);
+ if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+ cctxPtr->lz4CtxPtr = LZ4_createStream();
+ } else {
+ cctxPtr->lz4CtxPtr = LZ4_createStreamHC();
+ }
+ if (cctxPtr->lz4CtxPtr == NULL)
+ return err0r(LZ4F_ERROR_allocation_failed);
+ cctxPtr->lz4CtxAlloc = ctxTypeID;
+ cctxPtr->lz4CtxState = ctxTypeID;
+ } else if (cctxPtr->lz4CtxState != ctxTypeID) {
+ /* otherwise, a sufficient buffer is allocated, but we need to
+ * reset it to the correct context type */
+ if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+ LZ4_initStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr, sizeof (LZ4_stream_t));
+ } else {
+ LZ4_initStreamHC((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, sizeof(LZ4_streamHC_t));
+ LZ4_setCompressionLevel((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+ }
+ cctxPtr->lz4CtxState = ctxTypeID;
+ }
+ }
+
+ /* Buffer Management */
+ if (cctxPtr->prefs.frameInfo.blockSizeID == 0)
+ cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+ cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
+
+ { size_t const requiredBuffSize = preferencesPtr->autoFlush ?
+ ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 64 KB : 0) : /* only needs past data up to window size */
+ cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 128 KB : 0);
+
+ if (cctxPtr->maxBufferSize < requiredBuffSize) {
+ cctxPtr->maxBufferSize = 0;
+ FREEMEM(cctxPtr->tmpBuff);
+ cctxPtr->tmpBuff = (BYTE*)ALLOC_AND_ZERO(requiredBuffSize);
+ if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+ cctxPtr->maxBufferSize = requiredBuffSize;
+ } }
+ cctxPtr->tmpIn = cctxPtr->tmpBuff;
+ cctxPtr->tmpInSize = 0;
+ (void)XXH32_reset(&(cctxPtr->xxh), 0);
+
+ /* context init */
+ cctxPtr->cdict = cdict;
+ if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) {
+ /* frame init only for blockLinked : blockIndependent will be init at each block */
+ LZ4F_initStream(cctxPtr->lz4CtxPtr, cdict, cctxPtr->prefs.compressionLevel, LZ4F_blockLinked);
+ }
+ if (preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) {
+ LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed);
+ }
+
+ /* Magic Number */
+ LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER);
+ dstPtr += 4;
+ headerStart = dstPtr;
+
+ /* FLG Byte */
+ *dstPtr++ = (BYTE)(((1 & _2BITS) << 6) /* Version('01') */
+ + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)
+ + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4)
+ + ((unsigned)(cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
+ + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)
+ + (cctxPtr->prefs.frameInfo.dictID > 0) );
+ /* BD Byte */
+ *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
+ /* Optional Frame content size field */
+ if (cctxPtr->prefs.frameInfo.contentSize) {
+ LZ4F_writeLE64(dstPtr, cctxPtr->prefs.frameInfo.contentSize);
+ dstPtr += 8;
+ cctxPtr->totalInSize = 0;
+ }
+ /* Optional dictionary ID field */
+ if (cctxPtr->prefs.frameInfo.dictID) {
+ LZ4F_writeLE32(dstPtr, cctxPtr->prefs.frameInfo.dictID);
+ dstPtr += 4;
+ }
+ /* Header CRC Byte */
+ *dstPtr = LZ4F_headerChecksum(headerStart, (size_t)(dstPtr - headerStart));
+ dstPtr++;
+
+ cctxPtr->cStage = 1; /* header written, now request input data block */
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressBegin() :
+ * init streaming compression and writes frame header into dstBuffer.
+ * dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * preferencesPtr can be NULL, in which case default parameters are selected.
+ * @return : number of bytes written into dstBuffer for the header
+ * or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_preferences_t* preferencesPtr)
+{
+ return LZ4F_compressBegin_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+ NULL, preferencesPtr);
+}
+
+
+/* LZ4F_compressBound() :
+ * @return minimum capacity of dstBuffer for a given srcSize to handle worst case scenario.
+ * LZ4F_preferences_t structure is optional : if NULL, preferences will be set to cover worst case scenario.
+ * This function cannot fail.
+ */
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+ if (preferencesPtr && preferencesPtr->autoFlush) {
+ return LZ4F_compressBound_internal(srcSize, preferencesPtr, 0);
+ }
+ return LZ4F_compressBound_internal(srcSize, preferencesPtr, (size_t)-1);
+}
+
+
+typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level, const LZ4F_CDict* cdict);
+
+
+/*! LZ4F_makeBlock():
+ * compress a single block, add header and optional checksum.
+ * assumption : dst buffer capacity is >= BHSize + srcSize + crcSize
+ */
+static size_t LZ4F_makeBlock(void* dst,
+ const void* src, size_t srcSize,
+ compressFunc_t compress, void* lz4ctx, int level,
+ const LZ4F_CDict* cdict,
+ LZ4F_blockChecksum_t crcFlag)
+{
+ BYTE* const cSizePtr = (BYTE*)dst;
+ U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+BHSize),
+ (int)(srcSize), (int)(srcSize-1),
+ level, cdict);
+ if (cSize == 0) { /* compression failed */
+ DEBUGLOG(5, "LZ4F_makeBlock: compression failed, creating a raw block (size %u)", (U32)srcSize);
+ cSize = (U32)srcSize;
+ LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG);
+ memcpy(cSizePtr+BHSize, src, srcSize);
+ } else {
+ LZ4F_writeLE32(cSizePtr, cSize);
+ }
+ if (crcFlag) {
+ U32 const crc32 = XXH32(cSizePtr+BHSize, cSize, 0); /* checksum of compressed data */
+ LZ4F_writeLE32(cSizePtr+BHSize+cSize, crc32);
+ }
+ return BHSize + cSize + ((U32)crcFlag)*BFSize;
+}
+
+
+static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ int const acceleration = (level < 0) ? -level + 1 : 1;
+ LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+ if (cdict) {
+ return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+ } else {
+ return LZ4_compress_fast_extState_fastReset(ctx, src, dst, srcSize, dstCapacity, acceleration);
+ }
+}
+
+static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ int const acceleration = (level < 0) ? -level + 1 : 1;
+ (void)cdict; /* init once at beginning of frame */
+ return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+}
+
+static int LZ4F_compressBlockHC(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+ if (cdict) {
+ return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+ }
+ return LZ4_compress_HC_extStateHC_fastReset(ctx, src, dst, srcSize, dstCapacity, level);
+}
+
+static int LZ4F_compressBlockHC_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+ (void)level; (void)cdict; /* init once at beginning of frame */
+ return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+}
+
+static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level)
+{
+ if (level < LZ4HC_CLEVEL_MIN) {
+ if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlock;
+ return LZ4F_compressBlock_continue;
+ }
+ if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlockHC;
+ return LZ4F_compressBlockHC_continue;
+}
+
+static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr)
+{
+ if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+ return LZ4_saveDict ((LZ4_stream_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+ return LZ4_saveDictHC ((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+}
+
+typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
+
+/*! LZ4F_compressUpdate() :
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * dstBuffer MUST be >= LZ4F_compressBound(srcSize, preferencesPtr).
+ * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered.
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+ LZ4F_compressOptions_t cOptionsNull;
+ size_t const blockSize = cctxPtr->maxBlockSize;
+ const BYTE* srcPtr = (const BYTE*)srcBuffer;
+ const BYTE* const srcEnd = srcPtr + srcSize;
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+ LZ4F_lastBlockStatus lastBlockCompressed = notDone;
+ compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+ DEBUGLOG(4, "LZ4F_compressUpdate (srcSize=%zu)", srcSize);
+
+ if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
+ if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize))
+ return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+ MEM_INIT(&cOptionsNull, 0, sizeof(cOptionsNull));
+ if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
+
+ /* complete tmp buffer */
+ if (cctxPtr->tmpInSize > 0) { /* some data already within tmp buffer */
+ size_t const sizeToCopy = blockSize - cctxPtr->tmpInSize;
+ if (sizeToCopy > srcSize) {
+ /* add src to tmpIn buffer */
+ memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize);
+ srcPtr = srcEnd;
+ cctxPtr->tmpInSize += srcSize;
+ /* still needs some CRC */
+ } else {
+ /* complete tmpIn block and then compress it */
+ lastBlockCompressed = fromTmpBuffer;
+ memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
+ srcPtr += sizeToCopy;
+
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ cctxPtr->tmpIn, blockSize,
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+
+ if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize;
+ cctxPtr->tmpInSize = 0;
+ }
+ }
+
+ while ((size_t)(srcEnd - srcPtr) >= blockSize) {
+ /* compress full blocks */
+ lastBlockCompressed = fromSrcBuffer;
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ srcPtr, blockSize,
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+ srcPtr += blockSize;
+ }
+
+ if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) {
+ /* compress remaining input < blockSize */
+ lastBlockCompressed = fromSrcBuffer;
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ srcPtr, (size_t)(srcEnd - srcPtr),
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+ srcPtr = srcEnd;
+ }
+
+ /* preserve dictionary if necessary */
+ if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer)) {
+ if (compressOptionsPtr->stableSrc) {
+ cctxPtr->tmpIn = cctxPtr->tmpBuff;
+ } else {
+ int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+ if (realDictSize==0) return err0r(LZ4F_ERROR_GENERIC);
+ cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+ }
+ }
+
+ /* keep tmpIn within limits */
+ if (!(cctxPtr->prefs.autoFlush) &&
+ (cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */
+ {
+ int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+ cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+ }
+
+ /* some input data left, necessarily < blockSize */
+ if (srcPtr < srcEnd) {
+ /* fill tmp buffer */
+ size_t const sizeToCopy = (size_t)(srcEnd - srcPtr);
+ memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
+ cctxPtr->tmpInSize = sizeToCopy;
+ }
+
+ if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
+ (void)XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize);
+
+ cctxPtr->totalInSize += srcSize;
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_flush() :
+ * When compressed data must be sent immediately, without waiting for a block to be filled,
+ * invoke LZ4_flush(), which will immediately compress any remaining data stored within LZ4F_cctx.
+ * The result of the function is the number of bytes written into dstBuffer.
+ * It can be zero, this means there was no data left within LZ4F_cctx.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ * LZ4F_compressOptions_t* is optional. NULL is a valid argument.
+ */
+size_t LZ4F_flush(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+ compressFunc_t compress;
+
+ if (cctxPtr->tmpInSize == 0) return 0; /* nothing to flush */
+ if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
+ if (dstCapacity < (cctxPtr->tmpInSize + BHSize + BFSize))
+ return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+ (void)compressOptionsPtr; /* not yet useful */
+
+ /* select compression function */
+ compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+ /* compress tmp buffer */
+ dstPtr += LZ4F_makeBlock(dstPtr,
+ cctxPtr->tmpIn, cctxPtr->tmpInSize,
+ compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+ cctxPtr->cdict,
+ cctxPtr->prefs.frameInfo.blockChecksumFlag);
+ assert(((void)"flush overflows dstBuffer!", (size_t)(dstPtr - dstStart) <= dstCapacity));
+
+ if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked)
+ cctxPtr->tmpIn += cctxPtr->tmpInSize;
+ cctxPtr->tmpInSize = 0;
+
+ /* keep tmpIn within limits */
+ if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) { /* necessarily LZ4F_blockLinked */
+ int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+ cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+ }
+
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressEnd() :
+ * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ * It will flush whatever data remained within compressionContext (like LZ4_flush())
+ * but also properly finalize the frame, with an endMark and an (optional) checksum.
+ * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return: the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ * The context can then be used again to compress a new frame, starting with LZ4F_compressBegin().
+ */
+size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* dstPtr = dstStart;
+
+ size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr);
+ DEBUGLOG(5,"LZ4F_compressEnd: dstCapacity=%u", (unsigned)dstCapacity);
+ if (LZ4F_isError(flushSize)) return flushSize;
+ dstPtr += flushSize;
+
+ assert(flushSize <= dstCapacity);
+ dstCapacity -= flushSize;
+
+ if (dstCapacity < 4) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+ LZ4F_writeLE32(dstPtr, 0);
+ dstPtr += 4; /* endMark */
+
+ if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) {
+ U32 const xxh = XXH32_digest(&(cctxPtr->xxh));
+ if (dstCapacity < 8) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+ DEBUGLOG(5,"Writing 32-bit content checksum");
+ LZ4F_writeLE32(dstPtr, xxh);
+ dstPtr+=4; /* content Checksum */
+ }
+
+ cctxPtr->cStage = 0; /* state is now re-usable (with identical preferences) */
+ cctxPtr->maxBufferSize = 0; /* reuse HC context */
+
+ if (cctxPtr->prefs.frameInfo.contentSize) {
+ if (cctxPtr->prefs.frameInfo.contentSize != cctxPtr->totalInSize)
+ return err0r(LZ4F_ERROR_frameSize_wrong);
+ }
+
+ return (size_t)(dstPtr - dstStart);
+}
+
+
+/*-***************************************************
+* Frame Decompression
+*****************************************************/
+
+typedef enum {
+ dstage_getFrameHeader=0, dstage_storeFrameHeader,
+ dstage_init,
+ dstage_getBlockHeader, dstage_storeBlockHeader,
+ dstage_copyDirect, dstage_getBlockChecksum,
+ dstage_getCBlock, dstage_storeCBlock,
+ dstage_flushOut,
+ dstage_getSuffix, dstage_storeSuffix,
+ dstage_getSFrameSize, dstage_storeSFrameSize,
+ dstage_skipSkippable
+} dStage_t;
+
+struct LZ4F_dctx_s {
+ LZ4F_frameInfo_t frameInfo;
+ U32 version;
+ dStage_t dStage;
+ U64 frameRemainingSize;
+ size_t maxBlockSize;
+ size_t maxBufferSize;
+ BYTE* tmpIn;
+ size_t tmpInSize;
+ size_t tmpInTarget;
+ BYTE* tmpOutBuffer;
+ const BYTE* dict;
+ size_t dictSize;
+ BYTE* tmpOut;
+ size_t tmpOutSize;
+ size_t tmpOutStart;
+ XXH32_state_t xxh;
+ XXH32_state_t blockChecksum;
+ BYTE header[LZ4F_HEADER_SIZE_MAX];
+}; /* typedef'd to LZ4F_dctx in lz4frame.h */
+
+
+/*! LZ4F_createDecompressionContext() :
+ * Create a decompressionContext object, which will track all decompression operations.
+ * Provides a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
+ * Object can later be released using LZ4F_freeDecompressionContext().
+ * @return : if != 0, there was an error during context creation.
+ */
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber)
+{
+ LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOC_AND_ZERO(sizeof(LZ4F_dctx));
+ if (dctx == NULL) { /* failed allocation */
+ *LZ4F_decompressionContextPtr = NULL;
+ return err0r(LZ4F_ERROR_allocation_failed);
+ }
+
+ dctx->version = versionNumber;
+ *LZ4F_decompressionContextPtr = dctx;
+ return LZ4F_OK_NoError;
+}
+
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx)
+{
+ LZ4F_errorCode_t result = LZ4F_OK_NoError;
+ if (dctx != NULL) { /* can accept NULL input, like free() */
+ result = (LZ4F_errorCode_t)dctx->dStage;
+ FREEMEM(dctx->tmpIn);
+ FREEMEM(dctx->tmpOutBuffer);
+ FREEMEM(dctx);
+ }
+ return result;
+}
+
+
+/*==--- Streaming Decompression operations ---==*/
+
+void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx)
+{
+ dctx->dStage = dstage_getFrameHeader;
+ dctx->dict = NULL;
+ dctx->dictSize = 0;
+}
+
+
+/*! LZ4F_decodeHeader() :
+ * input : `src` points at the **beginning of the frame**
+ * output : set internal values of dctx, such as
+ * dctx->frameInfo and dctx->dStage.
+ * Also allocates internal buffers.
+ * @return : nb Bytes read from src (necessarily <= srcSize)
+ * or an error code (testable with LZ4F_isError())
+ */
+static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize)
+{
+ unsigned blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictIDFlag, blockSizeID;
+ size_t frameHeaderSize;
+ const BYTE* srcPtr = (const BYTE*)src;
+
+ DEBUGLOG(5, "LZ4F_decodeHeader");
+ /* need to decode header to get frameInfo */
+ if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete); /* minimal frame header size */
+ MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
+
+ /* special case : skippable frames */
+ if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) {
+ dctx->frameInfo.frameType = LZ4F_skippableFrame;
+ if (src == (void*)(dctx->header)) {
+ dctx->tmpInSize = srcSize;
+ dctx->tmpInTarget = 8;
+ dctx->dStage = dstage_storeSFrameSize;
+ return srcSize;
+ } else {
+ dctx->dStage = dstage_getSFrameSize;
+ return 4;
+ }
+ }
+
+ /* control magic number */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) {
+ DEBUGLOG(4, "frame header error : unknown magic number");
+ return err0r(LZ4F_ERROR_frameType_unknown);
+ }
+#endif
+ dctx->frameInfo.frameType = LZ4F_frame;
+
+ /* Flags */
+ { U32 const FLG = srcPtr[4];
+ U32 const version = (FLG>>6) & _2BITS;
+ blockChecksumFlag = (FLG>>4) & _1BIT;
+ blockMode = (FLG>>5) & _1BIT;
+ contentSizeFlag = (FLG>>3) & _1BIT;
+ contentChecksumFlag = (FLG>>2) & _1BIT;
+ dictIDFlag = FLG & _1BIT;
+ /* validate */
+ if (((FLG>>1)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */
+ if (version != 1) return err0r(LZ4F_ERROR_headerVersion_wrong); /* Version Number, only supported value */
+ }
+
+ /* Frame Header Size */
+ frameHeaderSize = minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
+
+ if (srcSize < frameHeaderSize) {
+ /* not enough input to fully decode frame header */
+ if (srcPtr != dctx->header)
+ memcpy(dctx->header, srcPtr, srcSize);
+ dctx->tmpInSize = srcSize;
+ dctx->tmpInTarget = frameHeaderSize;
+ dctx->dStage = dstage_storeFrameHeader;
+ return srcSize;
+ }
+
+ { U32 const BD = srcPtr[5];
+ blockSizeID = (BD>>4) & _3BITS;
+ /* validate */
+ if (((BD>>7)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */
+ if (blockSizeID < 4) return err0r(LZ4F_ERROR_maxBlockSize_invalid); /* 4-7 only supported values for the time being */
+ if (((BD>>0)&_4BITS) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bits */
+ }
+
+ /* check header */
+ assert(frameHeaderSize > 5);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ { BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
+ if (HC != srcPtr[frameHeaderSize-1])
+ return err0r(LZ4F_ERROR_headerChecksum_invalid);
+ }
+#endif
+
+ /* save */
+ dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
+ dctx->frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)blockChecksumFlag;
+ dctx->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag;
+ dctx->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID;
+ dctx->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
+ if (contentSizeFlag)
+ dctx->frameRemainingSize =
+ dctx->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
+ if (dictIDFlag)
+ dctx->frameInfo.dictID = LZ4F_readLE32(srcPtr + frameHeaderSize - 5);
+
+ dctx->dStage = dstage_init;
+
+ return frameHeaderSize;
+}
+
+
+/*! LZ4F_headerSize() :
+ * @return : size of frame header
+ * or an error code, which can be tested using LZ4F_isError()
+ */
+size_t LZ4F_headerSize(const void* src, size_t srcSize)
+{
+ if (src == NULL) return err0r(LZ4F_ERROR_srcPtr_wrong);
+
+ /* minimal srcSize to determine header size */
+ if (srcSize < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH)
+ return err0r(LZ4F_ERROR_frameHeader_incomplete);
+
+ /* special case : skippable frames */
+ if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START)
+ return 8;
+
+ /* control magic number */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
+ return err0r(LZ4F_ERROR_frameType_unknown);
+#endif
+
+ /* Frame Header Size */
+ { BYTE const FLG = ((const BYTE*)src)[4];
+ U32 const contentSizeFlag = (FLG>>3) & _1BIT;
+ U32 const dictIDFlag = FLG & _1BIT;
+ return minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
+ }
+}
+
+/*! LZ4F_getFrameInfo() :
+ * This function extracts frame parameters (max blockSize, frame checksum, etc.).
+ * Usage is optional. Objective is to provide relevant information for allocation purposes.
+ * This function works in 2 situations :
+ * - At the beginning of a new frame, in which case it will decode this information from `srcBuffer`, and start the decoding process.
+ * Amount of input data provided must be large enough to successfully decode the frame header.
+ * A header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes. It's possible to provide more input data than this minimum.
+ * - After decoding has been started. In which case, no input is read, frame parameters are extracted from dctx.
+ * The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ * Decompression must resume from (srcBuffer + *srcSizePtr).
+ * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ * or an error code which can be tested using LZ4F_isError()
+ * note 1 : in case of error, dctx is not modified. Decoding operations can resume from where they stopped.
+ * note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+ LZ4F_frameInfo_t* frameInfoPtr,
+ const void* srcBuffer, size_t* srcSizePtr)
+{
+ LZ4F_STATIC_ASSERT(dstage_getFrameHeader < dstage_storeFrameHeader);
+ if (dctx->dStage > dstage_storeFrameHeader) {
+ /* frameInfo already decoded */
+ size_t o=0, i=0;
+ *srcSizePtr = 0;
+ *frameInfoPtr = dctx->frameInfo;
+ /* returns : recommended nb of bytes for LZ4F_decompress() */
+ return LZ4F_decompress(dctx, NULL, &o, NULL, &i, NULL);
+ } else {
+ if (dctx->dStage == dstage_storeFrameHeader) {
+ /* frame decoding already started, in the middle of header => automatic fail */
+ *srcSizePtr = 0;
+ return err0r(LZ4F_ERROR_frameDecoding_alreadyStarted);
+ } else {
+ size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr);
+ if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; }
+ if (*srcSizePtr < hSize) {
+ *srcSizePtr=0;
+ return err0r(LZ4F_ERROR_frameHeader_incomplete);
+ }
+
+ { size_t decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize);
+ if (LZ4F_isError(decodeResult)) {
+ *srcSizePtr = 0;
+ } else {
+ *srcSizePtr = decodeResult;
+ decodeResult = BHSize; /* block header size */
+ }
+ *frameInfoPtr = dctx->frameInfo;
+ return decodeResult;
+ } } }
+}
+
+
+/* LZ4F_updateDict() :
+ * only used for LZ4F_blockLinked mode
+ * Condition : dstPtr != NULL
+ */
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+ const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+ unsigned withinTmp)
+{
+ assert(dstPtr != NULL);
+ if (dctx->dictSize==0) {
+ dctx->dict = (const BYTE*)dstPtr; /* priority to prefix mode */
+ }
+ assert(dctx->dict != NULL);
+
+ if (dctx->dict + dctx->dictSize == dstPtr) { /* prefix mode, everything within dstBuffer */
+ dctx->dictSize += dstSize;
+ return;
+ }
+
+ assert(dstPtr >= dstBufferStart);
+ if ((size_t)(dstPtr - dstBufferStart) + dstSize >= 64 KB) { /* history in dstBuffer becomes large enough to become dictionary */
+ dctx->dict = (const BYTE*)dstBufferStart;
+ dctx->dictSize = (size_t)(dstPtr - dstBufferStart) + dstSize;
+ return;
+ }
+
+ assert(dstSize < 64 KB); /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+ /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOutBuffer */
+ assert(dctx->tmpOutBuffer != NULL);
+
+ if (withinTmp && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */
+ /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+ assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
+ dctx->dictSize += dstSize;
+ return;
+ }
+
+ if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
+ size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
+ size_t copySize = 64 KB - dctx->tmpOutSize;
+ const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+ if (dctx->tmpOutSize > 64 KB) copySize = 0;
+ if (copySize > preserveSize) copySize = preserveSize;
+
+ memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = preserveSize + dctx->tmpOutStart + dstSize;
+ return;
+ }
+
+ if (dctx->dict == dctx->tmpOutBuffer) { /* copy dst into tmp to complete dict */
+ if (dctx->dictSize + dstSize > dctx->maxBufferSize) { /* tmp buffer not large enough */
+ size_t const preserveSize = 64 KB - dstSize;
+ memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+ dctx->dictSize = preserveSize;
+ }
+ memcpy(dctx->tmpOutBuffer + dctx->dictSize, dstPtr, dstSize);
+ dctx->dictSize += dstSize;
+ return;
+ }
+
+ /* join dict & dest into tmp */
+ { size_t preserveSize = 64 KB - dstSize;
+ if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
+ memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+ memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = preserveSize + dstSize;
+ }
+}
+
+
+
+/*! LZ4F_decompress() :
+ * Call this function repetitively to regenerate compressed data in srcBuffer.
+ * The function will attempt to decode up to *srcSizePtr bytes from srcBuffer
+ * into dstBuffer of capacity *dstSizePtr.
+ *
+ * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ * The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ * If number of bytes read is < number of bytes provided, then decompression operation is not complete.
+ * Remaining data will have to be presented again in a subsequent invocation.
+ *
+ * The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+ * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ * Respecting the hint provides a small boost to performance, since it allows less buffer shuffling.
+ * Note that this is just a hint, and it's always possible to any srcSize value.
+ * When a frame is fully decoded, @return will be 0.
+ * If decompression failed, @return is an error code which can be tested using LZ4F_isError().
+ */
+size_t LZ4F_decompress(LZ4F_dctx* dctx,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+ LZ4F_decompressOptions_t optionsNull;
+ const BYTE* const srcStart = (const BYTE*)srcBuffer;
+ const BYTE* const srcEnd = srcStart + *srcSizePtr;
+ const BYTE* srcPtr = srcStart;
+ BYTE* const dstStart = (BYTE*)dstBuffer;
+ BYTE* const dstEnd = dstStart ? dstStart + *dstSizePtr : NULL;
+ BYTE* dstPtr = dstStart;
+ const BYTE* selectedIn = NULL;
+ unsigned doAnotherStage = 1;
+ size_t nextSrcSizeHint = 1;
+
+
+ DEBUGLOG(5, "LZ4F_decompress : %p,%u => %p,%u",
+ srcBuffer, (unsigned)*srcSizePtr, dstBuffer, (unsigned)*dstSizePtr);
+ if (dstBuffer == NULL) assert(*dstSizePtr == 0);
+ MEM_INIT(&optionsNull, 0, sizeof(optionsNull));
+ if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
+ *srcSizePtr = 0;
+ *dstSizePtr = 0;
+ assert(dctx != NULL);
+
+ /* behaves as a state machine */
+
+ while (doAnotherStage) {
+
+ switch(dctx->dStage)
+ {
+
+ case dstage_getFrameHeader:
+ DEBUGLOG(6, "dstage_getFrameHeader");
+ if ((size_t)(srcEnd-srcPtr) >= maxFHSize) { /* enough to decode - shortcut */
+ size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, (size_t)(srcEnd-srcPtr)); /* will update dStage appropriately */
+ if (LZ4F_isError(hSize)) return hSize;
+ srcPtr += hSize;
+ break;
+ }
+ dctx->tmpInSize = 0;
+ if (srcEnd-srcPtr == 0) return minFHSize; /* 0-size input */
+ dctx->tmpInTarget = minFHSize; /* minimum size to decode header */
+ dctx->dStage = dstage_storeFrameHeader;
+ /* fall-through */
+
+ case dstage_storeFrameHeader:
+ DEBUGLOG(6, "dstage_storeFrameHeader");
+ { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr));
+ memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+ dctx->tmpInSize += sizeToCopy;
+ srcPtr += sizeToCopy;
+ }
+ if (dctx->tmpInSize < dctx->tmpInTarget) {
+ nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize; /* rest of header + nextBlockHeader */
+ doAnotherStage = 0; /* not enough src data, ask for some more */
+ break;
+ }
+ { size_t const hSize = LZ4F_decodeHeader(dctx, dctx->header, dctx->tmpInTarget); /* will update dStage appropriately */
+ if (LZ4F_isError(hSize)) return hSize;
+ }
+ break;
+
+ case dstage_init:
+ DEBUGLOG(6, "dstage_init");
+ if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_reset(&(dctx->xxh), 0);
+ /* internal buffers allocation */
+ { size_t const bufferNeeded = dctx->maxBlockSize
+ + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) ? 128 KB : 0);
+ if (bufferNeeded > dctx->maxBufferSize) { /* tmp buffers too small */
+ dctx->maxBufferSize = 0; /* ensure allocation will be re-attempted on next entry*/
+ FREEMEM(dctx->tmpIn);
+ dctx->tmpIn = (BYTE*)ALLOC(dctx->maxBlockSize + BFSize /* block checksum */);
+ if (dctx->tmpIn == NULL)
+ return err0r(LZ4F_ERROR_allocation_failed);
+ FREEMEM(dctx->tmpOutBuffer);
+ dctx->tmpOutBuffer= (BYTE*)ALLOC(bufferNeeded);
+ if (dctx->tmpOutBuffer== NULL)
+ return err0r(LZ4F_ERROR_allocation_failed);
+ dctx->maxBufferSize = bufferNeeded;
+ } }
+ dctx->tmpInSize = 0;
+ dctx->tmpInTarget = 0;
+ dctx->tmpOut = dctx->tmpOutBuffer;
+ dctx->tmpOutStart = 0;
+ dctx->tmpOutSize = 0;
+
+ dctx->dStage = dstage_getBlockHeader;
+ /* fall-through */
+
+ case dstage_getBlockHeader:
+ if ((size_t)(srcEnd - srcPtr) >= BHSize) {
+ selectedIn = srcPtr;
+ srcPtr += BHSize;
+ } else {
+ /* not enough input to read cBlockSize field */
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_storeBlockHeader;
+ }
+
+ if (dctx->dStage == dstage_storeBlockHeader) /* can be skipped */
+ case dstage_storeBlockHeader:
+ { size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+ size_t const wantedData = BHSize - dctx->tmpInSize;
+ size_t const sizeToCopy = MIN(wantedData, remainingInput);
+ memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+ srcPtr += sizeToCopy;
+ dctx->tmpInSize += sizeToCopy;
+
+ if (dctx->tmpInSize < BHSize) { /* not enough input for cBlockSize */
+ nextSrcSizeHint = BHSize - dctx->tmpInSize;
+ doAnotherStage = 0;
+ break;
+ }
+ selectedIn = dctx->tmpIn;
+ } /* if (dctx->dStage == dstage_storeBlockHeader) */
+
+ /* decode block header */
+ { U32 const blockHeader = LZ4F_readLE32(selectedIn);
+ size_t const nextCBlockSize = blockHeader & 0x7FFFFFFFU;
+ size_t const crcSize = dctx->frameInfo.blockChecksumFlag * BFSize;
+ if (blockHeader==0) { /* frameEnd signal, no more block */
+ DEBUGLOG(5, "end of frame");
+ dctx->dStage = dstage_getSuffix;
+ break;
+ }
+ if (nextCBlockSize > dctx->maxBlockSize) {
+ return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+ }
+ if (blockHeader & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
+ /* next block is uncompressed */
+ dctx->tmpInTarget = nextCBlockSize;
+ DEBUGLOG(5, "next block is uncompressed (size %u)", (U32)nextCBlockSize);
+ if (dctx->frameInfo.blockChecksumFlag) {
+ (void)XXH32_reset(&dctx->blockChecksum, 0);
+ }
+ dctx->dStage = dstage_copyDirect;
+ break;
+ }
+ /* next block is a compressed block */
+ dctx->tmpInTarget = nextCBlockSize + crcSize;
+ dctx->dStage = dstage_getCBlock;
+ if (dstPtr==dstEnd || srcPtr==srcEnd) {
+ nextSrcSizeHint = BHSize + nextCBlockSize + crcSize;
+ doAnotherStage = 0;
+ }
+ break;
+ }
+
+ case dstage_copyDirect: /* uncompressed block */
+ DEBUGLOG(6, "dstage_copyDirect");
+ { size_t sizeToCopy;
+ if (dstPtr == NULL) {
+ sizeToCopy = 0;
+ } else {
+ size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr));
+ sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
+ memcpy(dstPtr, srcPtr, sizeToCopy);
+ if (dctx->frameInfo.blockChecksumFlag) {
+ (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
+ }
+ if (dctx->frameInfo.contentChecksumFlag)
+ (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
+ if (dctx->frameInfo.contentSize)
+ dctx->frameRemainingSize -= sizeToCopy;
+
+ /* history management (linked blocks only)*/
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+ LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0);
+ } }
+
+ srcPtr += sizeToCopy;
+ dstPtr += sizeToCopy;
+ if (sizeToCopy == dctx->tmpInTarget) { /* all done */
+ if (dctx->frameInfo.blockChecksumFlag) {
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_getBlockChecksum;
+ } else
+ dctx->dStage = dstage_getBlockHeader; /* new block */
+ break;
+ }
+ dctx->tmpInTarget -= sizeToCopy; /* need to copy more */
+ }
+ nextSrcSizeHint = dctx->tmpInTarget +
+ +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+ + BHSize /* next header size */;
+ doAnotherStage = 0;
+ break;
+
+ /* check block checksum for recently transferred uncompressed block */
+ case dstage_getBlockChecksum:
+ DEBUGLOG(6, "dstage_getBlockChecksum");
+ { const void* crcSrc;
+ if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) {
+ crcSrc = srcPtr;
+ srcPtr += 4;
+ } else {
+ size_t const stillToCopy = 4 - dctx->tmpInSize;
+ size_t const sizeToCopy = MIN(stillToCopy, (size_t)(srcEnd-srcPtr));
+ memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+ dctx->tmpInSize += sizeToCopy;
+ srcPtr += sizeToCopy;
+ if (dctx->tmpInSize < 4) { /* all input consumed */
+ doAnotherStage = 0;
+ break;
+ }
+ crcSrc = dctx->header;
+ }
+ { U32 const readCRC = LZ4F_readLE32(crcSrc);
+ U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ DEBUGLOG(6, "compare block checksum");
+ if (readCRC != calcCRC) {
+ DEBUGLOG(4, "incorrect block checksum: %08X != %08X",
+ readCRC, calcCRC);
+ return err0r(LZ4F_ERROR_blockChecksum_invalid);
+ }
+#else
+ (void)readCRC;
+ (void)calcCRC;
+#endif
+ } }
+ dctx->dStage = dstage_getBlockHeader; /* new block */
+ break;
+
+ case dstage_getCBlock:
+ DEBUGLOG(6, "dstage_getCBlock");
+ if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) {
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_storeCBlock;
+ break;
+ }
+ /* input large enough to read full block directly */
+ selectedIn = srcPtr;
+ srcPtr += dctx->tmpInTarget;
+
+ if (0) /* always jump over next block */
+ case dstage_storeCBlock:
+ { size_t const wantedData = dctx->tmpInTarget - dctx->tmpInSize;
+ size_t const inputLeft = (size_t)(srcEnd-srcPtr);
+ size_t const sizeToCopy = MIN(wantedData, inputLeft);
+ memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+ dctx->tmpInSize += sizeToCopy;
+ srcPtr += sizeToCopy;
+ if (dctx->tmpInSize < dctx->tmpInTarget) { /* need more input */
+ nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize)
+ + (dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+ + BHSize /* next header size */;
+ doAnotherStage = 0;
+ break;
+ }
+ selectedIn = dctx->tmpIn;
+ }
+
+ /* At this stage, input is large enough to decode a block */
+ if (dctx->frameInfo.blockChecksumFlag) {
+ dctx->tmpInTarget -= 4;
+ assert(selectedIn != NULL); /* selectedIn is defined at this stage (either srcPtr, or dctx->tmpIn) */
+ { U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget);
+ U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (readBlockCrc != calcBlockCrc)
+ return err0r(LZ4F_ERROR_blockChecksum_invalid);
+#else
+ (void)readBlockCrc;
+ (void)calcBlockCrc;
+#endif
+ } }
+
+ if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
+ const char* dict = (const char*)dctx->dict;
+ size_t dictSize = dctx->dictSize;
+ int decodedSize;
+ assert(dstPtr != NULL);
+ if (dict && dictSize > 1 GB) {
+ /* the dictSize param is an int, avoid truncation / sign issues */
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ /* enough capacity in `dst` to decompress directly there */
+ decodedSize = LZ4_decompress_safe_usingDict(
+ (const char*)selectedIn, (char*)dstPtr,
+ (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+ dict, (int)dictSize);
+ if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC); /* decompression failed */
+ if (dctx->frameInfo.contentChecksumFlag)
+ XXH32_update(&(dctx->xxh), dstPtr, (size_t)decodedSize);
+ if (dctx->frameInfo.contentSize)
+ dctx->frameRemainingSize -= (size_t)decodedSize;
+
+ /* dictionary management */
+ if (dctx->frameInfo.blockMode==LZ4F_blockLinked) {
+ LZ4F_updateDict(dctx, dstPtr, (size_t)decodedSize, dstStart, 0);
+ }
+
+ dstPtr += decodedSize;
+ dctx->dStage = dstage_getBlockHeader;
+ break;
+ }
+
+ /* not enough place into dst : decode into tmpOut */
+ /* ensure enough place for tmpOut */
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+ if (dctx->dict == dctx->tmpOutBuffer) {
+ if (dctx->dictSize > 128 KB) {
+ memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - 64 KB, 64 KB);
+ dctx->dictSize = 64 KB;
+ }
+ dctx->tmpOut = dctx->tmpOutBuffer + dctx->dictSize;
+ } else { /* dict not within tmp */
+ size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
+ dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
+ } }
+
+ /* Decode block */
+ { const char* dict = (const char*)dctx->dict;
+ size_t dictSize = dctx->dictSize;
+ int decodedSize;
+ if (dict && dictSize > 1 GB) {
+ /* the dictSize param is an int, avoid truncation / sign issues */
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ decodedSize = LZ4_decompress_safe_usingDict(
+ (const char*)selectedIn, (char*)dctx->tmpOut,
+ (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+ dict, (int)dictSize);
+ if (decodedSize < 0) /* decompression failed */
+ return err0r(LZ4F_ERROR_decompressionFailed);
+ if (dctx->frameInfo.contentChecksumFlag)
+ XXH32_update(&(dctx->xxh), dctx->tmpOut, (size_t)decodedSize);
+ if (dctx->frameInfo.contentSize)
+ dctx->frameRemainingSize -= (size_t)decodedSize;
+ dctx->tmpOutSize = (size_t)decodedSize;
+ dctx->tmpOutStart = 0;
+ dctx->dStage = dstage_flushOut;
+ }
+ /* fall-through */
+
+ case dstage_flushOut: /* flush decoded data from tmpOut to dstBuffer */
+ DEBUGLOG(6, "dstage_flushOut");
+ if (dstPtr != NULL) {
+ size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr));
+ memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
+
+ /* dictionary management */
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+ LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
+
+ dctx->tmpOutStart += sizeToCopy;
+ dstPtr += sizeToCopy;
+ }
+ if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */
+ dctx->dStage = dstage_getBlockHeader; /* get next block */
+ break;
+ }
+ /* could not flush everything : stop there, just request a block header */
+ doAnotherStage = 0;
+ nextSrcSizeHint = BHSize;
+ break;
+
+ case dstage_getSuffix:
+ if (dctx->frameRemainingSize)
+ return err0r(LZ4F_ERROR_frameSize_wrong); /* incorrect frame size decoded */
+ if (!dctx->frameInfo.contentChecksumFlag) { /* no checksum, frame is completed */
+ nextSrcSizeHint = 0;
+ LZ4F_resetDecompressionContext(dctx);
+ doAnotherStage = 0;
+ break;
+ }
+ if ((srcEnd - srcPtr) < 4) { /* not enough size for entire CRC */
+ dctx->tmpInSize = 0;
+ dctx->dStage = dstage_storeSuffix;
+ } else {
+ selectedIn = srcPtr;
+ srcPtr += 4;
+ }
+
+ if (dctx->dStage == dstage_storeSuffix) /* can be skipped */
+ case dstage_storeSuffix:
+ { size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+ size_t const wantedData = 4 - dctx->tmpInSize;
+ size_t const sizeToCopy = MIN(wantedData, remainingInput);
+ memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+ srcPtr += sizeToCopy;
+ dctx->tmpInSize += sizeToCopy;
+ if (dctx->tmpInSize < 4) { /* not enough input to read complete suffix */
+ nextSrcSizeHint = 4 - dctx->tmpInSize;
+ doAnotherStage=0;
+ break;
+ }
+ selectedIn = dctx->tmpIn;
+ } /* if (dctx->dStage == dstage_storeSuffix) */
+
+ /* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */
+ { U32 const readCRC = LZ4F_readLE32(selectedIn);
+ U32 const resultCRC = XXH32_digest(&(dctx->xxh));
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (readCRC != resultCRC)
+ return err0r(LZ4F_ERROR_contentChecksum_invalid);
+#else
+ (void)readCRC;
+ (void)resultCRC;
+#endif
+ nextSrcSizeHint = 0;
+ LZ4F_resetDecompressionContext(dctx);
+ doAnotherStage = 0;
+ break;
+ }
+
+ case dstage_getSFrameSize:
+ if ((srcEnd - srcPtr) >= 4) {
+ selectedIn = srcPtr;
+ srcPtr += 4;
+ } else {
+ /* not enough input to read cBlockSize field */
+ dctx->tmpInSize = 4;
+ dctx->tmpInTarget = 8;
+ dctx->dStage = dstage_storeSFrameSize;
+ }
+
+ if (dctx->dStage == dstage_storeSFrameSize)
+ case dstage_storeSFrameSize:
+ { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+ (size_t)(srcEnd - srcPtr) );
+ memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+ srcPtr += sizeToCopy;
+ dctx->tmpInSize += sizeToCopy;
+ if (dctx->tmpInSize < dctx->tmpInTarget) {
+ /* not enough input to get full sBlockSize; wait for more */
+ nextSrcSizeHint = dctx->tmpInTarget - dctx->tmpInSize;
+ doAnotherStage = 0;
+ break;
+ }
+ selectedIn = dctx->header + 4;
+ } /* if (dctx->dStage == dstage_storeSFrameSize) */
+
+ /* case dstage_decodeSFrameSize: */ /* no direct entry */
+ { size_t const SFrameSize = LZ4F_readLE32(selectedIn);
+ dctx->frameInfo.contentSize = SFrameSize;
+ dctx->tmpInTarget = SFrameSize;
+ dctx->dStage = dstage_skipSkippable;
+ break;
+ }
+
+ case dstage_skipSkippable:
+ { size_t const skipSize = MIN(dctx->tmpInTarget, (size_t)(srcEnd-srcPtr));
+ srcPtr += skipSize;
+ dctx->tmpInTarget -= skipSize;
+ doAnotherStage = 0;
+ nextSrcSizeHint = dctx->tmpInTarget;
+ if (nextSrcSizeHint) break; /* still more to skip */
+ /* frame fully skipped : prepare context for a new frame */
+ LZ4F_resetDecompressionContext(dctx);
+ break;
+ }
+ } /* switch (dctx->dStage) */
+ } /* while (doAnotherStage) */
+
+ /* preserve history within tmp whenever necessary */
+ LZ4F_STATIC_ASSERT((unsigned)dstage_init == 2);
+ if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked) /* next block will use up to 64KB from previous ones */
+ && (dctx->dict != dctx->tmpOutBuffer) /* dictionary is not already within tmp */
+ && (dctx->dict != NULL) /* dictionary exists */
+ && (!decompressOptionsPtr->stableDst) /* cannot rely on dst data to remain there for next call */
+ && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) ) /* valid stages : [init ... getSuffix[ */
+ {
+ if (dctx->dStage == dstage_flushOut) {
+ size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
+ size_t copySize = 64 KB - dctx->tmpOutSize;
+ const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+ if (dctx->tmpOutSize > 64 KB) copySize = 0;
+ if (copySize > preserveSize) copySize = preserveSize;
+ assert(dctx->tmpOutBuffer != NULL);
+
+ memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = preserveSize + dctx->tmpOutStart;
+ } else {
+ const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize;
+ size_t const newDictSize = MIN(dctx->dictSize, 64 KB);
+
+ memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+
+ dctx->dict = dctx->tmpOutBuffer;
+ dctx->dictSize = newDictSize;
+ dctx->tmpOut = dctx->tmpOutBuffer + newDictSize;
+ }
+ }
+
+ *srcSizePtr = (size_t)(srcPtr - srcStart);
+ *dstSizePtr = (size_t)(dstPtr - dstStart);
+ return nextSrcSizeHint;
+}
+
+/*! LZ4F_decompress_usingDict() :
+ * Same as LZ4F_decompress(), using a predefined dictionary.
+ * Dictionary is used "in place", without any preprocessing.
+ * It must remain accessible throughout the entire frame decoding.
+ */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctx,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const void* dict, size_t dictSize,
+ const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+ if (dctx->dStage <= dstage_init) {
+ dctx->dict = (const BYTE*)dict;
+ dctx->dictSize = dictSize;
+ }
+ return LZ4F_decompress(dctx, dstBuffer, dstSizePtr,
+ srcBuffer, srcSizePtr,
+ decompressOptionsPtr);
+}
diff --git a/libbutl/lz4frame.h b/libbutl/lz4frame.h
new file mode 100644
index 0000000..4573317
--- /dev/null
+++ b/libbutl/lz4frame.h
@@ -0,0 +1,623 @@
+/*
+ LZ4 auto-framing library
+ Header File
+ Copyright (C) 2011-2017, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API able to create and decode LZ4 frames
+ * conformant with specification v1.6.1 in doc/lz4_Frame_format.md .
+ * Generated frames are compatible with `lz4` CLI.
+ *
+ * LZ4F also offers streaming capabilities.
+ *
+ * lz4.h is not required when using lz4frame.h,
+ * except to extract common constant such as LZ4_VERSION_NUMBER.
+ * */
+
+#ifndef LZ4F_H_09782039843
+#define LZ4F_H_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* --- Dependency --- */
+#include <stddef.h> /* size_t */
+
+
+/**
+ Introduction
+
+ lz4frame.h implements LZ4 frame specification (doc/lz4_Frame_format.md).
+ lz4frame.h provides frame compression functions that take care
+ of encoding standard metadata alongside LZ4-compressed blocks.
+*/
+
+/*-***************************************************************
+ * Compiler specifics
+ *****************************************************************/
+/* LZ4_DLL_EXPORT :
+ * Enable exporting of functions when building a Windows DLL
+ * LZ4FLIB_VISIBILITY :
+ * Control library symbols visibility.
+ */
+#ifndef LZ4FLIB_VISIBILITY
+# if defined(__GNUC__) && (__GNUC__ >= 4)
+# define LZ4FLIB_VISIBILITY __attribute__ ((visibility ("default")))
+# else
+# define LZ4FLIB_VISIBILITY
+# endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+# define LZ4FLIB_API __declspec(dllexport) LZ4FLIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+# define LZ4FLIB_API __declspec(dllimport) LZ4FLIB_VISIBILITY
+#else
+# define LZ4FLIB_API LZ4FLIB_VISIBILITY
+#endif
+
+#ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS
+# define LZ4F_DEPRECATE(x) x
+#else
+# if defined(_MSC_VER)
+# define LZ4F_DEPRECATE(x) x /* __declspec(deprecated) x - only works with C++ */
+# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6))
+# define LZ4F_DEPRECATE(x) x __attribute__((deprecated))
+# else
+# define LZ4F_DEPRECATE(x) x /* no deprecation warning for this compiler */
+# endif
+#endif
+
+
+/*-************************************
+ * Error management
+ **************************************/
+typedef size_t LZ4F_errorCode_t;
+
+LZ4FLIB_API unsigned LZ4F_isError(LZ4F_errorCode_t code); /**< tells when a function result is an error code */
+LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /**< return error code string; for debugging */
+
+
+/*-************************************
+ * Frame compression types
+ ************************************* */
+/* #define LZ4F_ENABLE_OBSOLETE_ENUMS // uncomment to enable obsolete enums */
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+# define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x
+#else
+# define LZ4F_OBSOLETE_ENUM(x)
+#endif
+
+/* The larger the block size, the (slightly) better the compression ratio,
+ * though there are diminishing returns.
+ * Larger blocks also increase memory usage on both compression and decompression sides.
+ */
+typedef enum {
+ LZ4F_default=0,
+ LZ4F_max64KB=4,
+ LZ4F_max256KB=5,
+ LZ4F_max1MB=6,
+ LZ4F_max4MB=7
+ LZ4F_OBSOLETE_ENUM(max64KB)
+ LZ4F_OBSOLETE_ENUM(max256KB)
+ LZ4F_OBSOLETE_ENUM(max1MB)
+ LZ4F_OBSOLETE_ENUM(max4MB)
+} LZ4F_blockSizeID_t;
+
+/* Linked blocks sharply reduce inefficiencies when using small blocks,
+ * they compress better.
+ * However, some LZ4 decoders are only compatible with independent blocks */
+typedef enum {
+ LZ4F_blockLinked=0,
+ LZ4F_blockIndependent
+ LZ4F_OBSOLETE_ENUM(blockLinked)
+ LZ4F_OBSOLETE_ENUM(blockIndependent)
+} LZ4F_blockMode_t;
+
+typedef enum {
+ LZ4F_noContentChecksum=0,
+ LZ4F_contentChecksumEnabled
+ LZ4F_OBSOLETE_ENUM(noContentChecksum)
+ LZ4F_OBSOLETE_ENUM(contentChecksumEnabled)
+} LZ4F_contentChecksum_t;
+
+typedef enum {
+ LZ4F_noBlockChecksum=0,
+ LZ4F_blockChecksumEnabled
+} LZ4F_blockChecksum_t;
+
+typedef enum {
+ LZ4F_frame=0,
+ LZ4F_skippableFrame
+ LZ4F_OBSOLETE_ENUM(skippableFrame)
+} LZ4F_frameType_t;
+
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+typedef LZ4F_blockSizeID_t blockSizeID_t;
+typedef LZ4F_blockMode_t blockMode_t;
+typedef LZ4F_frameType_t frameType_t;
+typedef LZ4F_contentChecksum_t contentChecksum_t;
+#endif
+
+/*! LZ4F_frameInfo_t :
+ * makes it possible to set or read frame parameters.
+ * Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO,
+ * setting all parameters to default.
+ * It's then possible to update selectively some parameters */
+typedef struct {
+ LZ4F_blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB; 0 == default */
+ LZ4F_blockMode_t blockMode; /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */
+ LZ4F_contentChecksum_t contentChecksumFlag; /* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */
+ LZ4F_frameType_t frameType; /* read-only field : LZ4F_frame or LZ4F_skippableFrame */
+ unsigned long long contentSize; /* Size of uncompressed content ; 0 == unknown */
+ unsigned dictID; /* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */
+ LZ4F_blockChecksum_t blockChecksumFlag; /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */
+} LZ4F_frameInfo_t;
+
+#define LZ4F_INIT_FRAMEINFO { LZ4F_default, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0ULL, 0U, LZ4F_noBlockChecksum } /* v1.8.3+ */
+
+/*! LZ4F_preferences_t :
+ * makes it possible to supply advanced compression instructions to streaming interface.
+ * Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES,
+ * setting all parameters to default.
+ * All reserved fields must be set to zero. */
+typedef struct {
+ LZ4F_frameInfo_t frameInfo;
+ int compressionLevel; /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */
+ unsigned autoFlush; /* 1: always flush; reduces usage of internal buffers */
+ unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */ /* v1.8.2+ */
+ unsigned reserved[3]; /* must be zero for forward compatibility */
+} LZ4F_preferences_t;
+
+#define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0u, 0u, { 0u, 0u, 0u } } /* v1.8.3+ */
+
+
+/*-*********************************
+* Simple compression function
+***********************************/
+
+LZ4FLIB_API int LZ4F_compressionLevel_max(void); /* v1.8.0+ */
+
+/*! LZ4F_compressFrameBound() :
+ * Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
+ * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
+ * Note : this result is only usable with LZ4F_compressFrame().
+ * It may also be used with LZ4F_compressUpdate() _if no flush() operation_ is performed.
+ */
+LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+
+/*! LZ4F_compressFrame() :
+ * Compress an entire srcBuffer into a valid LZ4 frame.
+ * dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ * or an error code if it fails (can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_preferences_t* preferencesPtr);
+
+
+/*-***********************************
+* Advanced compression functions
+*************************************/
+typedef struct LZ4F_cctx_s LZ4F_cctx; /* incomplete type */
+typedef LZ4F_cctx* LZ4F_compressionContext_t; /* for compatibility with previous API version */
+
+typedef struct {
+ unsigned stableSrc; /* 1 == src content will remain present on future calls to LZ4F_compress(); skip copying src content within tmp buffer */
+ unsigned reserved[3];
+} LZ4F_compressOptions_t;
+
+/*--- Resource Management ---*/
+
+#define LZ4F_VERSION 100 /* This number can be used to check for an incompatible API breaking change */
+LZ4FLIB_API unsigned LZ4F_getVersion(void);
+
+/*! LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential version mismatch, notably when using DLL.
+ * The function will provide a pointer to a fully allocated LZ4F_cctx object.
+ * If @return != zero, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+ */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
+
+
+/*---- Compression ----*/
+
+#define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected paramaters */
+#define LZ4F_HEADER_SIZE_MAX 19
+
+/* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */
+#define LZ4F_BLOCK_HEADER_SIZE 4
+
+/* Size in bytes of a block checksum footer in little-endian format. */
+#define LZ4F_BLOCK_CHECKSUM_SIZE 4
+
+/* Size in bytes of the content checksum. */
+#define LZ4F_CONTENT_CHECKSUM_SIZE 4
+
+/*! LZ4F_compressBegin() :
+ * will write the frame header into dstBuffer.
+ * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * @return : number of bytes written into dstBuffer for the header
+ * or an error code (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressBound() :
+ * Provides minimum dstCapacity required to guarantee success of
+ * LZ4F_compressUpdate(), given a srcSize and preferences, for a worst case scenario.
+ * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() instead.
+ * Note that the result is only valid for a single invocation of LZ4F_compressUpdate().
+ * When invoking LZ4F_compressUpdate() multiple times,
+ * if the output buffer is gradually filled up instead of emptied and re-used from its start,
+ * one must check if there is enough remaining capacity before each invocation, using LZ4F_compressBound().
+ * @return is always the same for a srcSize and prefsPtr.
+ * prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
+ * tech details :
+ * @return if automatic flushing is not enabled, includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ * It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd().
+ * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin().
+ */
+LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressUpdate() :
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+ * This value is provided by LZ4F_compressBound().
+ * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+ * LZ4F_compressUpdate() doesn't guarantee error recovery.
+ * When an error occurs, compression context must be freed or resized.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
+ * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const void* srcBuffer, size_t srcSize,
+ const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_flush() :
+ * When data must be generated and sent immediately, without waiting for a block to be completely filled,
+ * it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
+ * `dstCapacity` must be large enough to ensure the operation will be successful.
+ * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
+ * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ * Note : LZ4F_flush() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
+ */
+LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_compressEnd() :
+ * To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
+ * It will flush whatever data remained within `cctx` (like LZ4_flush())
+ * and properly finalize the frame, with an endMark and a checksum.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
+ * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
+ * or an error code if it fails (which can be tested using LZ4F_isError())
+ * Note : LZ4F_compressEnd() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
+ * A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
+ */
+LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_compressOptions_t* cOptPtr);
+
+
+/*-*********************************
+* Decompression functions
+***********************************/
+typedef struct LZ4F_dctx_s LZ4F_dctx; /* incomplete type */
+typedef LZ4F_dctx* LZ4F_decompressionContext_t; /* compatibility with previous API versions */
+
+typedef struct {
+ unsigned stableDst; /* pledges that last 64KB decompressed data will remain available unmodified. This optimization skips storage operations in tmp buffers. */
+ unsigned reserved[3]; /* must be set to zero for forward compatibility */
+} LZ4F_decompressOptions_t;
+
+
+/* Resource management */
+
+/*! LZ4F_createDecompressionContext() :
+ * Create an LZ4F_dctx object, to track all decompression operations.
+ * The version provided MUST be LZ4F_VERSION.
+ * The function provides a pointer to an allocated and initialized LZ4F_dctx object.
+ * The result is an errorCode, which can be tested using LZ4F_isError().
+ * dctx memory can be released using LZ4F_freeDecompressionContext();
+ * Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released.
+ * That is, it should be == 0 if decompression has been completed fully and correctly.
+ */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
+
+
+/*-***********************************
+* Streaming decompression functions
+*************************************/
+
+#define LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH 5
+
+/*! LZ4F_headerSize() : v1.9.0+
+ * Provide the header size of a frame starting at `src`.
+ * `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
+ * which is enough to decode the header length.
+ * @return : size of frame header
+ * or an error code, which can be tested using LZ4F_isError()
+ * note : Frame header size is variable, but is guaranteed to be
+ * >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes.
+ */
+LZ4FLIB_API size_t LZ4F_headerSize(const void* src, size_t srcSize);
+
+/*! LZ4F_getFrameInfo() :
+ * This function extracts frame parameters (max blockSize, dictID, etc.).
+ * Its usage is optional: user can call LZ4F_decompress() directly.
+ *
+ * Extracted information will fill an existing LZ4F_frameInfo_t structure.
+ * This can be useful for allocation and dictionary identification purposes.
+ *
+ * LZ4F_getFrameInfo() can work in the following situations :
+ *
+ * 1) At the beginning of a new frame, before any invocation of LZ4F_decompress().
+ * It will decode header from `srcBuffer`,
+ * consuming the header and starting the decoding process.
+ *
+ * Input size must be large enough to contain the full frame header.
+ * Frame header size can be known beforehand by LZ4F_headerSize().
+ * Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes,
+ * and not more than <= LZ4F_HEADER_SIZE_MAX bytes.
+ * Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work.
+ * It's allowed to provide more input data than the header size,
+ * LZ4F_getFrameInfo() will only consume the header.
+ *
+ * If input size is not large enough,
+ * aka if it's smaller than header size,
+ * function will fail and return an error code.
+ *
+ * 2) After decoding has been started,
+ * it's possible to invoke LZ4F_getFrameInfo() anytime
+ * to extract already decoded frame parameters stored within dctx.
+ *
+ * Note that, if decoding has barely started,
+ * and not yet read enough information to decode the header,
+ * LZ4F_getFrameInfo() will fail.
+ *
+ * The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value).
+ * LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started,
+ * and when decoding the header has been successful.
+ * Decompression must then resume from (srcBuffer + *srcSizePtr).
+ *
+ * @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ * or an error code which can be tested using LZ4F_isError().
+ * note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
+ * note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4FLIB_API size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+ LZ4F_frameInfo_t* frameInfoPtr,
+ const void* srcBuffer, size_t* srcSizePtr);
+
+/*! LZ4F_decompress() :
+ * Call this function repetitively to regenerate data compressed in `srcBuffer`.
+ *
+ * The function requires a valid dctx state.
+ * It will read up to *srcSizePtr bytes from srcBuffer,
+ * and decompress data into dstBuffer, of capacity *dstSizePtr.
+ *
+ * The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
+ * The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
+ *
+ * The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
+ * Unconsumed source data must be presented again in subsequent invocations.
+ *
+ * `dstBuffer` can freely change between each consecutive function invocation.
+ * `dstBuffer` content will be overwritten.
+ *
+ * @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
+ * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ * Respecting the hint provides some small speed benefit, because it skips intermediate buffers.
+ * This is just a hint though, it's always possible to provide any srcSize.
+ *
+ * When a frame is fully decoded, @return will be 0 (no more data expected).
+ * When provided with more bytes than necessary to decode a frame,
+ * LZ4F_decompress() will stop reading exactly at end of current frame, and @return 0.
+ *
+ * If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
+ * After a decompression error, the `dctx` context is not resumable.
+ * Use LZ4F_resetDecompressionContext() to return to clean state.
+ *
+ * After a frame is fully decoded, dctx can be used again to decompress another frame.
+ */
+LZ4FLIB_API size_t LZ4F_decompress(LZ4F_dctx* dctx,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const LZ4F_decompressOptions_t* dOptPtr);
+
+
+/*! LZ4F_resetDecompressionContext() : added in v1.8.0
+ * In case of an error, the context is left in "undefined" state.
+ * In which case, it's necessary to reset it, before re-using it.
+ * This method can also be used to abruptly stop any unfinished decompression,
+ * and start a new one using same context resources. */
+LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx); /* always successful */
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4F_H_09782039843 */
+
+#if defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843)
+#define LZ4F_H_STATIC_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* These declarations are not stable and may change in the future.
+ * They are therefore only safe to depend on
+ * when the caller is statically linked against the library.
+ * To access their declarations, define LZ4F_STATIC_LINKING_ONLY.
+ *
+ * By default, these symbols aren't published into shared/dynamic libraries.
+ * You can override this behavior and force them to be published
+ * by defining LZ4F_PUBLISH_STATIC_FUNCTIONS.
+ * Use at your own risk.
+ */
+#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
+# define LZ4FLIB_STATIC_API LZ4FLIB_API
+#else
+# define LZ4FLIB_STATIC_API
+#endif
+
+
+/* --- Error List --- */
+#define LZ4F_LIST_ERRORS(ITEM) \
+ ITEM(OK_NoError) \
+ ITEM(ERROR_GENERIC) \
+ ITEM(ERROR_maxBlockSize_invalid) \
+ ITEM(ERROR_blockMode_invalid) \
+ ITEM(ERROR_contentChecksumFlag_invalid) \
+ ITEM(ERROR_compressionLevel_invalid) \
+ ITEM(ERROR_headerVersion_wrong) \
+ ITEM(ERROR_blockChecksum_invalid) \
+ ITEM(ERROR_reservedFlag_set) \
+ ITEM(ERROR_allocation_failed) \
+ ITEM(ERROR_srcSize_tooLarge) \
+ ITEM(ERROR_dstMaxSize_tooSmall) \
+ ITEM(ERROR_frameHeader_incomplete) \
+ ITEM(ERROR_frameType_unknown) \
+ ITEM(ERROR_frameSize_wrong) \
+ ITEM(ERROR_srcPtr_wrong) \
+ ITEM(ERROR_decompressionFailed) \
+ ITEM(ERROR_headerChecksum_invalid) \
+ ITEM(ERROR_contentChecksum_invalid) \
+ ITEM(ERROR_frameDecoding_alreadyStarted) \
+ ITEM(ERROR_maxCode)
+
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+
+/* enum list is exposed, to handle specific errors */
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM)
+ _LZ4F_dummy_error_enum_for_c89_never_used } LZ4F_errorCodes;
+
+LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
+
+LZ4FLIB_STATIC_API size_t LZ4F_getBlockSize(unsigned);
+
+/**********************************
+ * Bulk processing dictionary API
+ *********************************/
+
+/* A Dictionary is useful for the compression of small messages (KB range).
+ * It dramatically improves compression efficiency.
+ *
+ * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful.
+ * Best results are generally achieved by using Zstandard's Dictionary Builder
+ * to generate a high-quality dictionary from a set of samples.
+ *
+ * Loading a dictionary has a cost, since it involves construction of tables.
+ * The Bulk processing dictionary API makes it possible to share this cost
+ * over an arbitrary number of compression jobs, even concurrently,
+ * markedly improving compression latency for these cases.
+ *
+ * The same dictionary will have to be used on the decompression side
+ * for decoding to be successful.
+ * To help identify the correct dictionary at decoding stage,
+ * the frame header allows optional embedding of a dictID field.
+ */
+typedef struct LZ4F_CDict_s LZ4F_CDict;
+
+/*! LZ4_createCDict() :
+ * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once.
+ * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
+LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict);
+
+
+/*! LZ4_compressFrame_usingCDict() :
+ * Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+ * cctx must point to a context created by LZ4F_createCompressionContext().
+ * If cdict==NULL, compress without a dictionary.
+ * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ * If this condition is not respected, function will fail (@return an errorCode).
+ * The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ * but it's not recommended, as it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer.
+ * or an error code if it fails (can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
+ LZ4F_cctx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* preferencesPtr);
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ * Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+ * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you may provide NULL as argument,
+ * however, it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer for the header,
+ * or an error code (which can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
+ LZ4F_cctx* cctx,
+ void* dstBuffer, size_t dstCapacity,
+ const LZ4F_CDict* cdict,
+ const LZ4F_preferences_t* prefsPtr);
+
+
+/*! LZ4F_decompress_usingDict() :
+ * Same as LZ4F_decompress(), using a predefined dictionary.
+ * Dictionary is used "in place", without any preprocessing.
+ * It must remain accessible throughout the entire frame decoding. */
+LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
+ LZ4F_dctx* dctxPtr,
+ void* dstBuffer, size_t* dstSizePtr,
+ const void* srcBuffer, size_t* srcSizePtr,
+ const void* dict, size_t dictSize,
+ const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843) */
diff --git a/libbutl/lz4hc.c b/libbutl/lz4hc.c
new file mode 100644
index 0000000..77c9f43
--- /dev/null
+++ b/libbutl/lz4hc.c
@@ -0,0 +1,1615 @@
+/*
+ LZ4 HC - High Compression Mode of LZ4
+ Copyright (C) 2011-2017, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+
+
+/* *************************************
+* Tuning Parameter
+***************************************/
+
+/*! HEAPMODE :
+ * Select how default compression function will allocate workplace memory,
+ * in stack (0:fastest), or in heap (1:requires malloc()).
+ * Since workplace is rather large, heap mode is recommended.
+ */
+#ifndef LZ4HC_HEAPMODE
+# define LZ4HC_HEAPMODE 1
+#endif
+
+
+/*=== Dependency ===*/
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "lz4hc.h"
+
+
+/*=== Common definitions ===*/
+#if defined(__GNUC__)
+# pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+#if defined (__clang__)
+# pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+#define LZ4_COMMONDEFS_ONLY
+#ifndef LZ4_SRC_INCLUDED
+#include "lz4.c" /* LZ4_count, constants, mem */
+#endif
+
+
+/*=== Enums ===*/
+typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
+
+
+/*=== Constants ===*/
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_OPT_NUM (1<<12)
+
+
+/*=== Macros ===*/
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
+#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */
+#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */
+/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
+#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+/**************************************
+* HC Compression
+**************************************/
+static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
+{
+ MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable));
+ MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+}
+
+static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+{
+ uptrval startingOffset = (uptrval)(hc4->end - hc4->base);
+ if (startingOffset > 1 GB) {
+ LZ4HC_clearTables(hc4);
+ startingOffset = 0;
+ }
+ startingOffset += 64 KB;
+ hc4->nextToUpdate = (U32) startingOffset;
+ hc4->base = start - startingOffset;
+ hc4->end = start;
+ hc4->dictBase = start - startingOffset;
+ hc4->dictLimit = (U32) startingOffset;
+ hc4->lowLimit = (U32) startingOffset;
+}
+
+
+/* Update chains up to ip (excluded) */
+LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
+{
+ U16* const chainTable = hc4->chainTable;
+ U32* const hashTable = hc4->hashTable;
+ const BYTE* const base = hc4->base;
+ U32 const target = (U32)(ip - base);
+ U32 idx = hc4->nextToUpdate;
+
+ while (idx < target) {
+ U32 const h = LZ4HC_hashPtr(base+idx);
+ size_t delta = idx - hashTable[h];
+ if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
+ DELTANEXTU16(chainTable, idx) = (U16)delta;
+ hashTable[h] = idx;
+ idx++;
+ }
+
+ hc4->nextToUpdate = target;
+}
+
+/** LZ4HC_countBack() :
+ * @return : negative value, nb of common bytes before ip/match */
+LZ4_FORCE_INLINE
+int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
+ const BYTE* const iMin, const BYTE* const mMin)
+{
+ int back = 0;
+ int const min = (int)MAX(iMin - ip, mMin - match);
+ assert(min <= 0);
+ assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
+ assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
+ while ( (back > min)
+ && (ip[back-1] == match[back-1]) )
+ back--;
+ return back;
+}
+
+#if defined(_MSC_VER)
+# define LZ4HC_rotl32(x,r) _rotl(x,r)
+#else
+# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+
+
+static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
+{
+ size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
+ if (bitsToRotate == 0) return pattern;
+ return LZ4HC_rotl32(pattern, (int)bitsToRotate);
+}
+
+/* LZ4HC_countPattern() :
+ * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
+static unsigned
+LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
+{
+ const BYTE* const iStart = ip;
+ reg_t const pattern = (sizeof(pattern)==8) ?
+ (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32;
+
+ while (likely(ip < iEnd-(sizeof(pattern)-1))) {
+ reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
+ if (!diff) { ip+=sizeof(pattern); continue; }
+ ip += LZ4_NbCommonBytes(diff);
+ return (unsigned)(ip - iStart);
+ }
+
+ if (LZ4_isLittleEndian()) {
+ reg_t patternByte = pattern;
+ while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
+ ip++; patternByte >>= 8;
+ }
+ } else { /* big endian */
+ U32 bitOffset = (sizeof(pattern)*8) - 8;
+ while (ip < iEnd) {
+ BYTE const byte = (BYTE)(pattern >> bitOffset);
+ if (*ip != byte) break;
+ ip ++; bitOffset -= 8;
+ }
+ }
+
+ return (unsigned)(ip - iStart);
+}
+
+/* LZ4HC_reverseCountPattern() :
+ * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
+ * read using natural platform endianess */
+static unsigned
+LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
+{
+ const BYTE* const iStart = ip;
+
+ while (likely(ip >= iLow+4)) {
+ if (LZ4_read32(ip-4) != pattern) break;
+ ip -= 4;
+ }
+ { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
+ while (likely(ip>iLow)) {
+ if (ip[-1] != *bytePtr) break;
+ ip--; bytePtr--;
+ } }
+ return (unsigned)(iStart - ip);
+}
+
+/* LZ4HC_protectDictEnd() :
+ * Checks if the match is in the last 3 bytes of the dictionary, so reading the
+ * 4 byte MINMATCH would overflow.
+ * @returns true if the match index is okay.
+ */
+static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
+{
+ return ((U32)((dictLimit - 1) - matchIndex) >= 3);
+}
+
+typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
+typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
+
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndGetWiderMatch (
+ LZ4HC_CCtx_internal* hc4,
+ const BYTE* const ip,
+ const BYTE* const iLowLimit,
+ const BYTE* const iHighLimit,
+ int longest,
+ const BYTE** matchpos,
+ const BYTE** startpos,
+ const int maxNbAttempts,
+ const int patternAnalysis,
+ const int chainSwap,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ U16* const chainTable = hc4->chainTable;
+ U32* const HashTable = hc4->hashTable;
+ const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx;
+ const BYTE* const base = hc4->base;
+ const U32 dictLimit = hc4->dictLimit;
+ const BYTE* const lowPrefixPtr = base + dictLimit;
+ const U32 ipIndex = (U32)(ip - base);
+ const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
+ const BYTE* const dictBase = hc4->dictBase;
+ int const lookBackLength = (int)(ip-iLowLimit);
+ int nbAttempts = maxNbAttempts;
+ U32 matchChainPos = 0;
+ U32 const pattern = LZ4_read32(ip);
+ U32 matchIndex;
+ repeat_state_e repeat = rep_untested;
+ size_t srcPatternLength = 0;
+
+ DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
+ /* First Match */
+ LZ4HC_Insert(hc4, ip);
+ matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+ DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+ matchIndex, lowestMatchIndex);
+
+ while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
+ int matchLength=0;
+ nbAttempts--;
+ assert(matchIndex < ipIndex);
+ if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
+ /* do nothing */
+ } else if (matchIndex >= dictLimit) { /* within current Prefix */
+ const BYTE* const matchPtr = base + matchIndex;
+ assert(matchPtr >= lowPrefixPtr);
+ assert(matchPtr < ip);
+ assert(longest >= 1);
+ if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
+ if (LZ4_read32(matchPtr) == pattern) {
+ int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+ matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+ matchLength -= back;
+ if (matchLength > longest) {
+ longest = matchLength;
+ *matchpos = matchPtr + back;
+ *startpos = ip + back;
+ } } }
+ } else { /* lowestMatchIndex <= matchIndex < dictLimit */
+ const BYTE* const matchPtr = dictBase + matchIndex;
+ if (LZ4_read32(matchPtr) == pattern) {
+ const BYTE* const dictStart = dictBase + hc4->lowLimit;
+ int back = 0;
+ const BYTE* vLimit = ip + (dictLimit - matchIndex);
+ if (vLimit > iHighLimit) vLimit = iHighLimit;
+ matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+ if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+ matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit);
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
+ matchLength -= back;
+ if (matchLength > longest) {
+ longest = matchLength;
+ *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */
+ *startpos = ip + back;
+ } } }
+
+ if (chainSwap && matchLength==longest) { /* better match => select a better chain */
+ assert(lookBackLength==0); /* search forward only */
+ if (matchIndex + (U32)longest <= ipIndex) {
+ int const kTrigger = 4;
+ U32 distanceToNextMatch = 1;
+ int const end = longest - MINMATCH + 1;
+ int step = 1;
+ int accel = 1 << kTrigger;
+ int pos;
+ for (pos = 0; pos < end; pos += step) {
+ U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
+ step = (accel++ >> kTrigger);
+ if (candidateDist > distanceToNextMatch) {
+ distanceToNextMatch = candidateDist;
+ matchChainPos = (U32)pos;
+ accel = 1 << kTrigger;
+ }
+ }
+ if (distanceToNextMatch > 1) {
+ if (distanceToNextMatch > matchIndex) break; /* avoid overflow */
+ matchIndex -= distanceToNextMatch;
+ continue;
+ } } }
+
+ { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
+ if (patternAnalysis && distNextMatch==1 && matchChainPos==0) {
+ U32 const matchCandidateIdx = matchIndex-1;
+ /* may be a repeated pattern */
+ if (repeat == rep_untested) {
+ if ( ((pattern & 0xFFFF) == (pattern >> 16))
+ & ((pattern & 0xFF) == (pattern >> 24)) ) {
+ repeat = rep_confirmed;
+ srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
+ } else {
+ repeat = rep_not;
+ } }
+ if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
+ && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) {
+ const int extDict = matchCandidateIdx < dictLimit;
+ const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
+ if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
+ const BYTE* const dictStart = dictBase + hc4->lowLimit;
+ const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
+ size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
+ if (extDict && matchPtr + forwardPatternLength == iLimit) {
+ U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
+ forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
+ }
+ { const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
+ size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
+ size_t currentSegmentLength;
+ if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
+ U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
+ backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
+ }
+ /* Limit backLength not go further than lowestMatchIndex */
+ backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
+ assert(matchCandidateIdx - backLength >= lowestMatchIndex);
+ currentSegmentLength = backLength + forwardPatternLength;
+ /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
+ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
+ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
+ U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
+ if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
+ matchIndex = newMatchIndex;
+ else {
+ /* Can only happen if started in the prefix */
+ assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
+ matchIndex = dictLimit;
+ }
+ } else {
+ U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
+ if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
+ assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
+ matchIndex = dictLimit;
+ } else {
+ matchIndex = newMatchIndex;
+ if (lookBackLength==0) { /* no back possible */
+ size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
+ if ((size_t)longest < maxML) {
+ assert(base + matchIndex != ip);
+ if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break;
+ assert(maxML < 2 GB);
+ longest = (int)maxML;
+ *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
+ *startpos = ip;
+ }
+ { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
+ if (distToNextPattern > matchIndex) break; /* avoid overflow */
+ matchIndex -= distToNextPattern;
+ } } } } }
+ continue;
+ } }
+ } } /* PA optimization */
+
+ /* follow current chain */
+ matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos);
+
+ } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
+
+ if ( dict == usingDictCtxHc
+ && nbAttempts > 0
+ && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
+ size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
+ U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
+ assert(dictEndOffset <= 1 GB);
+ matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
+ while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
+ const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
+
+ if (LZ4_read32(matchPtr) == pattern) {
+ int mlt;
+ int back = 0;
+ const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
+ if (vLimit > iHighLimit) vLimit = iHighLimit;
+ mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
+ mlt -= back;
+ if (mlt > longest) {
+ longest = mlt;
+ *matchpos = base + matchIndex + back;
+ *startpos = ip + back;
+ } }
+
+ { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
+ dictMatchIndex -= nextOffset;
+ matchIndex -= nextOffset;
+ } } }
+
+ return longest;
+}
+
+LZ4_FORCE_INLINE
+int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
+ const BYTE* const ip, const BYTE* const iLimit,
+ const BYTE** matchpos,
+ const int maxNbAttempts,
+ const int patternAnalysis,
+ const dictCtx_directive dict)
+{
+ const BYTE* uselessPtr = ip;
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+ * but this won't be the case here, as we define iLowLimit==ip,
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
+}
+
+/* LZ4HC_encodeSequence() :
+ * @return : 0 if ok,
+ * 1 if buffer issue detected */
+LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
+ const BYTE** _ip,
+ BYTE** _op,
+ const BYTE** _anchor,
+ int matchLength,
+ const BYTE* const match,
+ limitedOutput_directive limit,
+ BYTE* oend)
+{
+#define ip (*_ip)
+#define op (*_op)
+#define anchor (*_anchor)
+
+ size_t length;
+ BYTE* const token = op++;
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
+ static const BYTE* start = NULL;
+ static U32 totalCost = 0;
+ U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
+ U32 const ll = (U32)(ip - anchor);
+ U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
+ U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
+ U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
+ if (start==NULL) start = anchor; /* only works for single segment */
+ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
+ DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
+ pos,
+ (U32)(ip - anchor), matchLength, (U32)(ip-match),
+ cost, totalCost);
+ totalCost += cost;
+#endif
+
+ /* Encode Literal length */
+ length = (size_t)(ip - anchor);
+ LZ4_STATIC_ASSERT(notLimited == 0);
+ /* Check output limit */
+ if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
+ DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
+ (int)length, (int)(oend - op));
+ return 1;
+ }
+ if (length >= RUN_MASK) {
+ size_t len = length - RUN_MASK;
+ *token = (RUN_MASK << ML_BITS);
+ for(; len >= 255 ; len -= 255) *op++ = 255;
+ *op++ = (BYTE)len;
+ } else {
+ *token = (BYTE)(length << ML_BITS);
+ }
+
+ /* Copy Literals */
+ LZ4_wildCopy8(op, anchor, op + length);
+ op += length;
+
+ /* Encode Offset */
+ assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
+ LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+
+ /* Encode MatchLength */
+ assert(matchLength >= MINMATCH);
+ length = (size_t)matchLength - MINMATCH;
+ if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
+ DEBUGLOG(6, "Not enough room to write match length");
+ return 1; /* Check output limit */
+ }
+ if (length >= ML_MASK) {
+ *token += ML_MASK;
+ length -= ML_MASK;
+ for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
+ if (length >= 255) { length -= 255; *op++ = 255; }
+ *op++ = (BYTE)length;
+ } else {
+ *token += (BYTE)(length);
+ }
+
+ /* Prepare next loop */
+ ip += matchLength;
+ anchor = ip;
+
+ return 0;
+}
+#undef ip
+#undef op
+#undef anchor
+
+LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const source,
+ char* const dest,
+ int* srcSizePtr,
+ int const maxOutputSize,
+ int maxNbAttempts,
+ const limitedOutput_directive limit,
+ const dictCtx_directive dict
+ )
+{
+ const int inputSize = *srcSizePtr;
+ const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */
+
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+ BYTE* optr = (BYTE*) dest;
+ BYTE* op = (BYTE*) dest;
+ BYTE* oend = op + maxOutputSize;
+
+ int ml0, ml, ml2, ml3;
+ const BYTE* start0;
+ const BYTE* ref0;
+ const BYTE* ref = NULL;
+ const BYTE* start2 = NULL;
+ const BYTE* ref2 = NULL;
+ const BYTE* start3 = NULL;
+ const BYTE* ref3 = NULL;
+
+ /* init */
+ *srcSizePtr = 0;
+ if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
+ if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
+
+ /* Main Loop */
+ while (ip <= mflimit) {
+ ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
+ if (ml<MINMATCH) { ip++; continue; }
+
+ /* saved, in case we would skip too much */
+ start0 = ip; ref0 = ref; ml0 = ml;
+
+_Search2:
+ if (ip+ml <= mflimit) {
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
+ maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+ } else {
+ ml2 = ml;
+ }
+
+ if (ml2 == ml) { /* No better match => encode ML1 */
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ continue;
+ }
+
+ if (start0 < ip) { /* first match was skipped at least once */
+ if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */
+ ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */
+ } }
+
+ /* Here, start0==ip */
+ if ((start2 - ip) < 3) { /* First Match too small : removed */
+ ml = ml2;
+ ip = start2;
+ ref =ref2;
+ goto _Search2;
+ }
+
+_Search3:
+ /* At this stage, we have :
+ * ml2 > ml1, and
+ * ip1+3 <= ip2 (usually < ip1+ml1) */
+ if ((start2 - ip) < OPTIMAL_ML) {
+ int correction;
+ int new_ml = ml;
+ if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = new_ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+ if (start2 + ml2 <= mflimit) {
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
+ maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+ } else {
+ ml3 = ml2;
+ }
+
+ if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */
+ /* ip & ref are known; Now for ml */
+ if (start2 < ip+ml) ml = (int)(start2 - ip);
+ /* Now, encode 2 sequences */
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ ip = start2;
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
+ ml = ml2;
+ ref = ref2;
+ goto _dest_overflow;
+ }
+ continue;
+ }
+
+ if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */
+ if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+ if (start2 < ip+ml) {
+ int correction = (int)(ip+ml - start2);
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ if (ml2 < MINMATCH) {
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ }
+ }
+
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ ip = start3;
+ ref = ref3;
+ ml = ml3;
+
+ start0 = start2;
+ ref0 = ref2;
+ ml0 = ml2;
+ goto _Search2;
+ }
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ goto _Search3;
+ }
+
+ /*
+ * OK, now we have 3 ascending matches;
+ * let's write the first one ML1.
+ * ip & ref are known; Now decide ml.
+ */
+ if (start2 < ip+ml) {
+ if ((start2 - ip) < OPTIMAL_ML) {
+ int correction;
+ if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ } else {
+ ml = (int)(start2 - ip);
+ }
+ }
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+
+ /* ML2 becomes ML1 */
+ ip = start2; ref = ref2; ml = ml2;
+
+ /* ML3 becomes ML2 */
+ start2 = start3; ref2 = ref3; ml2 = ml3;
+
+ /* let's find a new ML3 */
+ goto _Search3;
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
+ size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+ size_t const totalSize = 1 + llAdd + lastRunSize;
+ if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
+ if (limit && (op + totalSize > oend)) {
+ if (limit == limitedOutput) return 0;
+ /* adapt lastRunSize to fill 'dest' */
+ lastRunSize = (size_t)(oend - op) - 1 /*token*/;
+ llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+ lastRunSize -= llAdd;
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+ ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
+
+ if (lastRunSize >= RUN_MASK) {
+ size_t accumulator = lastRunSize - RUN_MASK;
+ *op++ = (RUN_MASK << ML_BITS);
+ for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRunSize << ML_BITS);
+ }
+ memcpy(op, anchor, lastRunSize);
+ op += lastRunSize;
+ }
+
+ /* End */
+ *srcSizePtr = (int) (((const char*)ip) - source);
+ return (int) (((char*)op)-dest);
+
+_dest_overflow:
+ if (limit == fillOutput) {
+ /* Assumption : ip, anchor, ml and ref must be set correctly */
+ size_t const ll = (size_t)(ip - anchor);
+ size_t const ll_addbytes = (ll + 240) / 255;
+ size_t const ll_totalCost = 1 + ll_addbytes + ll;
+ BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+ DEBUGLOG(6, "Last sequence overflowing");
+ op = optr; /* restore correct out pointer */
+ if (op + ll_totalCost <= maxLitPos) {
+ /* ll validated; now adjust match length */
+ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+ size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+ assert(maxMlSize < INT_MAX); assert(ml >= 0);
+ if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
+ } }
+ goto _last_literals;
+ }
+ /* compression failed */
+ return 0;
+}
+
+
+static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
+ const char* const source, char* dst,
+ int* srcSizePtr, int dstCapacity,
+ int const nbSearches, size_t sufficient_len,
+ const limitedOutput_directive limit, int const fullUpdate,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed);
+
+
+LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ const limitedOutput_directive limit,
+ const dictCtx_directive dict
+ )
+{
+ typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
+ typedef struct {
+ lz4hc_strat_e strat;
+ int nbSearches;
+ U32 targetLength;
+ } cParams_t;
+ static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
+ { lz4hc, 2, 16 }, /* 0, unused */
+ { lz4hc, 2, 16 }, /* 1, unused */
+ { lz4hc, 2, 16 }, /* 2, unused */
+ { lz4hc, 4, 16 }, /* 3 */
+ { lz4hc, 8, 16 }, /* 4 */
+ { lz4hc, 16, 16 }, /* 5 */
+ { lz4hc, 32, 16 }, /* 6 */
+ { lz4hc, 64, 16 }, /* 7 */
+ { lz4hc, 128, 16 }, /* 8 */
+ { lz4hc, 256, 16 }, /* 9 */
+ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
+ { lz4opt, 512,128 }, /*11 */
+ { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
+ };
+
+ DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+ ctx, src, *srcSizePtr, limit);
+
+ if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
+ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
+
+ ctx->end += *srcSizePtr;
+ if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
+ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
+ { cParams_t const cParam = clTable[cLevel];
+ HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
+ int result;
+
+ if (cParam.strat == lz4hc) {
+ result = LZ4HC_compress_hashChain(ctx,
+ src, dst, srcSizePtr, dstCapacity,
+ cParam.nbSearches, limit, dict);
+ } else {
+ assert(cParam.strat == lz4opt);
+ result = LZ4HC_compress_optimal(ctx,
+ src, dst, srcSizePtr, dstCapacity,
+ cParam.nbSearches, cParam.targetLength, limit,
+ cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */
+ dict, favor);
+ }
+ if (result <= 0) ctx->dirty = 1;
+ return result;
+ }
+}
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock);
+
+static int
+LZ4HC_compress_generic_noDictCtx (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ assert(ctx->dictCtx == NULL);
+ return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
+}
+
+static int
+LZ4HC_compress_generic_dictCtx (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit;
+ assert(ctx->dictCtx != NULL);
+ if (position >= 64 KB) {
+ ctx->dictCtx = NULL;
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else if (position == 0 && *srcSizePtr > 4 KB) {
+ memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
+ LZ4HC_setExternalDict(ctx, (const BYTE *)src);
+ ctx->compressionLevel = (short)cLevel;
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else {
+ return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc);
+ }
+}
+
+static int
+LZ4HC_compress_generic (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ if (ctx->dictCtx == NULL) {
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else {
+ return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ }
+}
+
+
+int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
+
+static size_t LZ4_streamHC_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+ typedef struct { char c; LZ4_streamHC_t t; } t_a;
+ return sizeof(t_a) - sizeof(LZ4_streamHC_t);
+#else
+ return 1; /* effectively disabled */
+#endif
+}
+
+/* state is presumed correctly initialized,
+ * in which case its size and alignment have already been validate */
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+ LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+ if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0;
+ LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
+ LZ4HC_init_internal (ctx, (const BYTE*)src);
+ if (dstCapacity < LZ4_compressBound(srcSize))
+ return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
+ else
+ return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited);
+}
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+ LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+ if (ctx==NULL) return 0; /* init failure */
+ return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
+}
+
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
+#else
+ LZ4_streamHC_t state;
+ LZ4_streamHC_t* const statePtr = &state;
+#endif
+ int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ FREEMEM(statePtr);
+#endif
+ return cSize;
+}
+
+/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */
+int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
+{
+ LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+ if (ctx==NULL) return 0; /* init failure */
+ LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source);
+ LZ4_setCompressionLevel(ctx, cLevel);
+ return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput);
+}
+
+
+
+/**************************************
+* Streaming Functions
+**************************************/
+/* allocation */
+LZ4_streamHC_t* LZ4_createStreamHC(void)
+{
+ LZ4_streamHC_t* const state =
+ (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t));
+ if (state == NULL) return NULL;
+ LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT);
+ return state;
+}
+
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
+{
+ DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
+ if (!LZ4_streamHCPtr) return 0; /* support free on NULL */
+ FREEMEM(LZ4_streamHCPtr);
+ return 0;
+}
+
+
+LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
+{
+ LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
+ /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+ LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
+ DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size);
+ /* check conditions */
+ if (buffer == NULL) return NULL;
+ if (size < sizeof(LZ4_streamHC_t)) return NULL;
+ if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL;
+ /* init */
+ { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse);
+ MEM_INIT(hcstate, 0, sizeof(*hcstate)); }
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+ return LZ4_streamHCPtr;
+}
+
+/* just a stub */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+ if (LZ4_streamHCPtr->internal_donotuse.dirty) {
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ } else {
+ /* preserve end - base : can trigger clearTable's threshold */
+ LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
+ LZ4_streamHCPtr->internal_donotuse.base = NULL;
+ LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+ }
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+ if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
+ if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
+ LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
+}
+
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor)
+{
+ LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0);
+}
+
+/* LZ4_loadDictHC() :
+ * LZ4_streamHCPtr is presumed properly initialized */
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* dictionary, int dictSize)
+{
+ LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+ DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
+ assert(LZ4_streamHCPtr != NULL);
+ if (dictSize > 64 KB) {
+ dictionary += (size_t)dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ /* need a full initialization, there are bad side-effects when using resetFast() */
+ { int const cLevel = ctxPtr->compressionLevel;
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
+ }
+ LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary);
+ ctxPtr->end = (const BYTE*)dictionary + dictSize;
+ if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
+ return dictSize;
+}
+
+void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) {
+ working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL;
+}
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
+{
+ DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
+ if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
+ LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
+
+ /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+ ctxPtr->lowLimit = ctxPtr->dictLimit;
+ ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+ ctxPtr->dictBase = ctxPtr->base;
+ ctxPtr->base = newBlock - ctxPtr->dictLimit;
+ ctxPtr->end = newBlock;
+ ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
+
+ /* cannot reference an extDict and a dictCtx at the same time */
+ ctxPtr->dictCtx = NULL;
+}
+
+static int
+LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* src, char* dst,
+ int* srcSizePtr, int dstCapacity,
+ limitedOutput_directive limit)
+{
+ LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+ DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+ LZ4_streamHCPtr, src, *srcSizePtr, limit);
+ assert(ctxPtr != NULL);
+ /* auto-init if forgotten */
+ if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
+
+ /* Check overflow */
+ if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
+ size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+ if (dictSize > 64 KB) dictSize = 64 KB;
+ LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+ }
+
+ /* Check if blocks follow each other */
+ if ((const BYTE*)src != ctxPtr->end)
+ LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
+
+ /* Check overlapping input/dictionary space */
+ { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
+ const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+ const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+ if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) {
+ if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+ ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+ if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+ } }
+
+ return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+ if (dstCapacity < LZ4_compressBound(srcSize))
+ return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
+ else
+ return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited);
+}
+
+int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
+{
+ return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput);
+}
+
+
+
+/* LZ4_saveDictHC :
+ * save history content
+ * into a user-provided buffer
+ * which is then used to continue compression
+ */
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
+{
+ LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+ int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+ DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
+ assert(prefixSize >= 0);
+ if (dictSize > 64 KB) dictSize = 64 KB;
+ if (dictSize < 4) dictSize = 0;
+ if (dictSize > prefixSize) dictSize = prefixSize;
+ if (safeBuffer == NULL) assert(dictSize == 0);
+ if (dictSize > 0)
+ memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+ { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+ streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+ streamPtr->base = streamPtr->end - endIndex;
+ streamPtr->dictLimit = endIndex - (U32)dictSize;
+ streamPtr->lowLimit = endIndex - (U32)dictSize;
+ if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+ streamPtr->nextToUpdate = streamPtr->dictLimit;
+ }
+ return dictSize;
+}
+
+
+/***************************************************
+* Deprecated Functions
+***************************************************/
+
+/* These functions currently generate deprecation warnings */
+
+/* Wrappers for deprecated compression functions */
+int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
+
+
+/* Deprecated streaming functions */
+int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
+
+/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
+ * @return : 0 on success, !=0 if error */
+int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
+{
+ LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
+ if (hc4 == NULL) return 1; /* init failed */
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+ return 0;
+}
+
+void* LZ4_createHC (const char* inputBuffer)
+{
+ LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
+ if (hc4 == NULL) return NULL; /* not enough memory */
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+ return hc4;
+}
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+ if (!LZ4HC_Data) return 0; /* support free on NULL */
+ FREEMEM(LZ4HC_Data);
+ return 0;
+}
+
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
+{
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
+{
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
+}
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+ LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data;
+ const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit;
+ LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
+ /* avoid const char * -> char * conversion warning :( */
+ return (char *)(uptrval)bufferStart;
+}
+
+
+/* ================================================
+ * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
+ * ===============================================*/
+typedef struct {
+ int price;
+ int off;
+ int mlen;
+ int litlen;
+} LZ4HC_optimal_t;
+
+/* price in bytes */
+LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
+{
+ int price = litlen;
+ assert(litlen >= 0);
+ if (litlen >= (int)RUN_MASK)
+ price += 1 + ((litlen-(int)RUN_MASK) / 255);
+ return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
+{
+ int price = 1 + 2 ; /* token + 16-bit offset */
+ assert(litlen >= 0);
+ assert(mlen >= MINMATCH);
+
+ price += LZ4HC_literalsPrice(litlen);
+
+ if (mlen >= (int)(ML_MASK+MINMATCH))
+ price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255);
+
+ return price;
+}
+
+
+typedef struct {
+ int off;
+ int len;
+} LZ4HC_match_t;
+
+LZ4_FORCE_INLINE LZ4HC_match_t
+LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
+ const BYTE* ip, const BYTE* const iHighLimit,
+ int minLen, int nbSearches,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ LZ4HC_match_t match = { 0 , 0 };
+ const BYTE* matchPtr = NULL;
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+ * but this won't be the case here, as we define iLowLimit==ip,
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+ int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
+ if (matchLength <= minLen) return match;
+ if (favorDecSpeed) {
+ if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */
+ }
+ match.len = matchLength;
+ match.off = (int)(ip-matchPtr);
+ return match;
+}
+
+
+static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
+ const char* const source,
+ char* dst,
+ int* srcSizePtr,
+ int dstCapacity,
+ int const nbSearches,
+ size_t sufficient_len,
+ const limitedOutput_directive limit,
+ int const fullUpdate,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ int retval = 0;
+#define TRAILING_LITERALS 3
+#ifdef LZ4HC_HEAPMODE
+ LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
+#else
+ LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */
+#endif
+
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + *srcSizePtr;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = iend - LASTLITERALS;
+ BYTE* op = (BYTE*) dst;
+ BYTE* opSaved = (BYTE*) dst;
+ BYTE* oend = op + dstCapacity;
+ int ovml = MINMATCH; /* overflow - last sequence */
+ const BYTE* ovref = NULL;
+
+ /* init */
+#ifdef LZ4HC_HEAPMODE
+ if (opt == NULL) goto _return_label;
+#endif
+ DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
+ *srcSizePtr = 0;
+ if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
+ if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+
+ /* Main Loop */
+ while (ip <= mflimit) {
+ int const llen = (int)(ip - anchor);
+ int best_mlen, best_off;
+ int cur, last_match_pos = 0;
+
+ LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+ if (firstMatch.len==0) { ip++; continue; }
+
+ if ((size_t)firstMatch.len > sufficient_len) {
+ /* good enough solution : immediate encoding */
+ int const firstML = firstMatch.len;
+ const BYTE* const matchPos = ip - firstMatch.off;
+ opSaved = op;
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */
+ ovml = firstML;
+ ovref = matchPos;
+ goto _dest_overflow;
+ }
+ continue;
+ }
+
+ /* set prices for first positions (literals) */
+ { int rPos;
+ for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
+ int const cost = LZ4HC_literalsPrice(llen + rPos);
+ opt[rPos].mlen = 1;
+ opt[rPos].off = 0;
+ opt[rPos].litlen = llen + rPos;
+ opt[rPos].price = cost;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+ rPos, cost, opt[rPos].litlen);
+ } }
+ /* set prices using initial match */
+ { int mlen = MINMATCH;
+ int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
+ int const offset = firstMatch.off;
+ assert(matchML < LZ4_OPT_NUM);
+ for ( ; mlen <= matchML ; mlen++) {
+ int const cost = LZ4HC_sequencePrice(llen, mlen);
+ opt[mlen].mlen = mlen;
+ opt[mlen].off = offset;
+ opt[mlen].litlen = llen;
+ opt[mlen].price = cost;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
+ mlen, cost, mlen);
+ } }
+ last_match_pos = firstMatch.len;
+ { int addLit;
+ for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+ opt[last_match_pos+addLit].mlen = 1; /* literal */
+ opt[last_match_pos+addLit].off = 0;
+ opt[last_match_pos+addLit].litlen = addLit;
+ opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+ last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+ } }
+
+ /* check further positions */
+ for (cur = 1; cur < last_match_pos; cur++) {
+ const BYTE* const curPtr = ip + cur;
+ LZ4HC_match_t newMatch;
+
+ if (curPtr > mflimit) break;
+ DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
+ cur, opt[cur].price, opt[cur+1].price, cur+1);
+ if (fullUpdate) {
+ /* not useful to search here if next position has same (or lower) cost */
+ if ( (opt[cur+1].price <= opt[cur].price)
+ /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
+ && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
+ continue;
+ } else {
+ /* not useful to search here if next position has same (or lower) cost */
+ if (opt[cur+1].price <= opt[cur].price) continue;
+ }
+
+ DEBUGLOG(7, "search at rPos:%u", cur);
+ if (fullUpdate)
+ newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+ else
+ /* only test matches of minimum length; slightly faster, but misses a few bytes */
+ newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed);
+ if (!newMatch.len) continue;
+
+ if ( ((size_t)newMatch.len > sufficient_len)
+ || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
+ /* immediate encoding */
+ best_mlen = newMatch.len;
+ best_off = newMatch.off;
+ last_match_pos = cur + 1;
+ goto encode;
+ }
+
+ /* before match : set price with literals at beginning */
+ { int const baseLitlen = opt[cur].litlen;
+ int litlen;
+ for (litlen = 1; litlen < MINMATCH; litlen++) {
+ int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
+ int const pos = cur + litlen;
+ if (price < opt[pos].price) {
+ opt[pos].mlen = 1; /* literal */
+ opt[pos].off = 0;
+ opt[pos].litlen = baseLitlen+litlen;
+ opt[pos].price = price;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
+ pos, price, opt[pos].litlen);
+ } } }
+
+ /* set prices using match at position = cur */
+ { int const matchML = newMatch.len;
+ int ml = MINMATCH;
+
+ assert(cur + newMatch.len < LZ4_OPT_NUM);
+ for ( ; ml <= matchML ; ml++) {
+ int const pos = cur + ml;
+ int const offset = newMatch.off;
+ int price;
+ int ll;
+ DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
+ pos, last_match_pos);
+ if (opt[cur].mlen == 1) {
+ ll = opt[cur].litlen;
+ price = ((cur > ll) ? opt[cur - ll].price : 0)
+ + LZ4HC_sequencePrice(ll, ml);
+ } else {
+ ll = 0;
+ price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+ }
+
+ assert((U32)favorDecSpeed <= 1);
+ if (pos > last_match_pos+TRAILING_LITERALS
+ || price <= opt[pos].price - (int)favorDecSpeed) {
+ DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
+ pos, price, ml);
+ assert(pos < LZ4_OPT_NUM);
+ if ( (ml == matchML) /* last pos of last match */
+ && (last_match_pos < pos) )
+ last_match_pos = pos;
+ opt[pos].mlen = ml;
+ opt[pos].off = offset;
+ opt[pos].litlen = ll;
+ opt[pos].price = price;
+ } } }
+ /* complete following positions with literals */
+ { int addLit;
+ for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+ opt[last_match_pos+addLit].mlen = 1; /* literal */
+ opt[last_match_pos+addLit].off = 0;
+ opt[last_match_pos+addLit].litlen = addLit;
+ opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+ } }
+ } /* for (cur = 1; cur <= last_match_pos; cur++) */
+
+ assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS);
+ best_mlen = opt[last_match_pos].mlen;
+ best_off = opt[last_match_pos].off;
+ cur = last_match_pos - best_mlen;
+
+encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+ assert(cur < LZ4_OPT_NUM);
+ assert(last_match_pos >= 1); /* == 1 when only one candidate */
+ DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
+ { int candidate_pos = cur;
+ int selected_matchLength = best_mlen;
+ int selected_offset = best_off;
+ while (1) { /* from end to beginning */
+ int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */
+ int const next_offset = opt[candidate_pos].off;
+ DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
+ opt[candidate_pos].mlen = selected_matchLength;
+ opt[candidate_pos].off = selected_offset;
+ selected_matchLength = next_matchLength;
+ selected_offset = next_offset;
+ if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
+ assert(next_matchLength > 0); /* can be 1, means literal */
+ candidate_pos -= next_matchLength;
+ } }
+
+ /* encode all recorded sequences in order */
+ { int rPos = 0; /* relative position (to ip) */
+ while (rPos < last_match_pos) {
+ int const ml = opt[rPos].mlen;
+ int const offset = opt[rPos].off;
+ if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */
+ rPos += ml;
+ assert(ml >= MINMATCH);
+ assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
+ opSaved = op;
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */
+ ovml = ml;
+ ovref = ip - offset;
+ goto _dest_overflow;
+ } } }
+ } /* while (ip <= mflimit) */
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
+ size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+ size_t const totalSize = 1 + llAdd + lastRunSize;
+ if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
+ if (limit && (op + totalSize > oend)) {
+ if (limit == limitedOutput) { /* Check output limit */
+ retval = 0;
+ goto _return_label;
+ }
+ /* adapt lastRunSize to fill 'dst' */
+ lastRunSize = (size_t)(oend - op) - 1 /*token*/;
+ llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+ lastRunSize -= llAdd;
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+ ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
+
+ if (lastRunSize >= RUN_MASK) {
+ size_t accumulator = lastRunSize - RUN_MASK;
+ *op++ = (RUN_MASK << ML_BITS);
+ for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRunSize << ML_BITS);
+ }
+ memcpy(op, anchor, lastRunSize);
+ op += lastRunSize;
+ }
+
+ /* End */
+ *srcSizePtr = (int) (((const char*)ip) - source);
+ retval = (int) ((char*)op-dst);
+ goto _return_label;
+
+_dest_overflow:
+if (limit == fillOutput) {
+ /* Assumption : ip, anchor, ovml and ovref must be set correctly */
+ size_t const ll = (size_t)(ip - anchor);
+ size_t const ll_addbytes = (ll + 240) / 255;
+ size_t const ll_totalCost = 1 + ll_addbytes + ll;
+ BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+ DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved));
+ op = opSaved; /* restore correct out pointer */
+ if (op + ll_totalCost <= maxLitPos) {
+ /* ll validated; now adjust match length */
+ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+ size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+ assert(maxMlSize < INT_MAX); assert(ovml >= 0);
+ if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
+ DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
+ DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
+ DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
+ } }
+ goto _last_literals;
+}
+_return_label:
+#ifdef LZ4HC_HEAPMODE
+ FREEMEM(opt);
+#endif
+ return retval;
+}
diff --git a/libbutl/lz4hc.h b/libbutl/lz4hc.h
new file mode 100644
index 0000000..3d441fb
--- /dev/null
+++ b/libbutl/lz4hc.h
@@ -0,0 +1,413 @@
+/*
+ LZ4 HC - High Compression Mode of LZ4
+ Header File
+ Copyright (C) 2011-2017, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef LZ4_HC_H_19834876238432
+#define LZ4_HC_H_19834876238432
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* --- Dependency --- */
+/* note : lz4hc requires lz4.h/lz4.c for compilation */
+#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
+
+
+/* --- Useful constants --- */
+#define LZ4HC_CLEVEL_MIN 3
+#define LZ4HC_CLEVEL_DEFAULT 9
+#define LZ4HC_CLEVEL_OPT_MIN 10
+#define LZ4HC_CLEVEL_MAX 12
+
+
+/*-************************************
+ * Block Compression
+ **************************************/
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
+ * `dst` must be already allocated.
+ * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
+ * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
+ * @return : the number of bytes written into 'dst'
+ * or 0 if compression fails.
+ */
+LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
+
+
+/* Note :
+ * Decompression functions are provided within "lz4.h" (BSD license)
+ */
+
+
+/*! LZ4_compress_HC_extStateHC() :
+ * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ * `state` size is provided by LZ4_sizeofStateHC().
+ * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
+ */
+LZ4LIB_API int LZ4_sizeofStateHC(void);
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+
+
+/*! LZ4_compress_HC_destSize() : v1.9.0+
+ * Will compress as much data as possible from `src`
+ * to fit into `targetDstSize` budget.
+ * Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ * or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
+ */
+LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
+ const char* src, char* dst,
+ int* srcSizePtr, int targetDstSize,
+ int compressionLevel);
+
+
+/*-************************************
+ * Streaming Compression
+ * Bufferless synchronous API
+ **************************************/
+ typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
+
+/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
+ * These functions create and release memory for LZ4 HC streaming state.
+ * Newly created states are automatically initialized.
+ * A same state can be used multiple times consecutively,
+ * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
+LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
+
+/*
+ These functions compress data in successive blocks of any size,
+ using previous blocks as dictionary, to improve compression ratio.
+ One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+ There is an exception for ring buffers, which can be smaller than 64 KB.
+ Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
+
+ Before starting compression, state must be allocated and properly initialized.
+ LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
+
+ Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
+ or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
+ LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
+ which is automatically the case when state is created using LZ4_createStreamHC().
+
+ After reset, a first "fictional block" can be designated as initial dictionary,
+ using LZ4_loadDictHC() (Optional).
+
+ Invoke LZ4_compress_HC_continue() to compress each successive block.
+ The number of blocks is unlimited.
+ Previous input blocks, including initial dictionary when present,
+ must remain accessible and unmodified during compression.
+
+ It's allowed to update compression level anytime between blocks,
+ using LZ4_setCompressionLevel() (experimental).
+
+ 'dst' buffer should be sized to handle worst case scenarios
+ (see LZ4_compressBound(), it ensures compression success).
+ In case of failure, the API does not guarantee recovery,
+ so the state _must_ be reset.
+ To ensure compression success
+ whenever `dst` buffer size cannot be made >= LZ4_compressBound(),
+ consider using LZ4_compress_HC_continue_destSize().
+
+ Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
+ it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
+ Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
+
+ After completing a streaming compression,
+ it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
+ just by resetting it, using LZ4_resetStreamHC_fast().
+*/
+
+LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */
+LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
+
+LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr,
+ const char* src, char* dst,
+ int srcSize, int maxDstSize);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
+ * Similar to LZ4_compress_HC_continue(),
+ * but will read as much data as possible from `src`
+ * to fit into `targetDstSize` budget.
+ * Result is provided into 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ * or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
+ * Note that this function may not consume the entire input.
+ */
+LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* src, char* dst,
+ int* srcSizePtr, int targetDstSize);
+
+LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+
+
+
+/*^**********************************************
+ * !!!!!! STATIC LINKING ONLY !!!!!!
+ ***********************************************/
+
+/*-******************************************************************
+ * PRIVATE DEFINITIONS :
+ * Do not use these definitions directly.
+ * They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Declare an `LZ4_streamHC_t` directly, rather than any type below.
+ * Even then, only do so in the context of static linking, as definitions may change between versions.
+ ********************************************************************/
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG 15
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
+{
+ LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
+ LZ4_u16 chainTable[LZ4HC_MAXD];
+ const LZ4_byte* end; /* next block here to continue on current prefix */
+ const LZ4_byte* base; /* All index relative to this position */
+ const LZ4_byte* dictBase; /* alternate base for extDict */
+ LZ4_u32 dictLimit; /* below that point, need extDict */
+ LZ4_u32 lowLimit; /* below that point, no more dict */
+ LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
+ short compressionLevel;
+ LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
+ otherwise, favor compression ratio */
+ LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
+ const LZ4HC_CCtx_internal* dictCtx;
+};
+
+
+/* Do not use these definitions directly !
+ * Declare or allocate an LZ4_streamHC_t instead.
+ */
+#define LZ4_STREAMHCSIZE 262200 /* static size, for inter-version compatibility */
+#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*))
+union LZ4_streamHC_u {
+ void* table[LZ4_STREAMHCSIZE_VOIDP];
+ LZ4HC_CCtx_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_streamHC_t */
+
+/* LZ4_streamHC_t :
+ * This structure allows static allocation of LZ4 HC streaming state.
+ * This can be used to allocate statically, on state, or as part of a larger structure.
+ *
+ * Such state **must** be initialized using LZ4_initStreamHC() before first use.
+ *
+ * Note that invoking LZ4_initStreamHC() is not required when
+ * the state was created using LZ4_createStreamHC() (which is recommended).
+ * Using the normal builder, a newly created state is automatically initialized.
+ *
+ * Static allocation shall only be used in combination with static linking.
+ */
+
+/* LZ4_initStreamHC() : v1.9.0+
+ * Required before first use of a statically allocated LZ4_streamHC_t.
+ * Before v1.9.0 : use LZ4_resetStreamHC() instead
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size);
+
+
+/*-************************************
+* Deprecated Functions
+**************************************/
+/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
+
+/* deprecated compression functions */
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, use of
+ * LZ4_slideInputBufferHC() will truncate the history of the stream, rather
+ * than preserve a window-sized chunk of history.
+ */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
+/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
+ * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
+ * which is now the recommended function to start a new stream of blocks,
+ * but cannot be used to initialize a memory segment containing arbitrary garbage data.
+ *
+ * It is recommended to switch to LZ4_initStreamHC().
+ * LZ4_resetStreamHC() will generate deprecation warnings in a future version.
+ */
+LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4_HC_H_19834876238432 */
+
+
+/*-**************************************************
+ * !!!!! STATIC LINKING ONLY !!!!!
+ * Following definitions are considered experimental.
+ * They should not be linked from DLL,
+ * as there is no guarantee of API stability yet.
+ * Prototypes will be promoted to "stable" status
+ * after successfull usage in real-life scenarios.
+ ***************************************************/
+#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
+#ifndef LZ4_HC_SLO_098092834
+#define LZ4_HC_SLO_098092834
+
+#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
+#include "lz4.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
+ * It's possible to change compression level
+ * between successive invocations of LZ4_compress_HC_continue*()
+ * for dynamic adaptation.
+ */
+LZ4LIB_STATIC_API void LZ4_setCompressionLevel(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
+ * Opt. Parser will favor decompression speed over compression ratio.
+ * Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
+ */
+LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
+
+/*! LZ4_resetStreamHC_fast() : v1.9.0+
+ * When an LZ4_streamHC_t is known to be in a internally coherent state,
+ * it can often be prepared for a new compression with almost no work, only
+ * sometimes falling back to the full, expensive reset that is always required
+ * when the stream is in an indeterminate state (i.e., the reset performed by
+ * LZ4_resetStreamHC()).
+ *
+ * LZ4_streamHCs are guaranteed to be in a valid state when:
+ * - returned from LZ4_createStreamHC()
+ * - reset by LZ4_resetStreamHC()
+ * - memset(stream, 0, sizeof(LZ4_streamHC_t))
+ * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
+ * - the stream was in a valid state and was then used in any compression call
+ * that returned success
+ * - the stream was in an indeterminate state and was used in a compression
+ * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
+ * returned success
+ *
+ * Note:
+ * A stream that was last used in a compression call that returned an error
+ * may be passed to this function. However, it will be fully reset, which will
+ * clear any existing history and settings from the context.
+ */
+LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_compress_HC_extStateHC_fastReset() :
+ * A variant of LZ4_compress_HC_extStateHC().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see above comment on LZ4_resetStreamHC_fast() for a definition of
+ * "correctly initialized"). From a high level, the difference is that this
+ * function initializes the provided state with a call to
+ * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
+ * call to LZ4_resetStreamHC().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
+ void* state,
+ const char* src, char* dst,
+ int srcSize, int dstCapacity,
+ int compressionLevel);
+
+/*! LZ4_attach_HC_dictionary() :
+ * This is an experimental API that allows for the efficient use of a
+ * static dictionary many times.
+ *
+ * Rather than re-loading the dictionary buffer into a working context before
+ * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
+ * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
+ * in which the working stream references the dictionary stream in-place.
+ *
+ * Several assumptions are made about the state of the dictionary stream.
+ * Currently, only streams which have been prepared by LZ4_loadDictHC() should
+ * be expected to work.
+ *
+ * Alternatively, the provided dictionary stream pointer may be NULL, in which
+ * case any existing dictionary stream is unset.
+ *
+ * A dictionary should only be attached to a stream without any history (i.e.,
+ * a stream that has just been reset).
+ *
+ * The dictionary will remain attached to the working stream only for the
+ * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
+ * dictionary context association from the working stream. The dictionary
+ * stream (and source buffer) must remain in-place / accessible / unchanged
+ * through the lifetime of the stream session.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary(
+ LZ4_streamHC_t *working_stream,
+ const LZ4_streamHC_t *dictionary_stream);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4_HC_SLO_098092834 */
+#endif /* LZ4_HC_STATIC_LINKING_ONLY */
diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx
index 9514bbd..904910a 100644
--- a/libbutl/manifest-parser.cxx
+++ b/libbutl/manifest-parser.cxx
@@ -1,39 +1,10 @@
// file : libbutl/manifest-parser.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-parser.mxx>
-#endif
+#include <libbutl/manifest-parser.hxx>
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#include <vector>
-#include <cstdint>
-#include <utility>
-#include <stdexcept>
-
-#include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-import butl.char_scanner;
-import butl.manifest_types;
-#endif
-
-#endif
+#include <cassert>
using namespace std;
@@ -177,41 +148,136 @@ namespace butl
{
using iterator = string::const_iterator;
- auto space = [] (char c) -> bool {return c == ' ' || c == '\t';};
+ // Parse the value differently depending on whether it is multi-line or
+ // not.
+ //
+ if (v.find ('\n') == string::npos) // Single-line.
+ {
+ auto space = [] (char c) {return c == ' ' || c == '\t';};
- iterator i (v.begin ());
- iterator e (v.end ());
+ iterator i (v.begin ());
+ iterator e (v.end ());
- string r;
- size_t n (0);
- for (char c; i != e && (c = *i) != ';'; ++i)
- {
- // Unescape ';' character.
+ string r;
+ size_t n (0);
+ for (char c; i != e && (c = *i) != ';'; ++i)
+ {
+ // Unescape ';' and '\' characters.
+ //
+ if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\'))
+ c = *++i;
+
+ r += c;
+
+ if (!space (c))
+ n = r.size ();
+ }
+
+ // Strip the value trailing spaces.
//
- if (c == '\\' && i + 1 != e && *(i + 1) == ';')
- c = *++i;
+ if (r.size () != n)
+ r.resize (n);
- r += c;
+ // Find beginning of a comment (i).
+ //
+ if (i != e)
+ {
+ // Skip spaces.
+ //
+ for (++i; i != e && space (*i); ++i);
+ }
- if (!space (c))
- n = r.size ();
+ return make_pair (move (r), string (i, e));
}
+ else // Multi-line.
+ {
+ string r;
+ string c;
- // Strip the value trailing spaces.
- //
- if (r.size () != n)
- r.resize (n);
+ // Parse the value lines until the comment separator is encountered or
+ // the end of the value is reached. Add these lines to the resulting
+ // value, unescaping them if required.
+ //
+ // Note that we only need to unescape lines which have the '\+;' form.
+ //
+ auto i (v.begin ());
+ auto e (v.end ());
- // Find beginning of a comment (i).
- //
- if (i != e)
- {
- // Skip spaces.
+ while (i != e)
+ {
+ // Find the end of the line and while at it the first non-backslash
+ // character.
+ //
+ auto le (i);
+ auto nb (e);
+ for (; le != e && *le != '\n'; ++le)
+ {
+ if (nb == e && *le != '\\')
+ nb = le;
+ }
+
+ // If the value end is not reached then position to the beginning of
+ // the next line and to the end of the value otherwise.
+ //
+ auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);};
+
+ // If the first non-backslash character is ';' and it is the last
+ // character on the line, then this is either the comment separator or
+ // an escape sequence.
+ //
+ if (nb != e && *nb == ';' && nb + 1 == le)
+ {
+ // If ';' is the first (and thus the only) character on the line,
+ // then this is the comment separator and we bail out from this
+ // loop. Note that in this case we need to trim the trailing newline
+ // (but only one) from the resulting value since it is considered as
+ // a part of the separator.
+ //
+ if (nb == i)
+ {
+ if (!r.empty ())
+ {
+ assert (r.back () == '\n');
+ r.pop_back ();
+ }
+
+ next ();
+ break;
+ }
+ //
+ // Otherwise, this is an escape sequence, so unescape it. For that
+ // just take the rightmost half of the string:
+ //
+ // \; -> ;
+ // \\; -> \;
+ // \\\; -> \;
+ // \\\\; -> \\;
+ // \\\\\; -> \\;
+ //
+ else
+ i += (le - i) / 2;
+ }
+
+ // Add the line to the resulting value together with the trailing
+ // newline, if present.
+ //
+ r.append (i, le);
+
+ if (le != e)
+ r += '\n';
+
+ next ();
+ }
+
+ // If we haven't reached the end of the value then it means we've
+ // encountered the comment separator. In this case save the remaining
+ // value part as a comment.
//
- for (++i; i != e && space (*i); ++i);
- }
+ if (i != e)
+ c = string (i, e);
- return make_pair (move (r), string (i, e));
+ return make_pair (move (r), move (c));
+ }
}
void manifest_parser::
@@ -251,7 +317,8 @@ namespace butl
string& v (r.value);
string::size_type n (0); // Size of last non-space character (simple mode).
- // Detect the multi-line mode introductor.
+ // Detect the old-fashioned multi-line mode introducer (like in
+ // 'foo:\<newline>').
//
bool ml (false);
if (c == '\\')
@@ -266,11 +333,46 @@ namespace butl
ml = true;
}
else if (eos (p))
+ {
+ c = p; // Set to EOF.
ml = true;
+ }
else
unget (c);
}
+ // Detect the new-fashioned multi-line mode introducer (like in
+ // 'foo:<newline>\<newline>').
+ //
+ if (!ml && c == '\n')
+ {
+ get ();
+ xchar p1 (peek ());
+
+ if (p1 == '\\')
+ {
+ get ();
+ xchar p2 (peek ());
+
+ if (p2 == '\n')
+ {
+ get (); // Newline is not part of the value so skip it.
+ c = peek ();
+ ml = true;
+ }
+ else if (eos (p2))
+ {
+ c = p2; // Set to EOF.
+ ml = true;
+ }
+ else
+ unget (p1); // Unget '\\'. Note: '\n' will be ungot below.
+ }
+
+ if (!ml)
+ unget (c); // Unget '\n'.
+ }
+
// Multi-line value starts from the line that follows the name.
//
if (ml)
@@ -281,7 +383,7 @@ namespace butl
// The nl flag signals that the preceding character was a "special
// newline", that is, a newline that was part of the milti-line mode
- // introductor or an escape sequence.
+ // introducer or an escape sequence.
//
for (bool nl (ml); !eos (c); c = peek ())
{
@@ -299,7 +401,7 @@ namespace butl
//
// The first block handles the special sequence that starts with
// a special newline. In multi-line mode, this is an "immediate
- // termination" where we "use" the newline from the introductor.
+ // termination" where we "use" the newline from the introducer.
// Note also that in the simple mode the special sequence can
// only start with a special (i.e., escaped) newline.
//
@@ -472,11 +574,21 @@ namespace butl
static inline string
format (const string& n, uint64_t l, uint64_t c, const string& d)
{
- ostringstream os;
+ using std::to_string;
+
+ string r;
if (!n.empty ())
- os << n << ':';
- os << l << ':' << c << ": error: " << d;
- return os.str ();
+ {
+ r += n;
+ r += ':';
+ }
+
+ r += to_string (l);
+ r += ':';
+ r += to_string (c);
+ r += ": error: ";
+ r += d;
+ return r;
}
manifest_parsing::
diff --git a/libbutl/manifest-parser.mxx b/libbutl/manifest-parser.hxx
index 77addff..601fb2d 100644
--- a/libbutl/manifest-parser.mxx
+++ b/libbutl/manifest-parser.hxx
@@ -1,13 +1,8 @@
-// file : libbutl/manifest-parser.mxx -*- C++ -*-
+// file : libbutl/manifest-parser.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <iosfwd>
@@ -15,30 +10,15 @@
#include <utility> // pair, move()
#include <stdexcept> // runtime_error
#include <functional>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.manifest_parser;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utf8;
-import butl.optional;
-import butl.char_scanner;
-import butl.manifest_types;
-#else
-#include <libbutl/utf8.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/char-scanner.mxx>
-#include <libbutl/manifest-types.mxx>
-#endif
+
+#include <libbutl/utf8.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/char-scanner.hxx>
+#include <libbutl/manifest-types.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error
{
@@ -57,7 +37,7 @@ LIBBUTL_MODEXPORT namespace butl
};
class LIBBUTL_SYMEXPORT manifest_parser:
- protected char_scanner<utf8_validator>
+ protected char_scanner<utf8_validator, 2>
{
public:
// The filter, if specified, is called by next() prior to returning the
@@ -103,7 +83,7 @@ LIBBUTL_MODEXPORT namespace butl
split_comment (const std::string&);
private:
- using base = char_scanner<utf8_validator>;
+ using base = char_scanner<utf8_validator, 2>;
void
parse_next (manifest_name_value&);
diff --git a/libbutl/manifest-rewriter.cxx b/libbutl/manifest-rewriter.cxx
index e38d5f4..1232e9c 100644
--- a/libbutl/manifest-rewriter.cxx
+++ b/libbutl/manifest-rewriter.cxx
@@ -1,41 +1,15 @@
// file : libbutl/manifest-rewriter.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-rewriter.mxx>
-#endif
+#include <libbutl/manifest-rewriter.hxx>
-#include <cassert>
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
+#include <cassert>
#include <cstdint> // uint64_t
#include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_rewriter;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.manifest_types;
-#endif
-
-import butl.utility; // utf8_length()
-import butl.manifest_serializer;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/manifest-serializer.mxx>
-#endif
+
+#include <libbutl/utility.hxx> // utf8_length()
+#include <libbutl/manifest-serializer.hxx>
using namespace std;
@@ -64,7 +38,7 @@ namespace butl
// Temporary move the descriptor into the stream.
//
ifdstream is (move (fd));
- fdbuf& buf (static_cast<fdbuf&> (*is.rdbuf ()));
+ fdstreambuf& buf (static_cast<fdstreambuf&> (*is.rdbuf ()));
// Read suffix.
//
@@ -99,8 +73,6 @@ namespace butl
if (!nv.value.empty ())
{
- os << ' ';
-
manifest_serializer s (os, path_.string (), long_lines_);
// Note that the name can be surrounded with the ASCII whitespace
@@ -112,7 +84,7 @@ namespace butl
//
s.write_value (nv.value,
static_cast<size_t> (nv.colon_pos - nv.start_pos) -
- (nv.name.size () - utf8_length (nv.name)) + 2);
+ (nv.name.size () - utf8_length (nv.name)) + 1);
}
os << suffix;
@@ -144,15 +116,13 @@ namespace butl
if (!nv.value.empty ())
{
- os << ' ';
-
// Note that the name can be surrounded with the ASCII whitespace
// characters and the start_pos refers to the first character in the
// line.
//
s.write_value (nv.value,
static_cast<size_t> (nv.colon_pos - nv.start_pos) -
- (nv.name.size () - n) + 2);
+ (nv.name.size () - n) + 1);
}
os << suffix;
diff --git a/libbutl/manifest-rewriter.mxx b/libbutl/manifest-rewriter.hxx
index 907c990..02a533a 100644
--- a/libbutl/manifest-rewriter.mxx
+++ b/libbutl/manifest-rewriter.hxx
@@ -1,33 +1,15 @@
-// file : libbutl/manifest-rewriter.mxx -*- C++ -*-
+// file : libbutl/manifest-rewriter.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.manifest_rewriter;
-#ifdef __cpp_lib_modules_ts
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.manifest_types;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/manifest-types.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/manifest-types.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Rewrite a hand-written manifest file preserving formatting, comments,
// etc., of the unaffected parts. The general workflow is as follows:
diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx
index 6a26a15..26699e0 100644
--- a/libbutl/manifest-serializer.cxx
+++ b/libbutl/manifest-serializer.cxx
@@ -1,41 +1,13 @@
// file : libbutl/manifest-serializer.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-serializer.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef>
-#include <stdexcept>
+#include <libbutl/manifest-serializer.hxx>
#include <ostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_serializer;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.manifest_types;
-#endif
+#include <cassert>
-import butl.utf8;
-import butl.utility;
-#else
-#include <libbutl/utf8.mxx>
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utf8.hxx>
+#include <libbutl/utility.hxx>
using namespace std;
@@ -95,10 +67,7 @@ namespace butl
os_ << ':';
if (!v.empty ())
- {
- os_ << ' ';
- write_value (v, l + 2);
- }
+ write_value (v, l + 1);
os_ << endl;
break;
@@ -132,22 +101,89 @@ namespace butl
merge_comment (const string& value, const string& comment)
{
string r;
- for (char c: value)
+
+ // Merge the value and comment differently depending on whether any of
+ // them is multi-line or not.
+ //
+ if (value.find ('\n') == string::npos && // Single-line.
+ comment.find ('\n') == string::npos)
{
- // Escape ';' character.
- //
- if (c == ';')
- r += '\\';
+ for (char c: value)
+ {
+ // Escape ';' and '\' characters.
+ //
+ if (c == ';' || c == '\\')
+ r += '\\';
- r += c;
- }
+ r += c;
+ }
- // Add the comment.
- //
- if (!comment.empty ())
+ // Add the comment.
+ //
+ if (!comment.empty ())
+ {
+ r += "; ";
+ r += comment;
+ }
+ }
+ else // Multi-line.
{
- r += "; ";
- r += comment;
+ // Parse the value lines and add them to the resulting value, escaping
+ // them if required.
+ //
+ // Note that we only need to escape lines which have the '\*;' form.
+ //
+ for (auto i (value.begin ()), e (value.end ()); i != e; )
+ {
+ // Find the end of the line and while at it the first non-backslash
+ // character.
+ //
+ auto le (i);
+ auto nb (e);
+ for (; le != e && *le != '\n'; ++le)
+ {
+ if (nb == e && *le != '\\')
+ nb = le;
+ }
+
+ // If the first non-backslash character is ';' and it is the last
+ // character on the line, then we need to escape the line characters.
+ // Note that we only escape ';' if it is the only character on the
+ // line. Otherwise, we only escape backslashes doubling the number of
+ // them from the left:
+ //
+ // ; -> \;
+ // \; -> \\;
+ // \\; -> \\\\;
+ // \\\; -> \\\\\\;
+ //
+ if (nb != e && *nb == ';' && nb + 1 == le)
+ r.append (nb == i ? 1 : nb - i, '\\');
+
+ // Add the line to the resulting value together with the trailing
+ // newline, if present.
+ //
+ r.append (i, le);
+
+ if (le != e)
+ r += '\n';
+
+ // If the value end is not reached then position to the beginning of
+ // the next line and to the end of the value otherwise.
+ //
+ i = (le != e ? le + 1 : e);
+ }
+
+ // Append the comment, if present.
+ //
+ if (!comment.empty ())
+ {
+ if (!r.empty ())
+ r += '\n';
+
+ r += ";\n";
+ r += comment;
+ }
}
return r;
@@ -301,6 +337,8 @@ namespace butl
void manifest_serializer::
write_value (const string& v, size_t cl)
{
+ assert (!v.empty ());
+
// Consider both \r and \n characters as line separators, and the
// \r\n characters sequence as a single line separator.
//
@@ -319,11 +357,17 @@ namespace butl
// readability, still allowing the user to easily copy the value which
// seems to be the main reason for using the flag.
//
- if (cl > 39 || nl () != string::npos ||
- v.front () == ' ' || v.front () == '\t' ||
- v.back () == ' ' || v.back () == '\t')
+ if (cl + 1 > 39 || // '+ 1' for the space after the colon.
+ nl () != string::npos ||
+ v.front () == ' ' ||
+ v.front () == '\t' ||
+ v.back () == ' ' ||
+ v.back () == '\t')
{
- os_ << "\\" << endl; // Multi-line mode introductor.
+ if (multiline_v2_)
+ os_ << endl;
+
+ os_ << "\\" << endl; // Multi-line mode introducer.
// Chunk the value into fragments separated by newlines.
//
@@ -346,7 +390,10 @@ namespace butl
os_ << endl << "\\"; // Multi-line mode terminator.
}
else
- write_value (v.c_str (), v.size (), cl);
+ {
+ os_ << ' ';
+ write_value (v.c_str (), v.size (), cl + 1);
+ }
}
// manifest_serialization
diff --git a/libbutl/manifest-serializer.mxx b/libbutl/manifest-serializer.hxx
index b73c255..2159901 100644
--- a/libbutl/manifest-serializer.mxx
+++ b/libbutl/manifest-serializer.hxx
@@ -1,37 +1,20 @@
-// file : libbutl/manifest-serializer.mxx -*- C++ -*-
+// file : libbutl/manifest-serializer.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <iosfwd>
#include <cstddef> // size_t
#include <stdexcept> // runtime_error
#include <functional>
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.manifest_serializer;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.manifest_types;
-#else
-#include <libbutl/manifest-types.mxx>
-#endif
+#include <libbutl/manifest-types.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
class LIBBUTL_SYMEXPORT manifest_serialization: public std::runtime_error
{
@@ -62,14 +45,19 @@ LIBBUTL_MODEXPORT namespace butl
// Unless long_lines is true, break lines in values (including multi-line)
// so that their length does not exceed 78 codepoints (including '\n').
//
+ // Note that the multiline_v2 flag is temporary and should not be used
+ // except by the implementation for testing.
+ //
manifest_serializer (std::ostream& os,
const std::string& name,
bool long_lines = false,
- std::function<filter_function> filter = {})
+ std::function<filter_function> filter = {},
+ bool multiline_v2 = false)
: os_ (os),
name_ (name),
long_lines_ (long_lines),
- filter_ (std::move (filter))
+ filter_ (std::move (filter)),
+ multiline_v2_ (multiline_v2)
{
}
@@ -113,10 +101,12 @@ LIBBUTL_MODEXPORT namespace butl
size_t
write_name (const std::string&);
- // Write a value assuming the current line already has the specified
- // codepoint offset. If the resulting line length would be too large then
- // the multi-line representation will be used. It is assumed that the
- // name, followed by the colon, is already written.
+ // Write a non-empty value assuming the current line already has the
+ // specified codepoint offset. If the resulting line length would be too
+ // large then the multi-line representation will be used. For the
+ // single-line representation the space character is written before the
+ // value. It is assumed that the name, followed by the colon, is already
+ // written.
//
void
write_value (const std::string&, std::size_t offset);
@@ -138,6 +128,7 @@ LIBBUTL_MODEXPORT namespace butl
const std::string name_;
bool long_lines_;
const std::function<filter_function> filter_;
+ bool multiline_v2_;
};
// Serialize a manifest to a stream adding the leading format version pair
diff --git a/libbutl/manifest-types.mxx b/libbutl/manifest-types.hxx
index 93f6fc6..23318f0 100644
--- a/libbutl/manifest-types.mxx
+++ b/libbutl/manifest-types.hxx
@@ -1,30 +1,14 @@
-// file : libbutl/manifest-types.mxx -*- C++ -*-
+// file : libbutl/manifest-types.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#include <cstdint> // uint64_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.manifest_types;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#else
-#endif
+#include <cstdint> // uint64_t
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
class manifest_name_value
{
diff --git a/libbutl/mingw-condition_variable.hxx b/libbutl/mingw-condition_variable.hxx
new file mode 100644
index 0000000..965f533
--- /dev/null
+++ b/libbutl/mingw-condition_variable.hxx
@@ -0,0 +1,275 @@
+/**
+* std::condition_variable implementation for MinGW-w64
+*
+* Copyright (c) 2013-2016 by Mega Limited, Auckland, New Zealand
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_CONDITION_VARIABLE_HXX
+#define LIBBUTL_MINGW_CONDITION_VARIABLE_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+# error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+# error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <condition_variable> // Use std::cv_status, if available.
+
+#include <cassert>
+#include <chrono>
+#include <system_error>
+
+#include <synchapi.h>
+
+#include <libbutl/mingw-mutex.hxx>
+#include <libbutl/mingw-shared_mutex.hxx>
+
+namespace mingw_stdthread
+{
+#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
+ enum class cv_status { no_timeout, timeout };
+#else
+ using std::cv_status;
+#endif
+
+ // Native condition variable-based implementation.
+ //
+ class condition_variable
+ {
+ static constexpr DWORD kInfinite = 0xffffffffl;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
+ CONDITION_VARIABLE cvariable_ = CONDITION_VARIABLE_INIT;
+#pragma GCC diagnostic pop
+
+ friend class condition_variable_any;
+
+ bool wait_unique (mutex * pmutex, DWORD time)
+ {
+ BOOL success = SleepConditionVariableSRW(native_handle(),
+ pmutex->native_handle(),
+ time,
+// CONDITION_VARIABLE_LOCKMODE_SHARED has a value not specified by
+// Microsoft's Dev Center, but is known to be (convertible to) a ULONG. To
+// ensure that the value passed to this function is not equal to Microsoft's
+// constant, we can either use a static_assert, or simply generate an
+// appropriate value.
+ !CONDITION_VARIABLE_LOCKMODE_SHARED);
+ return success;
+ }
+ bool wait_impl (unique_lock<mutex> & lock, DWORD time)
+ {
+ mutex * pmutex = lock.release();
+ bool success = wait_unique(pmutex, time);
+ lock = unique_lock<mutex>(*pmutex, adopt_lock);
+ return success;
+ }
+public:
+ using native_handle_type = PCONDITION_VARIABLE;
+ native_handle_type native_handle ()
+ {
+ return &cvariable_;
+ }
+
+ condition_variable () = default;
+ ~condition_variable () = default;
+
+ condition_variable (const condition_variable &) = delete;
+ condition_variable & operator= (const condition_variable &) = delete;
+
+ void notify_one () noexcept
+ {
+ WakeConditionVariable(&cvariable_);
+ }
+
+ void notify_all () noexcept
+ {
+ WakeAllConditionVariable(&cvariable_);
+ }
+
+ void wait (unique_lock<mutex> & lock)
+ {
+ wait_impl(lock, kInfinite);
+ }
+
+ template<class Predicate>
+ void wait (unique_lock<mutex> & lock, Predicate pred)
+ {
+ while (!pred())
+ wait(lock);
+ }
+
+ template <class Rep, class Period>
+ cv_status wait_for(unique_lock<mutex>& lock,
+ const std::chrono::duration<Rep, Period>& rel_time)
+ {
+ using namespace std::chrono;
+ auto timeout = duration_cast<milliseconds>(rel_time).count();
+ DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
+ bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
+ return result ? cv_status::no_timeout : cv_status::timeout;
+ }
+
+ template <class Rep, class Period, class Predicate>
+ bool wait_for(unique_lock<mutex>& lock,
+ const std::chrono::duration<Rep, Period>& rel_time,
+ Predicate pred)
+ {
+#if __cplusplus >= 201703L
+ using steady_duration = typename std::chrono::steady_clock::duration;
+ return wait_until(lock,
+ std::chrono::steady_clock::now() +
+ std::chrono::ceil<steady_duration> (rel_time),
+ std::move(pred));
+#else
+ return wait_until(lock,
+ std::chrono::steady_clock::now() + rel_time,
+ std::move(pred));
+#endif
+ }
+ template <class Clock, class Duration>
+ cv_status wait_until (unique_lock<mutex>& lock,
+ const std::chrono::time_point<Clock,Duration>& abs_time)
+ {
+ return wait_for(lock, abs_time - Clock::now());
+ }
+ template <class Clock, class Duration, class Predicate>
+ bool wait_until (unique_lock<mutex>& lock,
+ const std::chrono::time_point<Clock, Duration>& abs_time,
+ Predicate pred)
+ {
+ while (!pred())
+ {
+ if (wait_until(lock, abs_time) == cv_status::timeout)
+ {
+ return pred();
+ }
+ }
+ return true;
+ }
+ };
+
+ class condition_variable_any
+ {
+ static constexpr DWORD kInfinite = 0xffffffffl;
+
+ condition_variable internal_cv_ {};
+ mutex internal_mutex_ {};
+
+ template<class L>
+ bool wait_impl (L & lock, DWORD time)
+ {
+ unique_lock<decltype(internal_mutex_)> internal_lock(internal_mutex_);
+ lock.unlock();
+ bool success = internal_cv_.wait_impl(internal_lock, time);
+ lock.lock();
+ return success;
+ }
+ // If the lock happens to be called on a native Windows mutex, skip any
+ // extra contention.
+ inline bool wait_impl (unique_lock<mutex> & lock, DWORD time)
+ {
+ return internal_cv_.wait_impl(lock, time);
+ }
+ bool wait_impl (unique_lock<shared_mutex> & lock, DWORD time)
+ {
+ shared_mutex * pmutex = lock.release();
+ bool success = internal_cv_.wait_unique(pmutex, time);
+ lock = unique_lock<shared_mutex>(*pmutex, adopt_lock);
+ return success;
+ }
+ bool wait_impl (shared_lock<shared_mutex> & lock, DWORD time)
+ {
+ shared_mutex * pmutex = lock.release();
+ BOOL success = SleepConditionVariableSRW(native_handle(),
+ pmutex->native_handle(), time,
+ CONDITION_VARIABLE_LOCKMODE_SHARED);
+ lock = shared_lock<shared_mutex>(*pmutex, adopt_lock);
+ return success;
+ }
+ public:
+ using native_handle_type = typename condition_variable::native_handle_type;
+
+ native_handle_type native_handle ()
+ {
+ return internal_cv_.native_handle();
+ }
+
+ void notify_one () noexcept
+ {
+ internal_cv_.notify_one();
+ }
+
+ void notify_all () noexcept
+ {
+ internal_cv_.notify_all();
+ }
+
+ condition_variable_any () = default;
+ ~condition_variable_any () = default;
+
+ template<class L>
+ void wait (L & lock)
+ {
+ wait_impl(lock, kInfinite);
+ }
+
+ template<class L, class Predicate>
+ void wait (L & lock, Predicate pred)
+ {
+ while (!pred())
+ wait(lock);
+ }
+
+ template <class L, class Rep, class Period>
+ cv_status wait_for(L& lock, const std::chrono::duration<Rep,Period>& period)
+ {
+ using namespace std::chrono;
+ auto timeout = duration_cast<milliseconds>(period).count();
+ DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
+ bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
+ return result ? cv_status::no_timeout : cv_status::timeout;
+ }
+
+ template <class L, class Rep, class Period, class Predicate>
+ bool wait_for(L& lock, const std::chrono::duration<Rep, Period>& period,
+ Predicate pred)
+ {
+ return wait_until(lock, std::chrono::steady_clock::now() + period,
+ std::move(pred));
+ }
+ template <class L, class Clock, class Duration>
+ cv_status wait_until (L& lock,
+ const std::chrono::time_point<Clock,Duration>& abs_time)
+ {
+ return wait_for(lock, abs_time - Clock::now());
+ }
+ template <class L, class Clock, class Duration, class Predicate>
+ bool wait_until (L& lock,
+ const std::chrono::time_point<Clock, Duration>& abs_time,
+ Predicate pred)
+ {
+ while (!pred())
+ {
+ if (wait_until(lock, abs_time) == cv_status::timeout)
+ {
+ return pred();
+ }
+ }
+ return true;
+ }
+ };
+}
+
+#endif // LIBBUTL_MINGW_CONDITION_VARIABLE_HXX
diff --git a/libbutl/mingw-invoke.hxx b/libbutl/mingw-invoke.hxx
new file mode 100644
index 0000000..65810e7
--- /dev/null
+++ b/libbutl/mingw-invoke.hxx
@@ -0,0 +1,109 @@
+/**
+* Lightweight std::invoke() implementation for C++11 and C++14
+*
+* Copyright (c) 2018-2019 by Nathaniel J. McClatchey, San Jose, CA, United States
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_INVOKE_HXX
+#define LIBBUTL_MINGW_INVOKE_HXX
+
+#include <type_traits> // For std::result_of, etc.
+#include <utility> // For std::forward
+#include <functional> // For std::reference_wrapper
+
+namespace mingw_stdthread
+{
+ namespace detail
+ {
+ // For compatibility, implement std::invoke for C++11 and C++14.
+ //
+ template<bool PMemFunc, bool PMemData>
+ struct Invoker
+ {
+ template<class F, class... Args>
+ inline static typename std::result_of<F(Args...)>::type invoke (F&& f, Args&&... args)
+ {
+ return std::forward<F>(f)(std::forward<Args>(args)...);
+ }
+ };
+ template<bool>
+ struct InvokerHelper;
+
+ template<>
+ struct InvokerHelper<false>
+ {
+ template<class T1>
+ inline static auto get (T1&& t1) -> decltype(*std::forward<T1>(t1))
+ {
+ return *std::forward<T1>(t1);
+ }
+
+ template<class T1>
+ inline static auto get (const std::reference_wrapper<T1>& t1) -> decltype(t1.get())
+ {
+ return t1.get();
+ }
+ };
+
+ template<>
+ struct InvokerHelper<true>
+ {
+ template<class T1>
+ inline static auto get (T1&& t1) -> decltype(std::forward<T1>(t1))
+ {
+ return std::forward<T1>(t1);
+ }
+ };
+
+ template<>
+ struct Invoker<true, false>
+ {
+ template<class T, class F, class T1, class... Args>
+ inline static auto invoke (F T::* f, T1&& t1, Args&&... args) -> \
+ decltype((InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...))
+ {
+ return (InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...);
+ }
+ };
+
+ template<>
+ struct Invoker<false, true>
+ {
+ template<class T, class F, class T1, class... Args>
+ inline static auto invoke (F T::* f, T1&& t1, Args&&... args) -> \
+ decltype(InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f)
+ {
+ return InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f;
+ }
+ };
+
+ template<class F, class... Args>
+ struct InvokeResult
+ {
+ typedef Invoker<std::is_member_function_pointer<typename std::remove_reference<F>::type>::value,
+ std::is_member_object_pointer<typename std::remove_reference<F>::type>::value &&
+ (sizeof...(Args) == 1)> invoker;
+ inline static auto invoke (F&& f, Args&&... args) -> decltype(invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...))
+ {
+ return invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...);
+ }
+ };
+
+ template<class F, class...Args>
+ auto invoke (F&& f, Args&&... args) -> decltype(InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...))
+ {
+ return InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...);
+ }
+ }
+}
+
+#endif // LIBBUTL_MINGW_INVOKE_HXX
diff --git a/libbutl/mingw-mutex.hxx b/libbutl/mingw-mutex.hxx
new file mode 100644
index 0000000..d297786
--- /dev/null
+++ b/libbutl/mingw-mutex.hxx
@@ -0,0 +1,210 @@
+/**
+* std::mutex et al implementation for MinGW-w64
+*
+* Copyright (c) 2013-2016 by Mega Limited, Auckland, New Zealand
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_MUTEX_HXX
+#define LIBBUTL_MINGW_MUTEX_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+# error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+# error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <chrono>
+#include <system_error>
+#include <atomic>
+
+#include <mutex>
+
+#include <synchapi.h> // For InitializeCriticalSection, etc.
+#include <errhandlingapi.h> // For GetLastError
+#include <handleapi.h>
+
+namespace mingw_stdthread
+{
+ // To make this namespace equivalent to the thread-related subset of std,
+ // pull in the classes and class templates supplied by std but not by this
+ // implementation.
+ //
+ using std::lock_guard;
+ using std::unique_lock;
+ using std::adopt_lock_t;
+ using std::defer_lock_t;
+ using std::try_to_lock_t;
+ using std::adopt_lock;
+ using std::defer_lock;
+ using std::try_to_lock;
+
+ class recursive_mutex
+ {
+ CRITICAL_SECTION mHandle;
+ public:
+ typedef LPCRITICAL_SECTION native_handle_type;
+ native_handle_type native_handle() {return &mHandle;}
+ recursive_mutex() noexcept : mHandle()
+ {
+ InitializeCriticalSection(&mHandle);
+ }
+ recursive_mutex (const recursive_mutex&) = delete;
+ recursive_mutex& operator=(const recursive_mutex&) = delete;
+ ~recursive_mutex() noexcept
+ {
+ DeleteCriticalSection(&mHandle);
+ }
+ void lock()
+ {
+ EnterCriticalSection(&mHandle);
+ }
+ void unlock()
+ {
+ LeaveCriticalSection(&mHandle);
+ }
+ bool try_lock()
+ {
+ return (TryEnterCriticalSection(&mHandle)!=0);
+ }
+ };
+
+ // Slim Reader-Writer (SRW)-based implementation that requires Windows 7.
+ //
+ class mutex
+ {
+ protected:
+ SRWLOCK mHandle;
+ public:
+ typedef PSRWLOCK native_handle_type;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
+ constexpr mutex () noexcept : mHandle(SRWLOCK_INIT) { }
+#pragma GCC diagnostic pop
+ mutex (const mutex&) = delete;
+ mutex & operator= (const mutex&) = delete;
+ void lock ()
+ {
+ AcquireSRWLockExclusive(&mHandle);
+ }
+ void unlock ()
+ {
+ ReleaseSRWLockExclusive(&mHandle);
+ }
+ // TryAcquireSRW functions are a Windows 7 feature.
+ bool try_lock ()
+ {
+ BOOL ret = TryAcquireSRWLockExclusive(&mHandle);
+ return ret;
+ }
+ native_handle_type native_handle ()
+ {
+ return &mHandle;
+ }
+ };
+
+ class recursive_timed_mutex
+ {
+ static constexpr DWORD kWaitAbandoned = 0x00000080l;
+ static constexpr DWORD kWaitObject0 = 0x00000000l;
+ static constexpr DWORD kInfinite = 0xffffffffl;
+ inline bool try_lock_internal (DWORD ms) noexcept
+ {
+ DWORD ret = WaitForSingleObject(mHandle, ms);
+
+ /*
+ @@ TODO
+#ifndef NDEBUG
+ if (ret == kWaitAbandoned)
+ {
+ using namespace std;
+ fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
+ terminate();
+ }
+#endif
+ */
+
+ return (ret == kWaitObject0) || (ret == kWaitAbandoned);
+ }
+ protected:
+ HANDLE mHandle;
+ public:
+ typedef HANDLE native_handle_type;
+ native_handle_type native_handle() const {return mHandle;}
+ recursive_timed_mutex(const recursive_timed_mutex&) = delete;
+ recursive_timed_mutex& operator=(const recursive_timed_mutex&) = delete;
+ recursive_timed_mutex(): mHandle(CreateMutex(NULL, FALSE, NULL)) {}
+ ~recursive_timed_mutex()
+ {
+ CloseHandle(mHandle);
+ }
+ void lock()
+ {
+ DWORD ret = WaitForSingleObject(mHandle, kInfinite);
+
+ /*
+ @@ TODO
+
+// If (ret == WAIT_ABANDONED), then the thread that held ownership was
+// terminated. Behavior is undefined, but Windows will pass ownership to this
+// thread.
+#ifndef NDEBUG
+ if (ret == kWaitAbandoned)
+ {
+ using namespace std;
+ fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
+ terminate();
+ }
+#endif
+ */
+
+ if ((ret != kWaitObject0) && (ret != kWaitAbandoned))
+ {
+ throw std::system_error(GetLastError(), std::system_category());
+ }
+ }
+ void unlock()
+ {
+ if (!ReleaseMutex(mHandle))
+ throw std::system_error(GetLastError(), std::system_category());
+ }
+ bool try_lock()
+ {
+ return try_lock_internal(0);
+ }
+ template <class Rep, class Period>
+ bool try_lock_for(const std::chrono::duration<Rep,Period>& dur)
+ {
+ using namespace std::chrono;
+ auto timeout = duration_cast<milliseconds>(dur).count();
+ while (timeout > 0)
+ {
+ constexpr auto kMaxStep = static_cast<decltype(timeout)>(kInfinite-1);
+ auto step = (timeout < kMaxStep) ? timeout : kMaxStep;
+ if (try_lock_internal(static_cast<DWORD>(step)))
+ return true;
+ timeout -= step;
+ }
+ return false;
+ }
+ template <class Clock, class Duration>
+ bool try_lock_until(const std::chrono::time_point<Clock,Duration>& timeout_time)
+ {
+ return try_lock_for(timeout_time - Clock::now());
+ }
+ };
+
+ typedef recursive_timed_mutex timed_mutex;
+}
+
+#endif // LIBBUTL_MINGW_MUTEX_HXX
diff --git a/libbutl/mingw-shared_mutex.hxx b/libbutl/mingw-shared_mutex.hxx
new file mode 100644
index 0000000..aacbaf8
--- /dev/null
+++ b/libbutl/mingw-shared_mutex.hxx
@@ -0,0 +1,124 @@
+/**
+* std::shared_mutex et al implementation for MinGW-w64
+*
+* Copyright (c) 2017 by Nathaniel J. McClatchey, Athens OH, United States
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_SHARED_MUTEX_HXX
+#define LIBBUTL_MINGW_SHARED_MUTEX_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+# error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+# error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <cassert>
+// For descriptive errors.
+#include <system_error>
+// For timing in shared_timed_mutex.
+#include <chrono>
+#include <limits>
+
+#include <shared_mutex> // shared_lock
+
+// For defer_lock_t, adopt_lock_t, and try_to_lock_t
+#include <libbutl/mingw-mutex.hxx>
+
+#include <synchapi.h>
+
+namespace mingw_stdthread
+{
+ using std::shared_lock;
+
+ class condition_variable_any;
+
+ // Slim Reader-Writer (SRW)-based implementation that requires Windows 7.
+ //
+ class shared_mutex : mutex
+ {
+ friend class condition_variable_any;
+ public:
+ using mutex::native_handle_type;
+ using mutex::lock;
+ using mutex::try_lock;
+ using mutex::unlock;
+ using mutex::native_handle;
+
+ void lock_shared ()
+ {
+ AcquireSRWLockShared(&mHandle);
+ }
+
+ void unlock_shared ()
+ {
+ ReleaseSRWLockShared(&mHandle);
+ }
+
+ bool try_lock_shared ()
+ {
+ return TryAcquireSRWLockShared(&mHandle) != 0;
+ }
+ };
+
+ class shared_timed_mutex : shared_mutex
+ {
+ typedef shared_mutex Base;
+ public:
+ using Base::lock;
+ using Base::try_lock;
+ using Base::unlock;
+ using Base::lock_shared;
+ using Base::try_lock_shared;
+ using Base::unlock_shared;
+
+ template< class Clock, class Duration >
+ bool try_lock_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
+ {
+ do
+ {
+ if (try_lock())
+ return true;
+ }
+ while (std::chrono::steady_clock::now() < cutoff);
+ return false;
+ }
+
+ template< class Rep, class Period >
+ bool try_lock_for (const std::chrono::duration<Rep,Period>& rel_time)
+ {
+ return try_lock_until(std::chrono::steady_clock::now() + rel_time);
+ }
+
+ template< class Clock, class Duration >
+ bool try_lock_shared_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
+ {
+ do
+ {
+ if (try_lock_shared())
+ return true;
+ }
+ while (std::chrono::steady_clock::now() < cutoff);
+ return false;
+ }
+
+ template< class Rep, class Period >
+ bool try_lock_shared_for (const std::chrono::duration<Rep,Period>& rel_time)
+ {
+ return try_lock_shared_until(std::chrono::steady_clock::now() + rel_time);
+ }
+ };
+}
+
+#endif // LIBBUTL_MINGW_SHARED_MUTEX_HXX
diff --git a/libbutl/mingw-thread.hxx b/libbutl/mingw-thread.hxx
new file mode 100644
index 0000000..66f98aa
--- /dev/null
+++ b/libbutl/mingw-thread.hxx
@@ -0,0 +1,330 @@
+/**
+* std::thread implementation for MinGW-w64
+*
+* Copyright (c) 2013-2016 by Mega Limited, Auckland, New Zealand
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_THREAD_HXX
+#define LIBBUTL_MINGW_THREAD_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+# error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+# error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <cstddef> // For std::size_t
+#include <cerrno> // Detect error type.
+#include <exception> // For std::terminate
+#include <system_error> // For std::system_error
+#include <functional> // For std::hash, std::invoke (C++17)
+#include <tuple> // For std::tuple
+#include <chrono> // For sleep timing.
+#include <memory> // For std::unique_ptr
+#include <iosfwd> // Stream output for thread ids.
+#include <utility> // For std::swap, std::forward
+
+#include <synchapi.h> // For WaitForSingleObject
+#include <handleapi.h> // For CloseHandle, etc.
+#include <sysinfoapi.h> // For GetNativeSystemInfo
+#include <processthreadsapi.h> // For GetCurrentThreadId
+
+#include <process.h> // For _beginthreadex
+
+#if __cplusplus < 201703L
+# include <libbutl/mingw-invoke.hxx>
+#endif
+
+namespace mingw_stdthread
+{
+ // @@ I think can get rid of this in C++14.
+ //
+ namespace detail
+ {
+ template<std::size_t...>
+ struct IntSeq {};
+
+ template<std::size_t N, std::size_t... S>
+ struct GenIntSeq : GenIntSeq<N-1, N-1, S...> { };
+
+ template<std::size_t... S>
+ struct GenIntSeq<0, S...> { typedef IntSeq<S...> type; };
+
+// Use a template specialization to avoid relying on compiler optimization
+// when determining the parameter integer sequence.
+ template<class Func, class T, typename... Args>
+ class ThreadFuncCall;
+// We can't define the Call struct in the function - the standard forbids template methods in that case
+ template<class Func, std::size_t... S, typename... Args>
+ class ThreadFuncCall<Func, detail::IntSeq<S...>, Args...>
+ {
+ static_assert(sizeof...(S) == sizeof...(Args), "Args must match.");
+ using Tuple = std::tuple<typename std::decay<Args>::type...>;
+ typename std::decay<Func>::type mFunc;
+ Tuple mArgs;
+
+ public:
+ ThreadFuncCall(Func&& aFunc, Args&&... aArgs)
+ : mFunc(std::forward<Func>(aFunc)),
+ mArgs(std::forward<Args>(aArgs)...)
+ {
+ }
+
+ void callFunc()
+ {
+#if __cplusplus < 201703L
+ detail::invoke(std::move(mFunc), std::move(std::get<S>(mArgs)) ...);
+#else
+ std::invoke (std::move(mFunc), std::move(std::get<S>(mArgs)) ...);
+#endif
+ }
+ };
+
+ // Allow construction of threads without exposing implementation.
+ class ThreadIdTool;
+ }
+
+ class thread
+ {
+ public:
+ class id
+ {
+ DWORD mId = 0;
+ friend class thread;
+ friend class std::hash<id>;
+ friend class detail::ThreadIdTool;
+ explicit id(DWORD aId) noexcept : mId(aId){}
+ public:
+ id () noexcept = default;
+ friend bool operator==(id x, id y) noexcept {return x.mId == y.mId; }
+ friend bool operator!=(id x, id y) noexcept {return x.mId != y.mId; }
+ friend bool operator< (id x, id y) noexcept {return x.mId < y.mId; }
+ friend bool operator<=(id x, id y) noexcept {return x.mId <= y.mId; }
+ friend bool operator> (id x, id y) noexcept {return x.mId > y.mId; }
+ friend bool operator>=(id x, id y) noexcept {return x.mId >= y.mId; }
+
+ template<class _CharT, class _Traits>
+ friend std::basic_ostream<_CharT, _Traits>&
+ operator<<(std::basic_ostream<_CharT, _Traits>& __out, id __id)
+ {
+ if (__id.mId == 0)
+ {
+ return __out << "<invalid std::thread::id>";
+ }
+ else
+ {
+ return __out << __id.mId;
+ }
+ }
+ };
+ private:
+ static constexpr HANDLE kInvalidHandle = nullptr;
+ static constexpr DWORD kInfinite = 0xffffffffl;
+ HANDLE mHandle;
+ id mThreadId;
+
+ template <class Call>
+ static unsigned __stdcall threadfunc(void* arg)
+ {
+ std::unique_ptr<Call> call(static_cast<Call*>(arg));
+ call->callFunc();
+ return 0;
+ }
+
+ static unsigned int _hardware_concurrency_helper() noexcept
+ {
+ SYSTEM_INFO sysinfo;
+ ::GetNativeSystemInfo(&sysinfo);
+ return sysinfo.dwNumberOfProcessors;
+ }
+ public:
+ typedef HANDLE native_handle_type;
+ id get_id() const noexcept {return mThreadId;}
+ native_handle_type native_handle() const {return mHandle;}
+ thread(): mHandle(kInvalidHandle), mThreadId(){}
+
+ thread(thread&& other) noexcept
+ :mHandle(other.mHandle), mThreadId(other.mThreadId)
+ {
+ other.mHandle = kInvalidHandle;
+ other.mThreadId = id{};
+ }
+
+ thread(const thread &other) = delete;
+
+ template<class Func, typename... Args>
+ explicit thread(Func&& func, Args&&... args) : mHandle(), mThreadId()
+ {
+ // Instead of INVALID_HANDLE_VALUE, _beginthreadex returns 0.
+
+ using ArgSequence = typename detail::GenIntSeq<sizeof...(Args)>::type;
+ using Call = detail::ThreadFuncCall<Func, ArgSequence, Args...>;
+ auto call = new Call(std::forward<Func>(func), std::forward<Args>(args)...);
+ unsigned int id_receiver;
+ auto int_handle = _beginthreadex(NULL, 0, threadfunc<Call>,
+ static_cast<LPVOID>(call), 0, &id_receiver);
+ if (int_handle == 0)
+ {
+ mHandle = kInvalidHandle;
+ int errnum = errno;
+ delete call;
+ // Note: Should only throw EINVAL, EAGAIN, EACCES
+ throw std::system_error(errnum, std::generic_category());
+ } else {
+ mThreadId.mId = id_receiver;
+ mHandle = reinterpret_cast<HANDLE>(int_handle);
+ }
+ }
+
+ bool joinable() const {return mHandle != kInvalidHandle;}
+
+ // Note: Due to lack of synchronization, this function has a race
+ // condition if called concurrently, which leads to undefined
+ // behavior. The same applies to all other member functions of this
+ // class, but this one is mentioned explicitly.
+ void join()
+ {
+ using namespace std;
+ if (get_id() == id(GetCurrentThreadId()))
+ throw system_error(make_error_code(errc::resource_deadlock_would_occur));
+ if (mHandle == kInvalidHandle)
+ throw system_error(make_error_code(errc::no_such_process));
+ if (!joinable())
+ throw system_error(make_error_code(errc::invalid_argument));
+ WaitForSingleObject(mHandle, kInfinite);
+ CloseHandle(mHandle);
+ mHandle = kInvalidHandle;
+ mThreadId = id{};
+ }
+
+ ~thread()
+ {
+ if (joinable())
+ {
+ // @@ TODO
+ /*
+#ifndef NDEBUG
+ std::printf("Error: Must join() or detach() a thread before \
+destroying it.\n");
+#endif
+ */
+ std::terminate();
+ }
+ }
+ thread& operator=(const thread&) = delete;
+ thread& operator=(thread&& other) noexcept
+ {
+ if (joinable())
+ {
+ // @@ TODO
+ /*
+#ifndef NDEBUG
+ std::printf("Error: Must join() or detach() a thread before \
+moving another thread to it.\n");
+#endif
+ */
+ std::terminate();
+ }
+ swap(other);
+ return *this;
+ }
+ void swap(thread& other) noexcept
+ {
+ std::swap(mHandle, other.mHandle);
+ std::swap(mThreadId.mId, other.mThreadId.mId);
+ }
+
+ static unsigned int hardware_concurrency() noexcept
+ {
+ // @@ TODO: this seems like a bad idea.
+ //
+ /*static*/ unsigned int cached = _hardware_concurrency_helper();
+ return cached;
+ }
+
+ void detach()
+ {
+ if (!joinable())
+ {
+ using namespace std;
+ throw system_error(make_error_code(errc::invalid_argument));
+ }
+ if (mHandle != kInvalidHandle)
+ {
+ CloseHandle(mHandle);
+ mHandle = kInvalidHandle;
+ }
+ mThreadId = id{};
+ }
+ };
+
+ namespace detail
+ {
+ class ThreadIdTool
+ {
+ public:
+ static thread::id make_id (DWORD base_id) noexcept
+ {
+ return thread::id(base_id);
+ }
+ };
+ }
+
+ namespace this_thread
+ {
+ inline thread::id get_id() noexcept
+ {
+ return detail::ThreadIdTool::make_id(GetCurrentThreadId());
+ }
+ inline void yield() noexcept {Sleep(0);}
+ template< class Rep, class Period >
+ void sleep_for( const std::chrono::duration<Rep,Period>& sleep_duration)
+ {
+ static constexpr DWORD kInfinite = 0xffffffffl;
+ using namespace std::chrono;
+ using rep = milliseconds::rep;
+ rep ms = duration_cast<milliseconds>(sleep_duration).count();
+ while (ms > 0)
+ {
+ constexpr rep kMaxRep = static_cast<rep>(kInfinite - 1);
+ auto sleepTime = (ms < kMaxRep) ? ms : kMaxRep;
+ Sleep(static_cast<DWORD>(sleepTime));
+ ms -= sleepTime;
+ }
+ }
+ template <class Clock, class Duration>
+ void sleep_until(const std::chrono::time_point<Clock,Duration>& sleep_time)
+ {
+ sleep_for(sleep_time-Clock::now());
+ }
+ }
+}
+
+namespace std
+{
+ // Specialize hash for this implementation's thread::id, even if the
+ // std::thread::id already has a hash.
+ template<>
+ struct hash<mingw_stdthread::thread::id>
+ {
+ typedef mingw_stdthread::thread::id argument_type;
+ typedef size_t result_type;
+ size_t operator() (const argument_type & i) const noexcept
+ {
+ return i.mId;
+ }
+ };
+}
+
+#endif // LIBBUTL_MINGW_THREAD_HXX
diff --git a/libbutl/move-only-function.hxx b/libbutl/move-only-function.hxx
new file mode 100644
index 0000000..e5cfe51
--- /dev/null
+++ b/libbutl/move-only-function.hxx
@@ -0,0 +1,177 @@
+// file : libbutl/move-only-function.hxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <utility>
+#include <functional>
+#include <type_traits>
+
+namespace butl
+{
+ // This is a move-only std::function version which is implemented in terms
+ // of std::function. It is similar to C++23 std::move_only_function but
+ // still provides target() (but not target_type()).
+ //
+ template <typename>
+ class move_only_function_ex;
+
+ // Alias butl::move_only_function to std::move_only_function if available
+ // and to move_only_function_ex otherwise.
+ //
+#ifdef __cpp_lib_move_only_function
+ using std::move_only_function;
+#else
+ template <typename F>
+ using move_only_function = move_only_function_ex<F>;
+#endif
+
+ template <typename R, typename... A>
+ class move_only_function_ex<R (A...)>
+ {
+ public:
+ using result_type = R;
+
+ move_only_function_ex () = default;
+ move_only_function_ex (std::nullptr_t) noexcept {}
+
+ // Note: according to the spec we should also disable these if F is not
+ // callable, but that is not easy to do in C++14. Maybe we should do
+ // something for C++17 and later (without this the diagnostics is quite
+ // hairy).
+ //
+ template <typename F>
+ move_only_function_ex (F&& f, typename std::enable_if<!std::is_same<typename std::remove_reference<F>::type, move_only_function_ex>::value>::type* = 0)
+ {
+ using FV = typename std::decay<F>::type;
+
+ if (!null (f))
+ f_ = wrapper<FV> (std::forward<F> (f));
+ }
+
+ template <typename F>
+ typename std::enable_if<!std::is_same<typename std::remove_reference<F>::type, move_only_function_ex>::value, move_only_function_ex>::type&
+ operator= (F&& f)
+ {
+ move_only_function_ex (std::forward<F> (f)).swap (*this);
+ return *this;
+ }
+
+ move_only_function_ex&
+ operator= (std::nullptr_t) noexcept
+ {
+ f_ = nullptr;
+ return *this;
+ }
+
+ void swap (move_only_function_ex& f) noexcept
+ {
+ f_.swap (f.f_);
+ }
+
+ R operator() (A... args) const
+ {
+ return f_ (std::forward<A> (args)...);
+ }
+
+ explicit operator bool () const noexcept
+ {
+ return static_cast<bool> (f_);
+ }
+
+ template <typename T>
+ T* target() noexcept
+ {
+ wrapper<T>* r (f_.template target<wrapper<T>> ());
+ return r != nullptr ? &r->f : nullptr;
+ }
+
+ template <typename T>
+ const T* target() const noexcept
+ {
+ const wrapper<T>* r (f_.template target<wrapper<T>> ());
+ return r != nullptr ? &r->f : nullptr;
+ }
+
+ move_only_function_ex (move_only_function_ex&&) = default;
+ move_only_function_ex& operator= (move_only_function_ex&&) = default;
+
+ move_only_function_ex (const move_only_function_ex&) = delete;
+ move_only_function_ex& operator= (const move_only_function_ex&) = delete;
+
+ private:
+ template <typename F>
+ struct wrapper
+ {
+ struct empty {};
+
+ union
+ {
+ F f;
+ empty e;
+ };
+
+ explicit wrapper (F&& f_): f (std::move (f_)) {}
+ explicit wrapper (const F& f_): f (f_) {}
+
+ R operator() (A... args)
+ {
+ return f (std::forward<A> (args)...);
+ }
+
+ R operator() (A... args) const
+ {
+ return f (std::forward<A> (args)...);
+ }
+
+ wrapper (wrapper&& w)
+ noexcept (std::is_nothrow_move_constructible<F>::value)
+ : f (std::move (w.f)) {}
+
+ wrapper& operator= (wrapper&&) = delete; // Shouldn't be needed.
+
+ ~wrapper () {f.~F ();}
+
+ // These shouldn't be called.
+ //
+ wrapper (const wrapper&) {}
+ wrapper& operator= (const wrapper&) {return *this;}
+ };
+
+ template <typename F> static bool null (const F&) {return false;}
+ template <typename R1, typename... A1> static bool null (R1 (*p) (A1...)) {return p == nullptr;}
+ template <typename R1, typename... A1> static bool null (const move_only_function_ex<R1 (A1...)>& f) {return !f;}
+ template <typename R1, typename C, typename... A1> static bool null (R1 (C::*p) (A1...)) {return p == nullptr;}
+ template <typename R1, typename C, typename... A1> static bool null (R1 (C::*p) (A1...) const) {return p == nullptr;}
+
+ std::function<R (A...)> f_;
+ };
+
+ template <typename R, typename... A>
+ inline bool
+ operator== (const move_only_function_ex<R (A...)>& f, std::nullptr_t) noexcept
+ {
+ return !f;
+ }
+
+ template <typename R, typename... A>
+ inline bool
+ operator== (std::nullptr_t, const move_only_function_ex<R (A...)>& f) noexcept
+ {
+ return !f;
+ }
+
+ template <typename R, typename... A>
+ inline bool
+ operator!= (const move_only_function_ex<R (A...)>& f, std::nullptr_t) noexcept
+ {
+ return static_cast<bool> (f);
+ }
+
+ template <typename R, typename... A>
+ inline bool
+ operator!= (std::nullptr_t, const move_only_function_ex<R (A...)>& f) noexcept
+ {
+ return static_cast<bool> (f);
+ }
+}
diff --git a/libbutl/multi-index.mxx b/libbutl/multi-index.hxx
index d51bdfc..a6754cd 100644
--- a/libbutl/multi-index.mxx
+++ b/libbutl/multi-index.hxx
@@ -1,29 +1,14 @@
-// file : libbutl/multi-index.mxx -*- C++ -*-
+// file : libbutl/multi-index.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <utility> // declval()
#include <functional> // hash
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.multi_index;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Google the "Emulating Boost.MultiIndex with Standard Containers" blog
// post for details.
@@ -57,7 +42,7 @@ LIBBUTL_MODEXPORT namespace butl
};
}
-LIBBUTL_MODEXPORT namespace std
+namespace std
{
template <typename T>
struct hash<butl::map_key<T>>: hash<T>
diff --git a/libbutl/openssl.cxx b/libbutl/openssl.cxx
index 8741b35..f9df2e7 100644
--- a/libbutl/openssl.cxx
+++ b/libbutl/openssl.cxx
@@ -1,35 +1,10 @@
// file : libbutl/openssl.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/openssl.mxx>
-#endif
+#include <libbutl/openssl.hxx>
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
#include <utility> // move()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.openssl;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#endif
-
-#endif
using namespace std;
diff --git a/libbutl/openssl.mxx b/libbutl/openssl.hxx
index 6a0907e..b340f5c 100644
--- a/libbutl/openssl.mxx
+++ b/libbutl/openssl.hxx
@@ -1,41 +1,21 @@
-// file : libbutl/openssl.mxx -*- C++ -*-
+// file : libbutl/openssl.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <type_traits>
-#include <cstddef> // size_t
-#include <utility> // move(), forward()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.openssl;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process; //@@ MOD TODO: should we re-export?
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
+#include <libbutl/semantic-version.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Perform a crypto operation using the openssl(1) program. Throw
// process_error and io_error (both derive from system_error) in case of
@@ -100,6 +80,23 @@ LIBBUTL_MODEXPORT namespace butl
// department (that were apparently fixed in 1.0.2). To work around these
// bugs pass user-supplied options first.
//
+ struct openssl_info
+ {
+ // Note that the program name can be used by the caller to properly
+ // interpret the version.
+ //
+ // The name/version examples:
+ //
+ // OpenSSL 3.0.0
+ // OpenSSL 1.1.1l
+ // LibreSSL 2.8.3
+ //
+ // The `l` component above ends up in semantic_version::build.
+ //
+ std::string name;
+ semantic_version version;
+ };
+
class LIBBUTL_SYMEXPORT openssl: public process
{
public:
@@ -133,6 +130,22 @@ LIBBUTL_MODEXPORT namespace butl
const std::string& command,
A&&... options);
+ // Run `openssl version` command and try to parse and return the
+ // information it prints to stdout. Return nullopt if the process hasn't
+ // terminated successfully or stdout parsing has failed. Throw
+ // process_error and io_error in case of errors.
+ //
+ template <typename E>
+ static optional<openssl_info>
+ info (E&& err, const process_env&);
+
+ template <typename C,
+ typename E>
+ static optional<openssl_info>
+ info (const C&,
+ E&& err,
+ const process_env&);
+
private:
template <typename T>
struct is_other
diff --git a/libbutl/openssl.ixx b/libbutl/openssl.ixx
index c685b65..db2fbcd 100644
--- a/libbutl/openssl.ixx
+++ b/libbutl/openssl.ixx
@@ -1,7 +1,10 @@
// file : libbutl/openssl.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef> // size_t
+#include <utility> // forward()
+
+namespace butl
{
template <typename I,
typename O,
@@ -23,4 +26,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
std::forward<A> (options)...)
{
}
+
+ template <typename E>
+ inline optional<openssl_info> openssl::
+ info (E&& err, const process_env& env)
+ {
+ return info ([] (const char* [], std::size_t) {},
+ std::forward<E> (err),
+ env);
+ }
}
diff --git a/libbutl/openssl.txx b/libbutl/openssl.txx
index 3a2c579..f55432d 100644
--- a/libbutl/openssl.txx
+++ b/libbutl/openssl.txx
@@ -1,7 +1,10 @@
// file : libbutl/openssl.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef> // size_t
+#include <utility> // forward()
+
+namespace butl
{
template <typename I>
typename std::enable_if<openssl::is_other<I>::value, I>::type openssl::
@@ -47,4 +50,67 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
// Note: leaving this scope closes any open ends of the pipes in io_data.
}
+
+ template <typename C,
+ typename E>
+ optional<openssl_info> openssl::
+ info (const C& cmdc, E&& err, const process_env& env)
+ {
+ using namespace std;
+
+ // Run the `openssl version` command.
+ //
+ openssl os (cmdc,
+ nullfd, fdstream_mode::text, forward<E> (err),
+ env,
+ "version");
+
+ // Read the command's stdout and wait for its completion. Bail out if the
+ // command didn't terminate successfully or stdout contains no data.
+ //
+ string s;
+ if (!getline (os.in, s))
+ return nullopt;
+
+ os.in.close ();
+
+ if (!os.wait ())
+ return nullopt;
+
+ // Parse the version string.
+ //
+ // Note that there is some variety in the version representations:
+ //
+ // OpenSSL 3.0.0 7 sep 2021 (Library: OpenSSL 3.0.0 7 sep 2021)
+ // OpenSSL 1.1.1l FIPS 24 Aug 2021
+ // LibreSSL 2.8.3
+ //
+ // We will only consider the first two space separated components as the
+ // program name and version. We will also assume that there are no leading
+ // spaces and the version is delimited from the program name with a single
+ // space character.
+ //
+ size_t e (s.find (' '));
+
+ // Bail out if there is no version present in the string or the program
+ // name is empty.
+ //
+ if (e == string::npos || e == 0)
+ return nullopt;
+
+ string nm (s, 0, e);
+
+ size_t b (e + 1); // The beginning of the version.
+ e = s.find (' ', b); // The end of the version.
+
+ optional<semantic_version> ver (
+ parse_semantic_version (string (s, b, e != string::npos ? e - b : e),
+ semantic_version::allow_build,
+ "" /* build_separators */));
+
+ if (!ver)
+ return nullopt;
+
+ return openssl_info {move (nm), move (*ver)};
+ }
}
diff --git a/libbutl/optional.mxx b/libbutl/optional.hxx
index d32e14b..f22189b 100644
--- a/libbutl/optional.mxx
+++ b/libbutl/optional.hxx
@@ -1,11 +1,7 @@
-// file : libbutl/optional.mxx -*- C++ -*-
+// file : libbutl/optional.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-// C includes.
// Note: the Clang check must come before GCC since it also defines __GNUC__.
//
@@ -54,7 +50,6 @@
# endif
#endif
-#ifndef __cpp_lib_modules_ts
#ifdef LIBBUTL_STD_OPTIONAL
# include <optional>
#else
@@ -62,31 +57,19 @@
# include <functional> // hash
# include <type_traits> // is_*
#endif
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.optional;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
#ifdef LIBBUTL_STD_OPTIONAL
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
- template <typename T>
- using optional = std::optional<T>;
-
+ using std::optional;
using std::nullopt_t;
using std::nullopt;
}
#else
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Simple optional class template while waiting for std::optional.
//
@@ -125,10 +108,16 @@ LIBBUTL_MODEXPORT namespace butl
#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \
(!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__))
constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
- constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+ constexpr optional_data (optional_data&& o)
+ noexcept (std::is_nothrow_move_constructible<T>::value)
+ : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
#else
optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
- optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+ optional_data (optional_data&& o)
+ noexcept (std::is_nothrow_move_constructible<T>::value)
+ : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
#endif
optional_data& operator= (nullopt_t);
@@ -136,7 +125,11 @@ LIBBUTL_MODEXPORT namespace butl
optional_data& operator= (T&&);
optional_data& operator= (const optional_data&);
- optional_data& operator= (optional_data&&);
+
+ optional_data& operator= (optional_data&&)
+ noexcept (std::is_nothrow_move_constructible<T>::value &&
+ std::is_nothrow_move_assignable<T>::value &&
+ std::is_nothrow_destructible<T>::value);
~optional_data ();
};
@@ -168,10 +161,16 @@ LIBBUTL_MODEXPORT namespace butl
#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \
(!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__))
constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
- constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+ constexpr optional_data (optional_data&& o)
+ noexcept (std::is_nothrow_move_constructible<T>::value)
+ : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
#else
optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
- optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+ optional_data (optional_data&& o)
+ noexcept (std::is_nothrow_move_constructible<T>::value)
+ : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
#endif
optional_data& operator= (nullopt_t);
@@ -179,7 +178,12 @@ LIBBUTL_MODEXPORT namespace butl
optional_data& operator= (T&&);
optional_data& operator= (const optional_data&);
- optional_data& operator= (optional_data&&);
+
+ // Note: it is trivially destructible and thus is no-throw destructible.
+ //
+ optional_data& operator= (optional_data&&)
+ noexcept (std::is_nothrow_move_constructible<T>::value &&
+ std::is_nothrow_move_assignable<T>::value);
};
template <typename T,
@@ -306,6 +310,8 @@ LIBBUTL_MODEXPORT namespace butl
explicit operator bool () const {return this->v_;}
};
+ // optional ? optional
+ //
template <typename T>
inline auto
operator== (const optional<T>& x, const optional<T>& y)
@@ -335,6 +341,131 @@ LIBBUTL_MODEXPORT namespace butl
{
return y < x;
}
+
+ // optional ? nullopt
+ // nullopt ? optional
+ //
+ template <typename T>
+ inline auto
+ operator== (const optional<T>& x, nullopt_t)
+ {
+ bool px (x);
+ return !px;
+ }
+
+ template <typename T>
+ inline auto
+ operator== (nullopt_t, const optional<T>& y)
+ {
+ bool py (y);
+ return !py;
+ }
+
+ template <typename T>
+ inline auto
+ operator!= (const optional<T>& x, nullopt_t y)
+ {
+ return !(x == y);
+ }
+
+ template <typename T>
+ inline auto
+ operator!= (nullopt_t x, const optional<T>& y)
+ {
+ return !(x == y);
+ }
+
+ template <typename T>
+ inline auto
+ operator< (const optional<T>&, nullopt_t)
+ {
+ return false;
+ }
+
+ template <typename T>
+ inline auto
+ operator< (nullopt_t, const optional<T>& y)
+ {
+ bool py (y);
+ return py;
+ }
+
+ template <typename T>
+ inline auto
+ operator> (const optional<T>& x, nullopt_t y)
+ {
+ return y < x;
+ }
+
+ template <typename T>
+ inline auto
+ operator> (nullopt_t x, const optional<T>& y)
+ {
+ return y < x;
+ }
+
+ // optional ? T
+ // T ? optional
+ //
+ template <typename T>
+ inline auto
+ operator== (const optional<T>& x, const T& y)
+ {
+ bool px (x);
+ return px && *x == y;
+ }
+
+ template <typename T>
+ inline auto
+ operator== (const T& x, const optional<T>& y)
+ {
+ bool py (y);
+ return py && x == *y;
+ }
+
+ template <typename T>
+ inline auto
+ operator!= (const optional<T>& x, const T& y)
+ {
+ return !(x == y);
+ }
+
+ template <typename T>
+ inline auto
+ operator!= (const T& x, const optional<T>& y)
+ {
+ return !(x == y);
+ }
+
+ template <typename T>
+ inline auto
+ operator< (const optional<T>& x, const T& y)
+ {
+ bool px (x);
+ return !px || *x < y;
+ }
+
+ template <typename T>
+ inline auto
+ operator< (const T& x, const optional<T>& y)
+ {
+ bool py (y);
+ return py && x < *y;
+ }
+
+ template <typename T>
+ inline auto
+ operator> (const optional<T>& x, const T& y)
+ {
+ return y < x;
+ }
+
+ template <typename T>
+ inline auto
+ operator> (const T& x, const optional<T>& y)
+ {
+ return y < x;
+ }
}
namespace std
diff --git a/libbutl/optional.ixx b/libbutl/optional.ixx
index e2b552f..fdd0ac5 100644
--- a/libbutl/optional.ixx
+++ b/libbutl/optional.ixx
@@ -77,6 +77,9 @@ namespace butl
template <typename T>
inline optional_data<T, false>& optional_data<T, false>::
operator= (optional_data&& o)
+ noexcept (std::is_nothrow_move_constructible<T>::value &&
+ std::is_nothrow_move_assignable<T>::value &&
+ std::is_nothrow_destructible<T>::value)
{
if (o.v_)
{
@@ -171,6 +174,8 @@ namespace butl
template <typename T>
inline optional_data<T, true>& optional_data<T, true>::
operator= (optional_data&& o)
+ noexcept (std::is_nothrow_move_constructible<T>::value &&
+ std::is_nothrow_move_assignable<T>::value)
{
if (o.v_)
{
diff --git a/libbutl/pager.cxx b/libbutl/pager.cxx
index 44aa83e..e647948 100644
--- a/libbutl/pager.cxx
+++ b/libbutl/pager.cxx
@@ -1,9 +1,7 @@
// file : libbutl/pager.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/pager.mxx>
-#endif
+#include <libbutl/pager.hxx>
#include <errno.h> // E*
@@ -14,46 +12,20 @@
# include <libbutl/win32-utility.hxx>
#endif
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
-#include <iostream>
-
+#include <cstddef> // size_t
#include <cstring> // strchr()
#include <utility> // move()
+
#ifndef _WIN32
# include <chrono>
# include <thread> // this_thread::sleep_for()
#endif
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.pager;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-import butl.fdstream;
-#endif
-#ifndef _WIN32
-import std.threading;
-#endif
-
-import butl.utility; // operator<<(ostream, exception), throw_generic_error()
-import butl.optional;
-import butl.fdstream; // fdclose()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/utility.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
using namespace std;
diff --git a/libbutl/pager.mxx b/libbutl/pager.hxx
index a1f640f..12a6670 100644
--- a/libbutl/pager.mxx
+++ b/libbutl/pager.hxx
@@ -1,36 +1,18 @@
-// file : libbutl/pager.mxx -*- C++ -*-
+// file : libbutl/pager.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <iostream>
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.pager;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-import butl.fdstream;
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Try to run the output through a pager program, such as more or less (no
// pun intended, less is used by default). If the default pager program is
diff --git a/libbutl/path-io.mxx b/libbutl/path-io.hxx
index 6b6dbcf..a60527d 100644
--- a/libbutl/path-io.mxx
+++ b/libbutl/path-io.hxx
@@ -1,34 +1,16 @@
-// file : libbutl/path-io.mxx -*- C++ -*-
+// file : libbutl/path-io.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-// C includes.
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ostream>
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.path_io;
-#ifdef __cpp_lib_modules_ts
-import std.core; //@@ MOD TMP (should not be needed).
-import std.io;
-#endif
-import butl.path;
-#else
-#include <libbutl/path.mxx>
-#endif
+#include <libbutl/path.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// This is the default path IO implementation. It is separate to allow
// custom implementations. For example, we may want to print paths as
diff --git a/libbutl/path-map.mxx b/libbutl/path-map.hxx
index daaf0a4..e3d776a 100644
--- a/libbutl/path-map.mxx
+++ b/libbutl/path-map.hxx
@@ -1,33 +1,16 @@
-// file : libbutl/path-map.mxx -*- C++ -*-
+// file : libbutl/path-map.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <algorithm> // min()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.path_map;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.prefix_map;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/prefix-map.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/prefix-map.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// prefix_map for filesystem paths
//
@@ -142,4 +125,12 @@ LIBBUTL_MODEXPORT namespace butl
template <typename T>
using dir_path_map =
prefix_map<dir_path, T, dir_path::traits_type::directory_separator>;
+
+ template <typename T>
+ using path_multimap =
+ prefix_multimap<path, T, path::traits_type::directory_separator>;
+
+ template <typename T>
+ using dir_path_multimap =
+ prefix_multimap<dir_path, T, dir_path::traits_type::directory_separator>;
}
diff --git a/libbutl/path-pattern.cxx b/libbutl/path-pattern.cxx
index cea5aa7..ed36eb5 100644
--- a/libbutl/path-pattern.cxx
+++ b/libbutl/path-pattern.cxx
@@ -1,41 +1,14 @@
// file : libbutl/path-pattern.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/path-pattern.mxx>
-#endif
+#include <libbutl/path-pattern.hxx>
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstdint>
-#include <cstddef>
#include <iterator> // reverse_iterator
-
#include <algorithm> // find()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.path_pattern;
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-#endif
-
-import butl.utility; // lcase()[_WIN32]
-import butl.filesystem; // path_search()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#include <libbutl/utility.hxx> // lcase()[_WIN32]
+#include <libbutl/filesystem.hxx> // path_search()
using namespace std;
diff --git a/libbutl/path-pattern.mxx b/libbutl/path-pattern.hxx
index 6d9684a..f6e01be 100644
--- a/libbutl/path-pattern.mxx
+++ b/libbutl/path-pattern.hxx
@@ -1,37 +1,20 @@
-// file : libbutl/path-pattern.mxx -*- C++ -*-
+// file : libbutl/path-pattern.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
+#include <cassert>
#include <cstdint> // uint16_t
#include <cstddef> // ptrdiff_t, size_t
#include <iterator> // input_iterator_tag
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.path_pattern;
-
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Wildcard pattern match (aka glob).
//
diff --git a/libbutl/path-pattern.ixx b/libbutl/path-pattern.ixx
index 71f125c..6fee31e 100644
--- a/libbutl/path-pattern.ixx
+++ b/libbutl/path-pattern.ixx
@@ -3,6 +3,32 @@
namespace butl
{
+ // path_match_flags
+ //
+ inline path_match_flags operator& (path_match_flags x, path_match_flags y)
+ {
+ return x &= y;
+ }
+
+ inline path_match_flags operator| (path_match_flags x, path_match_flags y)
+ {
+ return x |= y;
+ }
+
+ inline path_match_flags operator&= (path_match_flags& x, path_match_flags y)
+ {
+ return x = static_cast<path_match_flags> (
+ static_cast<std::uint16_t> (x) &
+ static_cast<std::uint16_t> (y));
+ }
+
+ inline path_match_flags operator|= (path_match_flags& x, path_match_flags y)
+ {
+ return x = static_cast<path_match_flags> (
+ static_cast<std::uint16_t> (x) |
+ static_cast<std::uint16_t> (y));
+ }
+
// path_pattern_iterator
//
inline path_pattern_iterator::
diff --git a/libbutl/path.cxx b/libbutl/path.cxx
index 3b04730..bd66f13 100644
--- a/libbutl/path.cxx
+++ b/libbutl/path.cxx
@@ -1,9 +1,7 @@
// file : libbutl/path.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/path.mxx>
-#endif
+#include <libbutl/path.hxx>
#ifdef _WIN32
# include <libbutl/win32-utility.hxx>
@@ -25,32 +23,11 @@
#endif
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <utility>
-
#include <atomic>
#include <cstring> // strcpy()
-#endif
-
-#ifdef __cpp_modules_ts
-module butl.path;
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-import butl.utility; // throw_*_error()
-import butl.process; // process::current_id()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/utility.hxx> // throw_*_error()
+#include <libbutl/process.hxx> // process::current_id()
#include <libbutl/export.hxx>
@@ -207,8 +184,8 @@ namespace butl
using std::to_string;
return prefix
- + "-" + to_string (process::current_id ())
- + "-" + to_string (temp_name_count++);
+ + '-' + to_string (process::current_id ())
+ + '-' + to_string (temp_name_count++);
}
template <>
diff --git a/libbutl/path.mxx b/libbutl/path.hxx
index 12479ce..b10022a 100644
--- a/libbutl/path.mxx
+++ b/libbutl/path.hxx
@@ -1,13 +1,8 @@
-// file : libbutl/path.mxx -*- C++ -*-
+// file : libbutl/path.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-#include <cassert>
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <ostream>
#include <cstddef> // ptrdiff_t
@@ -21,31 +16,17 @@
#ifdef _WIN32
#include <algorithm> // replace()
#endif
-#endif
-// Other includes.
+#include <libbutl/optional.hxx>
+#include <libbutl/small-vector.hxx>
-#ifdef __cpp_modules_ts
-export module butl.path;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.optional;
-import butl.small_vector;
#ifdef _WIN32
-import butl.utility;
-#endif
-#else
-#include <libbutl/optional.mxx>
-#include <libbutl/small-vector.mxx>
-#ifdef _WIN32
-#include <libbutl/utility.mxx> // *case*()
-#endif
+#include <libbutl/utility.hxx> // *case*()
#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Wish list/ideas for improvements.
//
@@ -78,7 +59,7 @@ LIBBUTL_MODEXPORT namespace butl
string_type path;
explicit
- invalid_basic_path (const string_type& p): path (p) {}
+ invalid_basic_path (string_type p): path (std::move (p)) {}
explicit
invalid_basic_path (const C* p): path (p) {}
invalid_basic_path (const C* p, size_type n): path (p, n) {}
@@ -392,6 +373,22 @@ LIBBUTL_MODEXPORT namespace butl
: (p = rfind_separator (s, n - 1)) == nullptr ? s : ++p;
}
+ // Return true if sb is a sub-path of sp (i.e., sp is a prefix). Expects
+ // both paths to be normalized. Note that this function returns true if
+ // the paths are equal. Empty path is considered a prefix of any path.
+ //
+ static bool
+ sub (const C* sb, size_type nb,
+ const C* sp, size_type np);
+
+ // Return true if sp is a super-path of sb (i.e., sb is a suffix). Expects
+ // both paths to be normalized. Note that this function returns true if
+ // the paths are equal. Empty path is considered a prefix of any path.
+ //
+ static bool
+ sup (const C* sp, size_type np,
+ const C* sb, size_type nb);
+
static int
compare (string_type const& l,
string_type const& r,
@@ -615,18 +612,18 @@ LIBBUTL_MODEXPORT namespace butl
// Constructors.
//
- path_data ()
+ path_data () noexcept
: tsep_ (0) {}
- path_data (string_type&& p, difference_type ts)
+ path_data (string_type&& p, difference_type ts) noexcept
: path_ (std::move (p)), tsep_ (path_.empty () ? 0 : ts) {}
explicit
- path_data (string_type&& p)
+ path_data (string_type&& p) noexcept
: path_ (std::move (p)) { _init (); }
void
- _init ()
+ _init () noexcept
{
size_type n (path_.size ()), i;
@@ -654,7 +651,8 @@ LIBBUTL_MODEXPORT namespace butl
using path_data<C>::path_data;
base_type () = default;
- base_type (path_data<C>&& d): path_data<C> (std::move (d)) {}
+ base_type (path_data<C>&& d) noexcept
+ : path_data<C> (std::move (d)) {}
};
using dir_type = basic_path<C, dir_path_kind<C>>;
@@ -952,6 +950,12 @@ LIBBUTL_MODEXPORT namespace butl
basic_path
relative (basic_path) const;
+ // As above but return nullopt rather than throw if a relative path cannot
+ // be derived.
+ //
+ optional<basic_path>
+ try_relative (basic_path) const;
+
// Iteration over path components.
//
// Note that for an absolute POSIX path the first component is empty,
@@ -1275,7 +1279,8 @@ LIBBUTL_MODEXPORT namespace butl
// Direct initialization without init()/cast().
//
explicit
- basic_path (data_type&& d): base_type (std::move (d)) {}
+ basic_path (data_type&& d) noexcept
+ : base_type (std::move (d)) {}
using base_type::_size;
using base_type::_init;
@@ -1474,9 +1479,9 @@ LIBBUTL_MODEXPORT namespace butl
basic_path_name (): // Create empty/NULL path name.
base (nullptr, &name) {}
- basic_path_name (basic_path_name&&);
+ basic_path_name (basic_path_name&&) noexcept;
basic_path_name (const basic_path_name&);
- basic_path_name& operator= (basic_path_name&&);
+ basic_path_name& operator= (basic_path_name&&) noexcept;
basic_path_name& operator= (const basic_path_name&);
};
@@ -1503,14 +1508,14 @@ LIBBUTL_MODEXPORT namespace butl
basic_path_name_value (): base (&path) {} // Create empty/NULL path name.
- basic_path_name_value (basic_path_name_value&&);
+ basic_path_name_value (basic_path_name_value&&) noexcept;
basic_path_name_value (const basic_path_name_value&);
- basic_path_name_value& operator= (basic_path_name_value&&);
+ basic_path_name_value& operator= (basic_path_name_value&&) noexcept;
basic_path_name_value& operator= (const basic_path_name_value&);
};
}
-LIBBUTL_MODEXPORT namespace std
+namespace std
{
template <typename C, typename K>
struct hash<butl::basic_path<C, K>>: hash<basic_string<C>>
diff --git a/libbutl/path.ixx b/libbutl/path.ixx
index 9c96cfc..b2fdb6f 100644
--- a/libbutl/path.ixx
+++ b/libbutl/path.ixx
@@ -1,7 +1,7 @@
// file : libbutl/path.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
{
// path_abnormality
//
@@ -117,6 +117,45 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
return r;
}
+ template <typename C>
+ inline bool path_traits<C>::
+ sub (const C* s, size_type n,
+ const C* ps, size_type pn)
+ {
+ // The thinking here is that we can use the full string representations
+ // (including the trailing slash in "/").
+ //
+ if (pn == 0)
+ return true;
+
+ // The second condition guards against the /foo-bar vs /foo case.
+ //
+ return n >= pn &&
+ compare (s, pn, ps, pn) == 0 &&
+ (is_separator (ps[pn - 1]) || // p ends with a separator
+ n == pn || // *this == p
+ is_separator (s[pn])); // next char is a separator
+ }
+
+ template <typename C>
+ inline bool path_traits<C>::
+ sup (const C* s, size_type n,
+ const C* ps, size_type pn)
+ {
+ // The thinking here is that we can use the full string representations
+ // (including the trailing slash in "/").
+ //
+ if (pn == 0)
+ return true;
+
+ // The second condition guards against the /foo-bar vs bar case.
+ //
+ return n >= pn &&
+ compare (s + n - pn, pn, ps, pn) == 0 &&
+ (n == pn || // *this == p
+ is_separator (s[n - pn - 1])); // Previous char is a separator.
+ }
+
#ifdef _WIN32
template <>
inline char path_traits<char>::
@@ -230,52 +269,16 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
inline bool basic_path<C, K>::
sub (const basic_path& p) const
{
- // The thinking here is that we can use the full string representations
- // (including the trailing slash in "/").
- //
- const string_type& ps (p.path_);
- size_type pn (ps.size ());
-
- if (pn == 0)
- return true;
-
- const string_type& s (this->path_);
- size_type n (s.size ());
-
- // The second condition guards against the /foo-bar vs /foo case.
- //
- return n >= pn &&
- traits_type::compare (s.c_str (), pn, ps.c_str (), pn) == 0 &&
- (traits_type::is_separator (ps.back ()) || // p ends with a separator
- n == pn || // *this == p
- traits_type::is_separator (s[pn])); // next char is a separator
+ return traits_type::sub (this->path_.c_str (), this->path_.size (),
+ p.path_.c_str (), p.path_.size ());
}
template <typename C, typename K>
inline bool basic_path<C, K>::
sup (const basic_path& p) const
{
- // The thinking here is that we can use the full string representations
- // (including the trailing slash in "/").
- //
- const string_type& ps (p.path_);
- size_type pn (ps.size ());
-
- if (pn == 0)
- return true;
-
- const string_type& s (this->path_);
- size_type n (s.size ());
-
- // The second condition guards against the /foo-bar vs bar case.
- //
- return n >= pn &&
- traits_type::compare (s.c_str () + n - pn, pn, ps.c_str (), pn) == 0 &&
- (n == pn || // *this == p
- //
- // Previous char is a separator.
- //
- traits_type::is_separator (s[n - pn - 1]));
+ return traits_type::sup (this->path_.c_str (), this->path_.size (),
+ p.path_.c_str (), p.path_.size ());
}
template <typename C, typename K>
@@ -779,7 +782,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
//
template <typename P>
inline basic_path_name<P>::
- basic_path_name (basic_path_name&& p)
+ basic_path_name (basic_path_name&& p) noexcept
: basic_path_name (p.path, std::move (p.name))
{
}
@@ -793,7 +796,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
template <typename P>
inline basic_path_name<P>& basic_path_name<P>::
- operator= (basic_path_name&& p)
+ operator= (basic_path_name&& p) noexcept
{
if (this != &p)
{
@@ -821,7 +824,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
//
template <typename P>
inline basic_path_name_value<P>::
- basic_path_name_value (basic_path_name_value&& p)
+ basic_path_name_value (basic_path_name_value&& p) noexcept
: basic_path_name_value (std::move (p.path), std::move (p.name))
{
}
@@ -835,7 +838,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
template <typename P>
inline basic_path_name_value<P>& basic_path_name_value<P>::
- operator= (basic_path_name_value&& p)
+ operator= (basic_path_name_value&& p) noexcept
{
if (this != &p)
{
diff --git a/libbutl/path.txx b/libbutl/path.txx
index 45b62bd..60e0f1a 100644
--- a/libbutl/path.txx
+++ b/libbutl/path.txx
@@ -1,7 +1,7 @@
// file : libbutl/path.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
{
template <typename C, typename K>
basic_path<C, K> basic_path<C, K>::
@@ -103,8 +103,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
#endif
template <typename C, typename K>
- basic_path<C, K> basic_path<C, K>::
- relative (basic_path<C, K> d) const
+ optional<basic_path<C, K>> basic_path<C, K>::
+ try_relative (basic_path<C, K> d) const
{
dir_type r;
@@ -118,12 +118,22 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
// Roots of the paths do not match.
//
if (d.root ())
- throw invalid_basic_path<C> (this->path_);
+ return nullopt;
}
return r / leaf (d);
}
+ template <typename C, typename K>
+ basic_path<C, K> basic_path<C, K>::
+ relative (basic_path<C, K> d) const
+ {
+ if (optional<basic_path<C, K>> r = try_relative (std::move (d)))
+ return std::move (*r);
+
+ throw invalid_basic_path<C> (this->path_);
+ }
+
#ifdef _WIN32
// Find the actual spelling of a name in the specified dir. If the name is
// found, append it to the result and return true. Otherwise, return false.
diff --git a/libbutl/prefix-map.mxx b/libbutl/prefix-map.hxx
index 75931da..0895d96 100644
--- a/libbutl/prefix-map.mxx
+++ b/libbutl/prefix-map.hxx
@@ -1,31 +1,16 @@
-// file : libbutl/prefix-map.mxx -*- C++ -*-
+// file : libbutl/prefix-map.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <map>
#include <string>
#include <utility> // move()
#include <algorithm> // min()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.prefix_map;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// A map of hierarchical "paths", e.g., 'foo.bar' or 'foo/bar' with the
// ability to retrieve a range of entries that have a specific prefix as
@@ -149,6 +134,37 @@ LIBBUTL_MODEXPORT namespace butl
const_iterator
find_sup (const key_type&) const;
+
+ // As above but additionally evaluate a predicate on each matching entry
+ // returning the one for which it returns true.
+ //
+ template <typename P>
+ iterator
+ find_sup_if (const key_type&, P);
+
+ template <typename P>
+ const_iterator
+ find_sup_if (const key_type&, P) const;
+ };
+
+ template <typename M>
+ struct prefix_multimap_common: prefix_map_common<M>
+ {
+ typedef M map_type;
+ typedef typename map_type::key_type key_type;
+ typedef typename map_type::iterator iterator;
+ typedef typename map_type::const_iterator const_iterator;
+
+ using prefix_map_common<M>::prefix_map_common;
+
+ // Find the most qualified entries that are super-prefixes of the
+ // specified prefix.
+ //
+ std::pair<iterator, iterator>
+ sup_range (const key_type&);
+
+ std::pair<const_iterator, const_iterator>
+ sup_range (const key_type&) const;
};
template <typename M, typename prefix_map_common<M>::delimiter_type D>
@@ -161,6 +177,16 @@ LIBBUTL_MODEXPORT namespace butl
: prefix_map_common<M> (std::move (i), D) {}
};
+ template <typename M, typename prefix_map_common<M>::delimiter_type D>
+ struct prefix_multimap_impl: prefix_multimap_common<M>
+ {
+ typedef typename prefix_multimap_common<M>::value_type value_type;
+
+ prefix_multimap_impl (): prefix_multimap_common<M> (D) {}
+ prefix_multimap_impl (std::initializer_list<value_type> i)
+ : prefix_multimap_common<M> (std::move (i), D) {}
+ };
+
template <typename K,
typename T,
typename compare_prefix<K>::delimiter_type D>
@@ -170,7 +196,7 @@ LIBBUTL_MODEXPORT namespace butl
typename T,
typename compare_prefix<K>::delimiter_type D>
using prefix_multimap =
- prefix_map_impl<std::multimap<K, T, compare_prefix<K>>, D>;
+ prefix_multimap_impl<std::multimap<K, T, compare_prefix<K>>, D>;
}
#include <libbutl/prefix-map.txx>
diff --git a/libbutl/prefix-map.txx b/libbutl/prefix-map.txx
index e9a99c9..80664bf 100644
--- a/libbutl/prefix-map.txx
+++ b/libbutl/prefix-map.txx
@@ -1,7 +1,7 @@
// file : libbutl/prefix-map.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
{
template <typename M>
auto prefix_map_common<M>::
@@ -127,4 +127,128 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
return i;
#endif
}
+
+ template <typename M>
+ template <typename P>
+ auto prefix_map_common<M>::
+ find_sup_if (const key_type& k, P pred) -> iterator
+ {
+#if 0
+ const auto& c (this->key_comp ());
+
+ for (auto i (this->upper_bound (k)), b (this->begin ()); i != b; )
+ {
+ --i;
+ if (c.prefix (i->first, k) && pred (*i))
+ return i;
+ }
+
+ return this->end ();
+#else
+ auto i (this->find (k)), e (this->end ());
+
+ if (i == e || !pred (*i))
+ {
+ const auto& c (this->key_comp ());
+
+ for (key_type p (k); c.prefix (p); )
+ {
+ i = this->find (p);
+ if (i != e && pred (*i))
+ break;
+ }
+ }
+
+ return i;
+#endif
+ }
+
+ template <typename M>
+ template <typename P>
+ auto prefix_map_common<M>::
+ find_sup_if (const key_type& k, P pred) const -> const_iterator
+ {
+#if 0
+ const auto& c (this->key_comp ());
+
+ for (auto i (this->upper_bound (k)), b (this->begin ()); i != b; )
+ {
+ --i;
+ if (c.prefix (i->first, k) && pred (*i))
+ return i;
+ }
+
+ return this->end ();
+#else
+ auto i (this->find (k)), e (this->end ());
+
+ if (i == e || !pred (*i))
+ {
+ const auto& c (this->key_comp ());
+
+ for (key_type p (k); c.prefix (p); )
+ {
+ i = this->find (p);
+ if (i != e && pred (*i))
+ break;
+ }
+ }
+
+ return i;
+#endif
+ }
+
+ template <typename M>
+ auto prefix_multimap_common<M>::
+ sup_range (const key_type& k) -> std::pair<iterator, iterator>
+ {
+#if 0
+ // TODO (see above).
+#else
+ // First look for the exact match before making any copies.
+ //
+ auto r (this->equal_range (k));
+
+ if (r.first == r.second)
+ {
+ const auto& c (this->key_comp ());
+
+ for (key_type p (k); c.prefix (p); )
+ {
+ r = this->equal_range (p);
+ if (r.first != r.second)
+ break;
+ }
+ }
+
+ return r;
+#endif
+ }
+
+ template <typename M>
+ auto prefix_multimap_common<M>::
+ sup_range (const key_type& k) const -> std::pair<const_iterator, const_iterator>
+ {
+#if 0
+ // TODO (see above).
+#else
+ // First look for the exact match before making any copies.
+ //
+ auto r (this->equal_range (k));
+
+ if (r.first == r.second)
+ {
+ const auto& c (this->key_comp ());
+
+ for (key_type p (k); c.prefix (p); )
+ {
+ r = this->equal_range (p);
+ if (r.first != r.second)
+ break;
+ }
+ }
+
+ return r;
+#endif
+ }
}
diff --git a/libbutl/process-details.hxx b/libbutl/process-details.hxx
index cf7624d..10d5241 100644
--- a/libbutl/process-details.hxx
+++ b/libbutl/process-details.hxx
@@ -3,17 +3,25 @@
#pragma once
-#include <libbutl/ft/shared_mutex.hxx>
+#ifdef LIBBUTL_MINGW_STDTHREAD
-#ifdef __cpp_lib_modules_ts
-import std.core; //@@ MOD TMP (dummy std.threading).
-import std.threading;
-#else
-#include <mutex>
-#if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex)
-# include <shared_mutex>
-#endif
-#endif
+# include <libbutl/mingw-shared_mutex.hxx>
+
+namespace butl
+{
+ using shared_mutex = mingw_stdthread::shared_mutex;
+ using ulock = mingw_stdthread::unique_lock<shared_mutex>;
+ using slock = mingw_stdthread::shared_lock<shared_mutex>;
+}
+
+#else // LIBBUTL_MINGW_STDTHREADS
+
+# include <libbutl/ft/shared_mutex.hxx>
+
+# include <mutex>
+# if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex)
+# include <shared_mutex>
+# endif
namespace butl
{
@@ -41,7 +49,11 @@ namespace butl
using ulock = std::unique_lock<shared_mutex>;
using slock = ulock;
#endif
+}
+#endif // LIBBUTL_MINGW_STDTHREADS
+namespace butl
+{
// Mutex that is acquired to make a sequence of operations atomic in regards
// to child process spawning. Must be aquired for exclusive access for child
// process startup, and for shared access otherwise. Defined in process.cxx.
diff --git a/libbutl/process-io.cxx b/libbutl/process-io.cxx
index c29bbc0..0be3a77 100644
--- a/libbutl/process-io.cxx
+++ b/libbutl/process-io.cxx
@@ -1,36 +1,11 @@
// file : libbutl/process-io.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/process-io.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <ostream>
+#include <libbutl/process-io.hxx>
#include <cstring> // strchr()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.process_io;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-#endif
-import butl.path-io;
-#else
-#include <libbutl/path-io.mxx>
-#endif
+#include <libbutl/path-io.hxx>
using namespace std;
diff --git a/libbutl/process-io.mxx b/libbutl/process-io.hxx
index d07a212..29d6d8b 100644
--- a/libbutl/process-io.mxx
+++ b/libbutl/process-io.hxx
@@ -1,32 +1,15 @@
-// file : libbutl/process-io.mxx -*- C++ -*-
+// file : libbutl/process-io.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <ostream>
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.process_io;
-#ifdef __cpp_lib_modules_ts
-import std.core; //@@ MOD TMP (should not be needed).
-import std.io;
-#endif
-import butl.process;
-#else
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/process.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
inline std::ostream&
operator<< (std::ostream& o, const process_path& p)
diff --git a/libbutl/process-run.cxx b/libbutl/process-run.cxx
index c26c20d..b044ea1 100644
--- a/libbutl/process-run.cxx
+++ b/libbutl/process-run.cxx
@@ -1,35 +1,12 @@
// file : libbutl/process-run.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/process.hxx>
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <cstdlib> // exit()
#include <iostream> // cerr
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.process;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-#endif
-import butl.utility; // operator<<(ostream,exception)
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx> // operator<<(ostream,exception)
using namespace std;
@@ -47,7 +24,7 @@ namespace butl
try
{
return process (pp, cmd,
- in, out, err,
+ move (in), move (out), move (err),
cwd != nullptr ? cwd->string ().c_str () : nullptr,
envvars);
}
diff --git a/libbutl/process-run.txx b/libbutl/process-run.txx
index aa1e381..6c903a8 100644
--- a/libbutl/process-run.txx
+++ b/libbutl/process-run.txx
@@ -1,7 +1,9 @@
// file : libbutl/process-run.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <utility> // forward(), index_sequence
+
+namespace butl
{
template <typename V>
void process_env::
@@ -85,21 +87,21 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
// valid file descriptor.
//
inline process::pipe
- process_stdin (const process::pipe& v)
+ process_stdin (process::pipe v)
{
assert (v.in >= 0);
return v;
}
inline process::pipe
- process_stdout (const process::pipe& v)
+ process_stdout (process::pipe v)
{
assert (v.out >= 0);
return v;
}
inline process::pipe
- process_stderr (const process::pipe& v)
+ process_stderr (process::pipe v)
{
assert (v.out >= 0);
return v;
@@ -129,13 +131,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
typename... A,
typename std::size_t... index>
process
- process_start (std::index_sequence<index...>,
- const C& cmdc,
- I&& in,
- O&& out,
- E&& err,
- const process_env& env,
- A&&... args)
+ process_start_impl (std::index_sequence<index...>,
+ const C& cmdc,
+ I&& in,
+ O&& out,
+ E&& err,
+ const process_env& env,
+ A&&... args)
{
// Map stdin/stdout/stderr arguments to their integer values, as expected
// by the process constructor.
@@ -168,7 +170,9 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
return process_start (env.cwd,
*env.path, cmd.data (),
env.vars,
- in_i, out_i, err_i);
+ std::move (in_i),
+ std::move (out_i),
+ std::move (err_i));
}
template <typename C,
@@ -184,13 +188,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
const process_env& env,
A&&... args)
{
- return process_start (std::index_sequence_for<A...> (),
- cmdc,
- std::forward<I> (in),
- std::forward<O> (out),
- std::forward<E> (err),
- env,
- std::forward<A> (args)...);
+ return process_start_impl (std::index_sequence_for<A...> (),
+ cmdc,
+ std::forward<I> (in),
+ std::forward<O> (out),
+ std::forward<E> (err),
+ env,
+ std::forward<A> (args)...);
}
template <typename I,
@@ -255,4 +259,45 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
env,
std::forward<A> (args)...);
}
+
+ template <typename C,
+ typename... A,
+ typename std::size_t... index>
+ void
+ process_print_impl (std::index_sequence<index...>,
+ const C& cmdc,
+ const process_env& env,
+ A&&... args)
+ {
+ // Construct the command line array.
+ //
+ const std::size_t args_size (sizeof... (args));
+
+ small_vector<const char*, args_size + 2> cmd;
+
+ assert (env.path != nullptr);
+ cmd.push_back (env.path->recall_string ());
+
+ std::string storage[args_size != 0 ? args_size : 1];
+
+ const char* dummy[] = {
+ nullptr, process_args_as_wrapper (cmd, args, storage[index])... };
+
+ cmd.push_back (dummy[0]); // NULL (and get rid of unused warning).
+
+ cmdc (cmd.data (), cmd.size ());
+ }
+
+ template <typename C,
+ typename... A>
+ inline void
+ process_print_callback (const C& cmdc,
+ const process_env& env,
+ A&&... args)
+ {
+ process_print_impl (std::index_sequence_for<A...> (),
+ cmdc,
+ env,
+ std::forward<A> (args)...);
+ }
}
diff --git a/libbutl/process.cxx b/libbutl/process.cxx
index 0695493..e416807 100644
--- a/libbutl/process.cxx
+++ b/libbutl/process.cxx
@@ -1,9 +1,7 @@
// file : libbutl/process.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/process.hxx>
#include <errno.h>
@@ -49,6 +47,14 @@
# elif defined(__NetBSD__) && __NetBSD__ >= 6
# define LIBBUTL_POSIX_SPAWN
//
+// On OpenBSD posix_spawn() appeared in 5.2 (see the man page for details).
+//
+# elif defined(__OpenBSD__)
+# include <sys/param.h> // OpenBSD (yyyymm)
+# if OpenBSD >= 201211 // 5.2 released on 1 Nov 2012.
+# define LIBBUTL_POSIX_SPAWN
+# endif
+//
// posix_spawn() appeared in Version 3 of the Single UNIX Specification that
// was implemented in MacOS 10.5 (see the man page for details).
//
@@ -87,29 +93,20 @@
# endif // _MSC_VER
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <chrono>
-#include <cstdint>
-#include <cstddef>
-#include <system_error>
-
#include <ios> // ios_base::failure
-#include <cstring> // strlen(), strchr(), strncmp()
+#include <memory> // unique_ptr
+#include <cstring> // strlen(), strchr(), strpbrk(), strncmp()
#include <utility> // move()
#include <ostream>
+#include <cassert>
#ifndef _WIN32
-#include <thread> // this_thread::sleep_for()
+# include <thread> // this_thread::sleep_for()
#else
-#include <map>
-#include <ratio> // milli
-#include <cstdlib> // __argv[]
-#include <algorithm> // find()
-#endif
+# include <map>
+# include <ratio> // milli
+# include <cstdlib> // __argv[]
+# include <algorithm> // find()
#endif
#include <libbutl/process-details.hxx>
@@ -119,32 +116,8 @@ namespace butl
shared_mutex process_spawn_mutex; // Out of module purview.
}
-#ifdef __cpp_modules_ts
-module butl.process;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.threading; // Clang wants it in purview (see process-details.hxx).
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.vector_view;
-import butl.small_vector;
-#endif
-
-#ifndef _WIN32
-import std.threading;
-#endif
-
-import butl.utility; // icasecmp()
-import butl.fdstream; // fdopen_null()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/utility.hxx> // icasecmp()
+#include <libbutl/fdstream.hxx> // fdopen_null()
using namespace std;
@@ -217,7 +190,7 @@ namespace butl
}
void process::
- print (ostream& o, const char* const args[], size_t n)
+ print (ostream& o, const char* const* args, size_t n)
{
size_t m (0);
const char* const* p (args);
@@ -253,6 +226,35 @@ namespace butl
} while (*p != nullptr);
}
+#if defined(LIBBUTL_POSIX_SPAWN) || defined(_WIN32)
+ // Return true if the NULL-terminated variable list contains an (un)set of
+ // the specified variable. The NULL list argument denotes an empty list.
+ //
+ // Note that on Windows variable names are case-insensitive.
+ //
+ static inline bool
+ contains_envvar (const char* const* vs, const char* v, size_t n)
+ {
+ if (vs != nullptr)
+ {
+ // Note that we don't expect the number of variables to (un)set to be
+ // large, so the linear search is OK.
+ //
+ while (const char* v1 = *vs++)
+ {
+#ifdef _WIN32
+ if (icasecmp (v1, v, n) == 0 && (v1[n] == '=' || v1[n] == '\0'))
+#else
+ if (strncmp (v1, v, n) == 0 && (v1[n] == '=' || v1[n] == '\0'))
+#endif
+ return true;
+ }
+ }
+
+ return false;
+ }
+#endif
+
#ifndef _WIN32
static process_path
@@ -381,10 +383,10 @@ namespace butl
}
process::
- process (const process_path& pp, const char* args[],
+ process (const process_path& pp, const char* const* args,
pipe pin, pipe pout, pipe perr,
const char* cwd,
- const char* const* envvars)
+ const char* const* evars)
{
int in (pin.in);
int out (pout.out);
@@ -452,6 +454,8 @@ namespace butl
else if (err == -2)
in_efd.out = open_null ();
+ const char* const* tevars (thread_env ());
+
// The posix_spawn()-based implementation.
//
#ifdef LIBBUTL_POSIX_SPAWN
@@ -540,47 +544,45 @@ namespace butl
fail (r);
#endif
- // Set/unset environment variables if requested.
+ // Set/unset the child process environment variables if requested.
//
- small_vector<const char*, 8> new_env;
+ vector<const char*> new_env;
- if (envvars != nullptr)
+ if (tevars != nullptr || evars != nullptr)
{
- for (const char* const* env (environ); *env != nullptr; ++env)
+ // Copy the non-overridden process environment variables into the
+ // child's environment.
+ //
+ for (const char* const* ev (environ); *ev != nullptr; ++ev)
{
- // Lookup the existing variable among those that are requested to be
- // (un)set. If not present, than add it to the child process
- // environment.
- //
- // Note that on POSIX variable names are case-sensitive.
- //
- // Alse note that we don't expect the number of variables to (un)set
- // to be large, so the linear search is OK.
- //
- const char* cv (*env);
- const char* eq (strchr (cv, '='));
- size_t n (eq != nullptr ? eq - cv : strlen (cv));
-
- const char* const* ev (envvars);
- for (; *ev != nullptr; ++ev)
- {
- const char* v (*ev);
- if (strncmp (cv, v, n) == 0 && (v[n] == '=' || v[n] == '\0'))
- break;
- }
+ const char* v (*ev);
+ const char* e (strchr (v, '='));
+ size_t n (e != nullptr ? e - v : strlen (v));
- if (*ev == nullptr)
- new_env.push_back (cv);
+ if (!contains_envvar (tevars, v, n) &&
+ !contains_envvar (evars, v, n))
+ new_env.push_back (v);
}
- // Copy the environment variables that are requested to be set.
+ // Copy non-overridden variable assignments into the child's
+ // environment.
//
- for (const char* const* ev (envvars); *ev != nullptr; ++ev)
+ auto set_vars = [&new_env] (const char* const* vs,
+ const char* const* ovs = nullptr)
{
- const char* v (*ev);
- if (strchr (v, '=') != nullptr)
- new_env.push_back (v);
- }
+ if (vs != nullptr)
+ {
+ while (const char* v = *vs++)
+ {
+ const char* e (strchr (v, '='));
+ if (e != nullptr && !contains_envvar (ovs, v, e - v))
+ new_env.push_back (v);
+ }
+ }
+ };
+
+ set_vars (tevars, evars);
+ set_vars (evars);
new_env.push_back (nullptr);
}
@@ -598,9 +600,9 @@ namespace butl
&fa,
nullptr /* attrp */,
const_cast<char* const*> (&args[0]),
- envvars != nullptr
- ? const_cast<char* const*> (new_env.data ())
- : environ);
+ new_env.empty ()
+ ? environ
+ : const_cast<char* const*> (new_env.data ()));
if (r != 0)
fail (r);
} // Release the lock in parent.
@@ -641,6 +643,10 @@ namespace butl
{
// Child.
//
+ // NOTE: make sure not to call anything that may acquire a mutex that
+ // could be already acquired in another thread, most notably
+ // malloc(). @@ What about exceptions (all the fail() calls)?
+
// Duplicate the user-supplied (fd > -1) or the created pipe descriptor
// to the standard stream descriptor (read end for STDIN_FILENO, write
// end otherwise). Close the pipe afterwards.
@@ -688,27 +694,38 @@ namespace butl
if (cwd != nullptr && *cwd != '\0' && chdir (cwd) != 0)
fail (true /* child */);
- // Set/unset environment variables if requested.
+ // Set/unset environment variables.
//
- if (envvars != nullptr)
+ auto set_vars = [] (const char* const* vs)
{
- while (const char* ev = *envvars++)
+ if (vs != nullptr)
{
- const char* v (strchr (ev, '='));
-
- try
+ while (const char* v = *vs++)
{
- if (v != nullptr)
- setenv (string (ev, v - ev), v + 1);
- else
- unsetenv (ev);
- }
- catch (const system_error& e)
- {
- throw process_child_error (e.code ().value ());
+ const char* e (strchr (v, '='));
+
+ try
+ {
+ // @@ TODO: redo without allocation (PATH_MAX?) Maybe
+ // also using C API to avoid exceptions.
+ //
+ if (e != nullptr)
+ setenv (string (v, e - v), e + 1);
+ else
+ unsetenv (v);
+ }
+ catch (const system_error& e)
+ {
+ // @@ Should we assume this cannot throw?
+ //
+ throw process_child_error (e.code ().value ());
+ }
}
}
- }
+ };
+
+ set_vars (tevars);
+ set_vars (evars);
// Try to re-exec after the "text file busy" failure for 450ms.
//
@@ -741,6 +758,13 @@ namespace butl
{
if (handle != 0)
{
+ // First close any open pipe ends for good measure but ignore any
+ // errors.
+ //
+ out_fd.reset ();
+ in_ofd.reset ();
+ in_efd.reset ();
+
int es;
int r (waitpid (handle, &es, 0));
handle = 0; // We have tried.
@@ -805,13 +829,15 @@ namespace butl
void process::
kill ()
{
- if (handle != 0)
- {
- if (::kill (handle, SIGKILL) == -1)
- throw process_error (errno);
+ if (handle != 0 && ::kill (handle, SIGKILL) == -1)
+ throw process_error (errno);
+ }
- wait ();
- }
+ void process::
+ term ()
+ {
+ if (handle != 0 && ::kill (handle, SIGTERM) == -1)
+ throw process_error (errno);
}
process::id_type process::
@@ -820,6 +846,12 @@ namespace butl
return getpid ();
}
+ process::handle_type process::
+ current_handle ()
+ {
+ return getpid ();
+ }
+
// process_exit
//
process_exit::
@@ -1272,13 +1304,30 @@ namespace butl
};
const char* process::
- quote_argument (const char* a, string& s)
+ quote_argument (const char* a, string& s, bool bat)
{
- // On Windows we need to protect values with spaces using quotes.
- // Since there could be actual quotes in the value, we need to
- // escape them.
+ // On Windows we need to protect values with spaces using quotes. Since
+ // there could be actual quotes in the value, we need to escape them.
//
- bool q (*a == '\0' || strchr (a, ' ') != nullptr);
+ // For batch files we also protect equal (`=`), comma (`,`) and semicolon
+ // (`;`) since otherwise an argument containing any of these will be split
+ // into several as if they were spaces (that is, the parts will appear in
+ // %1 %2, etc., instead of all in %1). This of course could break some
+ // batch files that rely on this semantics (for example, to automatically
+ // handle --foo=bar as --foo bar) but overall seeing a single argument
+ // (albeit quoted) is closer to the behavior of real executables. So we do
+ // this by default and if it becomes a problem we can invent a flag
+ // (probably in process_env) to disable this quoting (and while at it we
+ // may add a flag to disable all quoting since the user may need to quote
+ // some arguments but not others).
+ //
+ // While `()` and `[]` are not special characters, some "subsystems"
+ // (e.g., Cygwin/MSYS2) try to interpret them in certain contexts (e.g.,
+ // relative paths). So we quote them as well (over-quoting seems to be
+ // harmless according to the "Parsing C Command-Line Arguments" MSDN
+ // article).
+ //
+ bool q (*a == '\0' || strpbrk (a, bat ? " =,;" : " ()[]") != nullptr);
if (!q && strchr (a, '"') == nullptr)
return a;
@@ -1289,8 +1338,8 @@ namespace butl
s += '"';
// Note that backslashes don't need escaping, unless they immediately
- // precede the double quote (see `Parsing C Command-Line Arguments` MSDN
- // article for more details). For example:
+ // precede the double quote (see "Parsing C Command-Line Arguments" MSDN
+ // article for details). For example:
//
// -DPATH="C:\\foo\\" -> -DPATH=\"C:\\foo\\\\\"
// -DPATH=C:\foo bar\ -> "-DPATH=C:\foo bar\\"
@@ -1329,10 +1378,10 @@ namespace butl
static map<string, bool> detect_msys_cache_;
process::
- process (const process_path& pp, const char* args[],
+ process (const process_path& pp, const char* const* args,
pipe pin, pipe pout, pipe perr,
const char* cwd,
- const char* const* envvars)
+ const char* const* evars)
{
int in (pin.in);
int out (pout.out);
@@ -1354,7 +1403,9 @@ namespace butl
//
vector<char> new_env;
- if (envvars != nullptr)
+ const char* const* tevars (thread_env ());
+
+ if (tevars != nullptr || evars != nullptr)
{
// The environment block contains the variables in the following format:
//
@@ -1363,7 +1414,7 @@ namespace butl
// Note the trailing NULL character that follows the last variable
// (null-terminated) string.
//
- unique_ptr<char, void (*)(char*)> cvars (
+ unique_ptr<char, void (*)(char*)> pevars (
GetEnvironmentStringsA (),
[] (char* p)
{
@@ -1374,50 +1425,45 @@ namespace butl
assert (false);
});
- if (cvars.get () == nullptr)
+ if (pevars.get () == nullptr)
fail ();
- const char* cv (cvars.get ());
-
- // Copy the current environment variables.
+ // Copy the non-overridden process environment variables into the
+ // child's environment.
//
- while (*cv != '\0')
+ for (const char* v (pevars.get ()); *v != '\0'; )
{
- // Lookup the existing variable among those that are requested to be
- // (un)set. If not present, than copy it to the new block.
- //
- // Note that on Windows variable names are case-insensitive.
- //
- // Alse note that we don't expect the number of variables to (un)set
- // to be large, so the linear search is OK.
- //
- size_t n (strlen (cv) + 1); // Includes NULL character.
+ size_t n (strlen (v) + 1); // Includes NULL character.
- const char* eq (strchr (cv, '='));
- size_t nn (eq != nullptr ? eq - cv : n - 1);
- const char* const* ev (envvars);
+ const char* e (strchr (v, '='));
+ size_t nn (e != nullptr ? e - v : n - 1);
- for (; *ev != nullptr; ++ev)
- {
- const char* v (*ev);
- if (icasecmp (cv, v, nn) == 0 && (v[nn] == '=' || v[nn] == '\0'))
- break;
- }
+ if (!contains_envvar (tevars, v, nn) &&
+ !contains_envvar (evars, v, nn))
+ new_env.insert (new_env.end (), v, v + n);
- if (*ev == nullptr)
- new_env.insert (new_env.end (), cv, cv + n);
-
- cv += n;
+ v += n;
}
- // Copy the environment variables that are requested to be set.
+ // Copy non-overridden variable assignments into the child's
+ // environment.
//
- for (const char* const* ev (envvars); *ev != nullptr; ++ev)
+ auto set_vars = [&new_env] (const char* const* vs,
+ const char* const* ovs = nullptr)
{
- const char* v (*ev);
- if (strchr (v, '=') != nullptr)
- new_env.insert (new_env.end (), v, v + strlen (v) + 1);
- }
+ if (vs != nullptr)
+ {
+ while (const char* v = *vs++)
+ {
+ const char* e (strchr (v, '='));
+ if (e != nullptr && !contains_envvar (ovs, v, e - v))
+ new_env.insert (new_env.end (), v, v + strlen (v) + 1);
+ }
+ }
+ };
+
+ set_vars (tevars, evars);
+ set_vars (evars);
new_env.push_back ('\0'); // Terminate the new environment block.
}
@@ -1514,12 +1560,12 @@ namespace butl
//
string cmd_line;
{
- auto append = [&cmd_line, buf = string ()] (const char* a) mutable
+ auto append = [&batch, &cmd_line, buf = string ()] (const char* a) mutable
{
if (!cmd_line.empty ())
cmd_line += ' ';
- cmd_line += quote_argument (a, buf);
+ cmd_line += quote_argument (a, buf, batch.has_value ());
};
if (batch)
@@ -1761,7 +1807,6 @@ namespace butl
using namespace chrono;
-
// Retry for about 1 hour.
//
system_clock::duration timeout (1h);
@@ -1774,7 +1819,7 @@ namespace butl
0, // Primary thread security attributes.
true, // Inherit handles.
0, // Creation flags.
- envvars != nullptr ? new_env.data () : nullptr,
+ new_env.empty () ? nullptr : new_env.data (),
cwd != nullptr && *cwd != '\0' ? cwd : nullptr,
&si,
&pi))
@@ -1847,7 +1892,7 @@ namespace butl
return PeekNamedPipe (h, &c, 1, &n, nullptr, nullptr) && n == 1;
};
- // Hidden by butl::duration that is introduced via fdstream.mxx.
+ // Hidden by butl::duration that is introduced via fdstream.hxx.
//
using milli_duration = chrono::duration<DWORD, milli>;
@@ -1928,6 +1973,10 @@ namespace butl
{
if (handle != 0)
{
+ out_fd.reset ();
+ in_ofd.reset ();
+ in_efd.reset ();
+
DWORD es;
DWORD e (NO_ERROR);
if (WaitForSingleObject (handle, INFINITE) != WAIT_OBJECT_0 ||
@@ -1958,9 +2007,16 @@ namespace butl
optional<bool> process::
try_wait ()
{
+ return timed_wait (chrono::milliseconds (0));
+ }
+
+ template <>
+ optional<bool> process::
+ timed_wait (const chrono::milliseconds& t)
+ {
if (handle != 0)
{
- DWORD r (WaitForSingleObject (handle, 0));
+ DWORD r (WaitForSingleObject (handle, static_cast<DWORD> (t.count ())));
if (r == WAIT_TIMEOUT)
return nullopt;
@@ -1982,17 +2038,33 @@ namespace butl
return exit ? static_cast<bool> (*exit) : optional<bool> ();
}
- template <>
- optional<bool> process::
- timed_wait (const chrono::milliseconds&)
+ void process::
+ kill ()
{
- throw process_error (ENOTSUP);
+ // Note that TerminateProcess() requires an exit code the process will be
+ // terminated with. We could probably craft a custom exit code that will
+ // be treated by the normal() function as an abnormal termination.
+ // However, let's keep it simple and reuse the existing (semantically
+ // close) error code.
+ //
+ if (handle != 0 && !TerminateProcess (handle, DBG_TERMINATE_PROCESS))
+ {
+ DWORD e (GetLastError ());
+ if (e != ERROR_ACCESS_DENIED)
+ throw process_error (error_msg (e));
+
+ // Handle the case when the process has already terminated or is still
+ // exiting (potentially after being killed).
+ //
+ if (!try_wait ())
+ throw process_error (error_msg (e), EPERM);
+ }
}
void process::
- kill ()
+ term ()
{
- throw process_error (ENOTSUP);
+ kill ();
}
process::id_type process::
@@ -2012,6 +2084,15 @@ namespace butl
return GetCurrentProcessId ();
}
+ process::handle_type process::
+ current_handle ()
+ {
+ // Note that the returned handle is a pseudo handle (-1) that does not
+ // need to be closed.
+ //
+ return GetCurrentProcess ();
+ }
+
// process_exit
//
process_exit::
@@ -2023,7 +2104,7 @@ namespace butl
// [ 0, 16) - program exit code or exception code
// [16, 29) - facility
// [29, 30) - flag indicating if the status value is customer-defined
- // [30, 31) - severity (00 -success, 01 - informational, 10 - warning,
+ // [30, 31] - severity (00 -success, 01 - informational, 10 - warning,
// 11 - error)
//
: status (c)
@@ -2077,6 +2158,7 @@ namespace butl
{
case STATUS_ACCESS_VIOLATION: return "access violation";
case STATUS_DLL_INIT_FAILED: return "DLL initialization failed";
+ case STATUS_DLL_NOT_FOUND: return "unable to find DLL";
case STATUS_INTEGER_DIVIDE_BY_ZERO: return "integer divided by zero";
// If a VC-compiled program exits with the STATUS_STACK_BUFFER_OVERRUN
@@ -2094,6 +2176,10 @@ namespace butl
case STATUS_STACK_BUFFER_OVERRUN: return "aborted";
case STATUS_STACK_OVERFLOW: return "stack overflow";
+ // Presumably the kill() function was called for the process.
+ //
+ case DBG_TERMINATE_PROCESS: return "killed";
+
default:
{
string desc ("unknown error 0x");
diff --git a/libbutl/process.mxx b/libbutl/process.hxx
index 54abdec..bbb7c89 100644
--- a/libbutl/process.mxx
+++ b/libbutl/process.hxx
@@ -1,17 +1,12 @@
-// file : libbutl/process.mxx -*- C++ -*-
+// file : libbutl/process.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
#ifndef _WIN32
# include <sys/types.h> // pid_t
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <chrono>
@@ -20,33 +15,15 @@
#include <cstdint> // uint32_t
#include <system_error>
-#include <utility> // move(), forward(), index_sequence
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.process;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.optional;
-import butl.fdstream; // auto_fd, fdpipe
-import butl.vector_view;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/vector-view.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx> // auto_fd, fdpipe
+#include <libbutl/vector-view.hxx>
+#include <libbutl/small-vector.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
struct process_error: std::system_error
{
@@ -140,8 +117,8 @@ LIBBUTL_MODEXPORT namespace butl
// Moveable-only type.
//
- process_path (process_path&&);
- process_path& operator= (process_path&&);
+ process_path (process_path&&) noexcept;
+ process_path& operator= (process_path&&) noexcept;
process_path (const process_path&) = delete;
process_path& operator= (const process_path&) = delete;
@@ -191,6 +168,26 @@ LIBBUTL_MODEXPORT namespace butl
bool
normal () const;
+ // C/C++ don't apply constraints on program exit code other than it being
+ // of type int.
+ //
+ // POSIX specifies that only the least significant 8 bits shall be
+ // available from wait() and waitpid(); the full value shall be available
+ // from waitid() (read more at _Exit, _exit Open Group spec).
+ //
+ // While the Linux man page for waitid() doesn't mention any deviations
+ // from the standard, the FreeBSD implementation (as of version 11.0) only
+ // returns 8 bits like the other wait*() calls.
+ //
+ // Windows supports 32-bit exit codes.
+ //
+ // Note that in shells some exit values can have special meaning so using
+ // them can be a source of confusion. For bash values in the [126, 255]
+ // range are such a special ones (see Appendix E, "Exit Codes With Special
+ // Meanings" in the Advanced Bash-Scripting Guide).
+ //
+ // So [0, 125] appears to be the usable exit code range.
+ //
code_type
code () const;
@@ -272,7 +269,30 @@ LIBBUTL_MODEXPORT namespace butl
// the parent. So you should do this yourself, if required. For example,
// to redirect the child process stdout to stderr, you can do:
//
- // process p (..., 0, 2);
+ // process pr (..., 0, 2);
+ //
+ // Note also that the somewhat roundabout setup with -1 as a redirect
+ // "instruction" and out_fd/in_ofd/in_efd data members for the result
+ // helps to make sure the stream instances are destroyed before the
+ // process instance. For example:
+ //
+ // process pr (..., 0, -1, 2);
+ // ifdstream is (move (pr.in_ofd));
+ //
+ // This is important in case an exception is thrown where we want to make
+ // sure all our pipe ends are closed before we wait for the process exit
+ // (which happens in the process destructor).
+ //
+ // And speaking of the destruction order, another thing to keep in mind is
+ // that only one stream can use the skip mode (fdstream_mode::skip;
+ // because skipping is performed in the blocking mode) and the stream that
+ // skips should come first so that all other streams are destroyed/closed
+ // before it (failed that, we may end up in a deadlock). For example:
+ //
+ // process pr (..., -1, -1, -1);
+ // ifdstream is (move (pr.in_ofd), fdstream_mode::skip); // Must be first.
+ // ifdstream es (move (pr.in_efd));
+ // ofdstream os (move (pr.out_fd));
//
// The cwd argument allows to change the current working directory of the
// child process. NULL and empty arguments are ignored.
@@ -290,39 +310,104 @@ LIBBUTL_MODEXPORT namespace butl
// Note that the versions without the the process_path argument may
// temporarily change args[0] (see path_search() for details).
//
- process (const char* [],
+ process (const char**,
+ int in = 0, int out = 1, int err = 2,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const char* const*,
int in = 0, int out = 1, int err = 2,
const char* cwd = nullptr,
const char* const* envvars = nullptr);
- process (const process_path&, const char* [],
+ process (std::vector<const char*>&,
+ int in = 0, int out = 1, int err = 2,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const std::vector<const char*>&,
int in = 0, int out = 1, int err = 2,
const char* cwd = nullptr,
const char* const* envvars = nullptr);
// If the descriptors are pipes that you have created, then you should use
- // this constructor instead to communicate this information.
+ // this constructor instead to communicate this information (the parent
+ // end may need to be "probed" on Windows).
//
// For generality, if the "other" end of the pipe is -1, then assume this
// is not a pipe.
//
struct pipe
{
- int in = -1;
- int out = -1;
-
pipe () = default;
pipe (int i, int o): in (i), out (o) {}
explicit
pipe (const fdpipe& p): in (p.in.get ()), out (p.out.get ()) {}
+
+ // Transfer ownership to one end of the pipe.
+ //
+ pipe (auto_fd i, int o): in (i.release ()), out (o), own_in (true) {}
+ pipe (int i, auto_fd o): in (i), out (o.release ()), own_out (true) {}
+
+ // Moveable-only type.
+ //
+ pipe (pipe&&) noexcept;
+ pipe& operator= (pipe&&) noexcept;
+
+ pipe (const pipe&) = delete;
+ pipe& operator= (const pipe&) = delete;
+
+ ~pipe ();
+
+ public:
+ int in = -1;
+ int out = -1;
+
+ bool own_in = false;
+ bool own_out = false;
};
- process (const process_path&, const char* [],
+ process (const char**,
pipe in, pipe out, pipe err,
const char* cwd = nullptr,
const char* const* envvars = nullptr);
+ process (const char**,
+ int in, int out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const char* const*,
+ pipe in, pipe out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const char* const*,
+ int in, int out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (std::vector<const char*>&,
+ pipe in, pipe out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (std::vector<const char*>&,
+ int in, int out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const std::vector<const char*>&,
+ pipe in, pipe out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const std::vector<const char*>&,
+ int in, int out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
// The "piping" constructor, for example:
//
// process lhs (..., 0, -1); // Redirect stdout to a pipe.
@@ -331,16 +416,36 @@ LIBBUTL_MODEXPORT namespace butl
// rhs.wait (); // Wait for last first.
// lhs.wait ();
//
- process (const char* [],
+ process (const char**,
process&, int out = 1, int err = 2,
const char* cwd = nullptr,
const char* const* envvars = nullptr);
- process (const process_path&, const char* [],
+ process (const process_path&, const char* const*,
process&, int out = 1, int err = 2,
const char* cwd = nullptr,
const char* const* envvars = nullptr);
+ process (const char**,
+ process&, pipe out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const char**,
+ process&, int out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const char* const*,
+ process&, pipe out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
+ process (const process_path&, const char* const*,
+ process&, int out, pipe err,
+ const char* cwd = nullptr,
+ const char* const* envvars = nullptr);
+
// Wait for the process to terminate. Return true if the process
// terminated normally and with the zero exit code. Unless ignore_error
// is true, throw process_error if anything goes wrong. This function can
@@ -360,32 +465,42 @@ LIBBUTL_MODEXPORT namespace butl
// duration. Return the same result as wait() if the process has
// terminated in this timeframe and nullopt otherwise.
//
- // Note: not yet implemented on Windows.
- //
template <typename R, typename P>
optional<bool>
timed_wait (const std::chrono::duration<R, P>&);
- // Terminate the process.
+ // Note that the destructor will wait for the process but will ignore
+ // any errors and the exit status.
+ //
+ ~process () { if (handle != 0) wait (true); }
+
+ // Process termination.
+ //
+
+ // Send SIGKILL to the process on POSIX and call TerminateProcess() with
+ // DBG_TERMINATE_PROCESS exit code on Windows. Noop for an already
+ // terminated process.
//
- // On POSIX send SIGKILL to the process and wait until it terminates. The
- // process exit information is available after the call returns. Noop for
- // an already terminated process.
+ // Note that if the process is killed, it terminates as if it has called
+ // abort() (functions registered with atexit() are not called, etc).
//
- // Note: not yet implemented on Windows.
+ // Also note that on Windows calling this function for a terminating
+ // process results in the EPERM process_error exception.
//
void
kill ();
- // Note that the destructor will wait for the process but will ignore
- // any errors and the exit status.
+ // Send SIGTERM to the process on POSIX and call kill() on Windows (where
+ // there is no general way to terminate a console process gracefully).
+ // Noop for an already terminated process.
//
- ~process () {if (handle != 0) wait (true);}
+ void
+ term ();
// Moveable-only type.
//
- process (process&&);
- process& operator= (process&&);
+ process (process&&) noexcept;
+ process& operator= (process&&) noexcept (false); // Note: calls wait().
process (const process&) = delete;
process& operator= (const process&) = delete;
@@ -407,7 +522,7 @@ LIBBUTL_MODEXPORT namespace butl
//
// ... // E.g., print args[0].
//
- // process p (pp, args);
+ // process pr (pp, args);
//
// You can also specify the fallback directory which will be tried last.
// This, for example, can be used to implement the Windows "search in the
@@ -491,15 +606,17 @@ LIBBUTL_MODEXPORT namespace butl
// nameN arg arg ... nullptr nullptr
//
static void
- print (std::ostream&, const char* const args[], size_t n = 0);
+ print (std::ostream&, const char* const* args, size_t n = 0);
- // Quote and escape the specified command line argument. Return the
- // original string if neither is necessary and a pointer to the provided
- // buffer string containing the escaped version otherwise.
+ // Quote and escape the specified command line argument. If batch is true
+ // then also quote the equal (`=`), comma (`,`) and semicolon (`;`)
+ // characters which are treated as argument separators in batch file.
+ // Return the original string if neither is necessary and a pointer to the
+ // provided buffer string containing the escaped version otherwise.
//
#ifdef _WIN32
static const char*
- quote_argument (const char*, std::string& buffer);
+ quote_argument (const char*, std::string& buffer, bool batch);
#endif
public:
@@ -512,13 +629,16 @@ LIBBUTL_MODEXPORT namespace butl
public:
handle_type handle;
+ static handle_type
+ current_handle ();
+
// Absence means that the exit information is not (yet) known. This can be
// because you haven't called wait() yet or because wait() failed.
//
optional<process_exit> exit;
- // Use the following file descriptors to communicate with the new process's
- // standard streams.
+ // Use the following file descriptors to communicate with the new
+ // process's standard streams (if redirected to pipes; see above).
//
auto_fd out_fd; // Write to it to send to stdin.
auto_fd in_ofd; // Read from it to receive from stdout.
@@ -632,8 +752,8 @@ LIBBUTL_MODEXPORT namespace butl
// Moveable-only type.
//
- process_env (process_env&&);
- process_env& operator= (process_env&&);
+ process_env (process_env&&) noexcept;
+ process_env& operator= (process_env&&) noexcept;
process_env (const process_env&) = delete;
process_env& operator= (const process_env&) = delete;
@@ -669,7 +789,7 @@ LIBBUTL_MODEXPORT namespace butl
// command line or similar. It should be callable with the following
// signature:
//
- // void (const char*[], std::size_t)
+ // void (const char* const*, std::size_t)
//
template <typename C,
typename I,
@@ -710,6 +830,15 @@ LIBBUTL_MODEXPORT namespace butl
const process_env&,
A&&... args);
+ // Call the callback without actually running/starting anything.
+ //
+ template <typename C,
+ typename... A>
+ void
+ process_print_callback (const C&,
+ const process_env&,
+ A&&... args);
+
// Conversion of types to their C string representations. Can be overloaded
// (including via ADL) for custom types. The default implementation calls
// to_string() which covers all the numeric values via std::to_string () and
diff --git a/libbutl/process.ixx b/libbutl/process.ixx
index eba6be5..e4db474 100644
--- a/libbutl/process.ixx
+++ b/libbutl/process.ixx
@@ -1,6 +1,9 @@
// file : libbutl/process.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
+#include <cassert>
+#include <utility> // move()
+
namespace butl
{
// process_path
@@ -32,7 +35,7 @@ namespace butl
args0_ (nullptr) {}
inline process_path::
- process_path (process_path&& p)
+ process_path (process_path&& p) noexcept
: effect (std::move (p.effect)),
args0_ (p.args0_)
{
@@ -45,7 +48,7 @@ namespace butl
}
inline process_path& process_path::
- operator= (process_path&& p)
+ operator= (process_path&& p) noexcept
{
if (this != &p)
{
@@ -95,8 +98,13 @@ namespace butl
{
if (!effect.empty ())
{
+ bool init (initial != recall.string ().c_str ());
+
recall = std::move (effect);
effect.clear ();
+
+ if (!init)
+ initial = recall.string ().c_str ();
}
}
@@ -116,6 +124,42 @@ namespace butl
}
#endif
+ // process::pipe
+ //
+ inline process::pipe::
+ pipe (pipe&& p) noexcept
+ : in (p.in), out (p.out), own_in (p.own_in), own_out (p.own_out)
+ {
+ p.in = p.out = -1;
+ }
+
+ inline process::pipe& process::pipe::
+ operator= (pipe&& p) noexcept
+ {
+ if (this != &p)
+ {
+ int d (own_in ? in : own_out ? out : -1);
+ if (d != -1)
+ fdclose (d);
+
+ in = p.in;
+ out = p.out;
+ own_in = p.own_in;
+ own_out = p.own_out;
+
+ p.in = p.out = -1;
+ }
+ return *this;
+ }
+
+ inline process::pipe::
+ ~pipe ()
+ {
+ int d (own_in ? in : own_out ? out : -1);
+ if (d != -1)
+ fdclose (d);
+ }
+
// process
//
#ifndef _WIN32
@@ -170,21 +214,16 @@ namespace butl
inline process::
process (optional<process_exit> e)
- : handle (0),
- exit (std::move (e)),
- out_fd (-1),
- in_ofd (-1),
- in_efd (-1)
+ : handle (0), exit (std::move (e))
{
}
inline process::
- process (const process_path& pp, const char* args[],
+ process (const process_path& pp, const char* const* args,
int in, int out, int err,
const char* cwd,
const char* const* envvars)
- : process (pp,
- args,
+ : process (pp, args,
pipe (in, -1), pipe (-1, out), pipe (-1, err),
cwd,
envvars)
@@ -192,32 +231,187 @@ namespace butl
}
inline process::
- process (const char* args[],
+ process (const char** args,
int in, int out, int err,
const char* cwd,
const char* const* envvars)
- : process (path_search (args[0]), args, in, out, err, cwd, envvars) {}
+ : process (path_search (args[0]), args, in, out, err, cwd, envvars)
+ {
+ }
+
+ inline process::
+ process (const process_path& pp, const std::vector<const char*>& args,
+ int in, int out, int err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (pp, args.data (),
+ pipe (in, -1), pipe (-1, out), pipe (-1, err),
+ cwd,
+ envvars)
+ {
+ }
+
+ inline process::
+ process (std::vector<const char*>& args,
+ int in, int out, int err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (path_search (args[0]), args.data (),
+ in, out, err,
+ cwd,
+ envvars)
+ {
+ }
+
+ inline process::
+ process (const char** args,
+ pipe in, pipe out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (path_search (args[0]), args,
+ std::move (in), std::move (out), std::move (err),
+ cwd, envvars)
+ {
+ }
+
+ inline process::
+ process (const char** args,
+ int in, int out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (path_search (args[0]), args,
+ pipe (in, -1), pipe (-1, out), std::move (err),
+ cwd, envvars)
+ {
+ }
+
+ inline process::
+ process (const process_path& pp, const char* const* args,
+ int in, int out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (pp, args,
+ pipe (in, -1), pipe (-1, out), std::move (err),
+ cwd,
+ envvars)
+ {
+ }
+
+ inline process::
+ process (std::vector<const char*>& args,
+ pipe in, pipe out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (path_search (args[0]), args.data (),
+ std::move (in), std::move (out), std::move (err),
+ cwd,
+ envvars)
+ {
+ }
+
+ inline process::
+ process (std::vector<const char*>& args,
+ int in, int out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (path_search (args[0]), args.data (),
+ pipe (in, -1), pipe (-1, out), std::move (err),
+ cwd,
+ envvars)
+ {
+ }
+
+ inline process::
+ process (const process_path& pp, const std::vector<const char*>& args,
+ pipe in, pipe out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (pp, args.data (),
+ std::move (in), std::move (out), std::move (err),
+ cwd,
+ envvars)
+ {
+ }
inline process::
- process (const process_path& pp, const char* args[],
+ process (const process_path& pp, const std::vector<const char*>& args,
+ int in, int out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (pp, args.data (),
+ pipe (in, -1), pipe (-1, out), std::move (err),
+ cwd,
+ envvars)
+ {
+ }
+
+ inline process::
+ process (const process_path& pp, const char* const* args,
+ process& in, pipe out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (pp, args,
+ [&in] ()
+ {
+ assert (in.in_ofd != nullfd); // Should be a pipe.
+ return process::pipe (std::move (in.in_ofd), -1);
+ } (),
+ std::move (out), std::move (err),
+ cwd, envvars)
+ {
+ }
+
+ inline process::
+ process (const process_path& pp, const char* const* args,
process& in, int out, int err,
const char* cwd,
const char* const* envvars)
- : process (pp, args, in.in_ofd.get (), out, err, cwd, envvars)
+ : process (pp, args, in, pipe (-1, out), pipe (-1, err), cwd, envvars)
{
- assert (in.in_ofd.get () != -1); // Should be a pipe.
- in.in_ofd.reset (); // Close it on our side.
}
inline process::
- process (const char* args[],
+ process (const char** args,
process& in, int out, int err,
const char* cwd,
const char* const* envvars)
- : process (path_search (args[0]), args, in, out, err, cwd, envvars) {}
+ : process (path_search (args[0]), args, in, out, err, cwd, envvars)
+ {
+ }
+
+ inline process::
+ process (const char** args,
+ process& in, pipe out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (path_search (args[0]), args,
+ in, std::move (out), std::move (err),
+ cwd, envvars)
+ {
+ }
+
+ inline process::
+ process (const char** args,
+ process& in, int out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (path_search (args[0]), args,
+ in, pipe (-1, out), std::move (err),
+ cwd, envvars)
+ {
+ }
+
+ inline process::
+ process (const process_path& pp, const char* const* args,
+ process& in, int out, pipe err,
+ const char* cwd,
+ const char* const* envvars)
+ : process (pp, args, in, pipe (-1, out), std::move (err), cwd, envvars)
+ {
+ }
inline process::
- process (process&& p)
+ process (process&& p) noexcept
: handle (p.handle),
exit (std::move (p.exit)),
out_fd (std::move (p.out_fd)),
@@ -228,7 +422,7 @@ namespace butl
}
inline process& process::
- operator= (process&& p)
+ operator= (process&& p) noexcept (false)
{
if (this != &p)
{
@@ -251,7 +445,7 @@ namespace butl
// specialization.
//
template <>
- optional<bool> process::
+ LIBBUTL_SYMEXPORT optional<bool> process::
timed_wait (const std::chrono::milliseconds&);
template <typename R, typename P>
@@ -265,13 +459,13 @@ namespace butl
// process_env
//
inline process_env::
- process_env (process_env&& e)
+ process_env (process_env&& e) noexcept
{
*this = std::move (e);
}
inline process_env& process_env::
- operator= (process_env&& e)
+ operator= (process_env&& e) noexcept
{
if (this != &e)
{
diff --git a/libbutl/project-name.cxx b/libbutl/project-name.cxx
index 7a14b49..a7ed8a8 100644
--- a/libbutl/project-name.cxx
+++ b/libbutl/project-name.cxx
@@ -1,38 +1,16 @@
// file : libbutl/project-name.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/project-name.mxx>
-#endif
+#include <libbutl/project-name.hxx>
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <utility> // move()
#include <algorithm> // find()
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.project_name;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility;
-#endif
-
-import butl.path; // path::traits
-import butl.utility; // alpha(), alnum()
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#endif
+
+#include <libbutl/path.hxx> // path::traits
+#include <libbutl/utility.hxx> // alpha(), alnum()
using namespace std;
diff --git a/libbutl/project-name.mxx b/libbutl/project-name.hxx
index 1117e28..6e1f925 100644
--- a/libbutl/project-name.mxx
+++ b/libbutl/project-name.hxx
@@ -1,34 +1,17 @@
-// file : libbutl/project-name.mxx -*- C++ -*-
+// file : libbutl/project-name.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <utility> // move()
#include <ostream>
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.project_name;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility; // icasecmp(), sanitize_identifier()
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx> // icasecmp(), sanitize_identifier()
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Build system project name.
//
diff --git a/libbutl/prompt.cxx b/libbutl/prompt.cxx
index 1c0820a..154522c 100644
--- a/libbutl/prompt.cxx
+++ b/libbutl/prompt.cxx
@@ -1,33 +1,11 @@
// file : libbutl/prompt.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/prompt.mxx>
-#endif
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
+#include <libbutl/prompt.hxx>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.prompt;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-import butl.diagnostics;
-#else
-#include <libbutl/diagnostics.mxx> // diag_stream
-#endif
+#include <libbutl/diagnostics.hxx> // diag_stream
using namespace std;
@@ -66,8 +44,8 @@ namespace butl
if (!e)
a = def;
}
- } while (a != "y" && a != "n");
+ } while (a != "y" && a != "Y" && a != "n" && a != "N");
- return a == "y";
+ return a == "y" || a == "Y";
}
}
diff --git a/libbutl/prompt.mxx b/libbutl/prompt.hxx
index 2489b2f..2a07708 100644
--- a/libbutl/prompt.mxx
+++ b/libbutl/prompt.hxx
@@ -1,28 +1,13 @@
-// file : libbutl/prompt.mxx -*- C++ -*-
+// file : libbutl/prompt.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.prompt;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// The Y/N prompt. The def argument, if specified, should be either 'y' or
// 'n'. It is used as the default answer, in case the user just hits enter.
@@ -30,6 +15,10 @@ LIBBUTL_MODEXPORT namespace butl
// Write the prompt to diag_stream. Throw ios_base::failure if no answer
// could be extracted from stdin (for example, because it was closed).
//
+ // Note that the implementation accepts both lower and upper case y/n as
+ // valid answers (apparently the capitalized default answer confuses some
+ // users into answering with capital letters).
+ //
LIBBUTL_SYMEXPORT bool
yn_prompt (const std::string&, char def = '\0');
}
diff --git a/libbutl/regex.cxx b/libbutl/regex.cxx
index 83e296c..34536f2 100644
--- a/libbutl/regex.cxx
+++ b/libbutl/regex.cxx
@@ -1,42 +1,17 @@
// file : libbutl/regex.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/regex.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <regex>
-#include <string>
+#include <libbutl/regex.hxx>
#include <ostream>
#include <sstream>
#include <stdexcept> // runtime_error
+
#if defined(_MSC_VER) && _MSC_VER < 2000
# include <cstring> // strstr()
#endif
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-module butl.regex;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.regex;
-#endif
-#endif
-
-import butl.utility; // operator<<(ostream, exception)
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
namespace std
{
diff --git a/libbutl/regex.mxx b/libbutl/regex.hxx
index 84b024f..9b31075 100644
--- a/libbutl/regex.mxx
+++ b/libbutl/regex.hxx
@@ -1,22 +1,13 @@
-// file : libbutl/regex.mxx -*- C++ -*-
+// file : libbutl/regex.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-// C includes.
-#ifndef __cpp_lib_modules_ts
#include <regex>
#include <iosfwd>
#include <string>
#include <utility> // pair
-
-#include <locale>
#include <cstddef> // size_t
-#include <utility> // move(), make_pair()
-#endif
#if defined(__clang__)
# if __has_include(<__config>)
@@ -24,20 +15,9 @@
# endif
#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.regex;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.regex; // @@ MOD TODO should probably be re-exported.
-#endif
-#endif
-
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// The regex semantics for the following functions is like that of
// std::regex_replace() extended the standard ECMA-262 substitution escape
@@ -93,9 +73,54 @@ LIBBUTL_MODEXPORT namespace butl
regex_replace_match (const std::basic_string<C>&,
const std::basic_regex<C>&,
const std::basic_string<C>& fmt);
+
+ // As above but using match_results.
+ //
+ template <typename C>
+ std::basic_string<C>
+ regex_replace_match_results (
+ const std::match_results<typename std::basic_string<C>::const_iterator>&,
+ const std::basic_string<C>& fmt);
+
+ template <typename C>
+ std::basic_string<C>
+ regex_replace_match_results (
+ const std::match_results<typename std::basic_string<C>::const_iterator>&,
+ const C* fmt, std::size_t fmt_n);
+
+ // Parse the '/<regex>/<format>/' replacement string into the regex/format
+ // pair. Other character can be used as a delimiter instead of '/'. Throw
+ // std::invalid_argument or std::regex_error on parsing error.
+ //
+ // Note: escaping of the delimiter character is not (yet) supported.
+ //
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const std::basic_string<C>&,
+ std::regex_constants::syntax_option_type =
+ std::regex_constants::ECMAScript);
+
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C*,
+ std::regex_constants::syntax_option_type =
+ std::regex_constants::ECMAScript);
+
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C*, size_t,
+ std::regex_constants::syntax_option_type =
+ std::regex_constants::ECMAScript);
+
+ // As above but return string instead of regex and do not fail if there is
+ // text after the last delimiter instead returning its position.
+ //
+ template <typename C>
+ std::pair<std::basic_string<C>, std::basic_string<C>>
+ regex_replace_parse (const C*, size_t, size_t& end);
}
-LIBBUTL_MODEXPORT namespace std
+namespace std
{
// Print regex error description but only if it is meaningful (this is also
// why we have to print leading colon).
diff --git a/libbutl/regex.ixx b/libbutl/regex.ixx
index dec15d1..08962cf 100644
--- a/libbutl/regex.ixx
+++ b/libbutl/regex.ixx
@@ -1,7 +1,9 @@
// file : libbutl/regex.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <utility> // move(), make_pair()
+
+namespace butl
{
template <typename C>
inline std::pair<std::basic_string<C>, bool>
@@ -21,4 +23,30 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
return make_pair (move (r), match);
}
+
+ template <typename C>
+ inline std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const std::basic_string<C>& s,
+ std::regex_constants::syntax_option_type f)
+ {
+ return regex_replace_parse (s.c_str (), s.size (), f);
+ }
+
+ template <typename C>
+ inline std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C* s,
+ std::regex_constants::syntax_option_type f)
+ {
+ return regex_replace_parse (
+ s, std::basic_string<C>::traits_type::length (s), f);
+ }
+
+ template <typename C>
+ inline std::basic_string<C>
+ regex_replace_match_results (
+ const std::match_results<typename std::basic_string<C>::const_iterator>& m,
+ const std::basic_string<C>& fmt)
+ {
+ return regex_replace_match_results (m, fmt.c_str (), fmt.size ());
+ }
}
diff --git a/libbutl/regex.txx b/libbutl/regex.txx
index b785708..214d949 100644
--- a/libbutl/regex.txx
+++ b/libbutl/regex.txx
@@ -1,15 +1,16 @@
// file : libbutl/regex.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <locale>
+#include <stdexcept> // invalid_argument
+
+namespace butl
{
- // Replace the regex match results using the format string.
- //
template <typename C>
std::basic_string<C>
regex_replace_match_results (
const std::match_results<typename std::basic_string<C>::const_iterator>& m,
- const std::basic_string<C>& fmt)
+ const C* fmt, std::size_t n)
{
using namespace std;
@@ -60,7 +61,6 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
}
};
- size_t n (fmt.size ());
for (size_t i (0); i < n; ++i)
{
C c (fmt[i]);
@@ -278,4 +278,71 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
return match;
}
+
+ template <typename C>
+ std::pair<std::basic_regex<C>, std::basic_string<C>>
+ regex_replace_parse (const C* s, size_t n,
+ std::regex_constants::syntax_option_type f)
+ {
+ using namespace std;
+
+ using string_type = basic_string<C>;
+
+ size_t e;
+ pair<string_type, string_type> r (regex_replace_parse (s, n, e));
+
+ if (e != n)
+ throw invalid_argument ("junk after trailing delimiter");
+
+ return make_pair (basic_regex<C> (r.first, f), move (r.second));
+ }
+
+ template <typename C>
+ std::pair<std::basic_string<C>, std::basic_string<C>>
+ regex_replace_parse (const C* s, size_t n, size_t& e)
+ {
+ using namespace std;
+
+ using string_type = basic_string<C>;
+
+ if (n == 0)
+ throw invalid_argument ("no leading delimiter");
+
+ const C* b (s); // Save the beginning of the string.
+
+ char delim (s[0]);
+
+ // Position to the regex first character and find the regex-terminating
+ // delimiter.
+ //
+ --n;
+ ++s;
+
+ const C* p (string_type::traits_type::find (s, n, delim));
+
+ if (p == nullptr)
+ throw invalid_argument ("no delimiter after regex");
+
+ // Empty regex matches nothing, so not of much use.
+ //
+ if (p == s)
+ throw invalid_argument ("empty regex");
+
+ // Save the regex.
+ //
+ string_type re (s, p - s);
+
+ // Position to the format first character and find the trailing delimiter.
+ //
+ n -= p - s + 1;
+ s = p + 1;
+
+ p = string_type::traits_type::find (s, n, delim);
+
+ if (p == nullptr)
+ throw invalid_argument ("no delimiter after replacement");
+
+ e = p - b + 1;
+ return make_pair (move (re), string_type (s, p - s));
+ }
}
diff --git a/libbutl/semantic-version.cxx b/libbutl/semantic-version.cxx
index eaf709d..9e0a1ef 100644
--- a/libbutl/semantic-version.cxx
+++ b/libbutl/semantic-version.cxx
@@ -1,39 +1,12 @@
// file : libbutl/semantic-version.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/semantic-version.mxx>
-#endif
+#include <libbutl/semantic-version.hxx>
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-#include <ostream>
-
#include <cstring> // strchr()
-#include <cstdlib> // strtoull()
#include <utility> // move()
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.semantic_version;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#endif
-#else
-#endif
using namespace std;
@@ -80,9 +53,9 @@ namespace butl
}
semantic_version::
- semantic_version (const std::string& s, size_t p, const char* bs)
+ semantic_version (const std::string& s, size_t p, flags fs, const char* bs)
{
- semantic_version_result r (parse_semantic_version_impl (s, p, bs));
+ semantic_version_result r (parse_semantic_version_impl (s, p, fs, bs));
if (r.version)
*this = move (*r.version);
@@ -98,8 +71,27 @@ namespace butl
uint64_t min = 0, uint64_t max = uint64_t (~0));
semantic_version_result
- parse_semantic_version_impl (const string& s, size_t p, const char* bs)
+ parse_semantic_version_impl (const string& s, size_t p,
+ semantic_version::flags fs,
+ const char* bs)
{
+ bool allow_build ((fs & semantic_version::allow_build) != 0);
+
+ // If build separators are specified, then the allow_build flag must be
+ // specified explicitly.
+ //
+ assert (bs == nullptr || allow_build);
+
+ if (allow_build && bs == nullptr)
+ bs = "-+";
+
+ bool require_minor ((fs & semantic_version::allow_omit_minor) == 0);
+
+ if (!require_minor)
+ fs |= semantic_version::allow_omit_patch;
+
+ bool require_patch ((fs & semantic_version::allow_omit_patch) == 0);
+
auto bail = [] (string m)
{
return semantic_version_result {nullopt, move (m)};
@@ -110,31 +102,47 @@ namespace butl
if (!parse_uint64 (s, p, r.major))
return bail ("invalid major version");
- if (s[p] != '.')
- return bail ("'.' expected after major version");
-
- if (!parse_uint64 (s, ++p, r.minor))
- return bail ("invalid minor version");
-
- if (s[p] == '.')
+ if (s[p] == '.') // Is there a minor version?
{
- // Treat it as build if failed to parse as patch (e.g., 1.2.alpha).
+ // Try to parse the minor version and treat it as build on failure
+ // (e.g., 1.alpha).
//
- if (!parse_uint64 (s, ++p, r.patch))
+ if (parse_uint64 (s, ++p, r.minor))
+ {
+ if (s[p] == '.') // Is there a patch version?
+ {
+ // Try to parse the patch version and treat it as build on failure
+ // (e.g., 1.2.alpha).
+ //
+ if (parse_uint64 (s, ++p, r.patch))
+ ;
+ else
+ {
+ if (require_patch)
+ return bail ("invalid patch version");
+
+ --p;
+ // Fall through.
+ }
+ }
+ else if (require_patch)
+ return bail ("'.' expected after minor version");
+ }
+ else
{
- //if (require_patch)
- // return bail ("invalid patch version");
+ if (require_minor)
+ return bail ("invalid minor version");
--p;
// Fall through.
}
}
- //else if (require_patch)
- // return bail ("'.' expected after minor version");
+ else if (require_minor)
+ return bail ("'.' expected after major version");
if (char c = s[p])
{
- if (bs == nullptr || (*bs != '\0' && strchr (bs, c) == nullptr))
+ if (!allow_build || (*bs != '\0' && strchr (bs, c) == nullptr))
return bail ("junk after version");
r.build.assign (s, p, string::npos);
diff --git a/libbutl/semantic-version.mxx b/libbutl/semantic-version.hxx
index 566d192..4eba38a 100644
--- a/libbutl/semantic-version.mxx
+++ b/libbutl/semantic-version.hxx
@@ -1,32 +1,15 @@
-// file : libbutl/semantic-version.mxx -*- C++ -*-
+// file : libbutl/semantic-version.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-// C includes.
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <cstddef> // size_t
#include <cstdint> // uint*_t
#include <utility> // move()
#include <ostream>
-#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.semantic_version;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#else
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/optional.hxx>
#include <libbutl/export.hxx>
@@ -40,19 +23,13 @@ import butl.optional;
# undef minor
#endif
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Semantic or semantic-like version.
//
- // <major>.<minor>[.<patch>][<build>]
+ // <major>[.<minor>[.<patch>]][<build>]
//
- // If the patch component is absent, then it defaults to 0.
- //
- // @@ Currently there is no way to enforce the three-component version.
- // Supporting this will require changing allow_build to a bit-wise
- // flag. See parse_semantic_version_impl() for some sketched code.
- // We may also want to pass these flags to string() to not print
- // 0 patch.
+ // If the minor and patch components are absent, then they default to 0.
//
// By default, a version containing the <build> component is considered
// valid only if separated from <patch> with '-' (semver pre-release) or '+'
@@ -80,23 +57,36 @@ LIBBUTL_MODEXPORT namespace butl
std::uint64_t patch,
std::string build = "");
- // The build_separators argument can be NULL (no build component allowed),
- // empty (any build component allowed), or a string of characters to allow
- // as separators. When allow_build is true build_separators defaults to
- // "-+".
+ // If the allow_build flag is specified, then build_separators argument
+ // can be a string of characters to allow as separators, empty (any build
+ // component allowed), or NULL (defaults to "-+").
//
- explicit
- semantic_version (const std::string&, bool allow_build = true);
+ // Note: allow_omit_minor implies allow_omit_patch.
+ //
+ enum flags
+ {
+ none = 0, // Exact <major>.<minor>.<patch> form.
+ allow_omit_minor = 0x01, // Allow <major> form.
+ allow_omit_patch = 0x02, // Allow <major>.<minor> form.
+ allow_build = 0x04, // Allow <major>.<minor>.<patch>-<build> form.
+ };
- semantic_version (const std::string&, const char* build_separators);
+ explicit
+ semantic_version (const std::string&,
+ flags = none,
+ const char* build_separators = nullptr);
// As above but parse from the specified position until the end of the
// string.
//
- semantic_version (const std::string&, std::size_t pos, bool = true);
-
- semantic_version (const std::string&, std::size_t pos, const char*);
+ semantic_version (const std::string&,
+ std::size_t pos,
+ flags = none,
+ const char* = nullptr);
+ // @@ We may also want to pass allow_* flags not to print 0 minor/patch or
+ // maybe invent ignore_* flags.
+ //
std::string
string (bool ignore_build = false) const;
@@ -133,16 +123,15 @@ LIBBUTL_MODEXPORT namespace butl
// Try to parse a string as a semantic version returning nullopt if invalid.
//
optional<semantic_version>
- parse_semantic_version (const std::string&, bool allow_build = true);
-
- optional<semantic_version>
- parse_semantic_version (const std::string&, const char* build_separators);
-
- optional<semantic_version>
- parse_semantic_version (const std::string&, std::size_t pos, bool = true);
+ parse_semantic_version (const std::string&,
+ semantic_version::flags = semantic_version::none,
+ const char* build_separators = nullptr);
optional<semantic_version>
- parse_semantic_version (const std::string&, std::size_t pos, const char*);
+ parse_semantic_version (const std::string&,
+ std::size_t pos,
+ semantic_version::flags = semantic_version::none,
+ const char* = nullptr);
// NOTE: comparison operators take the build component into account.
//
@@ -187,6 +176,18 @@ LIBBUTL_MODEXPORT namespace butl
{
return o << x.string ();
}
+
+ semantic_version::flags
+ operator& (semantic_version::flags, semantic_version::flags);
+
+ semantic_version::flags
+ operator| (semantic_version::flags, semantic_version::flags);
+
+ semantic_version::flags
+ operator&= (semantic_version::flags&, semantic_version::flags);
+
+ semantic_version::flags
+ operator|= (semantic_version::flags&, semantic_version::flags);
}
#include <libbutl/semantic-version.ixx>
diff --git a/libbutl/semantic-version.ixx b/libbutl/semantic-version.ixx
index 6bf7584..8de1554 100644
--- a/libbutl/semantic-version.ixx
+++ b/libbutl/semantic-version.ixx
@@ -15,23 +15,9 @@ namespace butl
{
}
- // Note: the order is important to MinGW GCC (DLL linkage).
- //
inline semantic_version::
- semantic_version (const std::string& s, std::size_t p, bool ab)
- : semantic_version (s, p, ab ? "-+" : nullptr)
- {
- }
-
- inline semantic_version::
- semantic_version (const std::string& s, const char* bs)
- : semantic_version (s, 0, bs)
- {
- }
-
- inline semantic_version::
- semantic_version (const std::string& s, bool ab)
- : semantic_version (s, ab ? "-+" : nullptr)
+ semantic_version (const std::string& s, flags fs, const char* bs)
+ : semantic_version (s, 0, fs, bs)
{
}
@@ -42,29 +28,53 @@ namespace butl
};
LIBBUTL_SYMEXPORT semantic_version_result
- parse_semantic_version_impl (const std::string&, std::size_t, const char*);
+ parse_semantic_version_impl (const std::string&,
+ std::size_t,
+ semantic_version::flags,
+ const char*);
inline optional<semantic_version>
- parse_semantic_version (const std::string& s, bool ab)
+ parse_semantic_version (const std::string& s,
+ semantic_version::flags fs,
+ const char* bs)
{
- return parse_semantic_version (s, ab ? "-+" : nullptr);
+ return parse_semantic_version_impl (s, 0, fs, bs).version;
}
inline optional<semantic_version>
- parse_semantic_version (const std::string& s, const char* bs)
+ parse_semantic_version (const std::string& s,
+ std::size_t p,
+ semantic_version::flags fs,
+ const char* bs)
{
- return parse_semantic_version_impl (s, 0, bs).version;
+ return parse_semantic_version_impl (s, p, fs, bs).version;
}
- inline optional<semantic_version>
- parse_semantic_version (const std::string& s, std::size_t p, bool ab)
+ inline semantic_version::flags
+ operator&= (semantic_version::flags& x, semantic_version::flags y)
{
- return parse_semantic_version (s, p, ab ? "-+" : nullptr);
+ return x = static_cast<semantic_version::flags> (
+ static_cast<std::uint16_t> (x) &
+ static_cast<std::uint16_t> (y));
}
- inline optional<semantic_version>
- parse_semantic_version (const std::string& s, std::size_t p, const char* bs)
+ inline semantic_version::flags
+ operator|= (semantic_version::flags& x, semantic_version::flags y)
+ {
+ return x = static_cast<semantic_version::flags> (
+ static_cast<std::uint16_t> (x) |
+ static_cast<std::uint16_t> (y));
+ }
+
+ inline semantic_version::flags
+ operator& (semantic_version::flags x, semantic_version::flags y)
+ {
+ return x &= y;
+ }
+
+ inline semantic_version::flags
+ operator| (semantic_version::flags x, semantic_version::flags y)
{
- return parse_semantic_version_impl (s, p, bs).version;
+ return x |= y;
}
}
diff --git a/libbutl/sendmail.cxx b/libbutl/sendmail.cxx
index 1038cf4..5fec1a6 100644
--- a/libbutl/sendmail.cxx
+++ b/libbutl/sendmail.cxx
@@ -1,32 +1,7 @@
// file : libbutl/sendmail.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/sendmail.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.sendmail;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#endif
-
-#endif
+#include <libbutl/sendmail.hxx>
using namespace std;
diff --git a/libbutl/sendmail.mxx b/libbutl/sendmail.hxx
index 0d5b239..97a4d82 100644
--- a/libbutl/sendmail.mxx
+++ b/libbutl/sendmail.hxx
@@ -1,38 +1,17 @@
-// file : libbutl/sendmail.mxx -*- C++ -*-
+// file : libbutl/sendmail.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#include <cstddef> // size_t
-#include <utility> // move(), forward()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.sendmail;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Send email using the sendmail(1) program.
//
diff --git a/libbutl/sendmail.ixx b/libbutl/sendmail.ixx
index 105c1af..35b5c47 100644
--- a/libbutl/sendmail.ixx
+++ b/libbutl/sendmail.ixx
@@ -1,7 +1,10 @@
// file : libbutl/sendmail.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef> // size_t
+#include <utility> // move(), forward()
+
+namespace butl
{
template <typename E, typename... O>
inline sendmail::
diff --git a/libbutl/sha1.c b/libbutl/sha1.c
index 37e862e..98fce5e 100644
--- a/libbutl/sha1.c
+++ b/libbutl/sha1.c
@@ -121,11 +121,17 @@ main ()
#include <string.h>
+/* Assume if bzero/bcopy are defined as macros, then they do what we need. */
+
/* void bzero(void *s, size_t n); */
-#define bzero(s, n) memset((s), 0, (n))
+#ifndef bzero
+# define bzero(s, n) memset((s), 0, (n))
+#endif
/* void bcopy(const void *s1, void *s2, size_t n); */
-#define bcopy(s1, s2, n) memmove((s2), (s1), (n))
+#ifndef bcopy
+# define bcopy(s1, s2, n) memmove((s2), (s1), (n))
+#endif
/* The rest is the unmodified (except for adjusting function declarations and
adding a few explicit casts to make compilable in C++ without warnings)
diff --git a/libbutl/sha1.cxx b/libbutl/sha1.cxx
index 6a5e9db..e546922 100644
--- a/libbutl/sha1.cxx
+++ b/libbutl/sha1.cxx
@@ -1,9 +1,7 @@
// file : libbutl/sha1.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/sha1.mxx>
-#endif
+#include <libbutl/sha1.hxx>
// C interface for sha1c.
//
@@ -42,29 +40,9 @@ extern "C"
#define SHA1_Final(x, y) sha1_result((y), (char(&)[20])(x))
#include <cassert>
+#include <istream>
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.sha1;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-import butl.fdstream;
-#else
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/bufstreambuf.hxx>
using namespace std;
@@ -91,12 +69,12 @@ namespace butl
}
void sha1::
- append (ifdstream& is)
+ append (istream& is)
{
- fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+ bufstreambuf* buf (dynamic_cast<bufstreambuf*> (is.rdbuf ()));
assert (buf != nullptr);
- while (is.peek () != ifdstream::traits_type::eof () && is.good ())
+ while (is.peek () != istream::traits_type::eof () && is.good ())
{
size_t n (buf->egptr () - buf->gptr ());
append (buf->gptr (), n);
diff --git a/libbutl/sha1.mxx b/libbutl/sha1.hxx
index 07c469c..62710f4 100644
--- a/libbutl/sha1.mxx
+++ b/libbutl/sha1.hxx
@@ -1,34 +1,18 @@
-// file : libbutl/sha1.mxx -*- C++ -*-
+// file : libbutl/sha1.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
+#include <iosfwd> // istream
#include <string>
-#include <cstddef> // size_t
+#include <cstddef> // size_t
#include <cstdint>
-#include <cstring> // strlen()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.sha1;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
+#include <cstring> // strlen()
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
- class ifdstream;
-
// SHA1 checksum calculator.
//
// For a single chunk of data a sum can be obtained in one line, for
@@ -67,11 +51,14 @@ LIBBUTL_MODEXPORT namespace butl
// Append stream.
//
+ // Note that currently the stream is expected to be bufstreambuf-based
+ // (e.g., ifdstream).
+ //
void
- append (ifdstream&);
+ append (std::istream&);
explicit
- sha1 (ifdstream& i): sha1 () {append (i);}
+ sha1 (std::istream& i): sha1 () {append (i);}
// Check if any data has been hashed.
//
diff --git a/libbutl/sha256.cxx b/libbutl/sha256.cxx
index 2528693..95987ec 100644
--- a/libbutl/sha256.cxx
+++ b/libbutl/sha256.cxx
@@ -1,9 +1,7 @@
// file : libbutl/sha256.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/sha256.mxx>
-#endif
+#include <libbutl/sha256.hxx>
// C interface for sha256c.
//
@@ -26,39 +24,13 @@ extern "C"
#include "sha256c.c"
}
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-
#include <cctype> // isxdigit()
+#include <cassert>
+#include <istream>
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.sha256;
-
-// Only imports additional to interface.
-#ifdef __cpp_lib_modules_ts
-import std.io;
-#endif
-
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-import butl.utility; // *case()
-import butl.fdstream;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/utility.hxx> // *case()
+#include <libbutl/bufstreambuf.hxx>
using namespace std;
@@ -85,12 +57,12 @@ namespace butl
}
void sha256::
- append (ifdstream& is)
+ append (istream& is)
{
- fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+ bufstreambuf* buf (dynamic_cast<bufstreambuf*> (is.rdbuf ()));
assert (buf != nullptr);
- while (is.peek () != ifdstream::traits_type::eof () && is.good ())
+ while (is.peek () != istream::traits_type::eof () && is.good ())
{
size_t n (buf->egptr () - buf->gptr ());
append (buf->gptr (), n);
diff --git a/libbutl/sha256.mxx b/libbutl/sha256.hxx
index 9bc0971..566068f 100644
--- a/libbutl/sha256.mxx
+++ b/libbutl/sha256.hxx
@@ -1,35 +1,19 @@
-// file : libbutl/sha256.mxx -*- C++ -*-
+// file : libbutl/sha256.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
+#include <iosfwd> // istream
#include <cstddef> // size_t
#include <cstdint>
#include <cstring> // strlen(), memcpy()
#include <type_traits> // enable_if, is_integral
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.sha256;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
- class ifdstream;
-
// SHA256 checksum calculator.
//
// For a single chunk of data a sum can be obtained in one line, for
@@ -101,11 +85,14 @@ LIBBUTL_MODEXPORT namespace butl
// Append stream.
//
+ // Note that currently the stream is expected to be bufstreambuf-based
+ // (e.g., ifdstream).
+ //
void
- append (ifdstream&);
+ append (std::istream&);
explicit
- sha256 (ifdstream& i): sha256 () {append (i);}
+ sha256 (std::istream& i): sha256 () {append (i);}
// Check if any data has been hashed.
//
diff --git a/libbutl/small-allocator.mxx b/libbutl/small-allocator.hxx
index 5ef74be..429ba41 100644
--- a/libbutl/small-allocator.mxx
+++ b/libbutl/small-allocator.hxx
@@ -1,30 +1,16 @@
-// file : libbutl/small-allocator.mxx -*- C++ -*-
+// file : libbutl/small-allocator.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <cstddef> // size_t
#include <utility> // move()
#include <type_traits> // true_type, is_same
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.small_allocator;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Implementation of the allocator (and its buffer) for small containers.
//
diff --git a/libbutl/small-forward-list.mxx b/libbutl/small-forward-list.hxx
index 6aa4986..8d1cf68 100644
--- a/libbutl/small-forward-list.mxx
+++ b/libbutl/small-forward-list.hxx
@@ -1,31 +1,18 @@
-// file : libbutl/small-forward-list.mxx -*- C++ -*-
+// file : libbutl/small-forward-list.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#ifndef __cpp_lib_modules_ts
#include <cstddef> // size_t
#include <utility> // move()
+#include <type_traits> // is_nothrow_move_constructible
#include <forward_list>
-#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.small_forward_list;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.small_allocator;
-#else
-#include <libbutl/small-allocator.mxx>
-#endif
+#include <libbutl/small-allocator.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Issues and limitations.
//
@@ -115,14 +102,20 @@ LIBBUTL_MODEXPORT namespace butl
return *this;
}
+ // See small_vector for the move-constructor/assignment noexept
+ // expressions reasoning.
+ //
small_forward_list (small_forward_list&& v)
+#if !defined(_MSC_VER) || _MSC_VER > 1900
+ noexcept (std::is_nothrow_move_constructible<T>::value)
+#endif
: base_type (allocator_type (this))
{
*this = std::move (v); // Delegate to operator=(&&).
}
small_forward_list&
- operator= (small_forward_list&& v)
+ operator= (small_forward_list&& v) noexcept (false)
{
// VC14's implementation of operator=(&&) swaps pointers without regard
// for allocator (fixed in 15).
diff --git a/libbutl/small-list.mxx b/libbutl/small-list.hxx
index ff62192..7cb51fd 100644
--- a/libbutl/small-list.mxx
+++ b/libbutl/small-list.hxx
@@ -1,31 +1,18 @@
-// file : libbutl/small-list.mxx -*- C++ -*-
+// file : libbutl/small-list.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#ifndef __cpp_lib_modules_ts
#include <list>
#include <cstddef> // size_t
#include <utility> // move()
-#endif
+#include <type_traits> // is_nothrow_move_constructible
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.small_list;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.small_allocator;
-#else
-#include <libbutl/small-allocator.mxx>
-#endif
+#include <libbutl/small-allocator.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Issues and limitations.
//
@@ -117,14 +104,20 @@ LIBBUTL_MODEXPORT namespace butl
return *this;
}
+ // See small_vector for the move-constructor/assignment noexept
+ // expressions reasoning.
+ //
small_list (small_list&& v)
+#if !defined(__GLIBCXX__) && (!defined(_MSC_VER) || _MSC_VER > 1900)
+ noexcept (std::is_nothrow_move_constructible<T>::value)
+#endif
: base_type (allocator_type (this))
{
*this = std::move (v); // Delegate to operator=(&&).
}
small_list&
- operator= (small_list&& v)
+ operator= (small_list&& v) noexcept (false)
{
// libstdc++'s implementation prior to GCC 6 is broken (calls swap()).
// Since there is no easy way to determine this library's version, for
@@ -136,7 +129,7 @@ LIBBUTL_MODEXPORT namespace butl
#if defined(__GLIBCXX__) || (defined(_MSC_VER) && _MSC_VER <= 1900)
this->clear ();
for (T& x: v)
- this->push_back (std::move (x));
+ this->push_back (std::move (x)); // Note: can throw bad_alloc.
v.clear ();
#else
// Note: propagate_on_container_move_assignment = false
diff --git a/libbutl/small-vector-odb.hxx b/libbutl/small-vector-odb.hxx
index af9d96c..289ca38 100644
--- a/libbutl/small-vector-odb.hxx
+++ b/libbutl/small-vector-odb.hxx
@@ -5,7 +5,7 @@
#include <odb/pre.hxx>
-#include <libbutl/small-vector.mxx>
+#include <libbutl/small-vector.hxx>
#include <odb/container-traits.hxx>
diff --git a/libbutl/small-vector.mxx b/libbutl/small-vector.hxx
index 2a92182..44a3ef5 100644
--- a/libbutl/small-vector.mxx
+++ b/libbutl/small-vector.hxx
@@ -1,31 +1,18 @@
-// file : libbutl/small-vector.mxx -*- C++ -*-
+// file : libbutl/small-vector.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#ifndef __cpp_lib_modules_ts
#include <vector>
#include <cstddef> // size_t
#include <utility> // move()
-#endif
-
-// Other includes.
+#include <type_traits> // is_nothrow_move_constructible
-#ifdef __cpp_modules_ts
-export module butl.small_vector;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.small_allocator;
-#else
-#include <libbutl/small-allocator.mxx>
-#endif
+#include <libbutl/small-allocator.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Issues and limitations.
//
@@ -38,6 +25,9 @@ LIBBUTL_MODEXPORT namespace butl
//
// - swap() is deleted (see notes below).
//
+ // - In contrast to std::vector, the references, pointers, and iterators
+ // referring to elements are invalidated after moving from it.
+ //
template <typename T, std::size_t N>
class small_vector: private small_allocator_buffer<T, N>,
public std::vector<T, small_allocator<T, N>>
@@ -118,17 +108,46 @@ LIBBUTL_MODEXPORT namespace butl
return *this;
}
+ // Note that while the move constructor is implemented via the move
+ // assignment it may not throw if the value type is no-throw move
+ // constructible.
+ //
+ // Specifically, if v.size() > N then allocators evaluate as equal and the
+ // buffer ownership is transferred. Otherwise, the allocators do not
+ // evaluate as equal and the individual elements are move-constructed in
+ // the preallocated buffer.
+ //
+ // Also note that this constructor ends up calling
+ // base_type::operator=(base_type&&) whose noexcept expression evaluates
+ // to false (propagate_on_container_move_assignment and is_always_equal
+ // are false for small_allocator; see std::vector documentation for
+ // details). We, however, assume that the noexcept expression we use here
+ // is strict enough for all "sane" std::vector implementations since
+ // small_allocator never throws directly.
+ //
small_vector (small_vector&& v)
+ noexcept (std::is_nothrow_move_constructible<T>::value)
: base_type (allocator_type (this))
{
if (v.size () <= N)
reserve ();
*this = std::move (v); // Delegate to operator=(&&).
+
+ // Note that in contrast to the move assignment operator, the
+ // constructor must clear the other vector.
+ //
+ v.clear ();
}
+ // Note that when size() <= N and v.size() > N, then allocators of this
+ // and other containers do not evaluate as equal. Thus, the memory for the
+ // new elements is allocated on the heap and so std::bad_alloc can be
+ // thrown. @@ TODO: maybe we could re-implement this case in terms of
+ // swap()?
+ //
small_vector&
- operator= (small_vector&& v)
+ operator= (small_vector&& v) noexcept (false)
{
// VC's implementation of operator=(&&) (both 14 and 15) frees the
// memory and then reallocated with capacity equal to v.size(). This is
diff --git a/libbutl/standard-version.cxx b/libbutl/standard-version.cxx
index a9f5eb8..36f4830 100644
--- a/libbutl/standard-version.cxx
+++ b/libbutl/standard-version.cxx
@@ -1,41 +1,14 @@
// file : libbutl/standard-version.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/standard-version.mxx>
-#endif
+#include <libbutl/standard-version.hxx>
#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstdint>
-#include <cstddef>
-#include <ostream>
-
#include <cstdlib> // strtoull()
#include <utility> // move()
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.standard_version;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#endif
-import butl.utility;
-#else
-#include <libbutl/utility.mxx> // alnum()
-#endif
+#include <libbutl/utility.hxx> // alnum()
using namespace std;
@@ -60,6 +33,7 @@ namespace butl
const char* b (s.c_str () + p);
char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
uint64_t v (strtoull (b, &e, 10)); // Can't throw.
if (errno == ERANGE || b == e || v < min || v > max)
diff --git a/libbutl/standard-version.mxx b/libbutl/standard-version.hxx
index b86e3a9..e973352 100644
--- a/libbutl/standard-version.mxx
+++ b/libbutl/standard-version.hxx
@@ -1,31 +1,14 @@
-// file : libbutl/standard-version.mxx -*- C++ -*-
+// file : libbutl/standard-version.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-
-// C includes.
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <cstdint> // uint*_t
#include <cstddef> // size_t
#include <ostream>
-#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.standard_version;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#else
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/optional.hxx>
#include <libbutl/export.hxx>
@@ -39,7 +22,7 @@ import butl.optional;
# undef minor
#endif
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// The build2 "standard version" (normal, earliest, and stub):
//
@@ -221,7 +204,7 @@ LIBBUTL_MODEXPORT namespace butl
// Create empty version.
//
- standard_version () {} // = default; @@ MOD VC
+ standard_version () = default;
};
// Try to parse a string as a standard version returning nullopt if invalid.
diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx
index 5d5ec47..af5c1b3 100644
--- a/libbutl/string-parser.cxx
+++ b/libbutl/string-parser.cxx
@@ -1,33 +1,7 @@
// file : libbutl/string-parser.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/string-parser.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef>
-#include <utility> // move()
-#include <stdexcept>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.string_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-#endif
+#include <libbutl/string-parser.hxx>
using namespace std;
@@ -40,7 +14,7 @@ namespace butl
inline static bool
space (char c) noexcept
{
- return c == ' ' || c == '\t';
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
vector<pair<string, size_t>>
diff --git a/libbutl/string-parser.mxx b/libbutl/string-parser.hxx
index 4ff1590..9fc20c0 100644
--- a/libbutl/string-parser.mxx
+++ b/libbutl/string-parser.hxx
@@ -1,32 +1,17 @@
-// file : libbutl/string-parser.mxx -*- C++ -*-
+// file : libbutl/string-parser.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <cstddef> // size_t
#include <utility> // pair
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.string_parser;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
namespace string_parser
{
diff --git a/libbutl/string-table.mxx b/libbutl/string-table.hxx
index 78c6cd6..010fb01 100644
--- a/libbutl/string-table.mxx
+++ b/libbutl/string-table.hxx
@@ -1,36 +1,18 @@
-// file : libbutl/string-table.mxx -*- C++ -*-
+// file : libbutl/string-table.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
+#include <cassert>
#include <unordered_map>
-#include <limits> // numeric_limits
-#include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.string_table;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.multi_index;
-#else
-#include <libbutl/multi-index.mxx>
-#endif
+#include <libbutl/multi-index.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// A pool of strings and, optionally, other accompanying data in which each
// entry is assigned an individual index (or id) of type I (e.g., uint8_t,
diff --git a/libbutl/string-table.txx b/libbutl/string-table.txx
index 4db0a6b..8416b48 100644
--- a/libbutl/string-table.txx
+++ b/libbutl/string-table.txx
@@ -1,6 +1,9 @@
// file : libbutl/string-table.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
+#include <limits> // numeric_limits
+#include <cstddef> // size_t
+
namespace butl
{
template <typename I, typename D>
diff --git a/libbutl/tab-parser.cxx b/libbutl/tab-parser.cxx
index cca2792..d7e5a14 100644
--- a/libbutl/tab-parser.cxx
+++ b/libbutl/tab-parser.cxx
@@ -1,39 +1,12 @@
// file : libbutl/tab-parser.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/tab-parser.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstdint>
-#include <stdexcept>
+#include <libbutl/tab-parser.hxx>
#include <istream>
#include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.tab_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-import butl.string_parser;
-#else
-#include <libbutl/string-parser.mxx>
-#endif
+#include <libbutl/string-parser.hxx>
using namespace std;
diff --git a/libbutl/tab-parser.mxx b/libbutl/tab-parser.hxx
index a7f7e01..2dc612b 100644
--- a/libbutl/tab-parser.mxx
+++ b/libbutl/tab-parser.hxx
@@ -1,33 +1,17 @@
-// file : libbutl/tab-parser.mxx -*- C++ -*-
+// file : libbutl/tab-parser.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <iosfwd>
#include <string>
#include <vector>
#include <cstdint> // uint64_t
#include <stdexcept> // runtime_error
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.tab_parser;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
class LIBBUTL_SYMEXPORT tab_parsing: public std::runtime_error
{
diff --git a/libbutl/target-triplet.cxx b/libbutl/target-triplet.cxx
index 17337b3..e28f119 100644
--- a/libbutl/target-triplet.cxx
+++ b/libbutl/target-triplet.cxx
@@ -1,33 +1,9 @@
// file : libbutl/target-triplet.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/target-triplet.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <ostream>
+#include <libbutl/target-triplet.hxx>
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.target_triplet;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-#endif
using namespace std;
@@ -48,7 +24,12 @@ namespace butl
if (f == 0 || f == string::npos)
bad ("missing cpu");
- cpu.assign (s, 0, f);
+ // Canonicalize CPU.
+ //
+ if (s.compare (0, f, "arm64") == 0)
+ cpu = "aarch64";
+ else
+ cpu.assign (s, 0, f);
// If we have something in between, then the first component after CPU is
// VENDOR. Unless it is a first component of two-component system, as in
@@ -107,6 +88,13 @@ namespace butl
if (system.front () == '-' || system.back () == '-')
bad ("invalid os/kernel/abi");
+ // Canonicalize SYSTEM.
+ //
+ if (system == "linux")
+ system = "linux-gnu"; // Per config.sub.
+ else if (system == "windows-gnu" && vendor == "w64") // Clang's innovation.
+ system = "mingw32";
+
// Extract VERSION for some recognized systems.
//
string::size_type v (0);
@@ -124,6 +112,14 @@ namespace butl
version.assign (system, v, string::npos);
system.resize (system.size () - version.size ());
}
+ else if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+ {
+ // Handle iosNN[-...].
+ //
+ string::size_type p (system.find ('-'));
+ version.assign (system, 3, p == string::npos ? p : p - 3);
+ system.erase (3, version.size ());
+ }
// Determine class for some recognized systems.
//
@@ -131,6 +127,8 @@ namespace butl
class_ = "linux";
else if (vendor == "apple" && system == "darwin")
class_ = "macos";
+ else if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+ class_ = "ios";
else if (system == "freebsd" ||
system == "openbsd" ||
system == "netbsd")
@@ -162,7 +160,10 @@ namespace butl
if (!version.empty ())
{
- r += version;
+ if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+ r.insert (r.size () - system.size () + 3, version);
+ else
+ r += version;
}
return r;
@@ -186,7 +187,10 @@ namespace butl
if (!version.empty ())
{
- r += version;
+ if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+ r.insert (r.size () - system.size () + 3, version);
+ else
+ r += version;
}
return r;
diff --git a/libbutl/target-triplet.mxx b/libbutl/target-triplet.hxx
index 41c0cb5..bfb2c00 100644
--- a/libbutl/target-triplet.mxx
+++ b/libbutl/target-triplet.hxx
@@ -1,30 +1,14 @@
-// file : libbutl/target-triplet.mxx -*- C++ -*-
+// file : libbutl/target-triplet.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <ostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.target_triplet;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS
// form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus
@@ -38,6 +22,7 @@ LIBBUTL_MODEXPORT namespace butl
// This one is reasonably straightforward. Note that we always expect at
// least two components with the first being the CPU. In other words, we
// don't try to guess what just 'mingw32' might mean like config.sub does.
+ // Note that we canonicalize arm64 to aarch64 similar to config.sub.
//
// VENDOR
//
@@ -90,12 +75,19 @@ LIBBUTL_MODEXPORT namespace butl
// arm-softfloat-linux-gnu arm softfloat linux-gnu
// i686-pc-mingw32 i686 mingw32
// i686-w64-mingw32 i686 w64 mingw32
+ // i686-w64-windows-gnu i686 w64 mingw32
// i686-lfs-linux-gnu i686 lfs linux-gnu
// x86_64-unknown-linux-gnu x86_64 linux-gnu
+ // x86_64-redhat-linux x86_64 redhat linux-gnu
// x86_64-linux-gnux32 x86_64 linux-gnux32
// x86_64-microsoft-win32-msvc14.0 x86_64 microsoft win32-msvc 14.0
// x86_64-pc-windows-msvc x86_64 windows-msvc
// x86_64-pc-windows-msvc19.11.25547 x86_64 windows-msvc 19.11.25547
+ // wasm32-unknown-emscripten wasm32 emscripten
+ // arm64-apple-darwin20.1.0 aarch64 apple darwin 20.1.0
+ // arm64-apple-ios14.4 aarch64 apple ios 14.4
+ // arm64-apple-ios14.4-simulator aarch64 apple ios-simulator 14.4
+ // x86_64-apple-ios14.4-macabi x86_64 apple ios-macabi 14.4
//
// Similar to version splitting, for certain commonly-used targets we also
// derive the "target class" which can be used as a shorthand, more
@@ -107,6 +99,9 @@ LIBBUTL_MODEXPORT namespace butl
// macos *-apple-darwin*
// bsd *-*-(freebsd|openbsd|netbsd)*
// windows *-*-win32-* | *-*-windows-* | *-*-mingw32
+ // ios *-apple-ios*
+ //
+ // NOTE: see also os_release if adding anything new here.
//
// References:
//
@@ -157,7 +152,7 @@ LIBBUTL_MODEXPORT namespace butl
explicit
target_triplet (const std::string&);
- target_triplet () {} // = default; @@ MOD VC
+ target_triplet () = default;
};
inline bool
diff --git a/libbutl/timestamp.cxx b/libbutl/timestamp.cxx
index 9be2a82..260fbef 100644
--- a/libbutl/timestamp.cxx
+++ b/libbutl/timestamp.cxx
@@ -1,9 +1,7 @@
// file : libbutl/timestamp.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/timestamp.mxx>
-#endif
+#include <libbutl/timestamp.hxx>
#include <time.h> // localtime_{r,s}(), gmtime_{r,s}(), strptime(), timegm()
#include <errno.h> // EINVAL
@@ -25,22 +23,18 @@
#ifdef __GLIBCXX__
extern "C"
{
-#include "strptime.c"
+# include "strptime.c"
}
#else
-#include <locale.h> // LC_ALL
+# include <locale.h> // LC_ALL
#endif
#endif
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <chrono>
-
-#include <ctime> // tm, time_t, mktime(), strftime()[__GLIBCXX__]
+#include <ctime> // tm, time_t, mktime(), strftime()[libstdc++]
#include <cstdlib> // strtoull()
-#include <sstream>
+#include <sstream> // ostringstream, stringstream[VC]
#include <iomanip> // put_time(), setw(), dec, right
-#include <cstring> // strlen(), memcpy()
+#include <cstring> // strlen(), memcpy(), strchr()[VC]
#include <ostream>
#include <utility> // pair, make_pair()
#include <stdexcept> // runtime_error
@@ -49,30 +43,14 @@ extern "C"
//
#ifdef _WIN32
#ifndef __GLIBCXX__
-#include <locale>
-#include <clocale>
-#include <iomanip>
-#endif
+# include <ios>
+# include <locale>
+# include <clocale>
+# include <iomanip>
#endif
#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.timestamp;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-import butl.utility;
-#else
-#include <libbutl/utility.mxx> // throw_generic_error()
-#endif
+#include <libbutl/utility.hxx> // throw_generic_error()
using namespace std;
@@ -180,24 +158,85 @@ strptime (const char* input, const char* format, tm* time)
{
// VC std::get_time()-based implementation.
//
- istringstream is (input);
+ // Note that the major difference in semantics of strptime() and
+ // std::get_time() is that the former always fails if the format string is
+ // not fully processed, while the latter can succeed in such a case,
+ // specifically if the end of the stream is reached after a conversion
+ // specifier was successfully applied. See this post for some background:
+ //
+ // https://stackoverflow.com/questions/67060956/what-is-the-correct-behavior-of-stdget-time-for-short-input
+ //
+ // The consequence of this fact is that there is no easy way to detect if
+ // the format was fully processed when the end of input is reached. It seems
+ // that the only way to resolve this ambiguity is to append some end marker
+ // to both the input and format and re-parse. We can dedicate some character
+ // that is unlikely to be used in the time format/input (for example '\x1')
+ // to serve as an end marker.
+ //
+ // Alternatively, we can abandon the use of std::get_time() altogether and,
+ // for example, use a FreeBSD-based strptime() implementation. This feels a
+ // bit too radical at the moment, though.
+ //
+ const char em ('\x1');
+
+ if (strchr (input, em) != nullptr || strchr (format, em) != nullptr)
+ return nullptr;
+
+ stringstream ss (input); // Input/output stream.
// The original strptime() function behaves according to the process' C
// locale (set with std::setlocale()), which can differ from the process C++
// locale (set with std::locale::global()).
//
- is.imbue (locale (setlocale (LC_ALL, nullptr)));
+ ss.imbue (locale (setlocale (LC_ALL, nullptr)));
- if (!(is >> get_time (time, format)))
+ // Bail out on the parsing error.
+ //
+ if (!(ss >> get_time (time, format)))
return nullptr;
- else
- // tellg() behaves as UnformattedInputFunction, so returns failure status
- // if eofbit is set.
- //
- return const_cast<char*> (
- input + (is.eof ()
- ? strlen (input)
- : static_cast<size_t> (is.tellg ())));
+
+ // If the end of input is not reached then the format string is fully
+ // processed.
+ //
+ if (!ss.eof ())
+ return const_cast<char*> (input + static_cast<size_t> (ss.tellg ()));
+
+ // Since eof is reached, we cannot say if the format string was fully
+ // processed or not. For example:
+ //
+ // %b %Y - format
+ // Feb 2016 - eofbit is set with a format fully processed
+ // Feb - eofbit is set with a format partially processed
+ //
+ // So append the end marker character to both input and format and re-parse.
+ //
+ ss.clear (); // Clear eof.
+ ss.seekp (0, ios_base::end); // Position to the end for writing.
+ ss.put (em); // Append the end marker.
+ ss.seekg (0); // Rewind for reading.
+
+ string fm (format);
+ fm += em; // Append the end marker.
+
+ // Fail if the input is "shorter" than the format. For example:
+ //
+ // %b %Y\x1 - format
+ // Feb\x1 - stream
+ //
+ // Note that we can reuse the time object for re-parsing, since on success
+ // its fields will be overwritten with the same values.
+ //
+ if (!(ss >> get_time (time, fm.c_str ())))
+ return nullptr;
+
+ // We would fail earlier otherwise.
+ //
+ assert (ss.eof () || ss.get () == stringstream::traits_type::eof ());
+
+ // tellg() behaves as UnformattedInputFunction, so returns failure status if
+ // eofbit is set.
+ //
+ return const_cast<char*> (input + strlen (input));
}
#endif
diff --git a/libbutl/timestamp.mxx b/libbutl/timestamp.hxx
index 141e13d..2714a0d 100644
--- a/libbutl/timestamp.mxx
+++ b/libbutl/timestamp.hxx
@@ -1,34 +1,15 @@
-// file : libbutl/timestamp.mxx -*- C++ -*-
+// file : libbutl/timestamp.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <iosfwd>
#include <string>
#include <chrono>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.timestamp;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-//@@ MOD TODO: should't we re-export chrono (for somparison operator, etc)?
-// or ADL should kick in?
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// On all three main platforms that we target (GNU/Linux, Windows (both
// VC++ and GCC/MinGW64), and MacOS X) with recent C++ runtimes,
@@ -61,21 +42,12 @@ LIBBUTL_MODEXPORT namespace butl
// unreal and all of them are less than any non-special value (strictly
// speaking unreal is no greater (older) than any real value).
//
-#if defined(__cpp_modules_ts) && defined(__clang__) //@@ MOD Clang duplicate sym.
- inline const timestamp::rep timestamp_unknown_rep = -1;
- inline const timestamp timestamp_unknown = timestamp (duration (-1));
- inline const timestamp::rep timestamp_nonexistent_rep = 0;
- inline const timestamp timestamp_nonexistent = timestamp (duration (0));
- inline const timestamp::rep timestamp_unreal_rep = 1;
- inline const timestamp timestamp_unreal = timestamp (duration (1));
-#else
const timestamp::rep timestamp_unknown_rep = -1;
const timestamp timestamp_unknown = timestamp (duration (-1));
const timestamp::rep timestamp_nonexistent_rep = 0;
const timestamp timestamp_nonexistent = timestamp (duration (0));
const timestamp::rep timestamp_unreal_rep = 1;
const timestamp timestamp_unreal = timestamp (duration (1));
-#endif
// Print human-readable representation of the timestamp.
//
diff --git a/libbutl/unicode.cxx b/libbutl/unicode.cxx
index 4219846..294bb3f 100644
--- a/libbutl/unicode.cxx
+++ b/libbutl/unicode.cxx
@@ -1,32 +1,11 @@
// file : libbutl/unicode.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/unicode.mxx>
-#endif
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <ostream>
-#include <cstdint>
+#include <libbutl/unicode.hxx>
#include <cstddef> // size_t
#include <utility> // pair
#include <algorithm> // lower_bound()
-#endif
-
-#ifdef __cpp_modules_ts
-module butl.unicode;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-#endif
using namespace std;
diff --git a/libbutl/unicode.mxx b/libbutl/unicode.hxx
index b846476..8d99d0e 100644
--- a/libbutl/unicode.mxx
+++ b/libbutl/unicode.hxx
@@ -1,31 +1,15 @@
-// file : libbutl/unicode.mxx -*- C++ -*-
+// file : libbutl/unicode.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <ostream>
#include <cstdint> // uint16_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.unicode;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Note that the Unicode Standard requires the surrogates ([D800 DFFF]) to
// only be used in the context of the UTF-16 character encoding form. Thus,
diff --git a/libbutl/url.mxx b/libbutl/url.hxx
index 713bc3e..5721cfd 100644
--- a/libbutl/url.mxx
+++ b/libbutl/url.hxx
@@ -1,50 +1,23 @@
-// file : libbutl/url.mxx -*- C++ -*-
+// file : libbutl/url.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
+#include <cassert>
+#include <cstddef> // size_t
#include <cstdint> // uint*_t
#include <utility> // move()
#include <ostream>
#include <iterator> // back_inserter
-#include <cstddef> // size_t
-#include <stdexcept> // invalid_argument
-#include <algorithm> // find(), find_if()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.url;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility;
-import butl.optional;
-
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx>
+#include <libbutl/optional.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// RFC3986 Uniform Resource Locator (URL).
//
diff --git a/libbutl/url.ixx b/libbutl/url.ixx
index b823ee7..19d54c7 100644
--- a/libbutl/url.ixx
+++ b/libbutl/url.ixx
@@ -1,7 +1,7 @@
// file : libbutl/url.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
{
// url_traits
//
diff --git a/libbutl/url.txx b/libbutl/url.txx
index 0951e80..b2caa37 100644
--- a/libbutl/url.txx
+++ b/libbutl/url.txx
@@ -1,7 +1,12 @@
// file : libbutl/url.txx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <stdexcept> // invalid_argument
+#include <algorithm> // find(), find_if()
+
+#include <libbutl/small-vector.hxx>
+
+namespace butl
{
// Convenience functions.
//
diff --git a/libbutl/utf8.mxx b/libbutl/utf8.hxx
index 15e8ded..697f77a 100644
--- a/libbutl/utf8.mxx
+++ b/libbutl/utf8.hxx
@@ -1,33 +1,17 @@
-// file : libbutl/utf8.mxx -*- C++ -*-
+// file : libbutl/utf8.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <cstdint> // uint8_t
#include <utility> // pair
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.utf8;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.unicode;
-#else
-#include <libbutl/unicode.mxx>
-#endif
+#include <libbutl/unicode.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Here and below we will refer to bytes that encode a singe Unicode
// codepoint as "UTF-8 byte sequence" ("UTF-8 sequence" or "byte sequence"
diff --git a/libbutl/utf8.ixx b/libbutl/utf8.ixx
index 3d2e092..10624f8 100644
--- a/libbutl/utf8.ixx
+++ b/libbutl/utf8.ixx
@@ -116,7 +116,7 @@ namespace butl
{
if (b < 0xFE)
{
- *what = b < 0xFC ? "5" : "6";
+ *what = b < 0xFC ? '5' : '6';
*what += "-byte length UTF-8 sequence";
}
else
diff --git a/libbutl/utility.cxx b/libbutl/utility.cxx
index bbeafd2..b03a8f8 100644
--- a/libbutl/utility.cxx
+++ b/libbutl/utility.cxx
@@ -1,44 +1,23 @@
// file : libbutl/utility.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx>
#ifdef _WIN32
#include <libbutl/win32-utility.hxx>
#endif
-#include <stdlib.h> // setenv(), unsetenv(), _putenv()
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <utility>
+#include <stdlib.h> // getenv(), setenv(), unsetenv(), _putenv()
+#include <cstring> // strncmp(), strlen()
#include <ostream>
#include <type_traits> // enable_if, is_base_of
#include <system_error>
-#endif
#include <libbutl/ft/lang.hxx>
#include <libbutl/ft/exception.hxx>
-#ifdef __cpp_modules_ts
-module butl.utility;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-import butl.utf8;
-#else
-#include <libbutl/utf8.mxx>
-#endif
+#include <libbutl/utf8.hxx>
namespace butl
{
@@ -192,13 +171,42 @@ namespace butl
for (; i != n && ws (l[i]); ++i) ;
for (; n != i && ws (l[n - 1]); --n) ;
- if (i != 0)
+ if (n != l.size ()) l.resize (n);
+ if (i != 0) l.erase (0, i);
+
+ return l;
+ }
+
+ string&
+ trim_left (string& l)
+ {
+ auto ws = [] (char c )
{
- string s (l, i, n - i);
- l.swap (s);
- }
- else if (n != l.size ())
- l.resize (n);
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+ };
+
+ size_t i (0), n (l.size ());
+
+ for (; i != n && ws (l[i]); ++i) ;
+
+ if (i != 0) l.erase (0, i);
+
+ return l;
+ }
+
+ string&
+ trim_right (string& l)
+ {
+ auto ws = [] (char c )
+ {
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+ };
+
+ size_t i (0), n (l.size ());
+
+ for (; n != i && ws (l[n - 1]); --n) ;
+
+ if (n != l.size ()) l.resize (n);
return l;
}
@@ -332,6 +340,55 @@ namespace butl
s.resize (d - s.begin ());
}
+#ifdef __cpp_thread_local
+ thread_local
+#else
+ __thread
+#endif
+ const char* const* thread_env_ = nullptr;
+
+#ifdef _WIN32
+ const char* const*
+ thread_env () {return thread_env_;}
+
+ void
+ thread_env (const char* const* v) {thread_env_ = v;}
+#endif
+
+ optional<std::string>
+ getenv (const char* name)
+ {
+ if (const char* const* vs = thread_env_)
+ {
+ size_t n (strlen (name));
+
+ for (; *vs != nullptr; ++vs)
+ {
+ const char* v (*vs);
+
+ // Note that on Windows variable names are case-insensitive.
+ //
+#ifdef _WIN32
+ if (icasecmp (name, v, n) == 0)
+#else
+ if (strncmp (name, v, n) == 0)
+#endif
+ {
+ switch (v[n])
+ {
+ case '=': return string (v + n + 1);
+ case '\0': return nullopt;
+ }
+ }
+ }
+ }
+
+ if (const char* r = ::getenv (name))
+ return std::string (r);
+
+ return nullopt;
+ }
+
void
setenv (const string& name, const string& value)
{
diff --git a/libbutl/utility.mxx b/libbutl/utility.hxx
index 78c9355..779a0aa 100644
--- a/libbutl/utility.mxx
+++ b/libbutl/utility.hxx
@@ -1,9 +1,7 @@
-// file : libbutl/utility.mxx -*- C++ -*-
+// file : libbutl/utility.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
#ifndef _WIN32
# include <strings.h> // strcasecmp(), strncasecmp()
@@ -11,7 +9,6 @@
# include <string.h> // _stricmp(), _strnicmp()
#endif
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iosfwd> // ostream
#include <istream>
@@ -20,29 +17,17 @@
#include <cstring> // strcmp(), strlen()
#include <exception> // exception, uncaught_exception[s]()
//#include <functional> // hash
-#endif
#include <libbutl/ft/lang.hxx> // thread_local
#include <libbutl/ft/exception.hxx> // uncaught_exceptions
-#ifdef __cpp_modules_ts
-export module butl.utility;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utf8;
-import butl.unicode;
-import butl.optional;
-#else
-#include <libbutl/utf8.mxx>
-#include <libbutl/unicode.mxx>
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/utf8.hxx>
+#include <libbutl/unicode.hxx>
+#include <libbutl/optional.hxx>
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// Throw std::system_error with generic_category or system_category,
// respectively.
@@ -86,16 +71,23 @@ LIBBUTL_MODEXPORT namespace butl
// http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02)
//
char ucase (char);
- std::string ucase (const char*, std::size_t = std::string::npos);
-
- std::string ucase (const std::string&);
- std::string& ucase (std::string&);
+ std::string ucase (const char*, std::size_t n = std::string::npos);
+ std::string ucase (const std::string&,
+ std::size_t p = 0,
+ std::size_t n = std::string::npos);
+ std::string& ucase (std::string&,
+ std::size_t p = 0,
+ std::size_t n = std::string::npos);
void ucase (char*, std::size_t);
char lcase (char);
- std::string lcase (const char*, std::size_t = std::string::npos);
- std::string lcase (const std::string&);
- std::string& lcase (std::string&);
+ std::string lcase (const char*, std::size_t n = std::string::npos);
+ std::string lcase (const std::string&,
+ std::size_t p = 0,
+ std::size_t n = std::string::npos);
+ std::string& lcase (std::string&,
+ std::size_t p = 0,
+ std::size_t n = std::string::npos);
void lcase (char*, std::size_t);
// Compare ASCII characters/strings ignoring case. Behave as if characters
@@ -140,11 +132,13 @@ LIBBUTL_MODEXPORT namespace butl
bool digit (char);
bool alnum (char);
bool xdigit (char);
+ bool wspace (char);
bool alpha (wchar_t);
bool digit (wchar_t);
bool alnum (wchar_t);
bool xdigit (wchar_t);
+ bool wspace (wchar_t);
// Basic string utilities.
//
@@ -154,13 +148,31 @@ LIBBUTL_MODEXPORT namespace butl
LIBBUTL_SYMEXPORT std::string&
trim (std::string&);
+ LIBBUTL_SYMEXPORT std::string&
+ trim_left (std::string&);
+
+ LIBBUTL_SYMEXPORT std::string&
+ trim_right (std::string&);
+
inline std::string
trim (std::string&& s)
{
return move (trim (s));
}
- // Find the beginning and end poistions of the next word. Return the size
+ inline std::string
+ trim_left (std::string&& s)
+ {
+ return move (trim_left (s));
+ }
+
+ inline std::string
+ trim_right (std::string&& s)
+ {
+ return move (trim_right (s));
+ }
+
+ // Find the beginning and end positions of the next word. Return the size
// of the word or 0 and set b = e = n if there are no more words. For
// example:
//
@@ -178,6 +190,24 @@ LIBBUTL_MODEXPORT namespace butl
//
// The second version examines up to the n'th character in the string.
//
+ // The third version, instead of skipping consecutive delimiters, treats
+ // them as separating empty words. The additional m variable contains an
+ // unspecified internal state and should be initialized to 0. Note that in
+ // this case you should use the (b == n) condition to detect the end. Note
+ // also that a leading delimiter is considered as separating an empty word
+ // from the rest and the trailing delimiter is considered as separating the
+ // rest from an empty word. For example, this is how to parse lines while
+ // observing blanks:
+ //
+ // for (size_t b (0), e (0), m (0), n (s.size ());
+ // next_word (s, n, b, e, m, '\n', '\r'), b != n; )
+ // {
+ // string l (s, b, e - b);
+ // }
+ //
+ // For string "\na\n" this code will observe the {"", "a", ""} words. And
+ // for just "\n" it will observe the {"", ""} words.
+ //
std::size_t
next_word (const std::string&, std::size_t& b, std::size_t& e,
char d1 = ' ', char d2 = '\0');
@@ -186,6 +216,11 @@ LIBBUTL_MODEXPORT namespace butl
next_word (const std::string&, std::size_t n, std::size_t& b, std::size_t& e,
char d1 = ' ', char d2 = '\0');
+ std::size_t
+ next_word (const std::string&, std::size_t n,
+ std::size_t& b, std::size_t& e, std::size_t& m,
+ char d1 = ' ', char d2 = '\0');
+
// Sanitize a string to only contain characters valid in an identifier
// (ASCII alphanumeric plus `_`) replacing all others with `_`.
//
@@ -259,17 +294,80 @@ LIBBUTL_MODEXPORT namespace butl
// Environment variables.
//
- optional<std::string>
- getenv (const std::string&);
+ // Our getenv() wrapper (as well as the relevant process startup functions)
+ // have a notion of a "thread environment", that is, thread-specific
+ // environment variables. However, unlike the process environment (in the
+ // form of the environ array), the thread environment is specified as a set
+ // of overrides over the process environment (sets and unsets), the same as
+ // for the process startup.
+ //
+ extern
+#ifdef __cpp_thread_local
+ thread_local
+#else
+ __thread
+#endif
+ const char* const* thread_env_;
+
+ // On Windows one cannot export a thread-local variable so we have to
+ // use wrapper functions.
+ //
+#ifdef _WIN32
+ LIBBUTL_SYMEXPORT const char* const*
+ thread_env ();
+
+ LIBBUTL_SYMEXPORT void
+ thread_env (const char* const*);
+#else
+ const char* const*
+ thread_env ();
+
+ void
+ thread_env (const char* const*);
+#endif
+
+ struct auto_thread_env
+ {
+ optional<const char* const*> prev_env;
+
+ auto_thread_env () = default;
+
+ explicit
+ auto_thread_env (const char* const*);
+
+ // Move-to-empty-only type.
+ //
+ auto_thread_env (auto_thread_env&&) noexcept;
+ auto_thread_env& operator= (auto_thread_env&&) noexcept;
+
+ auto_thread_env (const auto_thread_env&) = delete;
+ auto_thread_env& operator= (const auto_thread_env&) = delete;
+
+ ~auto_thread_env ();
+ };
+
+ // Get the environment variables taking into account the current thread's
+ // overrides (thread_env).
+ //
+ LIBBUTL_SYMEXPORT optional<std::string>
+ getenv (const char*);
+
+ inline optional<std::string>
+ getenv (const std::string& n)
+ {
+ return getenv (n.c_str ());
+ }
- // Throw system_error on failure.
+ // Set the process environment variable. Best done before starting any
+ // threads (see thread_env). Throw system_error on failure.
//
- // Note that on Windows setting an empty value usets the variable.
+ // Note that on Windows setting an empty value unsets the variable.
//
LIBBUTL_SYMEXPORT void
setenv (const std::string& name, const std::string& value);
- // Throw system_error on failure.
+ // Unset the process environment variable. Best done before starting any
+ // threads (see thread_env). Throw system_error on failure.
//
LIBBUTL_SYMEXPORT void
unsetenv (const std::string&);
@@ -470,7 +568,7 @@ LIBBUTL_MODEXPORT namespace butl
#endif
}
-LIBBUTL_MODEXPORT namespace std
+namespace std
{
// Sanitize the exception description before printing. This includes:
//
diff --git a/libbutl/utility.ixx b/libbutl/utility.ixx
index 72fbc5b..fda1ce5 100644
--- a/libbutl/utility.ixx
+++ b/libbutl/utility.ixx
@@ -1,13 +1,10 @@
// file : libbutl/utility.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_lib_modules_ts
#include <cctype> // toupper(), tolower(), is*()
#include <cwctype> // isw*()
-#include <cstdlib> // getenv()
#include <algorithm> // for_each()
#include <stdexcept> // invalid_argument
-#endif
namespace butl
{
@@ -25,12 +22,15 @@ namespace butl
}
inline std::string&
- ucase (std::string& s)
+ ucase (std::string& s, std::size_t p, std::size_t n)
{
- if (size_t n = s.size ())
+ if (n == std::string::npos)
+ n = s.size () - p;
+
+ if (n != 0)
{
s.front () = s.front (); // Force copy in CoW.
- ucase (const_cast<char*> (s.data ()), n);
+ ucase (const_cast<char*> (s.data ()) + p, n);
}
return s;
}
@@ -43,9 +43,9 @@ namespace butl
}
inline std::string
- ucase (const std::string& s)
+ ucase (const std::string& s, std::size_t p, std::size_t n)
{
- return ucase (s.c_str (), s.size ());
+ return ucase (s.c_str () + p, n != std::string::npos ? n : s.size () - p);
}
inline char
@@ -62,12 +62,15 @@ namespace butl
}
inline std::string&
- lcase (std::string& s)
+ lcase (std::string& s, std::size_t p, std::size_t n)
{
- if (size_t n = s.size ())
+ if (n == std::string::npos)
+ n = s.size () - p;
+
+ if (n != 0)
{
s.front () = s.front (); // Force copy in CoW.
- lcase (const_cast<char*> (s.data ()), n);
+ lcase (const_cast<char*> (s.data ()) + p, n);
}
return s;
}
@@ -80,9 +83,9 @@ namespace butl
}
inline std::string
- lcase (const std::string& s)
+ lcase (const std::string& s, std::size_t p, std::size_t n)
{
- return lcase (s.c_str (), s.size ());
+ return lcase (s.c_str () + p, n != std::string::npos ? n : s.size () - p);
}
inline int
@@ -140,6 +143,12 @@ namespace butl
}
inline bool
+ wspace (char c)
+ {
+ return std::isspace (c);
+ }
+
+ inline bool
alpha (wchar_t c)
{
return std::iswalpha (c);
@@ -163,6 +172,12 @@ namespace butl
return std::iswxdigit (c);
}
+ inline bool
+ wspace (wchar_t c)
+ {
+ return std::iswspace (c);
+ }
+
inline std::size_t
next_word (const std::string& s, std::size_t& b, std::size_t& e,
char d1, char d2)
@@ -170,7 +185,7 @@ namespace butl
return next_word (s, s.size (), b, e, d1, d2);
}
- inline size_t
+ inline std::size_t
next_word (const std::string& s,
std::size_t n, std::size_t& b, std::size_t& e,
char d1, char d2)
@@ -195,6 +210,66 @@ namespace butl
return e - b;
}
+ inline std::size_t
+ next_word (const std::string& s,
+ std::size_t n, std::size_t& b, std::size_t& e, std::size_t& m,
+ char d1, char d2)
+ {
+ // An empty word will necessarily be represented as b and e being the
+ // position of a delimiter. Consider these corner cases (in all three we
+ // should produce two words):
+ //
+ // \n
+ // a\n
+ // \na
+ //
+ // It feels sensible to represent an empty word as the position of the
+ // trailing delimiter except if it is the last character (the first two
+ // cases). Thus the additional m state, which, if 0 or 1 indicates the
+ // number of delimiters to skip before parsing the next word and 2 if
+ // this is a trailing delimiter for which we need to fake an empty word
+ // with the leading delimiter.
+
+ if (b != e)
+ b = e;
+
+ if (m > 1)
+ {
+ --m;
+ return 0;
+ }
+
+ // Skip the leading delimiter, if any.
+ //
+ b += m;
+
+ if (b == n)
+ {
+ e = n;
+ return 0;
+ }
+
+ // Find first trailing delimiter.
+ //
+ m = 0;
+ for (e = b; e != n; ++e)
+ {
+ if (s[e] == d1 || s[e] == d2)
+ {
+ m = 1;
+
+ // Handle the special delimiter as the last character case.
+ //
+ if (e + 1 == n)
+ ++m;
+
+ break;
+ }
+ }
+
+ return e - b;
+ }
+
inline std::string&
sanitize_identifier (std::string& s)
{
@@ -222,7 +297,7 @@ namespace butl
inline void
sanitize_strlit (const std::string& s, std::string& o)
{
- for (size_t i (0), j;; i = j + 1)
+ for (std::size_t i (0), j;; i = j + 1)
{
j = s.find_first_of ("\\\"\n", i);
o.append (s.c_str () + i, (j == std::string::npos ? s.size () : j) - i);
@@ -327,13 +402,58 @@ namespace butl
return utf8_length_impl (s, nullptr, ts, wl).has_value ();
}
- inline optional<std::string>
- getenv (const std::string& name)
+#ifndef _WIN32
+ inline const char* const*
+ thread_env ()
{
- if (const char* r = std::getenv (name.c_str ()))
- return std::string (r);
+ return thread_env_;
+ }
- return nullopt;
+ inline void
+ thread_env (const char* const* v)
+ {
+ thread_env_ = v;
+ }
+#endif
+
+ // auto_thread_env
+ //
+ inline auto_thread_env::
+ auto_thread_env (const char* const* new_env)
+ {
+ const char* const* cur_env (thread_env ());
+
+ if (cur_env != new_env)
+ {
+ prev_env = cur_env;
+ thread_env (new_env);
+ }
+ }
+
+ inline auto_thread_env::
+ auto_thread_env (auto_thread_env&& x) noexcept
+ : prev_env (std::move (x.prev_env))
+ {
+ x.prev_env = nullopt;
+ }
+
+ inline auto_thread_env& auto_thread_env::
+ operator= (auto_thread_env&& x) noexcept
+ {
+ if (this != &x)
+ {
+ prev_env = std::move (x.prev_env);
+ x.prev_env = nullopt;
+ }
+
+ return *this;
+ }
+
+ inline auto_thread_env::
+ ~auto_thread_env ()
+ {
+ if (prev_env)
+ thread_env (*prev_env);
}
template <typename F, typename P>
diff --git a/libbutl/uuid-linux.cxx b/libbutl/uuid-linux.cxx
index 7689088..82af2e9 100644
--- a/libbutl/uuid-linux.cxx
+++ b/libbutl/uuid-linux.cxx
@@ -13,7 +13,7 @@
#include <utility> // move()
#include <system_error>
-#include <libbutl/utility.mxx> // function_cast()
+#include <libbutl/utility.hxx> // function_cast()
using namespace std;
diff --git a/libbutl/uuid-openbsd.cxx b/libbutl/uuid-openbsd.cxx
new file mode 100644
index 0000000..b64436b
--- /dev/null
+++ b/libbutl/uuid-openbsd.cxx
@@ -0,0 +1,80 @@
+// file : libbutl/uuid-openbsd.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUILD2_BOOTSTRAP
+
+#include <libbutl/uuid.hxx>
+
+#include <uuid.h>
+
+#include <errno.h>
+
+#include <cassert>
+#include <cstring> // memcpy()
+#include <system_error>
+
+using namespace std;
+
+namespace butl
+{
+ void
+ uuid_throw_weak (); // uuid.cxx
+
+ uuid uuid_system_generator::
+ generate (bool strong)
+ {
+ // The OpenBSD uuid_*() (<uuid.h>, uuid_compare(3)) API generates version
+ // 4 UUIDs (i.e. randomly generated) at least from version 6.4. For now we
+ // will assume that only random ones are strong.
+ //
+ // Here we assume uuid_t has the same definition as in FreeBSD/NetBSD (it
+ // is defined in <sys/uuid.h>).
+ //
+ uuid_t d;
+ uint32_t s;
+ uuid_create (&d, &s);
+
+ // None of the uuid_s_* errors seem plausible for this function so let's
+ // return the generic "not supported" error code.
+ //
+ if (s != uuid_s_ok)
+ throw system_error (ENOSYS, system_category ());
+
+ uuid r;
+
+ // This is effectively just memcpy() but let's reference the member names
+ // in case anything changes on either side.
+ //
+ r.time_low = d.time_low;
+ r.time_mid = d.time_mid;
+ r.time_hiv = d.time_hi_and_version;
+ r.clock_seq_hir = d.clock_seq_hi_and_reserved;
+ r.clock_seq_low = d.clock_seq_low;
+ memcpy (r.node, d.node, 6);
+
+ assert (r.variant () == uuid_variant::dce); // Sanity check.
+
+ if (strong)
+ {
+ switch (r.version ())
+ {
+ case uuid_version::random: break;
+ default: uuid_throw_weak ();
+ }
+ }
+
+ return r;
+ }
+
+ void uuid_system_generator::
+ initialize ()
+ {
+ }
+
+ void uuid_system_generator::
+ terminate ()
+ {
+ }
+}
+
+#endif // BUILD2_BOOTSTRAP
diff --git a/libbutl/uuid.cxx b/libbutl/uuid.cxx
index 377afb7..2132808 100644
--- a/libbutl/uuid.cxx
+++ b/libbutl/uuid.cxx
@@ -5,7 +5,7 @@
#include <errno.h> // ENOTSUP
-#include <cstdio> // sprintf() scanf()
+#include <cstdio> // snprintf() sscanf()
#include <cstring> // strlen()
#include <stdexcept>
#include <system_error>
@@ -19,16 +19,17 @@ namespace butl
{
array<char, 37> r;
- sprintf (r.data (),
- (upper
- ? "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X"
- : "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"),
- time_low,
- time_mid,
- time_hiv,
- clock_seq_hir,
- clock_seq_low,
- node[0], node[1], node[2], node[3], node[4], node[5]);
+ snprintf (r.data (),
+ 37,
+ (upper
+ ? "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X"
+ : "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"),
+ time_low,
+ time_mid,
+ time_hiv,
+ clock_seq_hir,
+ clock_seq_low,
+ node[0], node[1], node[2], node[3], node[4], node[5]);
return r;
}
diff --git a/libbutl/uuid.hxx b/libbutl/uuid.hxx
index 2361640..862f02d 100644
--- a/libbutl/uuid.hxx
+++ b/libbutl/uuid.hxx
@@ -48,12 +48,12 @@ namespace butl
{
// Normally not accessed directly (see RFC4122 Section 4.1.2).
//
- std::uint32_t time_low = 0;
- std::uint16_t time_mid = 0;
- std::uint16_t time_hiv = 0; // hi_and_version
- std::uint8_t clock_seq_hir = 0; // hi_and_reserved
- std::uint8_t clock_seq_low = 0;
- std::uint8_t node[6] = {0, 0, 0, 0, 0, 0};
+ std::uint32_t time_low = 0;
+ std::uint16_t time_mid = 0;
+ std::uint16_t time_hiv = 0; // hi_and_version
+ std::uint8_t clock_seq_hir = 0; // hi_and_reserved
+ std::uint8_t clock_seq_low = 0;
+ std::uint8_t node[6] = {0, 0, 0, 0, 0, 0};
// System UUID generator. See the uuid_generator interface for details.
//
@@ -158,10 +158,10 @@ namespace butl
void
swap (uuid&);
- uuid (uuid&&);
+ uuid (uuid&&) noexcept;
uuid (const uuid&) = default;
- uuid& operator= (uuid&&);
+ uuid& operator= (uuid&&) noexcept;
uuid& operator= (const uuid&) = default;
};
@@ -183,7 +183,7 @@ namespace butl
~uuid_generator () = default;
// Generate a UUID. If strong is true (default), generate a strongly-
- // unique UUID. Throw std::runtime_error to report errors, including if
+ // unique UUID. Throw std::system_error to report errors, including if
// strong uniqueness cannot be guaranteed.
//
// A weak UUID is not guaranteed to be unique, neither universialy nor
@@ -207,7 +207,7 @@ namespace butl
// Optional explicit initialization and termination. Note that it is not
// thread-safe and must only be performed once (normally from main())
// before/after any calls to generate(), respectively. Both functions may
- // throw std::runtime_error to report errors.
+ // throw std::system_error to report errors.
//
static void
initialize ();
diff --git a/libbutl/uuid.ixx b/libbutl/uuid.ixx
index 6744af7..6115be1 100644
--- a/libbutl/uuid.ixx
+++ b/libbutl/uuid.ixx
@@ -39,14 +39,14 @@ namespace butl
}
inline uuid::
- uuid (uuid&& u)
+ uuid (uuid&& u) noexcept
: uuid () // nil
{
swap (u);
}
inline uuid& uuid::
- operator= (uuid&& u)
+ operator= (uuid&& u) noexcept
{
if (this != &u)
{
diff --git a/libbutl/vector-view.mxx b/libbutl/vector-view.hxx
index 7924371..16ab08e 100644
--- a/libbutl/vector-view.mxx
+++ b/libbutl/vector-view.hxx
@@ -1,32 +1,17 @@
-// file : libbutl/vector-view.mxx -*- C++ -*-
+// file : libbutl/vector-view.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
#pragma once
-#endif
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
#include <vector>
#include <cstddef> // size_t, ptrdiff_t
#include <utility> // swap()
#include <iterator> // reverse_iterator
#include <stdexcept> // out_of_range
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.vector_view;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
#include <libbutl/export.hxx>
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
{
// In our version a const view allows the modification of the elements
// unless T is made const (the same semantics as in smart pointers).
diff --git a/libbutl/win32-utility.cxx b/libbutl/win32-utility.cxx
index 3b44d60..c69842b 100644
--- a/libbutl/win32-utility.cxx
+++ b/libbutl/win32-utility.cxx
@@ -8,16 +8,9 @@
//
#ifdef _WIN32
-#ifndef __cpp_lib_modules_ts
-#include <string>
#include <memory> // unique_ptr
-#include <libbutl/utility.mxx> // throw_system_error()
-#else
-import std.core;
-
-import butl.utility;
-#endif
+#include <libbutl/utility.hxx> // throw_system_error()
using namespace std;
diff --git a/libbutl/win32-utility.hxx b/libbutl/win32-utility.hxx
index b71eb1a..9bed647 100644
--- a/libbutl/win32-utility.hxx
+++ b/libbutl/win32-utility.hxx
@@ -31,11 +31,7 @@
# endif
#endif
-#ifndef __cpp_lib_modules_ts
#include <string>
-#else
-import std.core;
-#endif
#include <libbutl/export.hxx>
diff --git a/libbutl/xxhash.c b/libbutl/xxhash.c
new file mode 100644
index 0000000..ff28749
--- /dev/null
+++ b/libbutl/xxhash.c
@@ -0,0 +1,1030 @@
+/*
+* xxHash - Fast Hash algorithm
+* Copyright (C) 2012-2016, Yann Collet
+*
+* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following disclaimer
+* in the documentation and/or other materials provided with the
+* distribution.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* You can contact the author at :
+* - xxHash homepage: http://www.xxhash.com
+* - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/* *************************************
+* Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ * It can generate buggy code on targets which do not support unaligned memory accesses.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define XXH_FORCE_MEMORY_ACCESS 2
+# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7S__) ))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
+ */
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
+# define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
+# define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# define XXH_FORCE_ALIGN_CHECK 0
+# else
+# define XXH_FORCE_ALIGN_CHECK 1
+# endif
+#endif
+
+
+/* *************************************
+* Includes & Memory related functions
+***************************************/
+/*! Modify the local functions below should you wish to use some other memory routines
+* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void XXH_free (void* p) { free(p); }
+/*! and for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#include <assert.h> /* assert */
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+* Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# define FORCE_INLINE static __forceinline
+#else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define FORCE_INLINE static inline
+# endif
+# else
+# define FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+#endif
+
+
+/* *************************************
+* Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+# else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+# endif
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; } __attribute__((packed)) unalign;
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static U32 XXH_read32(const void* memPtr)
+{
+ U32 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+* Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+# define XXH_rotl32(x,r) _rotl(x,r)
+# define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap32 __builtin_bswap32
+#else
+static U32 XXH_swap32 (U32 x)
+{
+ return ((x << 24) & 0xff000000 ) |
+ ((x << 8) & 0x00ff0000 ) |
+ ((x >> 8) & 0x0000ff00 ) |
+ ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* *************************************
+* Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+static int XXH_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
+#endif
+
+
+/* ***************************
+* Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+
+/* *************************************
+* Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+* 32-bit hash functions
+*********************************************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 = 668265263U;
+static const U32 PRIME32_5 = 374761393U;
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+ seed += input * PRIME32_2;
+ seed = XXH_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+/* mix all bits */
+static U32 XXH32_avalanche(U32 h32)
+{
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+ return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+static U32
+XXH32_finalize(U32 h32, const void* ptr, size_t len,
+ XXH_endianess endian, XXH_alignment align)
+
+{
+ const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1 \
+ h32 += (*p++) * PRIME32_5; \
+ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+
+#define PROCESS4 \
+ h32 += XXH_get32bits(p) * PRIME32_3; \
+ p+=4; \
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
+
+ switch(len&15) /* or switch(bEnd - p) */
+ {
+ case 12: PROCESS4;
+ /* fallthrough */
+ case 8: PROCESS4;
+ /* fallthrough */
+ case 4: PROCESS4;
+ return XXH32_avalanche(h32);
+
+ case 13: PROCESS4;
+ /* fallthrough */
+ case 9: PROCESS4;
+ /* fallthrough */
+ case 5: PROCESS4;
+ PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 14: PROCESS4;
+ /* fallthrough */
+ case 10: PROCESS4;
+ /* fallthrough */
+ case 6: PROCESS4;
+ PROCESS1;
+ PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 15: PROCESS4;
+ /* fallthrough */
+ case 11: PROCESS4;
+ /* fallthrough */
+ case 7: PROCESS4;
+ /* fallthrough */
+ case 3: PROCESS1;
+ /* fallthrough */
+ case 2: PROCESS1;
+ /* fallthrough */
+ case 1: PROCESS1;
+ /* fallthrough */
+ case 0: return XXH32_avalanche(h32);
+ }
+ assert(0);
+ return h32; /* reaching this point is deemed impossible */
+}
+
+
+FORCE_INLINE U32
+XXH32_endian_align(const void* input, size_t len, U32 seed,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U32 h32;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)16;
+ }
+#endif
+
+ if (len>=16) {
+ const BYTE* const limit = bEnd - 15;
+ U32 v1 = seed + PRIME32_1 + PRIME32_2;
+ U32 v2 = seed + PRIME32_2;
+ U32 v3 = seed + 0;
+ U32 v4 = seed - PRIME32_1;
+
+ do {
+ v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+ v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+ v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+ v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+ } while (p < limit);
+
+ h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
+ + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (U32)len;
+
+ return XXH32_finalize(h32, p, len&15, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH32_state_t state;
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, input, len);
+ return XXH32_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+
+/*====== Hash streaming ======*/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+ return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+ XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME32_1 + PRIME32_2;
+ state.v2 = seed + PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME32_1;
+ /* do not write into reserved, planned to be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+ return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode
+XXH32_update_endian(XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ { const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+ state->total_len_32 += (unsigned)len;
+ state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (unsigned)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const U32* p32 = state->mem32;
+ state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+ state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+ state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+ state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian));
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= bEnd-16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = state->v1;
+ U32 v2 = state->v2;
+ U32 v3 = state->v3;
+ U32 v4 = state->v4;
+
+ do {
+ v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+ v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+ v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+ v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+ }
+
+ return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+FORCE_INLINE U32
+XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+ U32 h32;
+
+ if (state->large_len) {
+ h32 = XXH_rotl32(state->v1, 1)
+ + XXH_rotl32(state->v2, 7)
+ + XXH_rotl32(state->v3, 12)
+ + XXH_rotl32(state->v4, 18);
+ } else {
+ h32 = state->v3 /* == seed */ + PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+* These functions allow transformation of hash result into and from its canonical format.
+* This way, hash values can be written into a file or buffer, remaining comparable across different systems.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+ return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+* 64-bit hash functions
+*********************************************************************/
+
+/*====== Memory access ======*/
+
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint64_t U64;
+# else
+ /* if compiler doesn't support unsigned long long, replace by another 64-bit type */
+ typedef unsigned long long U64;
+# endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
+static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U64 XXH_read64(const void* memPtr)
+{
+ U64 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+# define XXH_swap64 __builtin_bswap64
+#else
+static U64 XXH_swap64 (U64 x)
+{
+ return ((x << 56) & 0xff00000000000000ULL) |
+ ((x << 40) & 0x00ff000000000000ULL) |
+ ((x << 24) & 0x0000ff0000000000ULL) |
+ ((x << 8) & 0x000000ff00000000ULL) |
+ ((x >> 8) & 0x00000000ff000000ULL) |
+ ((x >> 24) & 0x0000000000ff0000ULL) |
+ ((x >> 40) & 0x000000000000ff00ULL) |
+ ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/*====== xxh64 ======*/
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 = 1609587929392839161ULL;
+static const U64 PRIME64_4 = 9650029242287828579ULL;
+static const U64 PRIME64_5 = 2870177450012600261ULL;
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+ acc += input * PRIME64_2;
+ acc = XXH_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+ val = XXH64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+static U64 XXH64_avalanche(U64 h64)
+{
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+ return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+static U64
+XXH64_finalize(U64 h64, const void* ptr, size_t len,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1_64 \
+ h64 ^= (*p++) * PRIME64_5; \
+ h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+
+#define PROCESS4_64 \
+ h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
+ p+=4; \
+ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+
+#define PROCESS8_64 { \
+ U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
+ p+=8; \
+ h64 ^= k1; \
+ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
+}
+
+ switch(len&31) {
+ case 24: PROCESS8_64;
+ /* fallthrough */
+ case 16: PROCESS8_64;
+ /* fallthrough */
+ case 8: PROCESS8_64;
+ return XXH64_avalanche(h64);
+
+ case 28: PROCESS8_64;
+ /* fallthrough */
+ case 20: PROCESS8_64;
+ /* fallthrough */
+ case 12: PROCESS8_64;
+ /* fallthrough */
+ case 4: PROCESS4_64;
+ return XXH64_avalanche(h64);
+
+ case 25: PROCESS8_64;
+ /* fallthrough */
+ case 17: PROCESS8_64;
+ /* fallthrough */
+ case 9: PROCESS8_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 29: PROCESS8_64;
+ /* fallthrough */
+ case 21: PROCESS8_64;
+ /* fallthrough */
+ case 13: PROCESS8_64;
+ /* fallthrough */
+ case 5: PROCESS4_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 26: PROCESS8_64;
+ /* fallthrough */
+ case 18: PROCESS8_64;
+ /* fallthrough */
+ case 10: PROCESS8_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 30: PROCESS8_64;
+ /* fallthrough */
+ case 22: PROCESS8_64;
+ /* fallthrough */
+ case 14: PROCESS8_64;
+ /* fallthrough */
+ case 6: PROCESS4_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 27: PROCESS8_64;
+ /* fallthrough */
+ case 19: PROCESS8_64;
+ /* fallthrough */
+ case 11: PROCESS8_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 31: PROCESS8_64;
+ /* fallthrough */
+ case 23: PROCESS8_64;
+ /* fallthrough */
+ case 15: PROCESS8_64;
+ /* fallthrough */
+ case 7: PROCESS4_64;
+ /* fallthrough */
+ case 3: PROCESS1_64;
+ /* fallthrough */
+ case 2: PROCESS1_64;
+ /* fallthrough */
+ case 1: PROCESS1_64;
+ /* fallthrough */
+ case 0: return XXH64_avalanche(h64);
+ }
+
+ /* impossible to reach */
+ assert(0);
+ return 0; /* unreachable, but some compilers complain without it */
+}
+
+FORCE_INLINE U64
+XXH64_endian_align(const void* input, size_t len, U64 seed,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U64 h64;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)32;
+ }
+#endif
+
+ if (len>=32) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = seed + PRIME64_1 + PRIME64_2;
+ U64 v2 = seed + PRIME64_2;
+ U64 v3 = seed + 0;
+ U64 v4 = seed - PRIME64_1;
+
+ do {
+ v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+ v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+ v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+ v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+ } while (p<=limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (U64) len;
+
+ return XXH64_finalize(h64, p, len, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH64_state_t state;
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, input, len);
+ return XXH64_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/*====== Hash Streaming ======*/
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+ return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+ XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME64_1 + PRIME64_2;
+ state.v2 = seed + PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME64_1;
+ /* do not write into reserved, planned to be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+ return XXH_OK;
+}
+
+FORCE_INLINE XXH_errorcode
+XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ { const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ state->memsize += (U32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+ state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+ state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+ state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+ p += 32-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p+32 <= bEnd) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = state->v1;
+ U64 v2 = state->v2;
+ U64 v3 = state->v3;
+ U64 v4 = state->v4;
+
+ do {
+ v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+ v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+ v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+ v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+ }
+
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+ U64 h64;
+
+ if (state->total_len >= 32) {
+ U64 const v1 = state->v1;
+ U64 const v2 = state->v2;
+ U64 const v3 = state->v3;
+ U64 const v4 = state->v4;
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+ } else {
+ h64 = state->v3 /*seed*/ + PRIME64_5;
+ }
+
+ h64 += (U64) state->total_len;
+
+ return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned);
+}
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+ return XXH_readBE64(src);
+}
+
+#endif /* XXH_NO_LONG_LONG */
diff --git a/libbutl/xxhash.h b/libbutl/xxhash.h
new file mode 100644
index 0000000..d6bad94
--- /dev/null
+++ b/libbutl/xxhash.h
@@ -0,0 +1,328 @@
+/*
+ xxHash - Extremely Fast Hash algorithm
+ Header File
+ Copyright (C) 2012-2016, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name Speed Q.Score Author
+xxHash 5.4 GB/s 10
+CrapWow 3.2 GB/s 2 Andrew
+MumurHash 3a 2.7 GB/s 10 Austin Appleby
+SpookyHash 2.0 GB/s 10 Bob Jenkins
+SBox 1.4 GB/s 9 Bret Mulvey
+Lookup3 1.2 GB/s 9 Bob Jenkins
+SuperFastHash 1.2 GB/s 1 Paul Hsieh
+CityHash64 1.05 GB/s 10 Pike & Alakuijala
+FNV 0.55 GB/s 5 Fowler, Noll, Vo
+CRC32 0.43 GB/s 9
+MD5-32 0.33 GB/s 10 Ronald L. Rivest
+SHA1-32 0.28 GB/s 10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name Speed on 64 bits Speed on 32 bits
+XXH64 13.8 GB/s 1.9 GB/s
+XXH32 6.8 GB/s 6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+* Definitions
+******************************/
+#include <stddef.h> /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+ * API modifier
+ ******************************/
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * This is useful to include xxhash functions in `static` mode
+ * in order to inline them, and remove their symbol from the public list.
+ * Inlining can offer dramatic performance improvement on small keys.
+ * Methodology :
+ * #define XXH_INLINE_ALL
+ * #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ * It's not useful to compile and link it as a separate module.
+ */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# ifndef XXH_STATIC_LINKING_ONLY
+# define XXH_STATIC_LINKING_ONLY
+# endif
+# if defined(__GNUC__)
+# define XXH_PUBLIC_API static __inline __attribute__((unused))
+# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define XXH_PUBLIC_API static inline
+# elif defined(_MSC_VER)
+# define XXH_PUBLIC_API static __inline
+# else
+ /* this version may generate warnings for unused static functions */
+# define XXH_PUBLIC_API static
+# endif
+#else
+# define XXH_PUBLIC_API /* do nothing */
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXH_NAMESPACE
+# define XXH_CAT(A,B) A##B
+# define XXH_NAME2(A,B) XXH_CAT(A,B)
+# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+* Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 6
+#define XXH_VERSION_RELEASE 5
+#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/*-**********************************************************************
+* 32-bit hash
+************************************************************************/
+typedef unsigned int XXH32_hash_t;
+
+/*! XXH32() :
+ Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
+ The memory between input & input+length must be valid (allocated and read-accessible).
+ "seed" can be used to alter the result predictably.
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+
+/*====== Streaming ======*/
+typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+
+/*
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
+ * Note that, for small input, they are slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hashes later on, by calling again XXH*_digest().
+ *
+ * When done, free XXH state space if it was allocated dynamically.
+ */
+
+/*====== Canonical representation ======*/
+
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+ * These functions allow transformation of hash result into and from its canonical format.
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ */
+
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+* 64-bit hash
+************************************************************************/
+typedef unsigned long long XXH64_hash_t;
+
+/*! XXH64() :
+ Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
+ "seed" can be used to alter the result predictably.
+ This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
+*/
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*====== Streaming ======*/
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
+
+/*====== Canonical representation ======*/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+#endif /* XXH_NO_LONG_LONG */
+
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+ This section contains declarations which are not guaranteed to remain stable.
+ They may change in future versions, becoming incompatible with a different version of the library.
+ These declarations should only be used with static linking.
+ Never use them in association with dynamic linking !
+=================================================================================================== */
+
+/* These definitions are only present to allow
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+
+struct XXH32_state_s {
+ uint32_t total_len_32;
+ uint32_t large_len;
+ uint32_t v1;
+ uint32_t v2;
+ uint32_t v3;
+ uint32_t v4;
+ uint32_t mem32[4];
+ uint32_t memsize;
+ uint32_t reserved; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH32_state_t */
+
+struct XXH64_state_s {
+ uint64_t total_len;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t v4;
+ uint64_t mem64[4];
+ uint32_t memsize;
+ uint32_t reserved[2]; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH64_state_t */
+
+# else
+
+struct XXH32_state_s {
+ unsigned total_len_32;
+ unsigned large_len;
+ unsigned v1;
+ unsigned v2;
+ unsigned v3;
+ unsigned v4;
+ unsigned mem32[4];
+ unsigned memsize;
+ unsigned reserved; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH32_state_t */
+
+# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
+struct XXH64_state_s {
+ unsigned long long total_len;
+ unsigned long long v1;
+ unsigned long long v2;
+ unsigned long long v3;
+ unsigned long long v4;
+ unsigned long long mem64[4];
+ unsigned memsize;
+ unsigned reserved[2]; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH64_state_t */
+# endif
+
+# endif
+
+
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
+#endif
+
+#endif /* XXH_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */
diff --git a/manifest b/manifest
index c618b6f..7ac415a 100644
--- a/manifest
+++ b/manifest
@@ -1,9 +1,9 @@
: 1
name: libbutl
-version: 0.14.0-a.0.z
+version: 0.17.0-a.0.z
project: build2
summary: build2 utility library
-license: MIT AND BSD-3-Clause AND BSD-2-Clause ; MIT except for 5 files from the FreeBSD project.
+license: MIT AND BSD-3-Clause AND BSD-2-Clause ; MIT except for files from the FreeBSD, LZ4, and mingw-std-threads projects.
topics: build toolchain
description-file: README
changes-file: NEWS
@@ -12,7 +12,7 @@ doc-url: https://build2.org/doc.xhtml
src-url: https://git.build2.org/cgit/libbutl/tree/
email: users@build2.org
build-warning-email: builds@build2.org
-builds: all
+builds: all : &host
requires: c++14
-depends: * build2 >= 0.13.0
-depends: * bpkg >= 0.13.0
+depends: * build2 >= 0.16.0-
+depends: * bpkg >= 0.16.0-
diff --git a/tests/b-info/driver.cxx b/tests/b-info/driver.cxx
index 6a3e0a4..5691221 100644
--- a/tests/b-info/driver.cxx
+++ b/tests/b-info/driver.cxx
@@ -1,28 +1,15 @@
// file : tests/b-info/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.b;
-import butl.path;
-import butl.utility; // operator<<(ostream,exception)
-#else
-#include <libbutl/b.mxx>
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#endif
+
+#include <libbutl/b.hxx>
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream,exception)
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -62,12 +49,14 @@ try
cout.exceptions (ios::failbit | ios::badbit);
- b_project_info pi (b_info (project,
- 1 /* verb */,
- {} /* cmd_callback */,
- b,
- {} /* search_fallback */,
- {"--no-default-options"}));
+ b_project_info pi (
+ b_info (project,
+ b_info_flags::ext_mods | b_info_flags::subprojects,
+ 1 /* verb */,
+ {} /* cmd_callback */,
+ b,
+ {} /* search_fallback */,
+ {"--no-default-options"}));
cout << "project: " << pi.project << endl
<< "version: " << pi.version << endl
@@ -109,6 +98,18 @@ try
cout << *i;
}
+ cout << endl
+ << "modules: ";
+
+ for (auto b (pi.modules.begin ()), i (b);
+ i != pi.modules.end ();
+ ++i)
+ {
+ if (i != b)
+ cout << ' ';
+
+ cout << *i;
+ }
cout << endl;
return 0;
diff --git a/tests/b-info/testscript b/tests/b-info/testscript
index c5c3910..1ebf060 100644
--- a/tests/b-info/testscript
+++ b/tests/b-info/testscript
@@ -57,10 +57,11 @@ test.options += -b $recall($build.path)
url:$sp
%src_root: .+/prj/%
%out_root: .+/prj/%
- amalgamation: ../../../../
+ %amalgamation: .*%
subprojects: @subprj/
operations: update clean
meta-operations: perform configure disfigure dist info
+ modules: version config dist
EOO
$* prj/subprj >>/~"%EOO%"
@@ -74,6 +75,7 @@ test.options += -b $recall($build.path)
subprojects:$sp
operations: update clean
meta-operations: perform configure disfigure dist info
+ modules: config dist
EOO
}
diff --git a/tests/backtrace/driver.cxx b/tests/backtrace/driver.cxx
index d998942..ecfa58e 100644
--- a/tests/backtrace/driver.cxx
+++ b/tests/backtrace/driver.cxx
@@ -5,31 +5,17 @@
# include <sys/resource.h> // setrlimit()
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
#include <exception> // set_terminate(), terminate_handler
#include <system_error>
-#else
-import std.io;
-#endif
-// Other includes.
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/backtrace.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.process;
-import butl.fdstream;
-import butl.backtrace;
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/backtrace.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/base64/driver.cxx b/tests/base64/driver.cxx
index c7906f5..32d5236 100644
--- a/tests/base64/driver.cxx
+++ b/tests/base64/driver.cxx
@@ -1,29 +1,20 @@
// file : tests/base64/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <sstream>
-#endif
-// Other includes.
+#include <libbutl/base64.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.base64;
-#else
-#include <libbutl/base64.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
+// Test base64 encoding and decoding.
+//
static bool
encode (const string& i, const string& o)
{
@@ -79,9 +70,44 @@ encode (const string& i, const string& o)
return r;
}
+// Test base64url encoding only (decoding not yet implemented).
+//
+static bool
+encode_url (const string& i, const string& o)
+{
+ istringstream is (i);
+ string s (base64url_encode (is));
+ bool r (s == o && is.eof ());
+
+ if (r)
+ {
+ is.seekg (0);
+
+ // VC15 seekg() doesn't clear eofbit.
+ //
+#if defined(_MSC_VER) && _MSC_VER < 1920
+ is.clear ();
+#endif
+
+ assert (!is.eof ());
+
+ ostringstream os;
+ base64url_encode (os, is);
+ r = os.str () == o && is.eof ();
+ }
+
+ if (r)
+ r = base64url_encode (vector<char> (i.begin (), i.end ())) == o;
+
+ return r;
+}
+
+
int
main ()
{
+ // base64
+ //
assert (encode ("", ""));
assert (encode ("B", "Qg=="));
assert (encode ("BX", "Qlg="));
@@ -91,6 +117,19 @@ main ()
assert (encode ("BXzS@#", "Qlh6U0Aj"));
assert (encode ("BXzS@#/", "Qlh6U0AjLw=="));
+ // base64url: no padding in output.
+ //
+ assert (encode_url ("", ""));
+ assert (encode_url ("B", "Qg"));
+ assert (encode_url ("BX", "Qlg"));
+ assert (encode_url ("BXz", "Qlh6"));
+ assert (encode_url ("BXzS", "Qlh6Uw"));
+ assert (encode_url ("BXzS@", "Qlh6U0A"));
+ assert (encode_url ("BXzS@#", "Qlh6U0Aj"));
+ assert (encode_url ("BXzS@#/", "Qlh6U0AjLw"));
+
+ // Multi-line input.
+ //
const char* s (
"class fdstream_base\n"
"{\n"
@@ -102,10 +141,29 @@ main ()
" fdbuf buf_;\n"
"};\n");
+ // base64
+ //
const char* r (
"Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm\n"
"YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK\n"
"ICBmZGJ1ZiBidWZfOwp9Owo=");
assert (encode (s, r));
+
+ // base64url: no newlines or padding in output.
+ //
+ r =
+"Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm"
+"YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK"
+"ICBmZGJ1ZiBidWZfOwp9Owo";
+
+ assert (encode_url (s, r));
+
+ // Test 63rd and 64th characters: `>` maps to `+` or `-`; `?` maps to `/` or
+ // `_`.
+ //
+ assert (encode (">>>>>>", "Pj4+Pj4+"));
+ assert (encode_url (">>>>>>", "Pj4-Pj4-"));
+ assert (encode ("??????", "Pz8/Pz8/"));
+ assert (encode_url ("??????", "Pz8_Pz8_"));
}
diff --git a/tests/build/root.build b/tests/build/root.build
index bb274d3..515e1c9 100644
--- a/tests/build/root.build
+++ b/tests/build/root.build
@@ -13,6 +13,16 @@ if ($cxx.target.system == 'win32-msvc')
if ($cxx.class == 'msvc')
cxx.coptions += /wd4251 /wd4275 /wd4800
+elif ($cxx.id == 'gcc')
+{
+ cxx.coptions += -Wno-maybe-uninitialized -Wno-free-nonheap-object \
+-Wno-stringop-overread
+
+ if ($cxx.version.major >= 13)
+ cxx.coptions += -Wno-dangling-reference
+}
+elif ($cxx.id.type == 'clang' && $cxx.version.major >= 15)
+ cxx.coptions += -Wno-unqualified-std-cast-call
# Every exe{} in this subproject is by default a test.
#
diff --git a/tests/builtin/buildfile b/tests/builtin/buildfile
index 8341847..8d22fe4 100644
--- a/tests/builtin/buildfile
+++ b/tests/builtin/buildfile
@@ -6,3 +6,6 @@ import libs = libbutl%lib{butl}
./: exe{driver} file{cp-dir/cp-file}
exe{driver}: {hxx cxx}{*} $libs testscript{*}
+
+if ($cxx.target.class != 'windows')
+ cxx.libs += -lpthread
diff --git a/tests/builtin/driver.cxx b/tests/builtin/driver.cxx
index 9fb6d6f..bdf3fa9 100644
--- a/tests/builtin/driver.cxx
+++ b/tests/builtin/driver.cxx
@@ -1,53 +1,60 @@
// file : tests/builtin/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
+#ifdef _WIN32
+# include <libbutl/win32-utility.hxx>
+#endif
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
+#include <chrono>
#include <utility> // move()
+#include <cstdint> // uint8_t
#include <ostream>
#include <iostream>
+#ifndef _WIN32
+# include <thread> // this_thread::sleep_for()
#endif
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // eof()
+#include <libbutl/builtin.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/timestamp.hxx> // to_stream(duration)
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility; // eof()
-import butl.builtin;
-import butl.optional;
-import butl.timestamp; // to_stream(duration)
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/builtin.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/timestamp.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
+// Disable arguments globbing that may be enabled by default for MinGW runtime
+// (see tests/wildcard/driver.cxx for details).
+//
+#ifdef __MINGW32__
+int _CRT_glob = 0;
+#endif
+
inline ostream&
operator<< (ostream& os, const path& p)
{
return os << p.representation ();
}
-// Usage: argv[0] [-d <dir>] [-o <opt>] [-c] [-i] <builtin> <builtin-args>
+// Usage: argv[0] [-d <dir>] [-o <opt>] [-c] [-i] [-t <msec>] [-s <sec>]
+// <builtin> <builtin-args>
//
// Execute the builtin and exit with its exit status.
//
-// -d <dir> use as a current working directory
-// -c use callbacks that, in particular, trace calls to stdout
-// -o <opt> additional builtin option recognized by the callback
-// -i read lines from stdin and append them to the builtin arguments
+// -d <dir> use as a current working directory
+// -c use callbacks that, in particular, trace calls to stdout
+// -o <opt> additional builtin option recognized by the callback
+// -i read lines from stdin and append them to the builtin arguments
+// -t <msec> print diag if the builtin didn't complete in <msec> milliseconds
+// -s <sec> sleep <sec> seconds prior to running the builtin
+//
+// Note that the 'roundtrip' builtin name is also recognized and results in
+// running the pseudo-builtin that just roundtrips stdin to stdout.
//
int
main (int argc, char* argv[])
@@ -62,12 +69,25 @@ main (int argc, char* argv[])
dir_path cwd;
string option;
builtin_callbacks callbacks;
+ optional<duration> timeout;
+ optional<chrono::seconds> sec;
string name;
vector<string> args;
auto flag = [] (bool v) {return v ? "true" : "false";};
+ auto num = [] (const string& s)
+ {
+ assert (!s.empty ());
+
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ uint64_t r (strtoull (s.c_str (), &e, 10)); // Can't throw.
+ assert (errno != ERANGE && e == s.c_str () + s.size ());
+ return r;
+ };
+
// Parse the driver options and arguments.
//
int i (1);
@@ -120,7 +140,23 @@ main (int argc, char* argv[])
);
}
else if (a == "-i")
+ {
in = true;
+ }
+ else if (a == "-t")
+ {
+ ++i;
+
+ assert (i != argc);
+ timeout = chrono::milliseconds (num (argv[i]));
+ }
+ else if (a == "-s")
+ {
+ ++i;
+
+ assert (i != argc);
+ sec = chrono::seconds (num (argv[i]));
+ }
else
break;
}
@@ -142,23 +178,95 @@ main (int argc, char* argv[])
args.push_back (move (s));
}
- // Execute the builtin.
- //
- const builtin_info* bi (builtins.find (name));
+ auto sleep = [&sec] ()
+ {
+ if (sec)
+ {
+ // MINGW GCC 4.9 doesn't implement this_thread so use Win32 Sleep().
+ //
+#ifndef _WIN32
+ this_thread::sleep_for (*sec);
+#else
+ Sleep (static_cast<DWORD> (sec->count () * 1000));
+#endif
+ }
+ };
- if (bi == nullptr)
+ auto wait = [&timeout] (builtin& b)
{
- cerr << "unknown builtin '" << name << "'" << endl;
- return 1;
- }
+ optional<uint8_t> r;
+
+ if (timeout)
+ {
+ r = b.timed_wait (*timeout);
+
+ if (!r)
+ {
+ cerr << "timeout expired" << endl;
- if (bi->function == nullptr)
+ b.wait ();
+ r = 1;
+ }
+ }
+ else
+ r = b.wait ();
+
+ assert (b.try_wait ()); // While at it, test try_wait().
+
+ return *r;
+ };
+
+ // Execute the builtin.
+ //
+ if (name != "roundtrip")
{
- cerr << "external builtin '" << name << "'" << endl;
- return 1;
+ const builtin_info* bi (builtins.find (name));
+
+ if (bi == nullptr)
+ {
+ cerr << "unknown builtin '" << name << "'" << endl;
+ return 1;
+ }
+
+ if (bi->function == nullptr)
+ {
+ cerr << "external builtin '" << name << "'" << endl;
+ return 1;
+ }
+
+ sleep ();
+
+ uint8_t r; // Storage.
+ builtin b (bi->function (r, args, nullfd, nullfd, nullfd, cwd, callbacks));
+ return wait (b);
}
+ else
+ {
+ uint8_t r; // Storage.
- uint8_t r; // Storage.
- builtin b (bi->function (r, args, nullfd, nullfd, nullfd, cwd, callbacks));
- return b.wait ();
+ auto run = [&r, &sleep] ()
+ {
+ // While at it, test that a non-copyable lambda can be used as a
+ // builtin.
+ //
+ auto_fd fd;
+
+ return pseudo_builtin (
+ r,
+ [&sleep, fd = move (fd)] () mutable noexcept
+ {
+ fd.reset ();
+
+ sleep ();
+
+ if (cin.peek () != istream::traits_type::eof ())
+ cout << cin.rdbuf ();
+
+ return 0;
+ });
+ };
+
+ builtin b (run ());
+ return wait (b);
+ }
}
diff --git a/tests/builtin/find.testscript b/tests/builtin/find.testscript
new file mode 100644
index 0000000..b09822c
--- /dev/null
+++ b/tests/builtin/find.testscript
@@ -0,0 +1,276 @@
+# file : tests/builtin/find.testscript
+# license : MIT; see accompanying LICENSE file
+
+posix = ($cxx.target.class != 'windows')
+
+test.arguments = "find"
+
+: no-paths
+:
+$* 2>"find: missing start path" == 1
+
+: no-paths-primary
+:
+$* -name foo 2>"find: unknown option '-name'" == 1
+
+: unknown-primary
+:
+$* . -foo 2>"find: unknown primary '-foo'" == 1
+
+
+: no-primary-value
+:
+$* . -name 2>"find: missing value for primary '-name'" == 1
+
+: empty-primary-value
+:
+$* . -type '' 2>"find: empty value for primary '-type'" == 1
+
+: invalid-type-primary
+:
+$* . -type foo 2>"find: invalid value 'foo' for primary '-type'" == 1
+
+: invalid-mindepth-primary
+:
+$* . -mindepth 12a 2>"find: invalid value '12a' for primary '-mindepth'" == 1
+
+: path-not-exists
+:
+{
+ mkdir d;
+ $* x d >'d' 2>"find: 'x' doesn't exists" != 0
+}
+
+: path
+:
+{
+ : relative
+ :
+ {
+ : no-cwd
+ :
+ {
+ mkdir a;
+ touch a/b;
+
+ $* a >>/EOO
+ a
+ a/b
+ EOO
+ }
+
+ : absolute-cwd
+ :
+ : When cross-testing we cannot guarantee that host absolute paths are
+ : recognized by the target process.
+ :
+ if ($test.target == $build.host)
+ {
+ test.options += -d $~/a;
+ mkdir a;
+ touch a/b;
+
+ $* b >'b'
+ }
+
+ : relative-cwd
+ :
+ if ($test.target == $build.host)
+ {
+ test.options += -d a;
+ mkdir a;
+ touch a/b;
+
+ $* b >'b'
+ }
+ }
+
+ : non-normalized
+ :
+ {
+ mkdir a;
+ touch a/b;
+
+ # Note that the path specified on the command line is used unaltered.
+ #
+ s = ($posix ? '/' : '\');
+
+ $* ./a >>"EOO"
+ ./a
+ ./a$(s)b
+ EOO
+ }
+
+ : absolute
+ :
+ {
+ mkdir a;
+ touch a/b;
+
+ $* $~/a >>/"EOO"
+ $~/a
+ $~/a/b
+ EOO
+ }
+
+ : non-existent
+ :
+ {
+ touch a b;
+
+ $* a x b >>EOO 2>"find: 'x' doesn't exists" != 0
+ a
+ b
+ EOO
+ }
+
+ : non-directory
+ :
+ {
+ touch a b c;
+
+ $* a b/ c >>EOO 2>"find: 'b' is not a directory" != 0
+ a
+ c
+ EOO
+ }
+
+ : trailing-slash
+ :
+ {
+ mkdir -p a/b;
+
+ $* a >>/"EOO";
+ a
+ a/b
+ EOO
+
+ $* a/ >>"EOO"
+ a/
+ a/b
+ EOO
+ }
+}
+
+: name-primary
+:
+{
+ : basic
+ :
+ {
+ mkdir a;
+ touch a/ab a/ba;
+
+ $* . -name 'a*' >>/EOO;
+ ./a
+ ./a/ab
+ EOO
+
+ $* . -name 'b*' >>/EOO;
+ ./a/ba
+ EOO
+
+ $* a -name 'a*' >>/EOO
+ a
+ a/ab
+ EOO
+ }
+
+ : empty
+ :
+ {
+ touch a;
+
+ $* . -name ''
+ }
+}
+
+: type-primary
+:
+{
+ : regular
+ :
+ {
+ mkdir -p a/b;
+ touch a/b/c;
+
+ $* a -type f >>/EOO
+ a/b/c
+ EOO
+ }
+
+ : directory
+ :
+ {
+ mkdir -p a/b;
+ touch a/b/c;
+
+ $* a -type d >>/EOO
+ a
+ a/b
+ EOO
+ }
+
+ : symlink
+ :
+ if $posix
+ {
+ mkdir -p a/b;
+ touch a/b/c;
+ ln -s c a/b/d;
+
+ $* a -type l >>/EOO
+ a/b/d
+ EOO
+ }
+}
+
+: mindepth-primary
+:
+{
+ mkdir -p a/b/c;
+
+ $* a -mindepth 0 >>/EOO;
+ a
+ a/b
+ a/b/c
+ EOO
+
+ $* a -mindepth 1 >>/EOO;
+ a/b
+ a/b/c
+ EOO
+
+ $* a -mindepth 2 >>/EOO;
+ a/b/c
+ EOO
+
+ $* a -mindepth 3
+}
+
+: maxdepth-primary
+:
+{
+ mkdir -p a/b/c;
+
+ $* a -maxdepth 0 >>/EOO;
+ a
+ EOO
+
+ $* a -maxdepth 1 >>/EOO;
+ a
+ a/b
+ EOO
+
+ $* a -maxdepth 2 >>/EOO;
+ a
+ a/b
+ a/b/c
+ EOO
+
+ $* a -maxdepth 3 >>/EOO
+ a
+ a/b
+ a/b/c
+ EOO
+}
diff --git a/tests/builtin/sed.testscript b/tests/builtin/sed.testscript
index ad26483..2ed3088 100644
--- a/tests/builtin/sed.testscript
+++ b/tests/builtin/sed.testscript
@@ -89,16 +89,10 @@ test.options += -c
sed: empty script
EOE
- : multiple
- :
- $* -e 's/a//' -e 's/a//' 2>>EOE != 0
- sed: multiple scripts
- EOE
-
: invalid
:
$* -e 'z' 2>>EOE != 0
- sed: only 's' command supported
+ sed: unknown command in 'z': only 's' command supported
EOE
}
@@ -156,13 +150,13 @@ test.options += -c
: none
:
$* -e 's' 2>>EOE != 0
- sed: no delimiter for 's' command
+ sed: no delimiter for 's' command in 's'
EOE
: invalid
:
$* -e 's\\' 2>>EOE != 0
- sed: invalid delimiter for 's' command
+ sed: invalid delimiter for 's' command in 's\\'
EOE
}
@@ -171,14 +165,14 @@ test.options += -c
{
: unterminated
:
- $* -e 's/foo' 2>>/EOE != 0
- sed: unterminated 's' command regex
+ $* -e 's/foo' 2>>EOE != 0
+ sed: invalid 's' command 's/foo': no delimiter after regex
EOE
: empty
:
$* -e 's///' 2>>EOE != 0
- sed: empty regex in 's' command
+ sed: invalid 's' command 's///': empty regex
EOE
: invalid
@@ -187,20 +181,20 @@ test.options += -c
: regex errors. For example '*' is parsed successfully.
:
$* -e 's/foo[/bar/' 2>>~%EOE% != 0
- %sed: invalid regex.*%
+ %sed: invalid regex 'foo\[' in 's/foo\[/bar/'.*%
EOE
}
: unterminated-replacement
:
- $* -e 's/foo/bar' 2>>/EOE != 0
- sed: unterminated 's' command replacement
+ $* -e 's/foo/bar' 2>>EOE != 0
+ sed: invalid 's' command 's/foo/bar': no delimiter after replacement
EOE
: invalid-flags
:
$* -e 's/foo/bar/a' 2>>EOE != 0
- sed: invalid 's' command flag 'a'
+ sed: invalid 's' command flag 'a' in 's/foo/bar/a'
EOE
}
@@ -314,6 +308,35 @@ test.options += -c
}
}
}
+
+ : multiple
+ :
+ {
+ $* -e 's/b/x/' -e 's/x/y/' -e 's/c/z/' <'abc' >'ayz' : replace-replacement
+
+ : new-cycle
+ :
+ $* -e 's/b/x/p' -e 's/x/y/p' <<EOI >>EOO
+ abc
+ klm
+ dxe
+ EOI
+ axc
+ klm
+ dye
+ EOO
+
+ : quiet
+ :
+ $* -n -e 's/b/x/p' -e 's/x/y/p' <<EOI >>EOO
+ abc
+ klm
+ dxe
+ EOI
+ axc
+ dye
+ EOO
+ }
}
: in-place
diff --git a/tests/builtin/timeout.testscript b/tests/builtin/timeout.testscript
new file mode 100644
index 0000000..b8eddc3
--- /dev/null
+++ b/tests/builtin/timeout.testscript
@@ -0,0 +1,30 @@
+# file : tests/builtin/timeout.testscript
+# license : MIT; see accompanying LICENSE file
+
+: async-builtin
+:
+{
+ : expired
+ :
+ $* -s 5 'cat' <'test' | $* -t 1 'cat' >=f 2>'timeout expired' != 0
+
+ : not-expired
+ :
+ echo 'test' | $* -t 10000 'cat' >!
+}
+
+: pseudo-builtin
+:
+{
+ : expired
+ :
+ $* -s 5 'cat' <'test' | $* -t 1 'roundtrip' >=f 2>'timeout expired' != 0
+
+ : not-expired
+ :
+ echo 'test' | $* -t 10000 'roundtrip' >!
+}
+
+: sync-builtin
+:
+$* -t 1 'mkdir' d &d/
diff --git a/tests/command/driver.cxx b/tests/command/driver.cxx
index 0c070ab..9194c13 100644
--- a/tests/command/driver.cxx
+++ b/tests/command/driver.cxx
@@ -1,38 +1,22 @@
// file : tests/command/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ios>
#include <string>
#include <vector>
#include <iostream>
#include <stdexcept> // invalid_argument
#include <system_error>
-#endif
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/command.hxx>
+#include <libbutl/utility.hxx>
+#include <libbutl/optional.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.path_io;
-import butl.process; // process::print()
-import butl.command;
-import butl.utility;
-import butl.optional;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/command.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/cpfile/driver.cxx b/tests/cpfile/driver.cxx
index c613b49..fe01bdd 100644
--- a/tests/cpfile/driver.cxx
+++ b/tests/cpfile/driver.cxx
@@ -1,29 +1,16 @@
// file : tests/cpfile/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ios>
#include <string>
#include <system_error>
-#endif
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/filesystem.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/curl/driver.cxx b/tests/curl/driver.cxx
index 18ed1e6..856fde3 100644
--- a/tests/curl/driver.cxx
+++ b/tests/curl/driver.cxx
@@ -1,35 +1,17 @@
// file : tests/curl/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <iostream>
#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.curl;
-import butl.path;
-import butl.process;
-import butl.utility; // operator<<(ostream, exception)
-import butl.fdstream;
-
-import butl.optional; // @@ MOD Clang should not be necessary.
-import butl.small_vector; // @@ MOD Clang should not be necessary.
-#else
-#include <libbutl/curl.mxx>
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+
+#include <libbutl/curl.hxx>
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/fdstream.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -122,6 +104,26 @@ http ()
c.out.close ();
assert (!c.wait ());
}
+
+ // POST from stream without --fail.
+ //
+ {
+ curl c (p, path ("-"), nullfd, 2,
+ curl::post,
+ curl::flags::no_fail,
+ u + "/bogus");
+
+ c.out << "bogus" << endl;
+ c.out.close ();
+ assert (c.wait ());
+ }
+
+ // POST empty data.
+ //
+ {
+ curl c (p, nullfd, 1, 2, curl::post, u + "/bogus", "--verbose");
+ assert (!c.wait ());
+ }
}
int
diff --git a/tests/curl/testscript b/tests/curl/testscript
index 3da2306..d2056cd 100644
--- a/tests/curl/testscript
+++ b/tests/curl/testscript
@@ -43,14 +43,22 @@ sudo /usr/sbin/in.tftpd \
: http
:
{
- $* 'http' 2>>EOE
+ $* 'http' 2>>~%EOE%
- curl -s -S --fail --location https://build2.org/bogus
- curl: (22) The requested URL returned error: 404 Not Found
+ curl -sS --fail --location https://build2.org/bogus
+ %curl: \(22\) The requested URL returned error: 404( Not Found)?%
- curl -s -S --fail --location https://build2.org
+ curl -sS --fail --location https://build2.org
- curl -s -S --fail --location --data-binary @- https://build2.org/bogus
- curl: (22) The requested URL returned error: 404 Not Found
+ curl -sS --fail --location --data-binary @- https://build2.org/bogus
+ %curl: \(22\) The requested URL returned error: 404( Not Found)?%
+
+ curl -sS --location --data-binary @- https://build2.org/bogus
+
+ curl -sS --fail --location --data-raw "" --verbose https://build2.org/bogus
+ %.*
+ %> POST /bogus HTTP.+%
+ %.*
+ %curl: \(22\) The requested URL returned error: 404( Not Found)?%
EOE
}
diff --git a/tests/default-options/driver.cxx b/tests/default-options/driver.cxx
index 574e002..766dca8 100644
--- a/tests/default-options/driver.cxx
+++ b/tests/default-options/driver.cxx
@@ -1,61 +1,48 @@
// file : tests/default-options/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
+#include <limits>
#include <string>
#include <vector>
#include <iostream>
+#include <exception>
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.path_io;
-import butl.optional;
-import butl.fdstream;
-import butl.default_options;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/utility.mxx> // eof()
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/default-options.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx> // eof()
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/default-options.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
// Usage: argv[0] [-f <file>] [-d <start-dir>] [-s <sys-dir>] [-h <home-dir>]
-// [-x <extra-dir>] [-e] [-t] <cmd-options>
+// [-x <extra-dir>] [-a] [-e] [-t] <cmd-options>
//
// Parse default options files, merge them with the command line options, and
// print the resulting options to STDOUT one per line. Note that the options
// instance is a vector of arbitrary strings.
//
-// -f
+// -f <file>
// Default options file name. Can be specified multiple times.
//
-// -d
+// -d <start-dir>
// Directory to start the default options files search from. Can be
// specified multiple times, in which case a common start (parent)
// directory is deduced.
//
-// -s
+// -s <sys-dir>
// System directory.
//
-// -h
+// -h <home-dir>
// Home directory.
//
-// -x
+// -x <extra-dir>
// Extra directory.
//
// -a
@@ -70,6 +57,12 @@ using namespace butl;
// -t
// Trace the default options files search to STDERR.
//
+// -m <num>
+// Maximum number of arguments globally (SIZE_MAX/2 by default).
+//
+// -l <num>
+// Maximum number of arguments in the options file (1024 by default).
+//
int
main (int argc, const char* argv[])
{
@@ -78,56 +71,68 @@ main (int argc, const char* argv[])
class scanner
{
public:
- scanner (const string& f): ifs_ (f, fdopen_mode::in, ifdstream::badbit) {}
+ scanner (const string& f, const string& option, size_t pos)
+ : option_ (option), start_pos_ (pos) {load (path (f));}
bool
more ()
{
- if (peeked_)
- return true;
-
- if (!eof_)
- eof_ = ifs_.peek () == ifdstream::traits_type::eof ();
-
- return !eof_;
+ return i_ < args_.size ();
}
string
peek ()
{
assert (more ());
-
- if (peeked_)
- return *peeked_;
-
- string s;
- getline (ifs_, s);
-
- peeked_ = move (s);
- return *peeked_;
+ return args_[i_];
}
string
next ()
{
assert (more ());
+ return args_[i_++];
+ }
+
+ size_t
+ position ()
+ {
+ return start_pos_ + i_;
+ }
- string s;
- if (peeked_)
+ private:
+ void
+ load (const path& f)
+ {
+ ifdstream is (f, fdopen_mode::in, ifdstream::badbit);
+
+ for (string l; !eof (getline (is, l)); )
{
- s = move (*peeked_);
- peeked_ = nullopt;
- }
- else
- getline (ifs_, s);
+ if (option_ && *option_ == l)
+ {
+ assert (!eof (getline (is, l)));
+
+ // If the path of the file being parsed is not simple and the path
+ // of the file that needs to be loaded is relative, then complete
+ // the latter using the former as a base.
+ //
+ path p (l);
- return s;
+ if (!f.simple () && p.relative ())
+ p = f.directory () / p;
+
+ load (p);
+ }
+ else
+ args_.push_back (move (l));
+ }
}
private:
- ifdstream ifs_;
- bool eof_ = false;
- optional<string> peeked_;
+ optional<string> option_;
+ vector<string> args_;
+ size_t i_ = 0;
+ size_t start_pos_;
};
enum class unknow_mode
@@ -136,6 +141,15 @@ main (int argc, const char* argv[])
fail
};
+ class unknown_argument: public std::exception
+ {
+ public:
+ string argument;
+
+ explicit
+ unknown_argument (string a): argument (move (a)) {}
+ };
+
class options: public vector<string>
{
public:
@@ -152,7 +166,7 @@ main (int argc, const char* argv[])
switch (m)
{
case unknow_mode::stop: return r;
- case unknow_mode::fail: throw invalid_argument (a);
+ case unknow_mode::fail: throw unknown_argument (move (a));
}
}
@@ -195,6 +209,23 @@ main (int argc, const char* argv[])
vector<string> cmd_args;
bool print_entries (false);
bool trace (false);
+ size_t arg_max (numeric_limits<size_t>::max () / 2);
+ size_t arg_max_file (1024);
+
+ auto num = [] (const string& s) -> size_t
+ {
+ assert (!s.empty ());
+
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ uint64_t r (strtoull (s.c_str (), &e, 10)); // Can't throw.
+
+ assert (errno != ERANGE &&
+ e == s.c_str () + s.size () &&
+ r <= numeric_limits<size_t>::max ());
+
+ return static_cast<size_t> (r);
+ };
for (int i (1); i != argc; ++i)
{
@@ -237,6 +268,16 @@ main (int argc, const char* argv[])
{
trace = true;
}
+ else if (a == "-m")
+ {
+ assert (++i != argc);
+ arg_max = num (argv[i]);
+ }
+ else if (a == "-l")
+ {
+ assert (++i != argc);
+ arg_max_file = num (argv[i]);
+ }
else if (a.compare (0, 2, "--") == 0)
cmd_ops.push_back (move (a));
else
@@ -245,7 +286,7 @@ main (int argc, const char* argv[])
// Deduce a common start directory.
//
- fs.start = default_options_start (home_dir, dirs);
+ fs.start = default_options_start (home_dir, dirs.begin (), dirs.end ());
// Load and print the default options.
//
@@ -264,11 +305,20 @@ main (int argc, const char* argv[])
cerr << (overwrite ? "overwriting " : "loading ")
<< (remote ? "remote " : "local ") << f << endl;
},
+ "--options-file",
+ arg_max,
+ arg_max_file,
args);
}
+ catch (const unknown_argument& e)
+ {
+ cerr << "error: unexpected argument '" << e.argument << "'" << endl;
+ return 1;
+ }
catch (const invalid_argument& e)
{
- cerr << "error: unexpected argument '" << e.what () << "'" << endl;
+ cerr << "error: unable to load default options files: " << e.what ()
+ << endl;
return 1;
}
diff --git a/tests/default-options/testscript b/tests/default-options/testscript
index b168ca9..f071701 100644
--- a/tests/default-options/testscript
+++ b/tests/default-options/testscript
@@ -51,6 +51,7 @@
+mkdir -p $d/local/
+echo '--package-foo' >=$d/foo
+ +echo '--package-fox' >+$d/foo
+echo '--package-bar' >=$d/bar
+echo '--package-local-foo' >=$d/local/foo
+echo '--package-local-bar' >=$d/local/bar
@@ -78,7 +79,7 @@
%\.+/home/work/project/.build2/bar,--project-bar,true%
%\.+/home/work/project/.build2/local/foo,--project-local-foo,true%
%\.+/home/work/project/.build2/local/bar,--project-local-bar,true%
- %\.+/home/work/project/package/.build2/foo,--package-foo,true%
+ %\.+/home/work/project/package/.build2/foo,--package-foo --package-fox,true%
%\.+/home/work/project/package/.build2/bar,--package-bar,true%
%\.+/home/work/project/package/.build2/local/foo,--package-local-foo,true%
%\.+/home/work/project/package/.build2/local/bar,--package-local-bar,true%
@@ -129,6 +130,7 @@
--project-local-foo
--project-local-bar
--package-foo
+ --package-fox
--package-bar
--package-local-foo
--package-local-bar
@@ -150,6 +152,26 @@
%overwriting remote \.+/home/work/project/package/.build2/local/baz%
%overwriting remote \.+/home/work/project/package/.build2/local/foo%
EOE
+
+ : positions
+ :
+ {
+ : success
+ :
+ $* -f foo -f bar -d $start_dir -m 36 -l 2 >!
+
+ : fail-file
+ :
+ $* -f foo -f bar -d $start_dir -m 36 -l 1 2>>/~%EOE% != 0
+ %error: unable to load default options files: too many options in file .+/package/\.build2/foo%
+ EOE
+
+ : fail-globally
+ :
+ $* -f foo -f bar -d $start_dir -m 100 -l 10 2>>EOE != 0
+ error: unable to load default options files: too many options files
+ EOE
+ }
}
: args
@@ -345,3 +367,36 @@
EOO
}
}
+
+: options-file
+:
+{
+ d = work/.build2;
+ mkdir -p work/.build2;
+
+ cat <<EOI >=$d/foo;
+ --foo
+ --options-file
+ bar
+ --fox
+ EOI
+
+ cat <<EOI >=$d/bar;
+ --bar
+ --options-file
+ baz
+ --box
+ EOI
+
+ cat <<EOI >=$d/baz;
+ --baz
+ EOI
+
+ $* -d $~/work -f foo >>EOO
+ --foo
+ --bar
+ --baz
+ --box
+ --fox
+ EOO
+}
diff --git a/tests/dir-iterator/driver.cxx b/tests/dir-iterator/driver.cxx
index b73e2e2..c9f7218 100644
--- a/tests/dir-iterator/driver.cxx
+++ b/tests/dir-iterator/driver.cxx
@@ -1,30 +1,17 @@
// file : tests/dir-iterator/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <cstddef> // size_t
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.path_io;
-import butl.utility;
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/utility.mxx> // operator<<(ostream, exception)
-#include <libbutl/filesystem.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/timestamp.hxx>
+#include <libbutl/filesystem.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -40,7 +27,7 @@ operator<< (ostream& os, entry_type e)
return os << entry_type_string[static_cast<size_t> (e)];
}
-// Usage: argv[0] [-v] [-i] <dir>
+// Usage: argv[0] [-v] [-i|-d] <dir>
//
// Iterates over a directory filesystem sub-entries, obtains their types and
// target types for symlinks.
@@ -52,6 +39,10 @@ operator<< (ostream& os, entry_type e)
// Ignore dangling symlinks, rather than fail trying to obtain the target
// type.
//
+// -d
+// Detect dangling symlinks, rather than fail trying to obtain the target
+// type.
+//
int
main (int argc, const char* argv[])
{
@@ -59,6 +50,7 @@ main (int argc, const char* argv[])
bool verbose (false);
bool ignore_dangling (false);
+ bool detect_dangling (false);
int i (1);
for (; i != argc; ++i)
@@ -69,6 +61,8 @@ main (int argc, const char* argv[])
verbose = true;
else if (v == "-i")
ignore_dangling = true;
+ else if (v == "-d")
+ detect_dangling = true;
else
break;
}
@@ -79,15 +73,42 @@ main (int argc, const char* argv[])
return 1;
}
+ assert (!ignore_dangling || !detect_dangling);
+
const char* d (argv[i]);
try
{
- for (const dir_entry& de: dir_iterator (dir_path (d), ignore_dangling))
+ for (const dir_entry& de:
+ dir_iterator (dir_path (d),
+ (ignore_dangling ? dir_iterator::ignore_dangling :
+ detect_dangling ? dir_iterator::detect_dangling :
+ dir_iterator::no_follow)))
{
+ timestamp mt (de.mtime ());
+ timestamp at (de.atime ());
+
entry_type lt (de.ltype ());
entry_type t (lt == entry_type::symlink ? de.type () : lt);
+
const path& p (de.path ());
+ path fp (de.base () / p);
+
+ entry_time et (t == entry_type::directory
+ ? dir_time (path_cast<dir_path> (fp))
+ : file_time (fp));
+
+ if (mt != timestamp_unknown)
+ assert (mt == et.modification);
+
+ if (at != timestamp_unknown)
+ assert (mt == et.access);
+
+ if (de.mtime () != timestamp_unknown)
+ assert (de.mtime () == et.modification);
+
+ if (de.atime () != timestamp_unknown)
+ assert (de.atime () == et.access);
if (verbose)
{
diff --git a/tests/dir-iterator/testscript b/tests/dir-iterator/testscript
index 03ed164..9bc5513 100644
--- a/tests/dir-iterator/testscript
+++ b/tests/dir-iterator/testscript
@@ -7,6 +7,8 @@ test.options = -v
:
mkdir a;
touch a/b;
+sleep 1;
+echo "a" >=a/b; # Change modification time.
$* a >"reg b"
: dir
@@ -24,16 +26,16 @@ $* a >"dir b"
if ($test.target == $build.host)
{
+if ($cxx.target.class != 'windows')
- lnf = ^ln -s t wd/l &wd/l
- lnd = $lnf
+ lnf = [cmdline] ^ln -s t wd/l &wd/l
+ lnd = [cmdline] $lnf
else
echo 'yes' >=t
if cmd /C 'mklink l t' >- 2>- &?l && cat l >'yes'
- lnf = cmd /C 'mklink wd\l t' &wd/l >-
- lnd = cmd /C 'mklink /D wd\l t' &wd/l >-
+ lnf = [cmdline] cmd /C 'mklink wd\l t' &wd/l >-
+ lnd = [cmdline] cmd /C 'mklink /D wd\l t' &wd/l >-
end
- jnc = cmd /C 'mklink /J wd\l wd\t' &wd/l >-
+ jnc = [cmdline] cmd /C 'mklink /J wd\l wd\t' &wd/l >-
end
: symlink
@@ -54,6 +56,12 @@ if ($test.target == $build.host)
$* ../wd >- 2>! != 0 : keep
$* -i ../wd >'reg f': skip
+
+ : detect
+ :
+ $* -d ../wd >>~%EOO%
+ %(reg f|sym unk l)%{2}
+ EOO
}
: dir
@@ -71,6 +79,12 @@ if ($test.target == $build.host)
$* ../wd >- 2>! != 0 : keep
$* -i ../wd >'dir d': skip
+
+ : detect
+ :
+ $* -d ../wd >>~%EOO%
+ %(dir d|sym unk l)%{2}
+ EOO
}
}
@@ -89,5 +103,11 @@ if ($test.target == $build.host)
$* ../wd >- 2>! != 0 : keep
$* -i ../wd >'dir d': skip
+
+ : detect
+ :
+ $* -d ../wd >>~%EOO%
+ %(dir d|sym unk l)%{2}
+ EOO
}
}
diff --git a/tests/entry-time/driver.cxx b/tests/entry-time/driver.cxx
index 1e64b0d..c29837d 100644
--- a/tests/entry-time/driver.cxx
+++ b/tests/entry-time/driver.cxx
@@ -1,31 +1,17 @@
// file : tests/entry-time/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <chrono>
#include <iostream>
-#endif
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/timestamp.hxx>
+#include <libbutl/filesystem.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.filesystem;
-
-import butl.optional; // @@ MOD Clang should not be necessary.
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/timestamp.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/fdstream/driver.cxx b/tests/fdstream/driver.cxx
index 3215e02..ec0c54e 100644
--- a/tests/fdstream/driver.cxx
+++ b/tests/fdstream/driver.cxx
@@ -5,9 +5,6 @@
# include <libbutl/win32-utility.hxx>
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#ifndef _WIN32
# include <chrono>
#endif
@@ -15,38 +12,28 @@
#include <ios>
#include <string>
#include <vector>
-#include <thread>
#include <iomanip>
#include <sstream>
#include <fstream>
#include <utility> // move()
#include <iostream>
#include <exception>
-#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#ifndef _WIN32
-import std.threading;
-#endif
-#endif
-import butl.path;
-import butl.process;
-import butl.fdstream;
-import butl.timestamp;
-import butl.filesystem;
+#ifndef LIBBUTL_MINGW_STDTHREAD
+# include <thread>
#else
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/timestamp.mxx>
-#include <libbutl/filesystem.mxx>
+# include <libbutl/mingw-thread.hxx>
#endif
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/timestamp.hxx>
+#include <libbutl/filesystem.hxx>
+
+#undef NDEBUG
+#include <cassert>
+
using namespace std;
using namespace butl;
@@ -55,7 +42,9 @@ static const string text2 ("12"); // Keep shorter than text1.
// Windows text mode write-translated form of text1.
//
+#ifdef _WIN32
static const string text3 ("ABCDEF\r\nXYZ");
+#endif
static string
from_stream (ifdstream& is)
@@ -133,6 +122,12 @@ read_time (const path& p, const T& s, size_t n)
int
main (int argc, const char* argv[])
{
+#ifndef LIBBUTL_MINGW_STDTHREAD
+ using std::thread;
+#else
+ using mingw_stdthread::thread;
+#endif
+
bool v (false);
bool child (false);
@@ -470,12 +465,12 @@ main (int argc, const char* argv[])
//
{
string s;
- for (size_t i (0); i < 100; ++i)
+ for (size_t i (0); i < 300; ++i)
s += "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n";
const char* args[] = {argv[0], "-c", nullptr};
- auto test_read = [&args, &s] ()
+ auto test_read = [&args, &s] (bool timeout)
{
try
{
@@ -491,11 +486,29 @@ main (int argc, const char* argv[])
string r;
char buf[300];
+ bool timedout (false);
while (!is.eof ())
{
- pair<size_t, size_t> nd (fdselect (rds, wds));
-
- assert (nd.first == 1 && nd.second == 0 && rds[0].ready);
+ if (timeout)
+ {
+ pair<size_t, size_t> nd (
+ fdselect (rds, wds, chrono::milliseconds (3)));
+
+ assert (((nd.first == 0 && !rds[0].ready) ||
+ (nd.first == 1 && rds[0].ready)) &&
+ nd.second == 0);
+
+ if (nd.first == 0)
+ {
+ timedout = true;
+ continue;
+ }
+ }
+ else
+ {
+ pair<size_t, size_t> nd (fdselect (rds, wds));
+ assert (nd.first == 1 && nd.second == 0 && rds[0].ready);
+ }
for (streamsize n; (n = is.readsome (buf, sizeof (buf))) != 0; )
r.append (buf, static_cast<size_t> (n));
@@ -504,6 +517,10 @@ main (int argc, const char* argv[])
is.close ();
assert (r == s);
+
+ // If timeout is used, then it most likely timedout, at least once.
+ //
+ assert (timedout == timeout);
}
catch (const ios::failure&)
{
@@ -517,7 +534,10 @@ main (int argc, const char* argv[])
vector<thread> threads;
for (size_t i (0); i < 10; ++i)
- threads.emplace_back (test_read);
+ {
+ threads.emplace_back ([&test_read] {test_read (true /* timeout */);});
+ threads.emplace_back ([&test_read] {test_read (false /* timeout */);});
+ }
// While the threads are busy, let's test the skip/non_blocking modes
// combination.
@@ -550,7 +570,85 @@ main (int argc, const char* argv[])
t.join ();
}
- // Test setting and getting position via the non-standard fdbuf interface.
+ // Test (non-blocking) reading with getline_non_blocking().
+ //
+ {
+ const string ln (
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+ string s;
+ for (size_t i (0); i < 300; ++i)
+ {
+ s += ln;
+ s += '\n';
+ }
+
+ const char* args[] = {argv[0], "-c", nullptr};
+
+ auto test_read = [&args, &s, &ln] ()
+ {
+ try
+ {
+ process pr (args, -1, -1);
+ ofdstream os (move (pr.out_fd));
+
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::non_blocking,
+ ios_base::badbit);
+
+ os << s;
+ os.close ();
+
+ fdselect_set fds {is.fd ()};
+ fdselect_state& ist (fds[0]);
+
+ string r;
+ for (string l; ist.fd != nullfd; )
+ {
+ if (ist.fd != nullfd && getline_non_blocking (is, l))
+ {
+ if (eof (is))
+ ist.fd = nullfd;
+ else
+ {
+ assert (l == ln);
+
+ r += l;
+ r += '\n';
+
+ l.clear ();
+ }
+
+ continue;
+ }
+
+ ifdselect (fds);
+ }
+
+ is.close ();
+
+ assert (r == s);
+ }
+ catch (const ios::failure&)
+ {
+ assert (false);
+ }
+ catch (const process_error&)
+ {
+ assert (false);
+ }
+ };
+
+ vector<thread> threads;
+ for (size_t i (0); i < 20; ++i)
+ threads.emplace_back (test_read);
+
+ for (thread& t: threads)
+ t.join ();
+ }
+
+ // Test setting and getting position via the non-standard fdstreambuf
+ // interface.
//
// Seek for read.
//
@@ -559,7 +657,7 @@ main (int argc, const char* argv[])
ifdstream is (f);
- fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+ fdstreambuf* buf (dynamic_cast<fdstreambuf*> (is.rdbuf ()));
assert (buf != nullptr);
char c;
@@ -602,7 +700,7 @@ main (int argc, const char* argv[])
{
ifdstream is (f, fdopen_mode::in | fdopen_mode::out);
- fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+ fdstreambuf* buf (dynamic_cast<fdstreambuf*> (is.rdbuf ()));
assert (buf != nullptr);
// Read till the end of the fragment.
@@ -660,7 +758,7 @@ main (int argc, const char* argv[])
assert (static_cast<streamoff> (is.tellg ()) == 8);
- const fdbuf* buf (dynamic_cast<const fdbuf*> (is.rdbuf ()));
+ const fdstreambuf* buf (dynamic_cast<const fdstreambuf*> (is.rdbuf ()));
assert (buf != nullptr && buf->tellg () == 8);
assert (from_stream (is) == "89");
@@ -679,7 +777,7 @@ main (int argc, const char* argv[])
assert (static_cast<streamoff> (os.tellp ()) == 2);
- const fdbuf* buf (dynamic_cast<const fdbuf*> (os.rdbuf ()));
+ const fdstreambuf* buf (dynamic_cast<const fdstreambuf*> (os.rdbuf ()));
assert (buf != nullptr && buf->tellp () == 2);
os.close ();
@@ -732,7 +830,7 @@ main (int argc, const char* argv[])
assert (static_cast<streamoff> (is.tellg ()) == 8);
- const fdbuf* buf (dynamic_cast<const fdbuf*> (is.rdbuf ()));
+ const fdstreambuf* buf (dynamic_cast<const fdstreambuf*> (is.rdbuf ()));
assert (buf != nullptr && buf->tellp () == 8);
assert (from_stream (is) == "6789");
diff --git a/tests/host-os-release/buildfile b/tests/host-os-release/buildfile
new file mode 100644
index 0000000..cd277ff
--- /dev/null
+++ b/tests/host-os-release/buildfile
@@ -0,0 +1,6 @@
+# file : tests/host-os-release/buildfile
+# license : MIT; see accompanying LICENSE file
+
+import libs = libbutl%lib{butl}
+
+exe{driver}: {hxx cxx}{*} $libs testscript
diff --git a/tests/host-os-release/driver.cxx b/tests/host-os-release/driver.cxx
new file mode 100644
index 0000000..249cbff
--- /dev/null
+++ b/tests/host-os-release/driver.cxx
@@ -0,0 +1,58 @@
+// file : tests/host-os-release/driver.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <libbutl/host-os-release.hxx>
+
+#include <libbutl/path.hxx>
+
+namespace butl
+{
+ LIBBUTL_SYMEXPORT os_release
+ host_os_release_linux (path f = {});
+}
+
+#include <iostream>
+
+#undef NDEBUG
+#include <cassert>
+
+using namespace std;
+using namespace butl;
+
+int
+main (int argc, char* argv[])
+{
+ assert (argc >= 2); // <host-target-triplet>
+
+ target_triplet host (argv[1]);
+
+ os_release r;
+ if (host.class_ == "linux")
+ {
+ assert (argc == 3); // <host-target-triplet> <file-path>
+ r = host_os_release_linux (path (argv[2]));
+ }
+ else
+ {
+ assert (argc == 2);
+ if (optional<os_release> o = host_os_release (host))
+ r = move (*o);
+ else
+ {
+ cerr << "unrecognized host os " << host.string () << endl;
+ return 1;
+ }
+ }
+
+ cout << r.name_id << '\n';
+ for (auto b (r.like_ids.begin ()), i (b); i != r.like_ids.end (); ++i)
+ cout << (i != b ? "|" : "") << *i;
+ cout << '\n'
+ << r.version_id << '\n'
+ << r.variant_id << '\n'
+ << r.name << '\n'
+ << r.version_codename << '\n'
+ << r.variant << '\n';
+
+ return 0;
+}
diff --git a/tests/host-os-release/testscript b/tests/host-os-release/testscript
new file mode 100644
index 0000000..a18aa74
--- /dev/null
+++ b/tests/host-os-release/testscript
@@ -0,0 +1,223 @@
+# file : tests/host-os-release/testscript
+# license : MIT; see accompanying LICENSE file
+
+: linux
+:
+$* x86_64-linux-gnu os-release >>EOO
+ linux
+
+
+
+ Linux
+
+
+ EOO
+
+: debian-10
+:
+cat <<EOI >=os-release;
+ PRETTY_NAME="Debian GNU/Linux 10 (buster)"
+ NAME="Debian GNU/Linux"
+ VERSION_ID="10"
+ VERSION="10 (buster)"
+ VERSION_CODENAME=buster
+ ID=debian
+ HOME_URL="https://www.debian.org/"
+ SUPPORT_URL="https://www.debian.org/support"
+ BUG_REPORT_URL="https://bugs.debian.org/"
+ EOI
+$* x86_64-linux-gnu os-release >>EOO
+ debian
+
+ 10
+
+ Debian GNU/Linux
+ buster
+
+ EOO
+
+: debian-testing
+:
+cat <<EOI >=os-release;
+ PRETTY_NAME="Debian GNU/Linux bookworm/sid"
+ NAME="Debian GNU/Linux"
+ ID=debian
+ HOME_URL="https://www.debian.org/"
+ SUPPORT_URL="https://www.debian.org/support"
+ BUG_REPORT_URL="https://bugs.debian.org/"
+ EOI
+$* x86_64-linux-gnu os-release >>EOO
+ debian
+
+
+
+ Debian GNU/Linux
+
+
+ EOO
+
+: ubuntu-20.04
+:
+cat <<EOI >=os-release;
+ NAME="Ubuntu"
+ VERSION="20.04.1 LTS (Focal Fossa)"
+ ID=ubuntu
+ ID_LIKE=debian
+ PRETTY_NAME="Ubuntu 20.04.1 LTS"
+ VERSION_ID="20.04"
+ HOME_URL="https://www.ubuntu.com/"
+ SUPPORT_URL="https://help.ubuntu.com/"
+ BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
+ PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
+ VERSION_CODENAME=focal
+ UBUNTU_CODENAME=focal
+ EOI
+$* x86_64-linux-gnu os-release >>EOO
+ ubuntu
+ debian
+ 20.04
+
+ Ubuntu
+ focal
+
+ EOO
+
+: fedora-35
+:
+cat <<EOI >=os-release;
+ NAME="Fedora Linux"
+ VERSION="35 (Workstation Edition)"
+ ID=fedora
+ VERSION_ID=35
+ VERSION_CODENAME=""
+ PLATFORM_ID="platform:f35"
+ PRETTY_NAME="Fedora Linux 35 (Workstation Edition)"
+ ANSI_COLOR="0;38;2;60;110;180"
+ LOGO=fedora-logo-icon
+ CPE_NAME="cpe:/o:fedoraproject:fedora:35"
+ HOME_URL="https://fedoraproject.org/"
+ DOCUMENTATION_URL="https://docs.fedoraproject.org/en-US/fedora/f35/system-administrators-guide/"
+ SUPPORT_URL="https://ask.fedoraproject.org/"
+ BUG_REPORT_URL="https://bugzilla.redhat.com/"
+ REDHAT_BUGZILLA_PRODUCT="Fedora"
+ REDHAT_BUGZILLA_PRODUCT_VERSION=35
+ REDHAT_SUPPORT_PRODUCT="Fedora"
+ REDHAT_SUPPORT_PRODUCT_VERSION=35
+ PRIVACY_POLICY_URL="https://fedoraproject.org/wiki/Legal:PrivacyPolicy"
+ VARIANT="Workstation Edition"
+ VARIANT_ID=workstation
+ EOI
+$* x86_64-linux-gnu os-release >>EOO
+ fedora
+
+ 35
+ workstation
+ Fedora Linux
+
+ Workstation Edition
+ EOO
+
+: rhel-8.2
+:
+cat <<EOI >=os-release;
+ NAME="Red Hat Enterprise Linux"
+ VERSION="8.2 (Ootpa)"
+ ID="rhel"
+ ID_LIKE="fedora"
+ VERSION_ID="8.2"
+ PLATFORM_ID="platform:el8"
+ PRETTY_NAME="Red Hat Enterprise Linux 8.2 (Ootpa)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:redhat:enterprise_linux:8.2:GA"
+ HOME_URL="https://www.redhat.com/"
+ BUG_REPORT_URL="https://bugzilla.redhat.com/"
+
+ REDHAT_BUGZILLA_PRODUCT="Red Hat Enterprise Linux 8"
+ REDHAT_BUGZILLA_PRODUCT_VERSION=8.2
+ REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux"
+ REDHAT_SUPPORT_PRODUCT_VERSION="8.2"
+ EOI
+$* x86_64-linux-gnu os-release >>EOO
+ rhel
+ fedora
+ 8.2
+
+ Red Hat Enterprise Linux
+
+
+ EOO
+
+: centos-8
+:
+cat <<EOI >=os-release;
+ NAME="CentOS Linux"
+ VERSION="8 (Core)"
+ ID="centos"
+ ID_LIKE="rhel fedora"
+ VERSION_ID="8"
+ PLATFORM_ID="platform:el8"
+ PRETTY_NAME="CentOS Linux 8 (Core)"
+ ANSI_COLOR="0;31"
+ CPE_NAME="cpe:/o:centos:centos:8"
+ HOME_URL="https://www.centos.org/"
+ BUG_REPORT_URL="https://bugs.centos.org/"
+
+ CENTOS_MANTISBT_PROJECT="CentOS-8"
+ CENTOS_MANTISBT_PROJECT_VERSION="8"
+ REDHAT_SUPPORT_PRODUCT="centos"
+ REDHAT_SUPPORT_PRODUCT_VERSION="8"
+ EOI
+$* x86_64-linux-gnu os-release >>EOO
+ centos
+ rhel|fedora
+ 8
+
+ CentOS Linux
+
+
+ EOO
+
+: macos
+:
+if ($build.host.class == 'macos')
+{
+ $* $build.host >>~/EOO/
+ macos
+
+ /[0-9]+(\.[0-9]+(\.[0-9]+)?)?/
+
+ Mac OS
+
+
+ EOO
+}
+
+: freebsd
+:
+if ($build.host.system == 'freebsd')
+{
+ $* $build.host >>~/EOO/
+ freebsd
+
+ /[0-9]+\.[0-9]+/
+
+ FreeBSD
+
+
+ EOO
+}
+
+: windows
+:
+if ($build.host.system == 'windows')
+{
+ $* $build.host >>~/EOO/
+ windows
+
+ /[0-9]+(\.[0-9]+)?/
+
+ Windows
+
+
+ EOO
+}
diff --git a/tests/link/driver.cxx b/tests/link/driver.cxx
index 231da4b..b659838 100644
--- a/tests/link/driver.cxx
+++ b/tests/link/driver.cxx
@@ -1,34 +1,19 @@
// file : tests/link/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <set>
#include <utility> // pair
#include <iostream> // cerr
#include <system_error>
-#endif
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/filesystem.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.path_io;
-import butl.utility;
-import butl.fdstream;
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -122,11 +107,11 @@ link_dir (const dir_path& target,
dir_path tp (target.absolute () ? target : link.directory () / target);
set<pair<entry_type, path>> te;
- for (const dir_entry& de: dir_iterator (tp, false /* ignore_dangling */))
+ for (const dir_entry& de: dir_iterator (tp, dir_iterator::no_follow))
te.emplace (de.ltype (), de.path ());
set<pair<entry_type, path>> le;
- for (const dir_entry& de: dir_iterator (link, false /* ignore_dangling */))
+ for (const dir_entry& de: dir_iterator (link, dir_iterator::no_follow))
le.emplace (de.ltype (), de.path ());
return te == le;
@@ -321,7 +306,7 @@ main (int argc, const char* argv[])
assert (pe.first && pe.second.type == entry_type::directory);
}
- for (const dir_entry& de: dir_iterator (td, false /* ignore_dangling */))
+ for (const dir_entry& de: dir_iterator (td, dir_iterator::no_follow))
{
assert (de.path () != path ("dslink") ||
(de.type () == entry_type::directory &&
@@ -383,7 +368,9 @@ main (int argc, const char* argv[])
{
mksymlink (dp / "non-existing", dp / "lnk");
assert (!dir_empty (dp));
- assert (dir_iterator (dp, true /* ignore_dangling */) == dir_iterator ());
+
+ assert (dir_iterator (dp, dir_iterator::ignore_dangling) ==
+ dir_iterator ());
}
catch (const system_error& e)
{
@@ -408,10 +395,10 @@ main (int argc, const char* argv[])
mksymlink (dp / "non-existing", dp / "lnk1", true /* dir */);
assert (!dir_empty (dp));
- assert (dir_iterator (dp, true /* ignore_dangling */) == dir_iterator ());
+ assert (dir_iterator (dp, dir_iterator::ignore_dangling) == dir_iterator ());
mksymlink (tgd, dp / "lnk2", true /* dir */);
- assert (dir_iterator (dp, true /* ignore_dangling */) != dir_iterator ());
+ assert (dir_iterator (dp, dir_iterator::ignore_dangling) != dir_iterator ());
rmdir_r (dp);
assert (dir_exists (tgd));
diff --git a/tests/lz4/buildfile b/tests/lz4/buildfile
new file mode 100644
index 0000000..1f9a244
--- /dev/null
+++ b/tests/lz4/buildfile
@@ -0,0 +1,6 @@
+# file : tests/lz4/buildfile
+# license : MIT; see accompanying LICENSE file
+
+import libs = libbutl%lib{butl}
+
+exe{driver}: {hxx cxx}{*} $libs testscript file{*.lz4}
diff --git a/tests/lz4/driver.cxx b/tests/lz4/driver.cxx
new file mode 100644
index 0000000..8139c34
--- /dev/null
+++ b/tests/lz4/driver.cxx
@@ -0,0 +1,46 @@
+// file : tests/lz4/driver.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <iostream>
+#include <exception>
+
+#include <libbutl/lz4.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/filesystem.hxx> // entry_stat, path_entry()
+
+#undef NDEBUG
+#include <cassert>
+
+using namespace std;
+using namespace butl;
+
+// Usage: argv[0] [-c|-d] <input-file> <output-file>
+//
+int
+main (int argc, const char* argv[])
+try
+{
+ assert (argc == 4);
+
+ ifdstream ifs (argv[2], fdopen_mode::binary, ifdstream::badbit);
+ ofdstream ofs (argv[3], fdopen_mode::binary);
+
+ if (argv[1][1] == 'c')
+ {
+ lz4::compress (ofs, ifs,
+ 1 /* compression_level */,
+ 4 /* block_size_id (64KB) */,
+ fdstat (ifs.fd ()).size);
+ }
+ else
+ {
+ lz4::decompress (ofs, ifs);
+ }
+
+ ofs.close ();
+}
+catch (const std::exception& e)
+{
+ cerr << e.what () << endl;
+ return 1;
+}
diff --git a/tests/lz4/testscript b/tests/lz4/testscript
new file mode 100644
index 0000000..b064cff
--- /dev/null
+++ b/tests/lz4/testscript
@@ -0,0 +1,85 @@
+# file : tests/lz4/testscript
+# license : MIT; see accompanying LICENSE file
+
++touch zero
++cat <:'1' >=one
++cat <'The quick brown fox jumps over the lazy dog.' >=small
++cat <<EOI >=1kb
+The quick brown fox jumps over the lazy dog. The quick brown fox jumps over
+the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
+jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The
+quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
+lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
+jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The
+quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
+lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
+jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The
+quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
+lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
+jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The
+quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
+lazy dog. The quick brown fox jumps over th
+EOI
++cat 1kb 1kb 1kb 1kb 1kb 1kb 1kb 1kb >=8kb
++cat 8kb 8kb 8kb 8kb 8kb 8kb 8kb 8kb >=64kb
++cat 64kb 64kb 64kb 64kb 64kb 64kb 64kb 64kb >=512kb
+
+: rt-zero
+:
+$* -c ../zero zero.lz4 &zero.lz4;
+$* -d zero.lz4 zero &zero;
+diff ../zero zero
+
+: rt-one
+:
+$* -c ../one one.lz4 &one.lz4;
+$* -d one.lz4 one &one;
+diff ../one one
+
+: rt-small
+:
+$* -c ../small small.lz4 &small.lz4;
+$* -d small.lz4 small &small;
+diff ../small small
+
+: rt-1kb
+:
+$* -c ../1kb 1kb.lz4 &1kb.lz4;
+$* -d 1kb.lz4 1kb &1kb;
+diff ../1kb 1kb
+
+: rt-8kb
+:
+$* -c ../8kb 8kb.lz4 &8kb.lz4;
+$* -d 8kb.lz4 8kb &8kb;
+diff ../8kb 8kb
+
+: rt-64kb
+:
+$* -c ../64kb 64kb.lz4 &64kb.lz4;
+$* -d 64kb.lz4 64kb &64kb;
+diff ../64kb 64kb
+
+: rt-512kb
+:
+$* -c ../512kb 512kb.lz4 &512kb.lz4;
+$* -d 512kb.lz4 512kb &512kb;
+diff ../512kb 512kb
+
+: truncated-header6
+:
+$* -d $src_base/truncated-header6.lz4 out &out 2>>EOE !=0
+incomplete LZ4 frame header
+EOE
+
+: truncated-header12
+:
+$* -d $src_base/truncated-header12.lz4 out &out 2>>EOE !=0
+incomplete LZ4 frame header
+EOE
+
+: truncated-content
+:
+$* -d $src_base/truncated-content.lz4 out &out 2>>EOE !=0
+incomplete LZ4 compressed content
+EOE
diff --git a/tests/lz4/truncated-content.lz4 b/tests/lz4/truncated-content.lz4
new file mode 100644
index 0000000..2a9a39d
--- /dev/null
+++ b/tests/lz4/truncated-content.lz4
Binary files differ
diff --git a/tests/lz4/truncated-header12.lz4 b/tests/lz4/truncated-header12.lz4
new file mode 100644
index 0000000..101088e
--- /dev/null
+++ b/tests/lz4/truncated-header12.lz4
Binary files differ
diff --git a/tests/lz4/truncated-header6.lz4 b/tests/lz4/truncated-header6.lz4
new file mode 100644
index 0000000..a00e998
--- /dev/null
+++ b/tests/lz4/truncated-header6.lz4
@@ -0,0 +1 @@
+"Ml@ \ No newline at end of file
diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx
index a34f2b7..56c614a 100644
--- a/tests/manifest-parser/driver.cxx
+++ b/tests/manifest-parser/driver.cxx
@@ -1,29 +1,17 @@
// file : tests/manifest-parser/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <vector>
#include <string>
#include <utility> // pair, move()
#include <sstream>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-import butl.manifest_parser;
-#else
-#include <libbutl/optional.mxx>
-#include <libbutl/manifest-parser.mxx>
-#endif
+
+#include <libbutl/optional.hxx>
+#include <libbutl/manifest-parser.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
@@ -176,14 +164,18 @@ namespace butl
// Manifest value splitting (into the value/comment pair).
//
+ // Single-line.
+ //
{
- auto p (manifest_parser::split_comment ("value\\; text ; comment text"));
- assert (p.first == "value; text" && p.second == "comment text");
+ auto p (manifest_parser::split_comment (
+ "\\value\\\\\\; text ; comment text"));
+
+ assert (p.first == "\\value\\; text" && p.second == "comment text");
}
{
- auto p (manifest_parser::split_comment ("value"));
- assert (p.first == "value" && p.second == "");
+ auto p (manifest_parser::split_comment ("value\\"));
+ assert (p.first == "value\\" && p.second == "");
}
{
@@ -191,6 +183,59 @@ namespace butl
assert (p.first == "" && p.second == "comment");
}
+ // Multi-line.
+ //
+ {
+ auto p (manifest_parser::split_comment ("value\n;"));
+ assert (p.first == "value" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("value\ntext\n"));
+ assert (p.first == "value\ntext\n" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("value\ntext\n;"));
+ assert (p.first == "value\ntext" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("value\ntext\n;\n"));
+ assert (p.first == "value\ntext" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("\n\\\nvalue\ntext\n"
+ ";\n"
+ "\n\n comment\ntext"));
+
+ assert (p.first == "\n\\\nvalue\ntext" && p.second ==
+ "\n\n comment\ntext");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("\n;\ncomment"));
+ assert (p.first == "" && p.second == "comment");
+ }
+
+ {
+ auto p (manifest_parser::split_comment (";\ncomment"));
+ assert (p.first == "" && p.second == "comment");
+ }
+
+ {
+ auto p (manifest_parser::split_comment (";\n"));
+ assert (p.first == "" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment (
+ "\\;\n\\\\;\n\\\\\\;\n\\\\\\\\;\n\\\\\\\\\\;"));
+
+ assert (p.first == ";\n\\;\n\\;\n\\\\;\n\\\\;" && p.second == "");
+ }
+
// UTF-8.
//
assert (test (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0",
diff --git a/tests/manifest-rewriter/driver.cxx b/tests/manifest-rewriter/driver.cxx
index ec73d81..3b1dfe9 100644
--- a/tests/manifest-rewriter/driver.cxx
+++ b/tests/manifest-rewriter/driver.cxx
@@ -1,36 +1,21 @@
// file : tests/manifest-rewriter/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <vector>
#include <string>
#include <cstdint> // uint64_t
#include <utility> // move()
#include <iostream>
#include <exception>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.optional;
-import butl.fdstream;
-import butl.manifest_parser;
-import butl.manifest_rewriter;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/manifest-parser.mxx>
-#include <libbutl/manifest-rewriter.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/manifest-parser.hxx>
+#include <libbutl/manifest-rewriter.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
@@ -82,17 +67,26 @@ namespace butl
{{"a", "xyz"}, edit_cmd {"x", "y", "c"}, {"e", "123"}}) ==
":1\na: xyz\nc:d\nx: y\ne: 123");
- assert (edit (":1\na: b", {{"a", "xy\nz"}}) == ":1\na: \\\nxy\nz\n\\");
+ assert (edit (":1\na: b", {{"a", "xy\nz"}}) == ":1\na:\\\nxy\nz\n\\");
+
+ assert (edit (":1\na:\\\nxy\nz\n\\\nb: c", {{"a", "ab\ncd\ne"}}) ==
+ ":1\na:\\\nab\ncd\ne\n\\\nb: c");
+
+ assert (edit (":1\na: \\\nxy\nz\n\\\nb: c", {{"a", "ab\ncd\ne"}}) ==
+ ":1\na:\\\nab\ncd\ne\n\\\nb: c");
+
+ assert (edit (":1\na:\n\\\nxy\nz\n\\\nb: c", {{"a", "ab\ncd\ne"}}) ==
+ ":1\na:\\\nab\ncd\ne\n\\\nb: c");
assert (edit (":1\n", {{"a", "b", ""}}) == ":1\na: b\n");
assert (edit (":1\n abc: b",
{{"abc", "xyz"}}) ==
- ":1\n abc: \\\nxyz\n\\");
+ ":1\n abc:\\\nxyz\n\\");
assert (edit (":1\n a\xD0\xB0g : b",
{{"a\xD0\xB0g", "xyz"}}) ==
- ":1\n a\xD0\xB0g : \\\nxyz\n\\");
+ ":1\n a\xD0\xB0g :\\\nxyz\n\\");
// Test editing of manifests that contains CR characters.
//
diff --git a/tests/manifest-roundtrip/buildfile b/tests/manifest-roundtrip/buildfile
index 8056f64..7ddcc1f 100644
--- a/tests/manifest-roundtrip/buildfile
+++ b/tests/manifest-roundtrip/buildfile
@@ -3,5 +3,4 @@
import libs = libbutl%lib{butl}
-exe{driver}: {hxx cxx}{*} $libs
-exe{driver}: manifest: test.roundtrip = true
+exe{driver}: {hxx cxx}{*} $libs testscript
diff --git a/tests/manifest-roundtrip/driver.cxx b/tests/manifest-roundtrip/driver.cxx
index 53b688e..c63a729 100644
--- a/tests/manifest-roundtrip/driver.cxx
+++ b/tests/manifest-roundtrip/driver.cxx
@@ -1,45 +1,60 @@
// file : tests/manifest-roundtrip/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility; // operator<<(ostream, exception)
-import butl.fdstream;
-import butl.manifest_parser;
-import butl.manifest_serializer;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/manifest-parser.mxx>
-#include <libbutl/manifest-serializer.mxx>
-#endif
+
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/fdstream.hxx>
+#include <libbutl/manifest-parser.hxx>
+#include <libbutl/manifest-serializer.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
+// Usage: argv[0] [-m]
+//
+// Round-trip a manifest reading it from stdin and printing to stdout.
+//
+// -m
+// Serialize multi-line manifest values using the v2 form.
+//
+// -s
+// Split values into the value/comment pairs and merge them back before
+// printing.
+//
int
-main ()
+main (int argc, const char* argv[])
try
{
+ bool multiline_v2 (false);
+ bool split (false);
+
+ for (int i (1); i != argc; ++i)
+ {
+ string v (argv[i]);
+
+ if (v == "-m")
+ multiline_v2 = true;
+ else if (v == "-s")
+ split = true;
+ }
+
// Read/write in binary mode.
//
stdin_fdmode (fdstream_mode::binary);
stdout_fdmode (fdstream_mode::binary);
manifest_parser p (cin, "stdin");
- manifest_serializer s (cout, "stdout");
+
+ manifest_serializer s (cout,
+ "stdout",
+ false /* long_lines */,
+ {} /* filter */,
+ multiline_v2);
for (bool eom (true), eos (false); !eos; )
{
@@ -53,6 +68,12 @@ try
else
eom = false;
+ if (split)
+ {
+ const auto& vc (manifest_parser::split_comment (nv.value));
+ nv.value = manifest_serializer::merge_comment (vc.first, vc.second);
+ }
+
s.next (nv.name, nv.value);
}
}
diff --git a/tests/manifest-roundtrip/manifest b/tests/manifest-roundtrip/manifest
deleted file mode 100644
index 23c2730..0000000
--- a/tests/manifest-roundtrip/manifest
+++ /dev/null
@@ -1,32 +0,0 @@
-: 1
-name: libbpkg
-version: 1.0.1
-summary: build2 package manager library
-license: MIT
-tags: c++, package, manager, bpkg
-description: A very very very very very very very very very very very very\
- very very very very very very very very very very very very very very very\
- very very long description.
-changes: \
-1.0.1
- - Fixed a very very very very very very very very very very very very very\
- very annoying bug.
-1.0.0
- - Firts public release
- - Lots of really cool features
-\
-url: http://www.codesynthesis.com/projects/libstudxml/
-email: build-users@codesynthesis.com; Public mailing list, posts by\
- non-members are allowed but moderated.
-package-email: boris@codesynthesis.com; Direct email to the author.
-depends: libbutl
-depends: * build2
-depends: ?* bpkg
-requires: ?* linux | windows
-requires: c++11
-:
-path: c:\windows\\
-path: \
-
-c:\windows\\
-\
diff --git a/tests/manifest-roundtrip/testscript b/tests/manifest-roundtrip/testscript
new file mode 100644
index 0000000..a228b0f
--- /dev/null
+++ b/tests/manifest-roundtrip/testscript
@@ -0,0 +1,118 @@
+# file : tests/manifest-roundtrip/testscript
+# license : MIT; see accompanying LICENSE file
+
+: basics
+:
+$* <<EOF >>EOF
+ : 1
+ name: libbpkg
+ version: 1.0.1
+ summary: build2 package manager library
+ license: MIT
+ tags: c++, package, manager, bpkg
+ description: A very very very very very very very very very very very very\
+ very very very very very very very very very very very very very very very\
+ very very long description.
+ changes:\
+ 1.0.1
+ - Fixed a very very very very very very very very very very very very very\
+ very annoying bug.
+ 1.0.0
+ - Firts public release
+ - Lots of really cool features
+ \
+ url: http://www.codesynthesis.com/projects/libstudxml/
+ email: build-users@codesynthesis.com; Public mailing list, posts by\
+ non-members are allowed but moderated.
+ package-email: boris@codesynthesis.com; Direct email to the author.
+ depends: libbutl
+ depends: * build2
+ depends: * bpkg
+ requires: * linux ? ($linux) | windows ? ($windows)
+ requires: c++11
+ :
+ path: c:\windows\\
+ path:\
+
+ c:\windows\\
+ \
+ EOF
+
+: multiline-v2
+:
+$* -m <<EOF >>EOF
+ : 1
+ name: libbpkg
+ version: 1.0.1
+ summary: build2 package manager library
+ license: MIT
+ tags: c++, package, manager, bpkg
+ description: A very very very very very very very very very very very very\
+ very very very very very very very very very very very very very very very\
+ very very long description.
+ changes:
+ \
+ 1.0.1
+ - Fixed a very very very very very very very very very very very very very\
+ very annoying bug.
+ 1.0.0
+ - Firts public release
+ - Lots of really cool features
+ \
+ url: http://www.codesynthesis.com/projects/libstudxml/
+ email: build-users@codesynthesis.com; Public mailing list, posts by\
+ non-members are allowed but moderated.
+ package-email: boris@codesynthesis.com; Direct email to the author.
+ depends: libbutl
+ depends: * build2
+ depends: * bpkg
+ requires: * linux ? ($linux) | windows ? ($windows)
+ requires: c++11
+ :
+ path: c:\windows\\
+ path:
+ \
+
+ c:\windows\\
+ \
+ EOF
+
+: split-merge-comment
+:
+$* -s <<EOF >>EOF
+ : 1
+ info:\
+ value
+ text
+ \
+ info:\
+ value
+ text
+ ;
+ comment
+ \
+ info:\
+ ;
+ comment
+ text
+ \
+ info:\
+ value
+ \;
+ \\
+ ;
+ comment
+ \
+ info:\
+ value
+ \\;
+ ;
+ comment
+ \
+ info:\
+ value
+ \\\\;
+ ;
+ comment
+ \
+ EOF
diff --git a/tests/manifest-serializer/driver.cxx b/tests/manifest-serializer/driver.cxx
index c818b4a..a003fa4 100644
--- a/tests/manifest-serializer/driver.cxx
+++ b/tests/manifest-serializer/driver.cxx
@@ -1,27 +1,16 @@
// file : tests/manifest-serializer/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <vector>
#include <string>
#include <utility> // pair
#include <sstream>
#include <iostream>
-#endif
-// Other includes.
+#include <libbutl/manifest-serializer.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.manifest_serializer;
-#else
-#include <libbutl/manifest-serializer.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -200,21 +189,21 @@ main ()
//
string n ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
assert (test ({{"","1"},{n,"x"},{"",""},{"",""}},
- ": 1\n" + n + ": \\\nx\n\\\n"));
+ ": 1\n" + n + ":\\\nx\n\\\n"));
assert (test ({{"","1"},{"a","\n"},{"",""},{"",""}},
- ": 1\na: \\\n\n\n\\\n"));
+ ": 1\na:\\\n\n\n\\\n"));
assert (test ({{"","1"},{"a","\n\n"},{"",""},{"",""}},
- ": 1\na: \\\n\n\n\n\\\n"));
+ ": 1\na:\\\n\n\n\n\\\n"));
assert (test ({{"","1"},{"a","\nx\n"},{"",""},{"",""}},
- ": 1\na: \\\n\nx\n\n\\\n"));
+ ": 1\na:\\\n\nx\n\n\\\n"));
assert (test ({{"","1"},{"a","x\ny\nz"},{"",""},{"",""}},
- ": 1\na: \\\nx\ny\nz\n\\\n"));
+ ": 1\na:\\\nx\ny\nz\n\\\n"));
assert (test ({{"","1"},{"a"," x"},{"",""},{"",""}},
- ": 1\na: \\\n x\n\\\n"));
+ ": 1\na:\\\n x\n\\\n"));
assert (test ({{"","1"},{"a","x "},{"",""},{"",""}},
- ": 1\na: \\\nx \n\\\n"));
+ ": 1\na:\\\nx \n\\\n"));
assert (test ({{"","1"},{"a"," x "},{"",""},{"",""}},
- ": 1\na: \\\n x \n\\\n"));
+ ": 1\na:\\\n x \n\\\n"));
// The long lines mode.
//
@@ -223,51 +212,76 @@ main ()
true /* long_lines */));
assert (test ({{"","1"},{"a", " abc\n" + l1 + "\ndef"},{"",""},{"",""}},
- ": 1\na: \\\n abc\n" + l1 + "\ndef\n\\\n",
+ ": 1\na:\\\n abc\n" + l1 + "\ndef\n\\\n",
true /* long_lines */));
assert (test ({{"","1"},{n,l1},{"",""},{"",""}},
- ": 1\n" + n + ": \\\n" + l1 + "\n\\\n",
+ ": 1\n" + n + ":\\\n" + l1 + "\n\\\n",
true /* long_lines */));
// Carriage return character.
//
assert (test ({{"","1"},{"a","x\ry"},{"",""},{"",""}},
- ": 1\na: \\\nx\ny\n\\\n"));
+ ": 1\na:\\\nx\ny\n\\\n"));
assert (test ({{"","1"},{"a","x\r"},{"",""},{"",""}},
- ": 1\na: \\\nx\n\n\\\n"));
+ ": 1\na:\\\nx\n\n\\\n"));
assert (test ({{"","1"},{"a","x\r\ny"},{"",""},{"",""}},
- ": 1\na: \\\nx\ny\n\\\n"));
+ ": 1\na:\\\nx\ny\n\\\n"));
assert (test ({{"","1"},{"a","x\r\n"},{"",""},{"",""}},
- ": 1\na: \\\nx\n\n\\\n"));
+ ": 1\na:\\\nx\n\n\\\n"));
// Extra three x's are for the leading name part ("a: ") that we
// don't have.
//
assert (test ({{"","1"},{"a","\nxxx" + l1},{"",""},{"",""}},
- ": 1\na: \\\n\nxxx" + e1 + "\n\\\n"));
+ ": 1\na:\\\n\nxxx" + e1 + "\n\\\n"));
assert (test ({{"","1"},{"a","\nxxx" + l2},{"",""},{"",""}},
- ": 1\na: \\\n\nxxx" + e2 + "\n\\\n"));
+ ": 1\na:\\\n\nxxx" + e2 + "\n\\\n"));
assert (test ({{"","1"},{"a","\nxxx" + l3},{"",""},{"",""}},
- ": 1\na: \\\n\nxxx" + e3 + "\n\\\n"));
+ ": 1\na:\\\n\nxxx" + e3 + "\n\\\n"));
assert (test ({{"","1"},{"a","\nxxx" + l4},{"",""},{"",""}},
- ": 1\na: \\\n\nxxx" + e4 + "\n\\\n"));
+ ": 1\na:\\\n\nxxx" + e4 + "\n\\\n"));
// Backslash escaping (simple and multi-line).
//
assert (test ({{"","1"},{"a","c:\\"},{"",""},{"",""}},
": 1\na: c:\\\\\n"));
assert (test ({{"","1"},{"a","c:\\\nd:\\"},{"",""},{"",""}},
- ": 1\na: \\\nc:\\\\\nd:\\\\\n\\\n"));
+ ": 1\na:\\\nc:\\\\\nd:\\\\\n\\\n"));
// Manifest value/comment merging.
//
- assert (manifest_serializer::merge_comment ("value; text", "comment") ==
- "value\\; text; comment");
+ // Single-line.
+ //
+ assert (manifest_serializer::merge_comment ("value\\; text", "comment") ==
+ "value\\\\\\; text; comment");
assert (manifest_serializer::merge_comment ("value text", "") ==
"value text");
+ // Multi-line.
+ //
+ assert (manifest_serializer::merge_comment ("value\n;\ntext", "comment") ==
+ "value\n\\;\ntext\n;\ncomment");
+
+ assert (manifest_serializer::merge_comment ("value\n\\;\ntext\n",
+ "comment") ==
+ "value\n\\\\;\ntext\n\n;\ncomment");
+
+ assert (manifest_serializer::merge_comment ("value\n\\\\;\ntext\n",
+ "comment") ==
+ "value\n\\\\\\\\;\ntext\n\n;\ncomment");
+
+
+ assert (manifest_serializer::merge_comment ("value\n\\\ntext", "comment") ==
+ "value\n\\\ntext\n;\ncomment");
+
+ assert (manifest_serializer::merge_comment ("\\", "comment\n") ==
+ "\\\n;\ncomment\n");
+
+ assert (manifest_serializer::merge_comment ("", "comment\ntext") ==
+ ";\ncomment\ntext");
+
// Filtering.
//
assert (test ({{"","1"},{"a","abc"},{"b","bca"},{"c","cab"},{"",""},{"",""}},
diff --git a/tests/move-only-function/buildfile b/tests/move-only-function/buildfile
new file mode 100644
index 0000000..9012fd6
--- /dev/null
+++ b/tests/move-only-function/buildfile
@@ -0,0 +1,6 @@
+# file : tests/move-only-function/buildfile
+# license : MIT; see accompanying LICENSE file
+
+import libs = libbutl%lib{butl}
+
+exe{driver}: {hxx cxx}{*} $libs
diff --git a/tests/move-only-function/driver.cxx b/tests/move-only-function/driver.cxx
new file mode 100644
index 0000000..b94d674
--- /dev/null
+++ b/tests/move-only-function/driver.cxx
@@ -0,0 +1,149 @@
+// file : tests/move-only-function/driver.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <memory> // unique_ptr
+#include <utility> // move()
+
+#include <libbutl/move-only-function.hxx>
+
+#undef NDEBUG
+#include <cassert>
+
+using namespace std;
+
+static int
+func (int v)
+{
+ return v + 1;
+}
+
+struct functor
+{
+ int i;
+
+ int
+ operator() (int v)
+ {
+ return v + i;
+ }
+};
+
+int
+main ()
+{
+ using butl::move_only_function_ex;
+
+ // Attempt to copy-construct or copy-assign should not compile.
+ // Also check non-collable.
+ //
+#if 0
+ {
+ using ft = move_only_function_ex<int (int)>;
+ ft f;
+ ft f2 (f);
+ ft f3; f3 = f;
+ ft f4 (123);
+ }
+#endif
+
+ // NULL.
+ //
+ {
+ using ft = move_only_function_ex<int (int)>;
+
+ ft f1;
+ assert (!f1);
+
+ ft f2 (nullptr);
+ assert (f2 == nullptr);
+
+ f1 = func;
+ assert (f1 != nullptr);
+ f1 = nullptr;
+ assert (!f1);
+
+ int (*f) (int) = nullptr;
+ f2 = f;
+ assert (!f2);
+ }
+
+ // Function.
+ //
+ {
+ using ft = move_only_function_ex<int (int)>;
+
+ ft f (func);
+
+ assert (f (1) == 2);
+
+ ft f1 (move (f));
+ assert (!f);
+ assert (f1 (1) == 2);
+
+ f = &func;
+
+ assert (f (1) == 2);
+
+ assert (f.target<int (*) (int)> () != nullptr);
+ assert (f1.target<int (*) (int)> () != nullptr);
+ }
+
+ // Functor.
+ //
+ {
+ using ft = move_only_function_ex<int (int)>;
+
+ ft f (functor {1});
+
+ assert (f (1) == 2);
+
+ ft f1 (move (f));
+ assert (!f);
+ assert (f1 (1) == 2);
+
+ f = functor {2};
+
+ assert (f (1) == 3);
+
+ assert (ft (functor {1}).target<functor> () != nullptr);
+ }
+
+ // Lambda.
+ //
+ {
+ using ft = move_only_function_ex<int (int)>;
+
+ ft f ([p = unique_ptr<int> (new int (1))] (int v)
+ {
+ return *p + v;
+ });
+
+ assert (f (1) == 2);
+
+ ft f1 (move (f));
+ assert (!f);
+ assert (f1 (1) == 2);
+
+ f = ([p = unique_ptr<int> (new int (2))] (int v)
+ {
+ return *p + v;
+ });
+
+ assert (f (1) == 3);
+ }
+
+ // Void result.
+ //
+ {
+ using ft = move_only_function_ex<void (int)>;
+
+ ft f ([] (int v)
+ {
+ assert (v == 1);
+ });
+
+ f (1);
+ ft f1 (move (f));
+ f1 (1);
+ }
+}
diff --git a/tests/mventry/driver.cxx b/tests/mventry/driver.cxx
index cb1c348..e895ad6 100644
--- a/tests/mventry/driver.cxx
+++ b/tests/mventry/driver.cxx
@@ -1,28 +1,15 @@
// file : tests/mventry/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <iostream>
#include <system_error>
-#endif
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/filesystem.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility; // operator<<(ostream, exception)
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/mventry/testscript b/tests/mventry/testscript
index 61ef871..f52be79 100644
--- a/tests/mventry/testscript
+++ b/tests/mventry/testscript
@@ -98,16 +98,16 @@
if ($test.target == $build.host)
{
+if ($cxx.target.class != 'windows')
- lnf = ^ln -s t l &l
- lnd = $lnf
+ lnf = [cmdline] ^ln -s t l &l
+ lnd = [cmdline] $lnf
else
echo 'yes' >=t
if cmd /C 'mklink l t' >- 2>- &?l && cat l >'yes'
- lnf = cmd /C 'mklink l t' &l >-
- lnd = cmd /C 'mklink /D l t' &l >-
+ lnf = [cmdline] cmd /C 'mklink l t' &l >-
+ lnd = [cmdline] cmd /C 'mklink /D l t' &l >-
end
- jnc = cmd /C 'mklink /J l t' &l >-
+ jnc = [cmdline] cmd /C 'mklink /J l t' &l >-
end
: symlink
diff --git a/tests/next-word/buildfile b/tests/next-word/buildfile
new file mode 100644
index 0000000..e06cd88
--- /dev/null
+++ b/tests/next-word/buildfile
@@ -0,0 +1,6 @@
+# file : tests/next-word/buildfile
+# license : MIT; see accompanying LICENSE file
+
+import libs = libbutl%lib{butl}
+
+exe{driver}: {hxx cxx}{*} $libs
diff --git a/tests/next-word/driver.cxx b/tests/next-word/driver.cxx
new file mode 100644
index 0000000..4ebe1a5
--- /dev/null
+++ b/tests/next-word/driver.cxx
@@ -0,0 +1,46 @@
+// file : tests/next-word/driver.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <vector>
+#include <string>
+//#include <iostream>
+
+#include <libbutl/utility.hxx>
+
+#undef NDEBUG
+#include <cassert>
+
+using namespace std;
+using namespace butl;
+
+using strings = vector<string>;
+
+static strings
+parse_lines (const string& s)
+{
+ strings r;
+ for (size_t b (0), e (0), m (0), n (s.size ());
+ next_word (s, n, b, e, m, '\n', '\r'), b != n; )
+ {
+ //cerr << "'" << string (s, b, e - b) << "'" << endl;
+ r.push_back (string (s, b, e - b));
+ }
+ return r;
+}
+
+int
+main ()
+{
+ assert ((parse_lines("") == strings {}));
+ assert ((parse_lines("a") == strings {"a"}));
+ assert ((parse_lines("\n") == strings {"", ""}));
+ assert ((parse_lines("\n\n") == strings {"", "", ""}));
+ assert ((parse_lines("\n\n\n") == strings {"", "", "", ""}));
+ assert ((parse_lines("\na") == strings {"", "a"}));
+ assert ((parse_lines("\n\na") == strings {"", "", "a"}));
+ assert ((parse_lines("a\n") == strings {"a", ""}));
+ assert ((parse_lines("a\n\n") == strings {"a", "", ""}));
+ assert ((parse_lines("a\nb") == strings {"a", "b"}));
+ assert ((parse_lines("a\n\nb") == strings {"a", "", "b"}));
+ assert ((parse_lines("\na\nb\n") == strings {"", "a", "b", ""}));
+}
diff --git a/tests/openssl/driver.cxx b/tests/openssl/driver.cxx
index d245a3a..55f91dd 100644
--- a/tests/openssl/driver.cxx
+++ b/tests/openssl/driver.cxx
@@ -1,36 +1,18 @@
// file : tests/openssl/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <vector>
#include <iostream>
#include <iterator>
#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility; // operator<<(ostream, exception)
-import butl.openssl;
-import butl.process;
-import butl.fdstream; // nullfd
-
-import butl.optional; // @@ MOD Clang should not be necessary.
-import butl.small_vector; // @@ MOD Clang should not be necessary.
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/openssl.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/openssl.hxx>
+#include <libbutl/fdstream.hxx> // nullfd
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -41,12 +23,28 @@ int
main (int, const char* argv[])
try
{
- openssl os (nullfd, path ("-"), 2, path ("openssl"), "rand", 128);
+ using butl::optional;
+
+ // Test openssl rand command.
+ //
+ {
+ openssl os (nullfd, path ("-"), 2, path ("openssl"), "rand", 128);
+
+ vector<char> r (os.in.read_binary ());
+ os.in.close ();
+
+ assert (os.wait () && r.size () == 128);
+ }
+
+ // Test openssl info retrieval.
+ //
+ {
+ optional<openssl_info> v (openssl::info (2, path ("openssl")));
- vector<char> r (os.in.read_binary ());
- os.in.close ();
+ assert (v);
+ }
- return os.wait () && r.size () == 128 ? 0 : 1;
+ return 0;
}
catch (const system_error& e)
{
diff --git a/tests/optional/driver.cxx b/tests/optional/driver.cxx
index 5d72f08..da09cf5 100644
--- a/tests/optional/driver.cxx
+++ b/tests/optional/driver.cxx
@@ -1,23 +1,13 @@
// file : tests/optional/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <vector>
#include <utility> // move()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.optional;
-#else
-#include <libbutl/optional.mxx>
-#endif
+
+#include <libbutl/optional.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
diff --git a/tests/pager/driver.cxx b/tests/pager/driver.cxx
index ca3c3b9..c807ed0 100644
--- a/tests/pager/driver.cxx
+++ b/tests/pager/driver.cxx
@@ -1,28 +1,17 @@
// file : tests/pager/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ios> // ios_base::failure
#include <vector>
#include <string>
#include <utility> // move()
#include <sstream>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.pager;
-#else
-#include <libbutl/pager.mxx>
-#endif
+
+#include <libbutl/pager.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/path-entry/driver.cxx b/tests/path-entry/driver.cxx
index 30aae92..d9ea2be 100644
--- a/tests/path-entry/driver.cxx
+++ b/tests/path-entry/driver.cxx
@@ -1,36 +1,20 @@
// file : tests/path-entry/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
#include <stdexcept> // invalid_argument
#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.path-io;
-import butl.utility; // operator<<(ostream, exception)
-import butl.optional;
-import butl.timestamp;
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/timestamp.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/optional.hxx>
+#include <libbutl/timestamp.hxx>
+#include <libbutl/filesystem.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/path-entry/testscript b/tests/path-entry/testscript
index 16039fa..3ac363b 100644
--- a/tests/path-entry/testscript
+++ b/tests/path-entry/testscript
@@ -57,16 +57,16 @@
if ($test.target == $build.host)
{
+if ($cxx.target.class != 'windows')
- lnf = ^ln -s t l &l
- lnd = $lnf
+ lnf = [cmdline] ^ln -s t l &l
+ lnd = [cmdline] $lnf
else
echo 'yes' >=t
if cmd /C 'mklink l t' >- 2>- &?l && cat l >'yes'
- lnf = cmd /C 'mklink l t' &l >-
- lnd = cmd /C 'mklink /D l t' &l >-
+ lnf = [cmdline] cmd /C 'mklink l t' &l >-
+ lnd = [cmdline] cmd /C 'mklink /D l t' &l >-
end
- jnc = cmd /C 'mklink /J l t' &l >-
+ jnc = [cmdline] cmd /C 'mklink /J l t' &l >-
end
: symlink
diff --git a/tests/path/driver.cxx b/tests/path/driver.cxx
index b855e34..3124c13 100644
--- a/tests/path/driver.cxx
+++ b/tests/path/driver.cxx
@@ -1,27 +1,15 @@
// file : tests/path/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <sstream>
#include <iostream>
#include <type_traits>
-#endif
-// Other includes.
+#include <libbutl/path.hxx>
+//#include <libbutl/path-io.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-//import butl.path_io;
-#else
-#include <libbutl/path.mxx>
-//#include <libbutl/path-io.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/prefix-map/driver.cxx b/tests/prefix-map/driver.cxx
index e0da9ea..8ed35ea 100644
--- a/tests/prefix-map/driver.cxx
+++ b/tests/prefix-map/driver.cxx
@@ -1,24 +1,13 @@
// file : tests/prefix-map/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.prefix_map;
-#else
-#include <libbutl/prefix-map.mxx>
-#endif
+
+#include <libbutl/prefix-map.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/process-run/driver.cxx b/tests/process-run/driver.cxx
index 94b6e00..032f890 100644
--- a/tests/process-run/driver.cxx
+++ b/tests/process-run/driver.cxx
@@ -1,31 +1,16 @@
// file : tests/process-run/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.process;
-import butl.optional; // @@ MOD Clang shouldn't be needed.
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/process-term/buildfile b/tests/process-term/buildfile
new file mode 100644
index 0000000..e710179
--- /dev/null
+++ b/tests/process-term/buildfile
@@ -0,0 +1,6 @@
+# file : tests/process-term/buildfile
+# license : MIT; see accompanying LICENSE file
+
+import libs = libbutl%lib{butl}
+
+exe{driver}: {hxx cxx}{*} $libs testscript
diff --git a/tests/process-term/driver.cxx b/tests/process-term/driver.cxx
new file mode 100644
index 0000000..799757c
--- /dev/null
+++ b/tests/process-term/driver.cxx
@@ -0,0 +1,403 @@
+// file : tests/process-term/driver.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#ifndef _WIN32
+# include <time.h>
+# include <signal.h>
+# include <unistd.h>
+# include <sys/types.h>
+#else
+# include <libbutl/win32-utility.hxx>
+#endif
+
+#include <string>
+#include <cerrno> // ERANGE
+#include <utility> // move()
+#include <cstdlib> // atexit(), exit(), strtoull()
+#include <cstring> // memset()
+#include <cstdint> // uint64_t
+#include <iostream>
+#ifndef _WIN32
+# include <chrono>
+#endif
+
+#include <libbutl/process.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+
+#undef NDEBUG
+#include <cassert>
+
+using namespace std;
+using namespace butl;
+
+void
+atexit_func ()
+{
+ cout << "exiting";
+}
+
+#ifndef _WIN32
+
+volatile sig_atomic_t term_sig = 0;
+
+static void
+term (int sig)
+{
+ term_sig = sig;
+}
+#endif
+
+// Usages:
+//
+// argv[0]
+// argv[0] -s <sec> [-t (ignore|exit|default)] [-e] [-c <num>]
+//
+// In the first form run some basic process termination tests, running its
+// child in the second form.
+//
+// In the second form optionally register the SIGTERM signal handler
+// (POSIX-only) and the atexit function, then sleep for the requested number
+// of seconds and exit with the specified status.
+//
+// -s <sec>
+// Sleep for the specified timeout.
+//
+// -t (ignore|exit|default)
+// Register the SIGTERM signal handler. If the signal is received than
+// either ignore it, interrupt the sleep and exit, or call the default
+// handler.
+//
+// -e
+// Register the function with atexit() that prints the 'exiting' string to
+// stdout.
+//
+// -c <num>
+// Exit with the specified status (zero by default).
+//
+int
+main (int argc, const char* argv[])
+{
+ using butl::optional;
+
+ auto num = [] (const string& s)
+ {
+ assert (!s.empty ());
+
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ uint64_t r (strtoull (s.c_str (), &e, 10)); // Can't throw.
+ assert (errno != ERANGE && e == s.c_str () + s.size ());
+
+ return r;
+ };
+
+ int ec (0);
+ optional<uint64_t> sec;
+
+#ifndef _WIN32
+ enum class sig_action
+ {
+ ignore,
+ exit,
+ default_
+ };
+
+ optional<sig_action> term_action;
+
+ struct sigaction def_handler;
+#endif
+
+ for (int i (1); i != argc; ++i)
+ {
+ string o (argv[i]);
+
+ if (o == "-s")
+ {
+ assert (++i != argc);
+ sec = num (argv[i]);
+ }
+ else if (o == "-c")
+ {
+ assert (++i != argc);
+ ec = static_cast<int> (num (argv[i]));
+ }
+ else if (o == "-e")
+ {
+ assert (atexit (atexit_func) == 0);
+ }
+ else if (o == "-t")
+ {
+ assert (++i != argc);
+
+#ifndef _WIN32
+ string v (argv[i]);
+
+ if (v == "ignore")
+ term_action = sig_action::ignore;
+ else if (v == "exit")
+ term_action = sig_action::exit;
+ else if (v == "default")
+ term_action = sig_action::default_;
+ else
+ assert (false);
+
+ struct sigaction action;
+ memset (&action, 0, sizeof (action));
+ action.sa_handler = term;
+ assert (sigaction (SIGTERM, &action, &def_handler) == 0);
+#endif
+ }
+ else
+ assert (false);
+ }
+
+#ifndef _WIN32
+ auto sleep = [&term_action, &def_handler] (uint64_t sec)
+ {
+ // Wait until timeout expires or SIGTERM is received and is not ignored.
+ //
+ for (timespec tm {static_cast<time_t> (sec), 0};
+ nanosleep (&tm, &tm) == -1; )
+ {
+ assert (term_action && errno == EINTR && term_sig == SIGTERM);
+
+ if (*term_action == sig_action::ignore)
+ continue;
+
+ if (*term_action == sig_action::default_)
+ {
+ assert (sigaction (term_sig, &def_handler, nullptr) == 0);
+ kill (getpid (), term_sig);
+ }
+
+ break;
+ }
+ };
+#else
+ auto sleep = [] (uint64_t sec)
+ {
+ Sleep (static_cast<DWORD> (sec) * 1000);
+ };
+#endif
+
+ // Child process.
+ //
+ if (sec)
+ {
+ if (*sec != 0)
+ sleep (*sec);
+
+ return ec;
+ }
+
+ // Main process.
+ //
+
+ // Return true if the child process has written the specified string to
+ // stdout, represented by the reading end of the specified pipe.
+ //
+ auto test_out = [] (fdpipe&& pipe, const char* out)
+ {
+ pipe.out.close ();
+
+ ifdstream is (move (pipe.in));
+ bool r (is.read_text () == out);
+ is.close ();
+ return r;
+ };
+
+#ifndef _WIN32
+ // Terminate a process with the default SIGTERM handler.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 60, "-e"));
+
+ sleep (3); // Give the child some time to initialize.
+ p.term ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ assert (p.exit->signal () == SIGTERM);
+ }
+
+ // Terminate a process that exits on SIGTERM. Make sure it exits normally
+ // and atexit function is called.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2,
+ argv[0], "-s", 60, "-t", "exit", "-e", "-c", 5));
+
+ sleep (3); // Give the child some time to initialize.
+ p.term ();
+
+ assert (test_out (move (pipe), "exiting"));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (p.exit->normal ());
+ assert (p.exit->code () == 5);
+ }
+
+ // Terminate a process that calls the default handler on SIGTERM.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (
+ process_start (0, pipe, 2,
+ argv[0], "-s", 60, "-t", "default", "-e", "-c", 5));
+
+ sleep (3); // Give the child some time to initialize.
+ p.term ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ assert (p.exit->signal () == SIGTERM);
+ }
+
+ // Terminate and then kill still running process.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2,
+ argv[0], "-s", 60, "-t", "ignore", "-e"));
+
+ sleep (3); // Give the child some time to initialize.
+ p.term ();
+
+ assert (!p.timed_wait (chrono::seconds (1)));
+
+ p.kill ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ assert (p.exit->signal () == SIGKILL);
+ }
+
+ // Terminate an already terminated process.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 0, "-c", 5));
+
+ sleep (4); // Give the child some time to terminate.
+ p.term ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (p.exit->normal ());
+ assert (p.exit->code () == 5);
+ }
+
+ // Terminate a process being terminated.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 60));
+
+ p.term ();
+ p.term ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ }
+
+ // Kill a process being terminated.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 60));
+
+ p.term ();
+ p.kill ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ assert (p.exit->signal () == SIGTERM || p.exit->signal () == SIGKILL);
+ }
+
+ // Kill a process being killed.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 60));
+
+ p.kill ();
+ p.kill ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ }
+#endif
+
+ // Terminate and wait a process.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 60, "-e"));
+
+ sleep (3); // Give the child some time to initialize.
+ p.term ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ }
+
+ // Kill and wait a process.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 60, "-e"));
+
+ sleep (3); // Give the child some time to initialize.
+ p.kill ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (!p.exit->normal ());
+ }
+
+ // Kill a terminated process.
+ //
+ {
+ fdpipe pipe (fdopen_pipe ());
+ process p (process_start (0, pipe, 2, argv[0], "-s", 0, "-c", 5));
+
+ sleep (4); // Give the child some time to terminate.
+ p.kill ();
+
+ assert (test_out (move (pipe), ""));
+
+ assert (!p.wait ());
+ assert (p.exit);
+ assert (p.exit->normal ());
+ assert (p.exit->code () == 5);
+ }
+}
diff --git a/tests/process-term/testscript b/tests/process-term/testscript
new file mode 100644
index 0000000..f61899c
--- /dev/null
+++ b/tests/process-term/testscript
@@ -0,0 +1,4 @@
+# file : tests/process-term/testscript
+# license : MIT; see accompanying LICENSE file
+
+$*
diff --git a/tests/process/driver.cxx b/tests/process/driver.cxx
index 3be4154..1ee5710 100644
--- a/tests/process/driver.cxx
+++ b/tests/process/driver.cxx
@@ -1,44 +1,29 @@
// file : tests/process/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ios>
#include <string>
#include <vector>
+#include <chrono>
#include <sstream>
#include <iterator> // istreambuf_iterator, ostream_iterator
#include <algorithm> // copy()
#include <iostream>
-#endif
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // setenv(), getenv()
+#include <libbutl/process.hxx>
+#include <libbutl/process-io.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/timestamp.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility; // setenv(), getenv()
-import butl.process;
-import butl.optional;
-import butl.fdstream;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/process-io.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
-static const char* envvars[] = {"ABC=1", "DEF", nullptr};
-
using cstrings = vector<const char*>;
bool
@@ -91,18 +76,16 @@ exec (const path& p,
if (bin)
args.push_back ("-b");
+ const char* evars[] = {
+ "PAR1", "PAR2=2P", "PAR6=66", "PAR7", // Override the process variables.
+ "THR1", "THR2=2T", // Override the thread variables.
+ "CHD1", // Unset a non-existing variable.
+ "CHD2=C2", // Add the new variable.
+ nullptr};
+
if (env)
- {
args.push_back ("-e");
- // Here we set the environment variables for the current process to make
- // sure that the child process will not see the variable that is requested
- // to be unset, and will see the other one unaffected.
- //
- setenv ("DEF", "2");
- setenv ("XYZ", "3");
- }
-
if (cwd != nullptr)
args.push_back (cwd);
@@ -120,7 +103,7 @@ exec (const path& p,
out ? -1 : -2,
err ? (out ? 1 : -1) : -2,
cwd,
- env ? envvars : nullptr);
+ env ? evars : nullptr);
try
{
@@ -154,18 +137,21 @@ exec (const path& p,
process pr3 (args.data (),
-1, -1, -2,
cwd,
- env ? envvars : nullptr);
+ env ? evars : nullptr);
process pr2 (args.data (),
pr, bin_mode (move (pr3.out_fd)).get (), -2,
cwd,
- env ? envvars : nullptr);
+ env ? evars : nullptr);
ifdstream is (bin_mode (move (pr3.in_ofd)));
o = is.read_binary ();
- r = pr2.wait () && r;
- r = pr3.wait () && r;
+ // While at it, make sure that the process::timed_wait() template
+ // function overloads can be properly instantiated/linked.
+ //
+ r = pr2.timed_wait (duration::max ()) && r;
+ r = pr3.timed_wait (chrono::milliseconds::max ()) && r;
}
else
{
@@ -323,12 +309,23 @@ main (int argc, const char* argv[])
if (env)
{
- // Check that the ABC variable is set, the DEF is unset and the XYZ is
- // left unchanged.
+ // Check that the variables are (un)set as expected.
//
- if (getenv ("ABC") != optional<string> ("1") ||
- getenv ("DEF") ||
- getenv ("XYZ") != optional<string> ("3"))
+ if (getenv ("PAR1") ||
+ getenv ("PAR2") != optional<string> ("2P") ||
+ getenv ("PAR3") != optional<string> ("P3") ||
+ getenv ("PAR4") ||
+ getenv ("PAR5") != optional<string> ("5P") ||
+ getenv ("PAR6") != optional<string> ("66") ||
+ getenv ("PAR7") ||
+
+ getenv ("THR1") ||
+ getenv ("THR2") != optional<string> ("2T") ||
+ getenv ("THR3") != optional<string> ("T3") ||
+ getenv ("THR4") ||
+
+ getenv ("CHD1") ||
+ getenv ("CHD2") != optional<string> ("C2"))
return 1;
}
@@ -358,6 +355,26 @@ main (int argc, const char* argv[])
return 0;
}
+ // Here we set the process and thread environment variables to make sure
+ // that the child process will not see the variables that are requested to
+ // be unset, will see change for the variables that are requested to be set,
+ // and will see the other ones unaffected.
+ //
+ setenv ("PAR1", "P1");
+ setenv ("PAR2", "P2");
+ setenv ("PAR3", "P3");
+ setenv ("PAR4", "P4");
+ setenv ("PAR5", "P5");
+ setenv ("PAR6", "P6");
+ setenv ("PAR7", "P7");
+
+ const char* tevars[] = {
+ "THR1=T1", "THR2=T2", "THR3=T3", "THR4",
+ "PAR4", "PAR5=5P", "PAR6", "PAR7=7P", // Override the process variables.
+ nullptr};
+
+ auto_thread_env ate (tevars);
+
dir_path owd (dir_path::current_directory ());
// Test processes created as "already terminated".
diff --git a/tests/progress/driver.cxx b/tests/progress/driver.cxx
index 2a0b647..f1a257c 100644
--- a/tests/progress/driver.cxx
+++ b/tests/progress/driver.cxx
@@ -8,38 +8,19 @@
# include <io.h> //_write()
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <cstddef> // size_t
#include <iostream>
#ifndef _WIN32
# include <thread> // this_thread::sleep_for()
#endif
-#endif
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#ifndef _WIN32
-import std.threading;
-#endif
-#endif
-import butl.process;
-import butl.fdstream;
-import butl.diagnostics;
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx> // fdopen_null(), stderr_fd()
+#include <libbutl/diagnostics.hxx>
-import butl.optional; // @@ MOD Clang should not be necessary.
-import butl.small_vector; // @@ MOD Clang should not be necessary.
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx> // fdopen_null(), stderr_fd()
-#include <libbutl/diagnostics.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/project-name/driver.cxx b/tests/project-name/driver.cxx
index 02b3ae3..ac1c898 100644
--- a/tests/project-name/driver.cxx
+++ b/tests/project-name/driver.cxx
@@ -1,28 +1,17 @@
// file : tests/project-name/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ios> // ios::*bit
#include <string>
#include <iostream>
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility; // operator<<(ostream,exception), eof(), *case()
-import butl.project_name;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/project-name.mxx>
-#endif
+
+#include <libbutl/utility.hxx> // operator<<(ostream,exception), eof(),
+ // *case()
+#include <libbutl/project-name.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/regex/driver.cxx b/tests/regex/driver.cxx
index f78a100..f8363e1 100644
--- a/tests/regex/driver.cxx
+++ b/tests/regex/driver.cxx
@@ -1,33 +1,23 @@
// file : tests/regex/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
+#include <regex>
#include <string>
+#include <utility> // pair
#include <iostream>
+#include <stdexcept> // invalid_argument
#include <exception>
-#endif
-// Other includes.
+#include <libbutl/regex.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.regex; // @@ MOD TODO: shouldn't be necessary (re-export).
-#endif
-import butl.regex;
-import butl.utility; // operator<<(ostream, exception)
-#else
-#include <libbutl/regex.mxx>
-#include <libbutl/utility.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
-// Usage: argv[0] [-ffo] [-fnc] [-m] <string> <regex> <format>
+// Usage: argv[0] [-ffo] [-fnc] [-m] <string> "/<regex>/<format>/"
//
// Perform substitution of matched substrings with formatted replacement
// strings using regex_replace_*() functions. If the string matches the regex
@@ -66,11 +56,13 @@ try
break;
}
- assert (i + 3 == argc);
+ assert (i + 2 == argc);
+
+ string s (argv[i++]);
+ pair<regex, string> rf (regex_replace_parse (argv[i]));
- string s (argv[i++]);
- regex re (argv[i++]);
- string fmt (argv[i]);
+ const regex& re (rf.first);
+ const string& fmt (rf.second);
auto r (match
? regex_replace_match (s, re, fmt)
@@ -86,8 +78,13 @@ catch (const regex_error& e)
cerr << "invalid regex" << e << endl; // Print sanitized.
return 2;
}
-catch (const exception& e)
+catch (const invalid_argument& e)
{
cerr << e << endl;
return 2;
}
+catch (const exception&)
+{
+ assert (false);
+ return 2;
+}
diff --git a/tests/regex/testscript b/tests/regex/testscript
index fbee1d6..93ad4b6 100644
--- a/tests/regex/testscript
+++ b/tests/regex/testscript
@@ -4,38 +4,38 @@
: replace-search
:
{
- $* abcbd b x >axcxd : all
- $* -ffo abcbd b x >axcbd : first-only
- $* -fnc abcbd b x >xx : no-copy
+ $* abcbd /b/x/ >axcxd : all
+ $* -ffo abcbd /b/x/ >axcbd : first-only
+ $* -fnc abcbd /b/x/ >xx : no-copy
: ecma-escape
:
{
- $* xay a '$b' >'x$by' : none
- $* xay a '$' >'x$y' : none-term
- $* xay a '$$' >'x$y' : self
- $* xay a 'b$&c' >'xbacy' : match
- $* xay a 'b$`c' >'xbxcy' : match-precede
- $* xay a "b\\\$'c" >'xbycy' : match-follow
+ $* xay '/a/$b/' >'x$by' : none
+ $* xay '/a/$/' >'x$y' : none-term
+ $* xay '/a/$$/' >'x$y' : self
+ $* xay '/a/b$&c/' >'xbacy' : match
+ $* xay '/a/b$`c/' >'xbxcy' : match-precede
+ $* xay "/a/b\\\$'c/" >'xbycy' : match-follow
: capture
:
{
- $* abcdefghij '(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)' '$1$10' >aj : matched
- $* a '(a)|(b)' '$1$2$3' >a : unmatched
+ $* abcdefghij '/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/$1$10/' >aj : matched
+ $* a '/(a)|(b)/$1$2$3/' >a : unmatched
}
}
: perl-escape
:
{
- $* xay a '\b' >'xby' : none
- $* xay a '\' >'xy' : none-term
- $* xay a '\\' >'x\y' : self
+ $* xay '/a/\b/' >'xby' : none
+ $* xay '/a/\/' >'xy' : none-term
+ $* xay '/a/\\/' >'x\y' : self
: newline
:
- $* xay a '\n' >>EOO
+ $* xay '/a/\n/' >>EOO
x
y
EOO
@@ -43,25 +43,25 @@
: capture
:
{
- $* abcdefghij '(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)' '\1\10' >aa0 : matched
- $* a '(a)|(b)' '\1\2\3' >a : unmatched
+ $* abcdefghij '/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)/\1\10/' >aa0 : matched
+ $* a '/(a)|(b)/\1\2\3/' >a : unmatched
}
: upper
:
{
- $* xay a '\U' >xy : none
- $* xay a '\Uvz' >xVZy : repl
- $* xay a '\Uv\Ez' >xVzy : end
- $* aa a 'v\Uz' >vZvZ : locality
- $* xay '(a)' '\U\1' >xAy : capt
- $* x-y '(a?)-' '\U\1z' >xZy : capt-empty
- $* xay a '\uvz' >xVzy : once
+ $* xay '/a/\U/' >xy : none
+ $* xay '/a/\Uvz/' >xVZy : repl
+ $* xay '/a/\Uv\Ez/' >xVzy : end
+ $* aa '/a/v\Uz/' >vZvZ : locality
+ $* xay '/(a)/\U\1/' >xAy : capt
+ $* x-y '/(a?)-/\U\1z/' >xZy : capt-empty
+ $* xay '/a/\uvz/' >xVzy : once
}
: lower
:
- $* xay a '\lVZ' >xvZy
+ $* xay '/a/\lVZ/' >xvZy
}
}
@@ -70,6 +70,19 @@
{
test.options += -m
- $* abc 'a(b)c' 'x\1y' >xby : match
- $* abcd 'a(b)c' 'x\1yd' == 1 : no-match
+ $* abc '/a(b)c/x\1y/' >xby : match
+ $* abcd '/a(b)c/x\1yd/' == 1 : no-match
+}
+
+: invalid-regex-fmt
+:
+{
+ test.arguments += '' # Note: we will fail before the matching.
+
+ $* '' 2> 'no leading delimiter' != 0 : no-leading-delim
+ $* '/a' 2> 'no delimiter after regex' != 0 : no-mid-delim
+ $* '//' 2> 'empty regex' != 0 : no-regex
+ $* '/a[b/c/' 2>~'/invalid regex.*/' != 0 : regex
+ $* '/a/b' 2> 'no delimiter after replacement' != 0 : no-trailing-delim
+ $* '/a/b/s' 2> 'junk after trailing delimiter' != 0 : junk
}
diff --git a/tests/semantic-version/driver.cxx b/tests/semantic-version/driver.cxx
index 032cb14..3c20a6c 100644
--- a/tests/semantic-version/driver.cxx
+++ b/tests/semantic-version/driver.cxx
@@ -1,23 +1,12 @@
// file : tests/semantic-version/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <iostream>
-#endif
-// Other includes.
+#include <libbutl/semantic-version.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.semantic_version;
-#else
-#include <libbutl/semantic-version.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -34,7 +23,6 @@ main ()
semver v;
assert (v.major == 0 && v.minor == 0 && v.patch == 0 && v.build.empty ());
}
-
{
semver v (1, 2, 3);
assert (v.major == 1 && v.minor == 2 && v.patch == 3 && v.build.empty ());
@@ -57,17 +45,27 @@ main ()
// String representation.
//
- assert (semver ("1.2") == semver (1, 2, 0));
- assert (semver ("1.2-3") == semver (1, 2, 0, "-3"));
- assert (semver ("1.2.a1", "+-.") == semver (1, 2, 0, ".a1"));
- assert (semver ("1.2.3") == semver (1, 2, 3));
- assert (semver ("1.2.3-4") == semver (1, 2, 3, "-4"));
- assert (semver ("1.2.3+4") == semver (1, 2, 3, "+4"));
- assert (semver ("1.2.3.4", "+-.") == semver (1, 2, 3, ".4"));
- assert (semver ("1.2.3a", "") == semver (1, 2, 3, "a"));
- try {semver v ("1.2.3-4", false); assert (false);} catch (failed) {}
- try {semver v ("1.2.3.4"); assert (false);} catch (failed) {}
- try {semver v ("1.2.3a"); assert (false);} catch (failed) {}
+ assert (semver ("1", semver::allow_omit_minor) == semver (1, 0, 0));
+ assert (semver ("1-2", semver::allow_omit_minor | semver::allow_build) == semver (1, 0, 0, "-2"));
+ assert (semver ("1.2", semver::allow_omit_minor) == semver (1, 2, 0));
+ assert (semver ("1.2+a", semver::allow_omit_minor | semver::allow_build) == semver (1, 2, 0, "+a"));
+ assert (semver ("1.2", semver::allow_omit_patch) == semver (1, 2, 0));
+ assert (semver ("1.2-3", semver::allow_omit_patch | semver::allow_build) == semver (1, 2, 0, "-3"));
+ assert (semver ("1.2.a1", semver::allow_omit_patch | semver::allow_build, ".+-") == semver (1, 2, 0, ".a1"));
+ assert (semver ("1.2.3") == semver (1, 2, 3));
+ assert (semver ("1.2.3-4", semver::allow_build) == semver (1, 2, 3, "-4"));
+ assert (semver ("1.2.3+4", semver::allow_build) == semver (1, 2, 3, "+4"));
+ assert (semver ("1.2.3.4", semver::allow_build, "+-.") == semver (1, 2, 3, ".4"));
+ assert (semver ("1.2.3a", semver::allow_build, "") == semver (1, 2, 3, "a"));
+
+ try {semver v ("1"); assert (false);} catch (failed) {}
+ try {semver v ("1.x.2"); assert (false);} catch (failed) {}
+ try {semver v ("1.2"); assert (false);} catch (failed) {}
+ try {semver v ("1.2.x"); assert (false);} catch (failed) {}
+ try {semver v ("1.2.3-4"); assert (false);} catch (failed) {}
+ try {semver v ("1.2.3.4"); assert (false);} catch (failed) {}
+ try {semver v ("1.2.3a"); assert (false);} catch (failed) {}
+
assert (!parse_semantic_version ("1.2.3.4"));
// Numeric representation.
diff --git a/tests/sendmail/driver.cxx b/tests/sendmail/driver.cxx
index e73940b..3b97202 100644
--- a/tests/sendmail/driver.cxx
+++ b/tests/sendmail/driver.cxx
@@ -1,34 +1,16 @@
// file : tests/sendmail/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <iostream>
#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.process;
-import butl.utility; // operator<<(ostream, exception)
-import butl.sendmail;
-import butl.fdstream;
-
-import butl.optional; // @@ MOD Clang should not be necessary.
-import butl.small_vector; // @@ MOD Clang should not be necessary.
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/sendmail.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/sendmail.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/sha1/driver.cxx b/tests/sha1/driver.cxx
index 2b58113..1e8e254 100644
--- a/tests/sha1/driver.cxx
+++ b/tests/sha1/driver.cxx
@@ -1,29 +1,16 @@
// file : tests/sha1/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.sha1;
-import butl.path;
-import butl.fdstream;
-import butl.filesystem;
-#else
-#include <libbutl/sha1.mxx>
-#include <libbutl/path.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/filesystem.mxx> // auto_rmfile
-#endif
+
+#include <libbutl/sha1.hxx>
+#include <libbutl/path.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/filesystem.hxx> // auto_rmfile
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/sha256/driver.cxx b/tests/sha256/driver.cxx
index 2946755..30dfa49 100644
--- a/tests/sha256/driver.cxx
+++ b/tests/sha256/driver.cxx
@@ -1,29 +1,16 @@
// file : tests/sha256/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.sha256;
-import butl.fdstream;
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/sha256.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/filesystem.mxx> // auto_rmfile
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/sha256.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/filesystem.hxx> // auto_rmfile
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/small-forward-list/driver.cxx b/tests/small-forward-list/driver.cxx
index 670fff1..1cfea77 100644
--- a/tests/small-forward-list/driver.cxx
+++ b/tests/small-forward-list/driver.cxx
@@ -1,24 +1,13 @@
// file : tests/small-forward-list/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.small_forward_list;
-#else
-#include <libbutl/small-forward-list.mxx>
-#endif
+
+#include <libbutl/small-forward-list.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/small-list/driver.cxx b/tests/small-list/driver.cxx
index 9674402..8e2fb6e 100644
--- a/tests/small-list/driver.cxx
+++ b/tests/small-list/driver.cxx
@@ -1,24 +1,13 @@
// file : tests/small-list/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-// Other includes.
+#include <libbutl/small-list.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.small_list;
-#else
-#include <libbutl/small-list.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/small-vector/driver.cxx b/tests/small-vector/driver.cxx
index d79a03b..cc012fc 100644
--- a/tests/small-vector/driver.cxx
+++ b/tests/small-vector/driver.cxx
@@ -1,24 +1,13 @@
// file : tests/small-vector/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-// Other includes.
+#include <libbutl/small-vector.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.small_vector;
-#else
-#include <libbutl/small-vector.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -111,6 +100,7 @@ main ()
s1.emplace_back ("abc");
vector s2 (move (s1));
assert (s2[0] == "abc" && s2.capacity () == 2 && small (s2));
+ assert (s1.empty ()); // The source vector must be empty now.
}
{
diff --git a/tests/standard-version/driver.cxx b/tests/standard-version/driver.cxx
index 4b985e1..4bddf08 100644
--- a/tests/standard-version/driver.cxx
+++ b/tests/standard-version/driver.cxx
@@ -1,31 +1,18 @@
// file : tests/standard-version/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <ios> // ios::failbit, ios::badbit
#include <string>
#include <cstdint> // uint*_t
#include <iostream>
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility; // operator<<(ostream,exception), eof()
-import butl.optional;
-import butl.standard_version;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/standard-version.mxx>
-#endif
+
+#include <libbutl/utility.hxx> // operator<<(ostream,exception), eof()
+#include <libbutl/optional.hxx>
+#include <libbutl/standard-version.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -96,7 +83,7 @@ version (const string& s,
if (v.minor () != 99999)
{
- standard_version_constraint c1 ("~" + s);
+ standard_version_constraint c1 ('~' + s);
standard_version_constraint c2 ('[' + s + ' ' + max_ver ('~') + ')');
assert (c1 == c2);
}
@@ -104,7 +91,7 @@ version (const string& s,
if ((v.major () == 0 && v.minor () != 99999) ||
(v.major () != 0 && v.major () != 99999))
{
- standard_version_constraint c1 ("^" + s);
+ standard_version_constraint c1 ('^' + s);
standard_version_constraint c2 ('[' + s + ' ' + max_ver ('^') + ')');
assert (c1 == c2);
}
diff --git a/tests/strcase/driver.cxx b/tests/strcase/driver.cxx
index f9ea3b6..8e964a6 100644
--- a/tests/strcase/driver.cxx
+++ b/tests/strcase/driver.cxx
@@ -1,22 +1,12 @@
// file : tests/strcase/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#endif
-// Other includes.
+#include <libbutl/utility.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.utility;
-#else
-#include <libbutl/utility.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/string-parser/driver.cxx b/tests/string-parser/driver.cxx
index 4e4984e..8cba912 100644
--- a/tests/string-parser/driver.cxx
+++ b/tests/string-parser/driver.cxx
@@ -1,27 +1,15 @@
// file : tests/string-parser/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <vector>
#include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility; // operator<<(ostream,exception)
-import butl.string_parser;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/string-parser.mxx>
-#endif
+
+#include <libbutl/utility.hxx> // operator<<(ostream,exception)
+#include <libbutl/string-parser.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl::string_parser;
diff --git a/tests/tab-parser/driver.cxx b/tests/tab-parser/driver.cxx
index 5a527cf..99c19d9 100644
--- a/tests/tab-parser/driver.cxx
+++ b/tests/tab-parser/driver.cxx
@@ -1,26 +1,14 @@
// file : tests/tab-parser/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
-#endif
-// Other includes.
+#include <libbutl/utility.hxx> // operator<<(ostream,exception)
+#include <libbutl/tab-parser.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility; // operator<<(ostream,exception)
-import butl.tab_parser;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/tab-parser.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/target-triplet/driver.cxx b/tests/target-triplet/driver.cxx
index cc375e0..8c08a90 100644
--- a/tests/target-triplet/driver.cxx
+++ b/tests/target-triplet/driver.cxx
@@ -1,25 +1,14 @@
// file : tests/target-triplet/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <iostream>
#include <stdexcept> // invalid_argument
-#endif
-// Other includes.
+#include <libbutl/target-triplet.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.target_triplet;
-#else
-#include <libbutl/target-triplet.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -85,6 +74,10 @@ main ()
"i686-w64-mingw32",
"i686", "w64", "mingw32", "", "windows"));
+ assert (test ("x86_64-w64-windows-gnu",
+ "x86_64-w64-mingw32",
+ "x86_64", "w64", "mingw32", "", "windows"));
+
assert (test ("i686-lfs-linux-gnu",
"i686-lfs-linux-gnu",
"i686", "lfs", "linux-gnu", "", "linux"));
@@ -93,6 +86,10 @@ main ()
"x86_64-linux-gnu",
"x86_64", "", "linux-gnu", "", "linux"));
+ assert (test ("x86_64-redhat-linux",
+ "x86_64-redhat-linux-gnu",
+ "x86_64", "redhat", "linux-gnu", "", "linux"));
+
assert (test ("x86_64-linux-gnux32",
"x86_64-linux-gnux32",
"x86_64", "", "linux-gnux32", "", "linux"));
@@ -109,6 +106,34 @@ main ()
"aarch64-nto-qnx7.0.0",
"aarch64", "", "nto-qnx", "7.0.0", "other"));
+ assert (test ("wasm32-emscripten",
+ "wasm32-emscripten",
+ "wasm32", "", "emscripten", "", "other"));
+
+ assert (test ("arm64-apple-darwin20.1.0",
+ "aarch64-apple-darwin20.1.0",
+ "aarch64", "apple", "darwin", "20.1.0", "macos"));
+
+ assert (test ("arm64-apple-ios14.4",
+ "aarch64-apple-ios14.4",
+ "aarch64", "apple", "ios", "14.4", "ios"));
+
+ assert (test ("arm64-apple-ios",
+ "aarch64-apple-ios",
+ "aarch64", "apple", "ios", "", "ios"));
+
+ assert (test ("arm64-apple-ios14.4-simulator",
+ "aarch64-apple-ios14.4-simulator",
+ "aarch64", "apple", "ios-simulator", "14.4", "ios"));
+
+ assert (test ("arm64-apple-ios-simulator",
+ "aarch64-apple-ios-simulator",
+ "aarch64", "apple", "ios-simulator", "", "ios"));
+
+ assert (test ("x86_64-apple-ios14.4-macabi",
+ "x86_64-apple-ios14.4-macabi",
+ "x86_64", "apple", "ios-macabi", "14.4", "ios"));
+
// Version extraction.
//
assert (test ("x86_64-apple-darwin14.5.0",
diff --git a/tests/timestamp/driver.cxx b/tests/timestamp/driver.cxx
index 6283798..956b295 100644
--- a/tests/timestamp/driver.cxx
+++ b/tests/timestamp/driver.cxx
@@ -3,28 +3,17 @@
#include <time.h> // tzset() (POSIX), _tzset() (Windows)
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <chrono>
#include <locale>
#include <clocale>
#include <sstream>
#include <iomanip>
#include <system_error>
-#endif
-// Other includes.
+#include <libbutl/timestamp.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.timestamp;
-#else
-#include <libbutl/timestamp.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -189,15 +178,15 @@ main ()
assert (parse (".384902285 Feb 21 19:31:10 2016",
"%[.N] %b %d %H:%M:%S %Y",
- "." + ns (384902285) + " Feb 21 19:31:10 2016"));
+ '.' + ns (384902285) + " Feb 21 19:31:10 2016"));
assert (parse (".384902285 2016-02-21 19:31:10",
"%[.N] %Y-%m-%d %H:%M:%S",
- "." + ns (384902285) + " 2016-02-21 19:31:10"));
+ '.' + ns (384902285) + " 2016-02-21 19:31:10"));
assert (parse (".3849022852016-02-21 19:31:10",
"%[.N]%Y-%m-%d %H:%M:%S",
- "." + ns (384902285) + "2016-02-21 19:31:10"));
+ '.' + ns (384902285) + "2016-02-21 19:31:10"));
assert (parse ("Feb 1 2016", "%b %e %Y", "Feb 1 2016"));
assert (parse ("Feb 11 2016", "%b %e %Y", "Feb 11 2016"));
diff --git a/tests/url/driver.cxx b/tests/url/driver.cxx
index 95be244..869eed5 100644
--- a/tests/url/driver.cxx
+++ b/tests/url/driver.cxx
@@ -1,29 +1,17 @@
// file : tests/url/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
#include <utility> // move()
#include <iostream>
#include <iterator> // back_inserter
#include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.url;
-import butl.utility; // operator<<(ostream, exception)
-#else
-#include <libbutl/url.mxx>
-#include <libbutl/utility.mxx>
-#endif
+
+#include <libbutl/url.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/utf8/driver.cxx b/tests/utf8/driver.cxx
index f35e65e..ccc2870 100644
--- a/tests/utf8/driver.cxx
+++ b/tests/utf8/driver.cxx
@@ -1,24 +1,13 @@
// file : tests/utf8/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.utf8;
-import butl.utility;
-#else
-#include <libbutl/utf8.mxx>
-#include <libbutl/utility.mxx>
-#endif
+
+#include <libbutl/utf8.hxx>
+#include <libbutl/utility.hxx>
+
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
diff --git a/tests/uuid/driver.cxx b/tests/uuid/driver.cxx
index d8dae23..63e5bc7 100644
--- a/tests/uuid/driver.cxx
+++ b/tests/uuid/driver.cxx
@@ -5,13 +5,15 @@
# include <rpc.h> // GUID
#endif
-#include <cassert>
#include <sstream>
#include <iostream>
#include <libbutl/uuid.hxx>
#include <libbutl/uuid-io.hxx>
+#undef NDEBUG
+#include <cassert>
+
using namespace std;
using namespace butl;
diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx
index 00a317a..fee2748 100644
--- a/tests/wildcard/driver.cxx
+++ b/tests/wildcard/driver.cxx
@@ -1,36 +1,24 @@
// file : tests/wildcard/driver.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <map>
#include <string>
#include <vector>
-#include <algorithm> // sort()
-#include <exception>
#include <iostream>
-#endif
+#include <algorithm> // sort()
+#include <exception>
+#include <functional>
+#include <system_error>
-// Other includes.
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
+#include <libbutl/optional.hxx>
+#include <libbutl/filesystem.hxx>
+#include <libbutl/path-pattern.hxx>
-#ifdef __cpp_modules_ts
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility; // operator<<(ostream, exception)
-import butl.optional;
-import butl.filesystem;
-import butl.path_pattern;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/path-pattern.mxx>
-#endif
+#undef NDEBUG
+#include <cassert>
using namespace std;
using namespace butl;
@@ -74,8 +62,13 @@ int _CRT_glob = 0;
// through contains only the specified entry. The start directory is used if
// the first pattern component is a self-matching wildcard.
//
+// -d (print|stop)
+// If a inaccessible/dangling link is encountered, then print its path to
+// stderr and, optionally, stop the search. Meaningful in combination with
+// -sd and must follow it, if specified in the command line.
+//
// -i
-// Pass psflags::ignorable_components to the match/search functions.
+// Pass path_match_flags::match_absent to the match/search functions.
// Meaningful in combination with -sd or -sp options and must follow it, if
// specified in the command line.
//
@@ -108,6 +101,9 @@ try
bool sort (true);
path_match_flags flags (path_match_flags::follow_symlinks);
+ bool dangle_stop (false);
+ function<bool (const dir_entry&)> dangle_func;
+
int i (2);
for (; i != argc; ++i)
{
@@ -116,6 +112,34 @@ try
sort = false;
else if (o == "-i")
flags |= path_match_flags::match_absent;
+ else if (o == "-d")
+ {
+ ++i;
+
+ assert (op == "-sd" && i != argc);
+
+ string v (argv[i]);
+
+ if (v == "print")
+ {
+ dangle_func = [] (const dir_entry& de)
+ {
+ cerr << de.base () / de.path () << endl;
+ return true;
+ };
+ }
+ else if (v == "stop")
+ {
+ dangle_func = [&dangle_stop] (const dir_entry& de)
+ {
+ cerr << de.base () / de.path () << endl;
+ dangle_stop = true;
+ return false;
+ };
+ }
+ else
+ assert (false);
+ }
else
break; // End of options.
}
@@ -181,10 +205,13 @@ try
};
if (!entry)
- path_search (pattern, add, start, flags);
+ path_search (pattern, add, start, flags, dangle_func);
else
path_search (pattern, *entry, add, start, flags);
+ if (dangle_stop)
+ return 1;
+
// It the search succeeds, then test search in the directory tree
// represented by each matched path. Otherwise, if the directory tree is
// specified, then make sure that it doesn't match the pattern.
@@ -245,8 +272,13 @@ catch (const invalid_path& e)
cerr << e << ": " << e.path << endl;
return 2;
}
+catch (const system_error& e)
+{
+ cerr << e << endl;
+ return 3;
+}
catch (const exception& e)
{
cerr << e << endl;
- return 2;
+ return 4;
}
diff --git a/tests/wildcard/testscript b/tests/wildcard/testscript
index 5f6a767..baa51aa 100644
--- a/tests/wildcard/testscript
+++ b/tests/wildcard/testscript
@@ -650,12 +650,14 @@
{
mkdir a;
touch --no-cleanup a/b;
- ^ln -s b a/l &a/l;
+ ln -s b a/l &a/l;
rm a/b;
touch a/c;
- $* a/* >/'a/c'
+ $* a/* 2>! == 3;
+ $* -d 'print' a/* >/'a/c' 2>/'a/l';
+ $* -d 'stop' a/* >! 2>/'a/l' == 1
}
}