162 files changed, 22581 insertions, 3513 deletions
diff --git a/libbutl/b.cxx b/libbutl/b.cxx
index e1caa4c..0b4472f 100644
--- a/libbutl/b.cxx
+++ b/libbutl/b.cxx
@@ -1,58 +1,19 @@
 // file      : libbutl/b.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/b.mxx>
-#endif
-
-// C includes.
+#include <libbutl/b.hxx>
 
+#include <ios>       // ios::failure
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef>
-#include <cstdint>
-#include <stdexcept>
-#include <functional>
-
-#include <ios>     // ios::failure
-#include <utility> // move()
+#include <utility>   // move()
 #include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.b;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.url;
-import butl.path;
-import butl.process;
-import butl.optional;
-import butl.project_name;
-import butl.standard_version;
-#endif
-
-import butl.utility;      // next_word(), eof(), etc
-import butl.path_io;
-import butl.fdstream;
-import butl.process_io;   // operator<<(ostream, process_path)
-import butl.small_vector;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/process-io.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <algorithm>
+
+#include <libbutl/utility.hxx>      // next_word(), eof(), etc
+#include <libbutl/path-io.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/process-io.hxx>   // operator<<(ostream, process_path)
+#include <libbutl/small-vector.hxx>
 
 using namespace std;
 
@@ -71,15 +32,30 @@ namespace butl
     throw runtime_error ("invalid " + d);
   }
 
-  b_project_info
-  b_info (const dir_path& project,
-          bool ext_mods,
+  void
+  b_info (std::vector<b_project_info>& r,
+          const vector<dir_path>& projects,
+          b_info_flags fl,
           uint16_t verb,
           const function<b_callback>& cmd_callback,
           const path& program,
           const dir_path& search_fallback,
           const vector<string>& ops)
   {
+    // Bail out if the project list is empty.
+    //
+    if (projects.empty ())
+      return;
+
+    // Reserve enough space in the result and save its original size.
+    //
+    size_t rn (r.size ());
+    {
+      size_t n (rn + projects.size ());
+      if (r.capacity () < n)
+        r.reserve (n);
+    }
+
     try
     {
       process_path pp (
@@ -105,6 +81,23 @@ namespace butl
         else
           vops.push_back ("-q");
 
+        string spec ("info(");
+
+        // Note that quoting is essential here.
+        //
+        for (size_t i (0); i != projects.size(); ++i)
+        {
+          if (i != 0)
+            spec += ' ';
+
+          spec += '\'' + projects[i].representation () + '\'';
+        }
+
+        if ((fl & b_info_flags::subprojects) == b_info_flags::none)
+          spec += ",no_subprojects";
+
+        spec += ')';
+
         pr = process_start_callback (
           cmd_callback ? cmd_callback : [] (const char* const*, size_t) {},
           0 /* stdin */,
@@ -112,10 +105,12 @@ namespace butl
           2 /* stderr */,
           pp,
           vops,
-          ext_mods ? nullptr : "--no-external-modules",
+          ((fl & b_info_flags::ext_mods) == b_info_flags::none
+           ? "--no-external-modules"
+           : nullptr),
           "-s",
           ops,
-          "info:", "'" + project.representation () + "'");
+          spec);
 
         pipe.out.close ();
         ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit);
@@ -145,31 +140,52 @@ namespace butl
           }
         };
 
-        b_project_info r;
-        for (string l; !eof (getline (is, l)); )
+        b_project_info pi;
+        auto add_project = [&r, &pi] ()
         {
-          if (l.compare (0, 9, "project: ") == 0)
-          {
-            string v (l, 9);
-            if (!v.empty ())
-              r.project = parse_name (move (v), "project");
-          }
-          else if (l.compare (0, 9, "version: ") == 0)
+          // Parse version string to standard version if the project loaded
+          // the version module.
+          //
+          const auto& ms (pi.modules);
+          if (find (ms.begin (), ms.end (), "version") != ms.end ())
           {
-            string v (l, 9);
-            if (!v.empty ())
             try
             {
-              r.version = standard_version (v, standard_version::allow_stub);
+              pi.version = standard_version (pi.version_string,
+                                             standard_version::allow_stub);
             }
             catch (const invalid_argument& e)
             {
-              bad_value ("version '" + v + "': " + e.what ());
+              bad_value ("version '" + pi.version_string + "': " + e.what ());
             }
           }
+
+          // Add the project info and prepare for the next project info
+          // parsing.
+          //
+          r.push_back (move (pi));
+          pi = b_project_info ();
+        };
+
+        for (string l; !eof (getline (is, l)); )
+        {
+          if (l.empty ())
+          {
+            add_project ();
+          }
+          else if (l.compare (0, 9, "project: ") == 0)
+          {
+            string v (l, 9);
+            if (!v.empty ())
+              pi.project = parse_name (move (v), "project");
+          }
+          else if (l.compare (0, 9, "version: ") == 0)
+          {
+            pi.version_string = string (l, 9);
+          }
           else if (l.compare (0, 9, "summary: ") == 0)
           {
-            r.summary = string (l, 9);
+            pi.summary = string (l, 9);
           }
           else if (l.compare (0, 5, "url: ") == 0)
           {
@@ -177,7 +193,7 @@ namespace butl
             if (!v.empty ())
             try
             {
-              r.url = url (v);
+              pi.url = url (v);
             }
             catch (const invalid_argument& e)
             {
@@ -186,17 +202,17 @@ namespace butl
           }
           else if (l.compare (0, 10, "src_root: ") == 0)
           {
-            r.src_root = parse_dir (string (l, 10), "src_root");
+            pi.src_root = parse_dir (string (l, 10), "src_root");
           }
           else if (l.compare (0, 10, "out_root: ") == 0)
           {
-            r.out_root = parse_dir (string (l, 10), "out_root");
+            pi.out_root = parse_dir (string (l, 10), "out_root");
           }
           else if (l.compare (0, 14, "amalgamation: ") == 0)
           {
             string v (l, 14);
             if (!v.empty ())
-              r.amalgamation = parse_dir (move (v), "amalgamation");
+              pi.amalgamation = parse_dir (move (v), "amalgamation");
           }
           else if (l.compare (0, 13, "subprojects: ") == 0)
           {
@@ -214,7 +230,7 @@ namespace butl
               if (p != 0)
                 sn = parse_name (string (s, 0, p), "subproject");
 
-              r.subprojects.push_back (
+              pi.subprojects.push_back (
                 b_project_info::subproject {move (sn),
                                             parse_dir (string (s, p + 1),
                                                        "subproject")});
@@ -224,20 +240,36 @@ namespace butl
           {
             string v (l, 12);
             for (size_t b (0), e (0); next_word (v, b, e); )
-              r.operations.push_back (string (v, b, e - b));
+              pi.operations.push_back (string (v, b, e - b));
           }
           else if (l.compare (0, 17, "meta-operations: ") == 0)
           {
             string v (l, 17);
             for (size_t b (0), e (0); next_word (v, b, e); )
-              r.meta_operations.push_back (string (v, b, e - b));
+              pi.meta_operations.push_back (string (v, b, e - b));
+          }
+          else if (l.compare (0, 9, "modules: ") == 0)
+          {
+            string v (l, 9);
+            for (size_t b (0), e (0); next_word (v, b, e); )
+              pi.modules.push_back (string (v, b, e - b));
           }
         }
 
         is.close (); // Detect errors.
 
         if (pr.wait ())
-          return r;
+        {
+          add_project (); // Add the remaining project info.
+
+          if (r.size () - rn == projects.size ())
+            return;
+
+          ostringstream os;
+          os << "invalid " << pp << " output: expected information for "
+             << projects.size () << " projects instead of " << r.size () - rn;
+          throw b_error (os.str (), move (pr.exit));
+        }
       }
       // Note that ios::failure inherits from std::runtime_error, so this
       // catch-clause must go last.
@@ -276,7 +308,7 @@ namespace butl
       assert (!pr.wait ());
 
       throw b_error (
-        string ("process ") + pp.recall_string () + " " + to_string (*pr.exit),
+        string ("process ") + pp.recall_string () + ' ' + to_string (*pr.exit),
         move (pr.exit));
     }
     catch (const process_error& e)
diff --git a/libbutl/b.hxx b/libbutl/b.hxx
new file mode 100644
index 0000000..d3fd2bf
--- /dev/null
+++ b/libbutl/b.hxx
@@ -0,0 +1,150 @@
+// file      : libbutl/b.hxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <utility>    // move()
+#include <cstddef>    // size_tu
+#include <cstdint>    // uint16_t
+#include <stdexcept>  // runtime_error
+#include <functional>
+
+#include <libbutl/url.hxx>
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/project-name.hxx>
+#include <libbutl/standard-version.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+  class LIBBUTL_SYMEXPORT b_error: public std::runtime_error
+  {
+  public:
+    // Build system program exit information. May be absent if the error
+    // occured before the process has been started.
+    //
+    // Can be used by the caller to decide if to print the error message to
+    // stderr. Normally, it is not required if the process exited normally
+    // with non-zero code, since presumably it has issued diagnostics. Note
+    // that the normal() function can be used to check for this.
+    //
+    optional<process_exit> exit;
+
+    // Return true if the build2 process exited normally with non-zero code.
+    //
+    bool
+    normal () const {return exit && exit->normal () && !*exit;}
+
+    explicit
+    b_error (const std::string& description, optional<process_exit> = nullopt);
+  };
+
+  // Run `b info: <project-dir>...` command and parse and return (via argument
+  // to allow appending and for error position; see below) the build2 projects
+  // information it prints to stdout. Return the empty list if the specified
+  // project list is empty. Throw b_error on error. Note that the size of the
+  // result vector can be used to determine which project information caused
+  // the error.
+  //
+  // You can also specify the build2 verbosity level, command line callback
+  // (see process_run_callback() for details), build program search details,
+  // and additional options.
+  //
+  // Note that version_string is only parsed to standard_version if a project
+  // uses the version module. Otherwise, standard_version is empty.
+  //
+  struct b_project_info
+  {
+    using url_type = butl::url;
+
+    struct subproject
+    {
+      project_name name;  // Empty if anonymous.
+      dir_path     path;  // Relative to the project root.
+    };
+
+    project_name     project;
+    std::string      version_string;
+    standard_version version;
+    std::string      summary;
+    url_type         url;
+
+    dir_path src_root;
+    dir_path out_root;
+
+    dir_path                amalgamation; // Relative to project root and
+                                          // empty if not amalgmated.
+    std::vector<subproject> subprojects;
+
+    std::vector<std::string> operations;
+    std::vector<std::string> meta_operations;
+
+    std::vector<std::string> modules;
+  };
+
+  enum class b_info_flags: std::uint16_t
+  {
+    // Retrieve information that may come from external modules (operations,
+    // meta-operations, etc). Omitting this flag results in passing
+    // --no-external-modules to the build2 program and speeds up its
+    // execution.
+    //
+    ext_mods = 0x1,
+
+    // Discover subprojects. Omitting this flag results in passing
+    // no_subprojects info meta-operation parameter to the build2 program and
+    // speeds up its execution.
+    //
+    subprojects = 0x2,
+
+    none = 0
+  };
+
+  inline b_info_flags operator& (b_info_flags, b_info_flags);
+  inline b_info_flags operator| (b_info_flags, b_info_flags);
+  inline b_info_flags operator&= (b_info_flags&, b_info_flags);
+  inline b_info_flags operator|= (b_info_flags&, b_info_flags);
+
+  using b_callback = void (const char* const args[], std::size_t n);
+
+  LIBBUTL_SYMEXPORT void
+  b_info (std::vector<b_project_info>& result,
+          const std::vector<dir_path>& projects,
+          b_info_flags,
+          std::uint16_t verb = 1,
+          const std::function<b_callback>& cmd_callback = {},
+          const path& program = path ("b"),
+          const dir_path& search_fallback = {},
+          const std::vector<std::string>& options = {});
+
+  // As above but retrieve information for a single project.
+  //
+  inline b_project_info
+  b_info (const dir_path& project,
+          b_info_flags fl,
+          std::uint16_t verb = 1,
+          const std::function<b_callback>& cmd_callback = {},
+          const path& program = path ("b"),
+          const dir_path& search_fallback = {},
+          const std::vector<std::string>& options = {})
+  {
+    std::vector<b_project_info> r;
+    b_info (r,
+            std::vector<dir_path> ({project}),
+            fl,
+            verb,
+            cmd_callback,
+            program,
+            search_fallback,
+            options);
+
+    return std::move (r[0]);
+  }
+}
+
+#include <libbutl/b.ixx>
diff --git a/libbutl/b.ixx b/libbutl/b.ixx
new file mode 100644
index 0000000..1667101
--- /dev/null
+++ b/libbutl/b.ixx
@@ -0,0 +1,31 @@
+// file      : libbutl/b.ixx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+namespace butl
+{
+  // b_info_flags
+  //
+  inline b_info_flags operator& (b_info_flags x, b_info_flags y)
+  {
+    return x &= y;
+  }
+
+  inline b_info_flags operator| (b_info_flags x, b_info_flags y)
+  {
+    return x |= y;
+  }
+
+  inline b_info_flags operator&= (b_info_flags& x, b_info_flags y)
+  {
+    return x = static_cast<b_info_flags> (
+      static_cast<std::uint16_t> (x) &
+      static_cast<std::uint16_t> (y));
+  }
+
+  inline b_info_flags operator|= (b_info_flags& x, b_info_flags y)
+  {
+    return x = static_cast<b_info_flags> (
+      static_cast<std::uint16_t> (x) |
+      static_cast<std::uint16_t> (y));
+  }
+}
diff --git a/libbutl/b.mxx b/libbutl/b.mxx
deleted file mode 100644
index 9e12711..0000000
--- a/libbutl/b.mxx
+++ /dev/null
@@ -1,115 +0,0 @@
-// file      : libbutl/b.mxx -*- C++ -*-
-// license   : MIT; see accompanying LICENSE file
-
-#ifndef __cpp_modules_ts
-#pragma once
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef>    // size_tu
-#include <cstdint>    // uint16_t
-#include <stdexcept>  // runtime_error
-#include <functional>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.b;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.url;
-import butl.path;
-import butl.process;
-import butl.optional;
-import butl.project_name;
-import butl.standard_version;
-#else
-#include <libbutl/url.mxx>
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/project-name.mxx>
-#include <libbutl/standard-version.mxx>
-#endif
-
-#include <libbutl/export.hxx>
-
-LIBBUTL_MODEXPORT namespace butl
-{
-  class LIBBUTL_SYMEXPORT b_error: public std::runtime_error
-  {
-  public:
-    // Build system program exit information. May be absent if the error
-    // occured before the process has been started.
-    //
-    // Can be used by the caller to decide if to print the error message to
-    // stderr. Normally, it is not required if the process exited normally
-    // with non-zero code, since presumably it has issued diagnostics. Note
-    // that the normal() function can be used to check for this.
-    //
-    optional<process_exit> exit;
-
-    // Return true if the build2 process exited normally with non-zero code.
-    //
-    bool
-    normal () const {return exit && exit->normal () && !*exit;}
-
-    explicit
-    b_error (const std::string& description, optional<process_exit> = nullopt);
-  };
-
-  // Run `b info: <project-dir>` command and parse and return the build2
-  // project information it prints to stdout. Throw b_error on error.
-  //
-  // Unless you need information that may come from external modules
-  // (operations, meta-operations, etc), pass false as the ext_mods argument,
-  // which results in passing --no-external-modules to the build2 program and
-  // speeds up its execution.
-  //
-  // You can also specify the build2 verbosity level, command line callback
-  // (see process_run_callback() for details), build program search details
-  // and additional options.
-  //
-  struct b_project_info
-  {
-    using url_type = butl::url;
-
-    struct subproject
-    {
-      project_name name;  // Empty if anonymous.
-      dir_path     path;  // Relative to the project root.
-    };
-
-    project_name     project;
-    standard_version version;
-    std::string      summary;
-    url_type         url;
-
-    dir_path src_root;
-    dir_path out_root;
-
-    dir_path                amalgamation; // Relative to project root and
-                                          // empty if not amalgmated.
-    std::vector<subproject> subprojects;
-
-    std::vector<std::string> operations;
-    std::vector<std::string> meta_operations;
-  };
-
-  using b_callback = void (const char* const args[], std::size_t n);
-
-  LIBBUTL_SYMEXPORT b_project_info
-  b_info (const dir_path& project,
-          bool ext_mods,
-          std::uint16_t verb = 1,
-          const std::function<b_callback>& cmd_callback = {},
-          const path& program = path ("b"),
-          const dir_path& search_fallback = {},
-          const std::vector<std::string>& options = {});
-}
diff --git a/libbutl/backtrace.cxx b/libbutl/backtrace.cxx
index 8c9c6ae..347e231 100644
--- a/libbutl/backtrace.cxx
+++ b/libbutl/backtrace.cxx
@@ -1,15 +1,14 @@
 // file      : libbutl/backtrace.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/backtrace.mxx>
-#endif
+#include <libbutl/backtrace.hxx>
 
 // We only enable backtrace during bootstrap if we can do it without any
 // complications of the build scripts/makefiles.
 //
 // With glibc linking with -rdynamic gives (non-static) function names.
-// FreeBSD/NetBSD requires explicitly linking -lexecinfo.
+// FreeBSD/NetBSD requires explicitly linking -lexecinfo. OpenBSD only has
+// this functionality built-in from 7.0 and requires -lexecinfo.
 //
 // Note that some libc implementation on Linux (most notably, musl), don't
 // support this, at least not out of the box.
@@ -20,6 +19,11 @@
       defined(__FreeBSD__) || \
       defined(__NetBSD__)
 #    define LIBBUTL_BACKTRACE
+#  elif defined (__OpenBSD__)
+#    include <sys/param.h> // OpenBSD (yyyymm)
+#    if OpenBSD >= 202110  // 7.0 was released in October 2021.
+#      define LIBBUTL_BACKTRACE
+#    endif
 #  endif
 #else
 #  if defined(__GLIBC__) || \
@@ -35,30 +39,12 @@
 
 #include <cassert>
 
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
 #ifdef LIBBUTL_BACKTRACE
 #  include <memory>  // unique_ptr
 #  include <cstddef> // size_t
 #endif
 
 #include <exception>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.backtrace;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-#endif
 
 using namespace std;
 
diff --git a/libbutl/backtrace.mxx b/libbutl/backtrace.hxx
index f5a63d5..6afb6ea 100644
--- a/libbutl/backtrace.mxx
+++ b/libbutl/backtrace.hxx
@@ -1,28 +1,13 @@
-// file      : libbutl/backtrace.mxx -*- C++ -*-
+// file      : libbutl/backtrace.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.backtrace;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Return the calling thread's backtrace or empty string if this
   // functionality is not supported or an error has occurred. The exact
diff --git a/libbutl/base64.cxx b/libbutl/base64.cxx
index 527c6af..282f7c2 100644
--- a/libbutl/base64.cxx
+++ b/libbutl/base64.cxx
@@ -1,37 +1,13 @@
 // file      : libbutl/base64.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/base64.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
+#include <libbutl/base64.hxx>
 
 #include <cstddef>   // size_t
 #include <istream>
 #include <ostream>
 #include <iterator>  // {istreambuf, ostreambuf, back_insert}_iterator
 #include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.base64;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-#endif
 
 using namespace std;
 
@@ -40,19 +16,20 @@ namespace butl
   static const char codes[] =
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
+  static const char codes_url[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
   // base64-encode the data in the iterator range [i, e). Write the encoded
-  // data starting at the iterator position o.
+  // data starting at the iterator position o. If url is true, encode using
+  // base64url.
   //
   template <typename I, typename O>
   static void
-  base64_encode (I& i, const I& e, O& o)
+  base64_encode (I& i, const I& e, O& o, bool url = false)
   {
     const size_t un (65); // Non-existing index of the codes string.
     for (size_t n (0); i != e; ++n)
     {
-      if (n && n % 19 == 0)
-        *o++ = '\n'; // Split into lines, like the base64 utility does.
-
       auto next = [&i] () {return static_cast<unsigned char> (*i++);};
 
       unsigned char c (next ());
@@ -75,10 +52,26 @@ namespace butl
         i4 = c & 0x3F;
       }
 
-      *o++ = codes[i1];
-      *o++ = codes[i2];
-      *o++ = i3 == un ? '=' : codes[i3];
-      *o++ = i4 == un ? '=' : codes[i4];
+      if (!url)
+      {
+        if (n && n % 19 == 0)
+          *o++ = '\n'; // Split into lines, like the base64 utility does.
+
+        *o++ = codes[i1];
+        *o++ = codes[i2];
+        *o++ = i3 == un ? '=' : codes[i3];
+        *o++ = i4 == un ? '=' : codes[i4];
+      }
+      // base64url: different 63rd and 64th characters and no padding or
+      // newlines.
+      //
+      else
+      {
+        *o++ = codes_url[i1];
+        *o++ = codes_url[i2];
+        if (i3 != un) *o++ = codes_url[i3];
+        if (i4 != un) *o++ = codes_url[i4];
+      }
     }
   }
 
@@ -194,6 +187,47 @@ namespace butl
     return r;
   }
 
+  string
+  base64url_encode (istream& is)
+  {
+    if (!is.good ())
+      throw invalid_argument ("bad stream");
+
+    string r;
+    istreambuf_iterator<char> i (is);
+    back_insert_iterator<string> o (r);
+
+    base64_encode (i, istreambuf_iterator<char> (), o, true /* url */);
+    is.setstate (istream::eofbit);
+    return r;
+  }
+
+  void
+  base64url_encode (ostream& os, istream& is)
+  {
+    if (!os.good () || !is.good ())
+      throw invalid_argument ("bad stream");
+
+    istreambuf_iterator<char> i (is);
+    ostreambuf_iterator<char> o (os);
+    base64_encode (i, istreambuf_iterator<char> (), o, true /* url */);
+
+    if (o.failed ())
+      os.setstate (istream::badbit);
+
+    is.setstate (istream::eofbit);
+  }
+
+  string
+  base64url_encode (const std::vector<char>& v)
+  {
+    string r;
+    back_insert_iterator<string> o (r);
+    auto i (v.begin ());
+    base64_encode (i, v.end (), o, true /* url */);
+    return r;
+  }
+
   void
   base64_decode (ostream& os, istream& is)
   {
diff --git a/libbutl/base64.mxx b/libbutl/base64.hxx
index 698b7e2..a0d1450 100644
--- a/libbutl/base64.mxx
+++ b/libbutl/base64.hxx
@@ -1,31 +1,15 @@
-// file      : libbutl/base64.mxx -*- C++ -*-
+// file      : libbutl/base64.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <iosfwd>
 #include <string>
 #include <vector>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.base64;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Base64-encode a stream or a buffer. Split the output into 76 char-long
   // lines (new line is the 77th). If reading from a stream, check if it has
@@ -43,6 +27,25 @@ LIBBUTL_MODEXPORT namespace butl
   LIBBUTL_SYMEXPORT std::string
   base64_encode (const std::vector<char>&);
 
+  // Encode a stream or a buffer using base64url (RFC4648), a base64 variant
+  // with different 62nd and 63rd alphabet characters (- and _ instead of ~
+  // and .; to make it filesystem safe) and optional padding because the
+  // padding character `=` would have to be percent-encoded to be safe in
+  // URLs. This implementation does not output any padding, newlines or any
+  // other whitespace (which is required, for example, by RFC7519: JSON Web
+  // Token (JWT) and RFC7515: JSON Web Signature (JWS)).
+  //
+  // Note that base64url decoding has not yet been implemented.
+  //
+  LIBBUTL_SYMEXPORT void
+  base64url_encode (std::ostream&, std::istream&);
+
+  LIBBUTL_SYMEXPORT std::string
+  base64url_encode (std::istream&);
+
+  LIBBUTL_SYMEXPORT std::string
+  base64url_encode (const std::vector<char>&);
+
   // Base64-decode a stream or a string. Throw invalid_argument if the input
   // is not a valid base64 representation. If reading from a stream, check if
   // it has badbit, failbit, or eofbit set and throw invalid_argument if
diff --git a/libbutl/bufstreambuf.cxx b/libbutl/bufstreambuf.cxx
new file mode 100644
index 0000000..d152166
--- /dev/null
+++ b/libbutl/bufstreambuf.cxx
@@ -0,0 +1,13 @@
+// file      : libbutl/bufstreambuf.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbutl/bufstreambuf.hxx>
+
+namespace butl
+{
+  bufstreambuf::
+  ~bufstreambuf ()
+  {
+    // Vtable.
+  }
+}
diff --git a/libbutl/bufstreambuf.hxx b/libbutl/bufstreambuf.hxx
new file mode 100644
index 0000000..a49b2d0
--- /dev/null
+++ b/libbutl/bufstreambuf.hxx
@@ -0,0 +1,67 @@
+// file      : libbutl/bufstreambuf.hxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <cstdint>   // uint64_t
+#include <streambuf>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+  // A buffered streambuf interface that exposes its buffer for direct scan
+  // and provides a notion of logical position. See fdstreambuf for background
+  // and motivation.
+  //
+  class LIBBUTL_SYMEXPORT bufstreambuf: public std::basic_streambuf<char>
+  {
+  public:
+    using base = std::basic_streambuf<char>;
+
+    using int_type = base::int_type;
+    using traits_type = base::traits_type;
+
+    using pos_type = base::pos_type; // std::streampos
+    using off_type = base::off_type; // std::streamoff
+
+  public:
+    explicit
+    bufstreambuf (std::uint64_t pos = 0): off_ (pos) {}
+
+    virtual
+    ~bufstreambuf ();
+
+    // basic_streambuf input interface.
+    //
+  public:
+
+    // Direct access to the get area. Use with caution.
+    //
+    using base::gptr;
+    using base::egptr;
+    using base::gbump;
+
+    // Return the (logical) position of the next byte to be read.
+    //
+    // Note that on Windows when reading in the text mode the logical position
+    // may differ from the physical file descriptor position due to the CRLF
+    // character sequence translation. See the fdstreambuf::seekoff()
+    // implementation for more background on this issue.
+    //
+    std::uint64_t
+    tellg () const {return off_ - (egptr () - gptr ());}
+
+    // basic_streambuf output interface.
+    //
+  public:
+
+    // Return the (logical) position of the next byte to be written.
+    //
+    std::uint64_t
+    tellp () const {return off_ + (pptr () - pbase ());}
+
+  protected:
+    std::uint64_t off_;
+  };
+}
diff --git a/libbutl/buildfile b/libbutl/buildfile
index 6526900..bbecf43 100644
--- a/libbutl/buildfile
+++ b/libbutl/buildfile
@@ -1,37 +1,37 @@
 # file      : libbutl/buildfile
 # license   : MIT; see accompanying LICENSE file
 
-# This library was modularized using the Modules TS semantics (with support
-# for dual, module/header consumption) which was subsequently partially
-# dismantled. We, however, kept some of the changes in anticipation that they
-# would be useful when attempting to modularize using the merged modules
-# semantics. Specifically, there are currently headers with both .mxx and .hxx
-# extensions and the code is littered with the `#if __cpp_[lib_]modules_ts`
-# blocks. Note that it's important for the auto-generated header support
-# that the default extension for hxx{} is .hxx.
-#
-# @@ If/when going back to using mxx{}, make sure to cleanup explicit .mxx.
-#
-lib{butl}: {hxx ixx txx cxx}{** -uuid-* +uuid-io            \
-                                -win32-utility              \
-                                -version                    \
-                                -builtin-options}           \
-  hxx{**.mxx} {hxx}{version} {hxx ixx cxx}{builtin-options}
+lib{butl}: {hxx ixx txx cxx}{** -uuid-* +uuid-io         \
+                                -win32-utility           \
+                                -mingw-*                 \
+                                -version                 \
+                                -builtin-options}        \
+           {hxx}{version} {hxx ixx cxx}{builtin-options}
 
 tclass = $cxx.target.class
 tsys   = $cxx.target.system
 
 windows = ($tclass == 'windows')
 
-# Exclude these from compilation on non-Windows targets.
+# Whether to use our own implementation of C++14 threads on MinGW (note:
+# requires Windows 7 or later).
+#
+# Note that for now we use built-in POSIX thread support during bootstrap
+# (which, as a side effect, verifies we still use MinGW GCC configured with
+# POSIX support, which we still need for TLS, exceptions, and thread-safe
+# static locals).
+#
+mingw_stdthread = ($tsys == 'mingw32')
+
+# Exclude these from compilation on targets where does not apply.
 #
 lib{butl}: {hxx ixx cxx}{win32-utility}: include = $windows
+lib{butl}: hxx{mingw-*}: include = $mingw_stdthread
 
-# Our C-files are included into sha256.cxx (sha256c.c) and timestamp.cxx
-# (strptime.c timelocal.h timelocal.c), so treat them as files exclude from
-# the compilation.
+# Our C-files are always included into C++-files that wrap the corresponding
+# API so treat them as files to exclude from the compilation.
 #
-lib{butl}: file{*.c *.h}
+lib{butl}: file{**.c **.h}
 
 # Platform-specific UUID implementations.
 #
@@ -39,6 +39,13 @@ lib{butl}: cxx{uuid-linux}:   include = ($tclass == 'linux')
 lib{butl}: cxx{uuid-macos}:   include = ($tclass == 'macos')
 lib{butl}: cxx{uuid-windows}: include = $windows
 lib{butl}: cxx{uuid-freebsd}: include = ($tsys == 'freebsd' || $tsys == 'netbsd')
+lib{butl}: cxx{uuid-openbsd}: include = ($tsys == 'openbsd')
+
+# GCC prior to version 6 has flaky `#pragma GCC diagnostic` so we have to
+# disable certain warnings outright.
+#
+if ($cxx.id == 'gcc' && $cxx.version.major < 6)
+  cc.coptions += -Wno-unused-function
 
 # Additional system libraries.
 #
@@ -58,10 +65,18 @@ switch $tclass, $tsys
 
   case 'bsd', 'freebsd' | 'netbsd'
     cxx.libs += -lexecinfo
+
+  case 'bsd', 'openbsd'
+  {
+    # Built-in libexecinfo is only available since OpenBSD 7.0.
+    #
+    if (([uint64] $regex.replace($cxx.target.version, '(\d+)\..+', '\1')) >= 7)
+      cxx.libs += -lexecinfo
+  }
 }
 
 if! $windows
-  cxx.libs += -lpthread
+  cxx.libs += -pthread
 
 # Include the generated version header into the distribution (so that we don't
 # pick up an installed one) and don't remove it when cleaning in src (so that
@@ -78,6 +93,9 @@ hxx{version}:
 #
 cxx.poptions =+ "-I$out_root" "-I$src_root"
 
+if $mingw_stdthread
+  cxx.poptions += -D_WIN32_WINNT=0x0601 -DLIBBUTL_MINGW_STDTHREAD
+
 obja{*} bmia{*}: cxx.poptions += -DLIBBUTL_STATIC_BUILD
 objs{*} bmis{*}: cxx.poptions += -DLIBBUTL_SHARED_BUILD
 
@@ -85,6 +103,9 @@ objs{*} bmis{*}: cxx.poptions += -DLIBBUTL_SHARED_BUILD
 #
 lib{butl}: cxx.export.poptions = "-I$out_root" "-I$src_root"
 
+if $mingw_stdthread
+  lib{butl}: cxx.export.poptions += -D_WIN32_WINNT=0x0601 -DLIBBUTL_MINGW_STDTHREAD
+
 liba{butl}: cxx.export.poptions += -DLIBBUTL_STATIC
 libs{butl}: cxx.export.poptions += -DLIBBUTL_SHARED
 
diff --git a/libbutl/builtin-options.cxx b/libbutl/builtin-options.cxx
index 536f97d..98a47cf 100644
--- a/libbutl/builtin-options.cxx
+++ b/libbutl/builtin-options.cxx
@@ -15,8 +15,10 @@
 #include <set>
 #include <string>
 #include <vector>
+#include <utility>
 #include <ostream>
 #include <sstream>
+#include <cstring>
 
 namespace butl
 {
@@ -25,7 +27,7 @@ namespace butl
     // unknown_option
     //
     unknown_option::
-    ~unknown_option () throw ()
+    ~unknown_option () noexcept
     {
     }
 
@@ -36,7 +38,7 @@ namespace butl
     }
 
     const char* unknown_option::
-    what () const throw ()
+    what () const noexcept
     {
       return "unknown option";
     }
@@ -44,7 +46,7 @@ namespace butl
     // unknown_argument
     //
     unknown_argument::
-    ~unknown_argument () throw ()
+    ~unknown_argument () noexcept
     {
     }
 
@@ -55,7 +57,7 @@ namespace butl
     }
 
     const char* unknown_argument::
-    what () const throw ()
+    what () const noexcept
     {
       return "unknown argument";
     }
@@ -63,7 +65,7 @@ namespace butl
     // missing_value
     //
     missing_value::
-    ~missing_value () throw ()
+    ~missing_value () noexcept
     {
     }
 
@@ -74,7 +76,7 @@ namespace butl
     }
 
     const char* missing_value::
-    what () const throw ()
+    what () const noexcept
     {
       return "missing option value";
     }
@@ -82,7 +84,7 @@ namespace butl
     // invalid_value
     //
     invalid_value::
-    ~invalid_value () throw ()
+    ~invalid_value () noexcept
     {
     }
 
@@ -97,7 +99,7 @@ namespace butl
     }
 
     const char* invalid_value::
-    what () const throw ()
+    what () const noexcept
     {
       return "invalid option value";
     }
@@ -111,7 +113,7 @@ namespace butl
     }
 
     const char* eos_reached::
-    what () const throw ()
+    what () const noexcept
     {
       return "end of argument stream reached";
     }
@@ -158,6 +160,7 @@ namespace butl
         else
           ++i_;
 
+        ++start_position_;
         return r;
       }
       else
@@ -168,11 +171,20 @@ namespace butl
     skip ()
     {
       if (i_ < argc_)
+      {
         ++i_;
+        ++start_position_;
+      }
       else
         throw eos_reached ();
     }
 
+    std::size_t argv_scanner::
+    position ()
+    {
+      return start_position_;
+    }
+
     // vector_scanner
     //
     bool vector_scanner::
@@ -208,6 +220,12 @@ namespace butl
         throw eos_reached ();
     }
 
+    std::size_t vector_scanner::
+    position ()
+    {
+      return start_position_ + i_;
+    }
+
     template <typename X>
     struct parser
     {
@@ -235,10 +253,31 @@ namespace butl
     struct parser<bool>
     {
       static void
-      parse (bool& x, scanner& s)
+      parse (bool& x, bool& xs, scanner& s)
       {
-        s.next ();
-        x = true;
+        const char* o (s.next ());
+
+        if (s.more ())
+        {
+          const char* v (s.next ());
+
+          if (std::strcmp (v, "1")    == 0 ||
+              std::strcmp (v, "true") == 0 ||
+              std::strcmp (v, "TRUE") == 0 ||
+              std::strcmp (v, "True") == 0)
+            x = true;
+          else if (std::strcmp (v, "0")     == 0 ||
+                   std::strcmp (v, "false") == 0 ||
+                   std::strcmp (v, "FALSE") == 0 ||
+                   std::strcmp (v, "False") == 0)
+            x = false;
+          else
+            throw invalid_value (o, v);
+        }
+        else
+          throw missing_value (o);
+
+        xs = true;
       }
     };
 
@@ -260,6 +299,17 @@ namespace butl
     };
 
     template <typename X>
+    struct parser<std::pair<X, std::size_t> >
+    {
+      static void
+      parse (std::pair<X, std::size_t>& x, bool& xs, scanner& s)
+      {
+        x.second = s.position ();
+        parser<X>::parse (x.first, xs, s);
+      }
+    };
+
+    template <typename X>
     struct parser<std::vector<X> >
     {
       static void
@@ -297,6 +347,7 @@ namespace butl
 
         if (s.more ())
         {
+          std::size_t pos (s.position ());
           std::string ov (s.next ());
           std::string::size_type p = ov.find ('=');
 
@@ -316,14 +367,14 @@ namespace butl
           if (!kstr.empty ())
           {
             av[1] = const_cast<char*> (kstr.c_str ());
-            argv_scanner s (0, ac, av);
+            argv_scanner s (0, ac, av, false, pos);
             parser<K>::parse (k, dummy, s);
           }
 
           if (!vstr.empty ())
           {
             av[1] = const_cast<char*> (vstr.c_str ());
-            argv_scanner s (0, ac, av);
+            argv_scanner s (0, ac, av, false, pos);
             parser<V>::parse (v, dummy, s);
           }
 
@@ -336,6 +387,56 @@ namespace butl
       }
     };
 
+    template <typename K, typename V, typename C>
+    struct parser<std::multimap<K, V, C> >
+    {
+      static void
+      parse (std::multimap<K, V, C>& m, bool& xs, scanner& s)
+      {
+        const char* o (s.next ());
+
+        if (s.more ())
+        {
+          std::size_t pos (s.position ());
+          std::string ov (s.next ());
+          std::string::size_type p = ov.find ('=');
+
+          K k = K ();
+          V v = V ();
+          std::string kstr (ov, 0, p);
+          std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ()));
+
+          int ac (2);
+          char* av[] =
+          {
+            const_cast<char*> (o),
+            0
+          };
+
+          bool dummy;
+          if (!kstr.empty ())
+          {
+            av[1] = const_cast<char*> (kstr.c_str ());
+            argv_scanner s (0, ac, av, false, pos);
+            parser<K>::parse (k, dummy, s);
+          }
+
+          if (!vstr.empty ())
+          {
+            av[1] = const_cast<char*> (vstr.c_str ());
+            argv_scanner s (0, ac, av, false, pos);
+            parser<V>::parse (v, dummy, s);
+          }
+
+          m.insert (typename std::multimap<K, V, C>::value_type (k, v));
+        }
+        else
+          throw missing_value (o);
+
+        xs = true;
+      }
+    };
+
     template <typename X, typename T, T X::*M>
     void
     thunk (X& x, scanner& s)
@@ -343,6 +444,14 @@ namespace butl
       parser<T>::parse (x.*M, s);
     }
 
+    template <typename X, bool X::*M>
+    void
+    thunk (X& x, scanner& s)
+    {
+      s.next ();
+      x.*M = true;
+    }
+
     template <typename X, typename T, T X::*M, bool X::*S>
     void
     thunk (X& x, scanner& s)
@@ -353,7 +462,6 @@ namespace butl
 }
 
 #include <map>
-#include <cstring>
 
 namespace butl
 {
@@ -704,15 +812,15 @@ namespace butl
     _cli_cp_options_map_init ()
     {
       _cli_cp_options_map_["--recursive"] =
-      &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >;
+      &::butl::cli::thunk< cp_options, &cp_options::recursive_ >;
       _cli_cp_options_map_["-R"] =
-      &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >;
+      &::butl::cli::thunk< cp_options, &cp_options::recursive_ >;
       _cli_cp_options_map_["-r"] =
-      &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >;
+      &::butl::cli::thunk< cp_options, &cp_options::recursive_ >;
       _cli_cp_options_map_["--preserve"] =
-      &::butl::cli::thunk< cp_options, bool, &cp_options::preserve_ >;
+      &::butl::cli::thunk< cp_options, &cp_options::preserve_ >;
       _cli_cp_options_map_["-p"] =
-      &::butl::cli::thunk< cp_options, bool, &cp_options::preserve_ >;
+      &::butl::cli::thunk< cp_options, &cp_options::preserve_ >;
     }
   };
 
@@ -978,9 +1086,9 @@ namespace butl
     _cli_date_options_map_init ()
     {
       _cli_date_options_map_["--utc"] =
-      &::butl::cli::thunk< date_options, bool, &date_options::utc_ >;
+      &::butl::cli::thunk< date_options, &date_options::utc_ >;
       _cli_date_options_map_["-u"] =
-      &::butl::cli::thunk< date_options, bool, &date_options::utc_ >;
+      &::butl::cli::thunk< date_options, &date_options::utc_ >;
     }
   };
 
@@ -1163,6 +1271,269 @@ namespace butl
     return r;
   }
 
+  // find_options
+  //
+
+  find_options::
+  find_options ()
+  {
+  }
+
+  bool find_options::
+  parse (int& argc,
+         char** argv,
+         bool erase,
+         ::butl::cli::unknown_mode opt,
+         ::butl::cli::unknown_mode arg)
+  {
+    ::butl::cli::argv_scanner s (argc, argv, erase);
+    bool r = _parse (s, opt, arg);
+    return r;
+  }
+
+  bool find_options::
+  parse (int start,
+         int& argc,
+         char** argv,
+         bool erase,
+         ::butl::cli::unknown_mode opt,
+         ::butl::cli::unknown_mode arg)
+  {
+    ::butl::cli::argv_scanner s (start, argc, argv, erase);
+    bool r = _parse (s, opt, arg);
+    return r;
+  }
+
+  bool find_options::
+  parse (int& argc,
+         char** argv,
+         int& end,
+         bool erase,
+         ::butl::cli::unknown_mode opt,
+         ::butl::cli::unknown_mode arg)
+  {
+    ::butl::cli::argv_scanner s (argc, argv, erase);
+    bool r = _parse (s, opt, arg);
+    end = s.end ();
+    return r;
+  }
+
+  bool find_options::
+  parse (int start,
+         int& argc,
+         char** argv,
+         int& end,
+         bool erase,
+         ::butl::cli::unknown_mode opt,
+         ::butl::cli::unknown_mode arg)
+  {
+    ::butl::cli::argv_scanner s (start, argc, argv, erase);
+    bool r = _parse (s, opt, arg);
+    end = s.end ();
+    return r;
+  }
+
+  bool find_options::
+  parse (::butl::cli::scanner& s,
+         ::butl::cli::unknown_mode opt,
+         ::butl::cli::unknown_mode arg)
+  {
+    bool r = _parse (s, opt, arg);
+    return r;
+  }
+
+  typedef
+  std::map<std::string, void (*) (find_options&, ::butl::cli::scanner&)>
+  _cli_find_options_map;
+
+  static _cli_find_options_map _cli_find_options_map_;
+
+  struct _cli_find_options_map_init
+  {
+    _cli_find_options_map_init ()
+    {
+    }
+  };
+
+  static _cli_find_options_map_init _cli_find_options_map_init_;
+
+  bool find_options::
+  _parse (const char* o, ::butl::cli::scanner& s)
+  {
+    _cli_find_options_map::const_iterator i (_cli_find_options_map_.find (o));
+
+    if (i != _cli_find_options_map_.end ())
+    {
+      (*(i->second)) (*this, s);
+      return true;
+    }
+
+    return false;
+  }
+
+  bool find_options::
+  _parse (::butl::cli::scanner& s,
+          ::butl::cli::unknown_mode opt_mode,
+          ::butl::cli::unknown_mode arg_mode)
+  {
+    // Can't skip combined flags (--no-combined-flags).
+    //
+    assert (opt_mode != ::butl::cli::unknown_mode::skip);
+
+    bool r = false;
+    bool opt = true;
+
+    while (s.more ())
+    {
+      const char* o = s.peek ();
+
+      if (std::strcmp (o, "--") == 0)
+      {
+        opt = false;
+      }
+
+      if (opt)
+      {
+        if (_parse (o, s))
+        {
+          r = true;
+          continue;
+        }
+
+        if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0')
+        {
+          // Handle combined option values.
+          //
+          std::string co;
+          if (const char* v = std::strchr (o, '='))
+          {
+            co.assign (o, 0, v - o);
+            ++v;
+
+            int ac (2);
+            char* av[] =
+            {
+              const_cast<char*> (co.c_str ()),
+              const_cast<char*> (v)
+            };
+
+            ::butl::cli::argv_scanner ns (0, ac, av);
+
+            if (_parse (co.c_str (), ns))
+            {
+              // Parsed the option but not its value?
+              //
+              if (ns.end () != 2)
+                throw ::butl::cli::invalid_value (co, v);
+
+              s.next ();
+              r = true;
+              continue;
+            }
+            else
+            {
+              // Set the unknown option and fall through.
+              //
+              o = co.c_str ();
+            }
+          }
+
+          // Handle combined flags.
+          //
+          char cf[3];
+          {
+            const char* p = o + 1;
+            for (; *p != '\0'; ++p)
+            {
+              if (!((*p >= 'a' && *p <= 'z') ||
+                    (*p >= 'A' && *p <= 'Z') ||
+                    (*p >= '0' && *p <= '9')))
+                break;
+            }
+
+            if (*p == '\0')
+            {
+              for (p = o + 1; *p != '\0'; ++p)
+              {
+                std::strcpy (cf, "-");
+                cf[1] = *p;
+                cf[2] = '\0';
+
+                int ac (1);
+                char* av[] =
+                {
+                  cf
+                };
+
+                ::butl::cli::argv_scanner ns (0, ac, av);
+
+                if (!_parse (cf, ns))
+                  break;
+              }
+
+              if (*p == '\0')
+              {
+                // All handled.
+                //
+                s.next ();
+                r = true;
+                continue;
+              }
+              else
+              {
+                // Set the unknown option and fall through.
+                //
+                o = cf;
+              }
+            }
+          }
+
+          switch (opt_mode)
+          {
+            case ::butl::cli::unknown_mode::skip:
+            {
+              s.skip ();
+              r = true;
+              continue;
+            }
+            case ::butl::cli::unknown_mode::stop:
+            {
+              break;
+            }
+            case ::butl::cli::unknown_mode::fail:
+            {
+              throw ::butl::cli::unknown_option (o);
+            }
+          }
+
+          break;
+        }
+      }
+
+      switch (arg_mode)
+      {
+        case ::butl::cli::unknown_mode::skip:
+        {
+          s.skip ();
+          r = true;
+          continue;
+        }
+        case ::butl::cli::unknown_mode::stop:
+        {
+          break;
+        }
+        case ::butl::cli::unknown_mode::fail:
+        {
+          throw ::butl::cli::unknown_argument (o);
+        }
+      }
+
+      break;
+    }
+
+    return r;
+  }
+
   // ln_options
   //
 
@@ -1246,9 +1617,9 @@ namespace butl
     _cli_ln_options_map_init ()
     {
       _cli_ln_options_map_["--symbolic"] =
-      &::butl::cli::thunk< ln_options, bool, &ln_options::symbolic_ >;
+      &::butl::cli::thunk< ln_options, &ln_options::symbolic_ >;
       _cli_ln_options_map_["-s"] =
-      &::butl::cli::thunk< ln_options, bool, &ln_options::symbolic_ >;
+      &::butl::cli::thunk< ln_options, &ln_options::symbolic_ >;
     }
   };
 
@@ -1514,9 +1885,9 @@ namespace butl
     _cli_mkdir_options_map_init ()
     {
       _cli_mkdir_options_map_["--parents"] =
-      &::butl::cli::thunk< mkdir_options, bool, &mkdir_options::parents_ >;
+      &::butl::cli::thunk< mkdir_options, &mkdir_options::parents_ >;
       _cli_mkdir_options_map_["-p"] =
-      &::butl::cli::thunk< mkdir_options, bool, &mkdir_options::parents_ >;
+      &::butl::cli::thunk< mkdir_options, &mkdir_options::parents_ >;
     }
   };
 
@@ -1782,9 +2153,9 @@ namespace butl
     _cli_mv_options_map_init ()
     {
       _cli_mv_options_map_["--force"] =
-      &::butl::cli::thunk< mv_options, bool, &mv_options::force_ >;
+      &::butl::cli::thunk< mv_options, &mv_options::force_ >;
       _cli_mv_options_map_["-f"] =
-      &::butl::cli::thunk< mv_options, bool, &mv_options::force_ >;
+      &::butl::cli::thunk< mv_options, &mv_options::force_ >;
     }
   };
 
@@ -2051,13 +2422,13 @@ namespace butl
     _cli_rm_options_map_init ()
     {
       _cli_rm_options_map_["--recursive"] =
-      &::butl::cli::thunk< rm_options, bool, &rm_options::recursive_ >;
+      &::butl::cli::thunk< rm_options, &rm_options::recursive_ >;
       _cli_rm_options_map_["-r"] =
-      &::butl::cli::thunk< rm_options, bool, &rm_options::recursive_ >;
+      &::butl::cli::thunk< rm_options, &rm_options::recursive_ >;
       _cli_rm_options_map_["--force"] =
-      &::butl::cli::thunk< rm_options, bool, &rm_options::force_ >;
+      &::butl::cli::thunk< rm_options, &rm_options::force_ >;
       _cli_rm_options_map_["-f"] =
-      &::butl::cli::thunk< rm_options, bool, &rm_options::force_ >;
+      &::butl::cli::thunk< rm_options, &rm_options::force_ >;
     }
   };
 
@@ -2323,9 +2694,9 @@ namespace butl
     _cli_rmdir_options_map_init ()
     {
       _cli_rmdir_options_map_["--force"] =
-      &::butl::cli::thunk< rmdir_options, bool, &rmdir_options::force_ >;
+      &::butl::cli::thunk< rmdir_options, &rmdir_options::force_ >;
       _cli_rmdir_options_map_["-f"] =
-      &::butl::cli::thunk< rmdir_options, bool, &rmdir_options::force_ >;
+      &::butl::cli::thunk< rmdir_options, &rmdir_options::force_ >;
     }
   };
 
@@ -2594,13 +2965,13 @@ namespace butl
     _cli_sed_options_map_init ()
     {
       _cli_sed_options_map_["--quiet"] =
-      &::butl::cli::thunk< sed_options, bool, &sed_options::quiet_ >;
+      &::butl::cli::thunk< sed_options, &sed_options::quiet_ >;
       _cli_sed_options_map_["-n"] =
-      &::butl::cli::thunk< sed_options, bool, &sed_options::quiet_ >;
+      &::butl::cli::thunk< sed_options, &sed_options::quiet_ >;
       _cli_sed_options_map_["--in-place"] =
-      &::butl::cli::thunk< sed_options, bool, &sed_options::in_place_ >;
+      &::butl::cli::thunk< sed_options, &sed_options::in_place_ >;
       _cli_sed_options_map_["-i"] =
-      &::butl::cli::thunk< sed_options, bool, &sed_options::in_place_ >;
+      &::butl::cli::thunk< sed_options, &sed_options::in_place_ >;
       _cli_sed_options_map_["--expression"] =
       &::butl::cli::thunk< sed_options, std::vector<std::string>, &sed_options::expression_,
         &sed_options::expression_specified_ >;
@@ -3136,13 +3507,13 @@ namespace butl
     _cli_test_options_map_init ()
     {
       _cli_test_options_map_["--file"] =
-      &::butl::cli::thunk< test_options, bool, &test_options::file_ >;
+      &::butl::cli::thunk< test_options, &test_options::file_ >;
       _cli_test_options_map_["-f"] =
-      &::butl::cli::thunk< test_options, bool, &test_options::file_ >;
+      &::butl::cli::thunk< test_options, &test_options::file_ >;
       _cli_test_options_map_["--directory"] =
-      &::butl::cli::thunk< test_options, bool, &test_options::directory_ >;
+      &::butl::cli::thunk< test_options, &test_options::directory_ >;
       _cli_test_options_map_["-d"] =
-      &::butl::cli::thunk< test_options, bool, &test_options::directory_ >;
+      &::butl::cli::thunk< test_options, &test_options::directory_ >;
     }
   };
 
diff --git a/libbutl/builtin-options.hxx b/libbutl/builtin-options.hxx
index b389298..70179dd 100644
--- a/libbutl/builtin-options.hxx
+++ b/libbutl/builtin-options.hxx
@@ -68,7 +68,7 @@ namespace butl
     {
       public:
       virtual
-      ~unknown_option () throw ();
+      ~unknown_option () noexcept;
 
       unknown_option (const std::string& option);
 
@@ -79,7 +79,7 @@ namespace butl
       print (::std::ostream&) const;
 
       virtual const char*
-      what () const throw ();
+      what () const noexcept;
 
       private:
       std::string option_;
@@ -89,7 +89,7 @@ namespace butl
     {
       public:
       virtual
-      ~unknown_argument () throw ();
+      ~unknown_argument () noexcept;
 
       unknown_argument (const std::string& argument);
 
@@ -100,7 +100,7 @@ namespace butl
       print (::std::ostream&) const;
 
       virtual const char*
-      what () const throw ();
+      what () const noexcept;
 
       private:
       std::string argument_;
@@ -110,7 +110,7 @@ namespace butl
     {
       public:
       virtual
-      ~missing_value () throw ();
+      ~missing_value () noexcept;
 
       missing_value (const std::string& option);
 
@@ -121,7 +121,7 @@ namespace butl
       print (::std::ostream&) const;
 
       virtual const char*
-      what () const throw ();
+      what () const noexcept;
 
       private:
       std::string option_;
@@ -131,7 +131,7 @@ namespace butl
     {
       public:
       virtual
-      ~invalid_value () throw ();
+      ~invalid_value () noexcept;
 
       invalid_value (const std::string& option,
                      const std::string& value,
@@ -150,7 +150,7 @@ namespace butl
       print (::std::ostream&) const;
 
       virtual const char*
-      what () const throw ();
+      what () const noexcept;
 
       private:
       std::string option_;
@@ -165,7 +165,7 @@ namespace butl
       print (::std::ostream&) const;
 
       virtual const char*
-      what () const throw ();
+      what () const noexcept;
     };
 
     // Command line argument scanner interface.
@@ -174,6 +174,14 @@ namespace butl
     // for the two previous arguments up until a call to a third
     // peek() or next().
     //
+    // The position() function returns a monotonically-increasing
+    // number which, if stored, can later be used to determine the
+    // relative position of the argument returned by the following
+    // call to next(). Note that if multiple scanners are used to
+    // extract arguments from multiple sources, then the end
+    // position of the previous scanner should be used as the
+    // start position of the next.
+    //
     class scanner
     {
       public:
@@ -191,13 +199,24 @@ namespace butl
 
       virtual void
       skip () = 0;
+
+      virtual std::size_t
+      position () = 0;
     };
 
     class argv_scanner: public scanner
     {
       public:
-      argv_scanner (int& argc, char** argv, bool erase = false);
-      argv_scanner (int start, int& argc, char** argv, bool erase = false);
+      argv_scanner (int& argc,
+                    char** argv,
+                    bool erase = false,
+                    std::size_t start_position = 0);
+
+      argv_scanner (int start,
+                    int& argc,
+                    char** argv,
+                    bool erase = false,
+                    std::size_t start_position = 0);
 
       int
       end () const;
@@ -214,7 +233,11 @@ namespace butl
       virtual void
       skip ();
 
-      private:
+      virtual std::size_t
+      position ();
+
+      protected:
+      std::size_t start_position_;
       int i_;
       int& argc_;
       char** argv_;
@@ -224,13 +247,15 @@ namespace butl
     class vector_scanner: public scanner
     {
       public:
-      vector_scanner (const std::vector<std::string>&, std::size_t start = 0);
+      vector_scanner (const std::vector<std::string>&,
+                      std::size_t start = 0,
+                      std::size_t start_position = 0);
 
       std::size_t
       end () const;
 
       void
-      reset (std::size_t start = 0);
+      reset (std::size_t start = 0, std::size_t start_position = 0);
 
       virtual bool
       more ();
@@ -244,7 +269,11 @@ namespace butl
       virtual void
       skip ();
 
+      virtual std::size_t
+      position ();
+
       private:
+      std::size_t start_position_;
       const std::vector<std::string>& v_;
       std::size_t i_;
     };
@@ -455,6 +484,67 @@ namespace butl
     bool utc_;
   };
 
+  class find_options
+  {
+    public:
+    find_options ();
+
+    // Return true if anything has been parsed.
+    //
+    bool
+    parse (int& argc,
+           char** argv,
+           bool erase = false,
+           ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+           ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+    bool
+    parse (int start,
+           int& argc,
+           char** argv,
+           bool erase = false,
+           ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+           ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+    bool
+    parse (int& argc,
+           char** argv,
+           int& end,
+           bool erase = false,
+           ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+           ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+    bool
+    parse (int start,
+           int& argc,
+           char** argv,
+           int& end,
+           bool erase = false,
+           ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+           ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+    bool
+    parse (::butl::cli::scanner&,
+           ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+           ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+    // Option accessors.
+    //
+    // Implementation details.
+    //
+    protected:
+    bool
+    _parse (const char*, ::butl::cli::scanner&);
+
+    private:
+    bool
+    _parse (::butl::cli::scanner&,
+            ::butl::cli::unknown_mode option,
+            ::butl::cli::unknown_mode argument);
+
+    public:
+  };
+
   class ln_options
   {
     public:
diff --git a/libbutl/builtin-options.ixx b/libbutl/builtin-options.ixx
index f10f82d..e118156 100644
--- a/libbutl/builtin-options.ixx
+++ b/libbutl/builtin-options.ixx
@@ -107,14 +107,29 @@ namespace butl
     // argv_scanner
     //
     inline argv_scanner::
-    argv_scanner (int& argc, char** argv, bool erase)
-    : i_ (1), argc_ (argc), argv_ (argv), erase_ (erase)
+    argv_scanner (int& argc,
+                  char** argv,
+                  bool erase,
+                  std::size_t sp)
+    : start_position_ (sp + 1),
+      i_ (1),
+      argc_ (argc),
+      argv_ (argv),
+      erase_ (erase)
     {
     }
 
     inline argv_scanner::
-    argv_scanner (int start, int& argc, char** argv, bool erase)
-    : i_ (start), argc_ (argc), argv_ (argv), erase_ (erase)
+    argv_scanner (int start,
+                  int& argc,
+                  char** argv,
+                  bool erase,
+                  std::size_t sp)
+    : start_position_ (sp + static_cast<std::size_t> (start)),
+      i_ (start),
+      argc_ (argc),
+      argv_ (argv),
+      erase_ (erase)
     {
     }
 
@@ -127,8 +142,10 @@ namespace butl
     // vector_scanner
     //
     inline vector_scanner::
-    vector_scanner (const std::vector<std::string>& v, std::size_t i)
-    : v_ (v), i_ (i)
+    vector_scanner (const std::vector<std::string>& v,
+                    std::size_t i,
+                    std::size_t sp)
+    : start_position_ (sp), v_ (v), i_ (i)
     {
     }
 
@@ -139,9 +156,10 @@ namespace butl
     }
 
     inline void vector_scanner::
-    reset (std::size_t i)
+    reset (std::size_t i, std::size_t sp)
     {
       i_ = i;
+      start_position_ = sp;
     }
   }
 }
@@ -175,6 +193,9 @@ namespace butl
     return this->utc_;
   }
 
+  // find_options
+  //
+
   // ln_options
   //
 
diff --git a/libbutl/builtin.cli b/libbutl/builtin.cli
index adc47fa..23a5708 100644
--- a/libbutl/builtin.cli
+++ b/libbutl/builtin.cli
@@ -34,6 +34,11 @@ namespace butl
     bool --utc|-u;
   };
 
+  class find_options
+  {
+    // No options so far (expression/primaries handled as arguments).
+  };
+
   class ln_options
   {
     bool --symbolic|-s;
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx
index 79ff968..a5861d4 100644
--- a/libbutl/builtin.cxx
+++ b/libbutl/builtin.cxx
@@ -1,28 +1,16 @@
 // file      : libbutl/builtin.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/builtin.mxx>
-#endif
+#include <libbutl/builtin.hxx>
 
 #ifdef _WIN32
 #  include <libbutl/win32-utility.hxx>
 #endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <map>
-#include <string>
-#include <vector>
-#include <thread>
-#include <utility>    // move(), forward()
-#include <cstdint>    // uint*_t
-#include <functional>
-
 #include <ios>
 #include <chrono>
 #include <cerrno>
+#include <cassert>
 #include <ostream>
 #include <sstream>
 #include <cstdlib>      // strtoull()
@@ -30,41 +18,16 @@
 #include <exception>
 #include <system_error>
 
-#endif
+#include <libbutl/regex.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx>      // operator<<(ostream,exception),
+                                    // throw_generic_error()
+#include <libbutl/optional.hxx>
+#include <libbutl/filesystem.hxx>
+#include <libbutl/small-vector.hxx>
 
 #include <libbutl/builtin-options.hxx>
 
-#ifdef __cpp_modules_ts
-module butl.builtin;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.threading;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.timestamp;
-#endif
-
-import butl.regex;
-import butl.path_io;
-import butl.utility;      // operator<<(ostream,exception),
-                          // throw_generic_error()
-import butl.optional;
-import butl.filesystem;
-import butl.small_vector;
-#else
-#include <libbutl/regex.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
-
 // Strictly speaking a builtin which reads/writes from/to standard streams
 // must be asynchronous so that the caller can communicate with it through
 // pipes without being blocked on I/O operations. However, as an optimization,
@@ -280,7 +243,7 @@ namespace butl
   // completed using the current directory if it is relative. Fail if
   // std::system_error is thrown by the underlying function call.
   //
-  dir_path
+  static dir_path
   current_directory (const dir_path& wd, const function<error_record ()>& fail)
   {
     try
@@ -507,7 +470,7 @@ namespace butl
       if (cbs.create)
         call (fail, cbs.create, to, false /* pre */);
 
-      for (const auto& de: dir_iterator (from, false /* ignore_dangling */))
+      for (const auto& de: dir_iterator (from, dir_iterator::no_follow))
       {
         path f (from / de.path ());
         path t (to / de.path ());
@@ -853,6 +816,314 @@ namespace butl
     return builtin (r = 0);
   }
 
+  // find <start-path>... [-name <pattern>]
+  //                      [-type <type>]
+  //                      [-mindepth <depth>]
+  //                      [-maxdepth <depth>]
+  //
+  // Note: must be executed asynchronously.
+  //
+  static uint8_t
+  find (const strings& args,
+        auto_fd in, auto_fd out, auto_fd err,
+        const dir_path& cwd,
+        const builtin_callbacks& cbs) noexcept
+  try
+  {
+    uint8_t r (1);
+    ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ()));
+
+    // Note that on some errors we will issue diagnostics but continue the
+    // search and return with non-zero code at the end. This is consistent
+    // with how major implementations behave (see below).
+    //
+    bool error_occured (false);
+    auto error = [&cerr, &error_occured] (bool fail = false)
+    {
+      error_occured = true;
+      return error_record (cerr, fail, "find");
+    };
+
+    auto fail = [&error] () {return error (true /* fail */);};
+
+    try
+    {
+      in.close ();
+      ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ()));
+
+      // Parse arguments.
+      //
+      cli::vector_scanner scan (args);
+
+      // Currently, we don't expect any options.
+      //
+      parse<find_options> (scan, args, cbs.parse_option, fail);
+
+      // Parse path arguments until the first primary (starts with '-') is
+      // encountered.
+      //
+      small_vector<path, 1> paths;
+
+      while (scan.more ())
+      {
+        if (*scan.peek () == '-')
+          break;
+
+        try
+        {
+          paths.emplace_back (scan.next ());
+        }
+        catch (const invalid_path& e)
+        {
+          fail () << "invalid path '" << e.path << "'";
+        }
+      }
+
+      // Note that POSIX doesn't explicitly describe the behavior if no paths
+      // are specified on the command line. On Linux the current directory is
+      // assumed in this case. We, however, will follow the FreeBSD behavior
+      // and fail since this seems to be less error-prone.
+      //
+      if (paths.empty ())
+        fail () << "missing start path";
+
+      // Parse primaries.
+      //
+      optional<string>     name;
+      optional<entry_type> type;
+      optional<uint64_t>   min_depth;
+      optional<uint64_t>   max_depth;
+
+      while (scan.more ())
+      {
+        const char* p (scan.next ());
+
+        // Return the string value of the current primary. Fail if absent or
+        // empty, unless empty value is allowed.
+        //
+        auto str = [p, &scan, &fail] (bool allow_empty = false)
+        {
+          if (!scan.more ())
+          {
+            fail () << "missing value for primary '" << p << "'";
+          }
+
+          string n (p); // Save for diagnostics.
+          string r (scan.next ());
+
+          if (r.empty () && !allow_empty)
+            fail () << "empty value for primary '" << n << "'";
+
+          return r;
+        };
+
+        // Return the unsigned numeric value of the current primary. Fail if
+        // absent or is not a valid number.
+        //
+        auto num = [p, &str, &fail] ()
+        {
+          string n (p); // Save for diagnostics.
+          string s (str ());
+
+          const char* b (s.c_str ());
+          char* e (nullptr);
+          errno = 0; // We must clear it according to POSIX.
+          uint64_t r (strtoull (b, &e, 10)); // Can't throw.
+
+          if (errno == ERANGE || e != b + s.size ())
+            fail () << "invalid value '" << s << "' for primary '" << n << "'";
+
+          return r;
+        };
+
+        if (strcmp (p, "-name") == 0)
+        {
+          // Note that the empty never-matching pattern is allowed.
+          //
+          name = str (true /* allow_empty */);
+        }
+        else if (strcmp (p, "-type") == 0)
+        {
+          string s (str ());
+          char t (s.size () == 1 ? s[0] : '\0');
+
+          switch (t)
+          {
+          case 'f': type = entry_type::regular;   break;
+          case 'd': type = entry_type::directory; break;
+          case 'l': type = entry_type::symlink;   break;
+          default: fail () << "invalid value '" << s << "' for primary '-type'";
+          }
+        }
+        else if (strcmp (p, "-mindepth") == 0)
+        {
+          min_depth = num ();
+        }
+        else if (strcmp (p, "-maxdepth") == 0)
+        {
+          max_depth = num ();
+        }
+        else
+          fail () << "unknown primary '" << p << "'";
+      }
+
+      // Print the path if the expression evaluates to true for it. Traverse
+      // further down if the path refers to a directory and the maximum depth
+      // is not specified or is not reached.
+      //
+      // Note that paths for evaluating/printing (pp) and for
+      // stating/traversing (ap) are passed separately. The former is
+      // potentially relative and the latter is absolute. Also note that
+      // for optimization we separately pass the base name simple path.
+      //
+      auto find = [&cout,
+                   &name,
+                   &type,
+                   &min_depth,
+                   &max_depth,
+                   &fail] (const path& pp,
+                           const path& ap,
+                           const path& bp,
+                           entry_type t,
+                           uint64_t level,
+                           const auto& find) -> void
+      {
+        // Print the path if no primary evaluates to false.
+        //
+        if ((!type      || *type == t)          &&
+            (!min_depth || level >= *min_depth) &&
+            (!name      || path_match (bp.string (), *name)))
+        {
+          // Print the trailing directory separator, if present.
+          //
+          if (pp.to_directory ())
+          {
+            // The trailing directory separator can only be present for
+            // paths specified on the command line.
+            //
+            assert (level == 0);
+
+            cout << pp.representation () << '\n';
+          }
+          else
+            cout << pp << '\n';
+        }
+
+        // Traverse the directory, unless the max depth is specified and
+        // reached.
+        //
+        if (t == entry_type::directory && (!max_depth || level < *max_depth))
+        try
+        {
+          for (const auto& de: dir_iterator (path_cast<dir_path> (ap),
+                                             dir_iterator::no_follow))
+          {
+            find (pp / de.path (),
+                  ap / de.path (),
+                  de.path (),
+                  de.ltype (),
+                  level + 1,
+                  find);
+          }
+        }
+        catch (const system_error& e)
+        {
+          fail () << "unable to scan directory '" << pp << "': " << e;
+        }
+      };
+
+      dir_path wd;
+
+      for (const path& p: paths)
+      {
+        // Complete the path if it is relative, so that we can properly stat
+        // it and, potentially, traverse. Note that we don't normalize it
+        // since POSIX requires that the paths should be evaluated (by
+        // primaries) and printed unaltered.
+        //
+        path ap;
+
+        if (p.relative ())
+        {
+          if (wd.empty () && cwd.relative ())
+            wd = current_directory (cwd, fail);
+
+          ap = (!wd.empty () ? wd : cwd) / p;
+        }
+
+        // Issue an error if the path is empty, doesn't exist, or has the
+        // trailing directory separator but refers to a non-directory.
+        //
+        // Note that POSIX doesn't explicitly describe the behavior if any of
+        // the above happens. We will follow the behavior which is common for
+        // both Linux and FreeBSD by issuing the diagnostics, proceeding to
+        // the subsequent paths, and returning with non-zero code at the end.
+        //
+        if (p.empty ())
+        {
+          error () << "empty path";
+          continue;
+        }
+
+        const path& fp (!ap.empty () ? ap : p);
+        pair<bool, entry_stat> pe;
+
+        try
+        {
+          pe = path_entry (fp);
+        }
+        catch (const system_error& e)
+        {
+          fail () << "unable to stat '" << p << "': " << e;
+        }
+
+        if (!pe.first)
+        {
+          error () << "'" << p << "' doesn't exists";
+          continue;
+        }
+
+        entry_type t (pe.second.type);
+
+        if (p.to_directory () && t != entry_type::directory)
+        {
+          error () << "'" << p << "' is not a directory";
+          continue;
+        }
+
+        find (p, fp, p.leaf (), t, 0 /* level */, find);
+      }
+
+      cout.close ();
+      r = !error_occured ? 0 : 1;
+    }
+    // Can be thrown while closing cin or creating, writing to, or closing
+    // cout or writing to cerr.
+    //
+    catch (const io_error& e)
+    {
+      error () << e;
+    }
+    catch (const failed&)
+    {
+      // Diagnostics has already been issued.
+    }
+    catch (const cli::exception& e)
+    {
+      error () << e;
+    }
+
+    cerr.close ();
+    return r;
+  }
+  // In particular, handles io_error exception potentially thrown while
+  // creating, writing to, or closing cerr.
+  //
+  catch (const std::exception&)
+  {
+    return 1;
+  }
+
   // Create a symlink to a file or directory at the specified path and calling
   // the hook for the created filesystem entries. The paths must be absolute
   // and normalized. Fall back to creating a hardlink, if symlink creation is
@@ -1632,15 +1903,6 @@ namespace butl
         string replacement;
         bool global;
         bool print;
-
-        subst (const string& re, bool ic, string rp, bool gl, bool pr)
-            //
-            // Note that ECMAScript is implied if no grammar flag is specified.
-            //
-            : regex (re, ic ? regex::icase : regex::ECMAScript),
-              replacement (move (rp)),
-              global (gl),
-              print (pr) {}
       };
 
       small_vector<subst, 1> substs;
@@ -1663,57 +1925,59 @@ namespace butl
         if (delim == '\\' || delim == '\n')
           fail () << "invalid delimiter for 's' command in '" << v << "'";
 
-        size_t p (v.find (delim, 2));
-        if (p == string::npos)
-          fail () << "unterminated 's' command regex in '" << v << "'";
-
-        string regex (v, 2, p - 2);
-
-        // Empty regex matches nothing, so not of much use.
-        //
-        if (regex.empty ())
-          fail () << "empty regex in 's' command in '" << v << "'";
-
-        size_t b (p + 1);
-        p = v.find (delim, b);
-        if (p == string::npos)
-          fail () << "unterminated 's' command replacement in '" << v << "'";
-
-        string replacement (v, b, p - b);
-
-        // Parse the substitute command flags.
+        // Parse the substitute command regex (as string), replacement, and
+        // flags.
         //
+        pair<string, string> rf;
         bool icase  (false);
         bool global (false);
         bool print  (false);
 
-        char c;
-        for (++p; (c = v[p]) != '\0'; ++p)
+        try
         {
-          switch (c)
+          size_t e;
+          rf = regex_replace_parse (v.c_str () + 1, v.size () - 1, e);
+
+          char c;
+          for (size_t i (e + 1); (c = v[i]) != '\0'; ++i)
           {
-          case 'i': icase  = true; break;
-          case 'g': global = true; break;
-          case 'p': print  = true; break;
-          default:
+            switch (c)
             {
-              fail () << "invalid 's' command flag '" << c << "' in '" << v
-                      << "'";
+            case 'i': icase  = true; break;
+            case 'g': global = true; break;
+            case 'p': print  = true; break;
+            default:
+              {
+                fail () << "invalid 's' command flag '" << c << "' in '" << v
+                        << "'";
+              }
             }
           }
         }
+        catch (const invalid_argument& e)
+        {
+          fail () << "invalid 's' command '" << v << "': " << e;
+        }
 
+        // Parse the regex and add the substitution to the list.
+        //
         try
         {
-          substs.emplace_back (regex, icase,
-                               move (replacement),
-                               global, print);
+          // Note that ECMAScript is implied if no grammar flag is specified.
+          //
+          regex re (rf.first, icase ? regex::icase : regex::ECMAScript);
+
+          substs.push_back ({move (re),
+                             move (rf.second),
+                             global,
+                             print});
         }
         catch (const regex_error& e)
         {
           // Print regex_error description if meaningful (no space).
           //
-          fail () << "invalid regex '" << regex << "' in '" << v << "'" << e;
+          fail () << "invalid regex '" << rf.first << "' in '" << v << "'"
+                  << e;
         }
       }
 
@@ -1936,6 +2200,7 @@ namespace butl
         if (!a.empty () && a[0] != '-' && a[0] != '+')
         {
           char* e (nullptr);
+          errno = 0; // We must clear it according to POSIX.
           n = strtoull (a.c_str (), &e, 10); // Can't throw.
 
           if (errno != ERANGE && e == a.c_str () + a.size ())
@@ -2220,17 +2485,17 @@ namespace butl
   {
     unique_ptr<builtin::async_state> s (
       new builtin::async_state (
+        r,
         [fn,
-         &r,
          &args,
          in = move (in), out = move (out), err = move (err),
          &cwd,
-         &cbs] () mutable noexcept
+         &cbs] () mutable noexcept -> uint8_t
         {
-          r = fn (args,
-                  move (in), move (out), move (err),
-                  cwd,
-                  cbs);
+          return fn (args,
+                     move (in), move (out), move (err),
+                     cwd,
+                     cbs);
         }));
 
     return builtin (r, move (s));
@@ -2270,6 +2535,7 @@ namespace butl
     {"diff",  {nullptr,            2}},
     {"echo",  {&async_impl<&echo>, 2}},
     {"false", {&false_,            0}},
+    {"find",  {&async_impl<&find>, 2}},
     {"ln",    {&sync_impl<&ln>,    2}},
     {"mkdir", {&sync_impl<&mkdir>, 2}},
     {"mv",    {&sync_impl<&mv>,    2}},
@@ -2289,7 +2555,7 @@ namespace butl
   {
     if (state_ != nullptr)
     {
-      unique_lock<mutex> l (state_->mutex);
+      unique_lock l (state_->mutex);
 
       if (!state_->finished)
         state_->condv.wait (l, [this] {return state_->finished;});
@@ -2304,7 +2570,7 @@ namespace butl
   {
     if (state_ != nullptr)
     {
-      unique_lock<mutex> l (state_->mutex);
+      unique_lock l (state_->mutex);
 
       if (!state_->finished &&
           !state_->condv.wait_for (l, tm, [this] {return state_->finished;}))
diff --git a/libbutl/builtin.mxx b/libbutl/builtin.hxx
index a99d6f4..b301f8a 100644
--- a/libbutl/builtin.mxx
+++ b/libbutl/builtin.hxx
@@ -1,47 +1,35 @@
-// file      : libbutl/builtin.mxx -*- C++ -*-
+// file      : libbutl/builtin.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-// C includes.
 
-#ifndef __cpp_lib_modules_ts
 #include <map>
-#include <mutex>
 #include <string>
 #include <vector>
-#include <thread>
 #include <chrono>
 #include <memory>             // unique_ptr
 #include <cstddef>            // size_t
 #include <utility>            // move()
 #include <cstdint>            // uint8_t
 #include <functional>
-#include <condition_variable>
-#endif
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.builtin;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.threading;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.timestamp;
+#ifndef LIBBUTL_MINGW_STDTHREAD
+#  include <mutex>
+#  include <thread>
+#  include <condition_variable>
 #else
-#include <libbutl/path.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/timestamp.mxx>
+#  include <libbutl/mingw-mutex.hxx>
+#  include <libbutl/mingw-thread.hxx>
+#  include <libbutl/mingw-condition_variable.hxx>
 #endif
 
+#include <libbutl/path.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/timestamp.hxx>
+
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // A process/thread-like object representing a running builtin.
   //
@@ -75,12 +63,26 @@ LIBBUTL_MODEXPORT namespace butl
     ~builtin () {if (state_ != nullptr) state_->thread.join ();}
 
   public:
+#ifndef LIBBUTL_MINGW_STDTHREAD
+    using mutex_type = std::mutex;
+    using condition_variable_type = std::condition_variable;
+    using thread_type = std::thread;
+
+    using unique_lock = std::unique_lock<mutex_type>;
+#else
+    using mutex_type = mingw_stdthread::mutex;
+    using condition_variable_type = mingw_stdthread::condition_variable;
+    using thread_type = mingw_stdthread::thread;
+
+    using unique_lock = mingw_stdthread::unique_lock<mutex_type>;
+#endif
+
     struct async_state
     {
       bool finished = false;
-      std::mutex mutex;
-      std::condition_variable condv;
-      std::thread thread;
+      mutex_type mutex;
+      condition_variable_type condv;
+      thread_type thread;
 
       // Note that we can't use std::function as an argument type to get rid
       // of the template since std::function can only be instantiated with a
@@ -88,8 +90,7 @@ LIBBUTL_MODEXPORT namespace butl
       // be able to capture auto_fd by value in a lambda, etc).
       //
       template <typename F>
-      explicit
-      async_state (F);
+      async_state (uint8_t&, F);
     };
 
     builtin (std::uint8_t& r, std::unique_ptr<async_state>&& s = nullptr)
diff --git a/libbutl/builtin.ixx b/libbutl/builtin.ixx
index 0356f8b..d77590b 100644
--- a/libbutl/builtin.ixx
+++ b/libbutl/builtin.ixx
@@ -25,7 +25,7 @@ namespace butl
   {
     if (state_ != nullptr)
     {
-      std::unique_lock<std::mutex> l (state_->mutex);
+      unique_lock l (state_->mutex);
 
       if (!state_->finished)
         return nullopt;
@@ -47,13 +47,14 @@ namespace butl
   //
   template <typename F>
   inline builtin::async_state::
-  async_state (F f)
-      : thread ([f = std::move (f), this] () mutable noexcept
+  async_state (uint8_t& r, F f)
+      : thread ([this, &r, f = std::move (f)] () mutable noexcept
                 {
-                  f ();
+                  uint8_t t (f ());
 
                   {
-                    std::unique_lock<std::mutex> l (this->mutex);
+                    unique_lock l (this->mutex);
+                    r = t;
                     finished = true;
                   }
 
@@ -68,9 +69,10 @@ namespace butl
   {
     std::unique_ptr<builtin::async_state> s (
       new builtin::async_state (
-        [f = std::move (f), &r] () mutable noexcept
+        r,
+        [f = std::move (f)] () mutable noexcept -> uint8_t
         {
-          r = f ();
+          return f ();
         }));
 
     return builtin (r, move (s));
diff --git a/libbutl/char-scanner.mxx b/libbutl/char-scanner.hxx
index 60994cf..24865b7 100644
--- a/libbutl/char-scanner.mxx
+++ b/libbutl/char-scanner.hxx
@@ -1,37 +1,21 @@
-// file      : libbutl/char-scanner.mxx -*- C++ -*-
+// file      : libbutl/char-scanner.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <string>  // char_traits
+#include <cassert>
 #include <cstddef> // size_t
 #include <cstdint> // uint64_t
 #include <climits> // INT_*
 #include <utility> // pair, make_pair()
 #include <istream>
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.char_scanner;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.fdstream;
-#else
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/bufstreambuf.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Refer to utf8_validator for details.
   //
@@ -59,23 +43,25 @@ LIBBUTL_MODEXPORT namespace butl
     // 0x0D is treated "as if" it was followed by 0x0A and multiple 0x0D
     // are treated as one.
     //
-    // Note also that if the stream happens to be ifdstream, then it includes
-    // a number of optimizations that assume nobody else is messing with the
-    // stream.
+    // Note also that if the stream happens to be bufstreambuf-based, then it
+    // includes a number of optimizations that assume nobody else is messing
+    // with the stream.
     //
-    // The line and position arguments can be used to override the start line
-    // and position in the stream (useful when re-scanning data saved with the
-    // save_* facility).
+    // The line, column, and position arguments can be used to override the
+    // start line, column, and position in the stream (useful when re-scanning
+    // data saved with the save_* facility).
     //
     char_scanner (std::istream&,
                   bool crlf = true,
                   std::uint64_t line = 1,
+                  std::uint64_t column = 1,
                   std::uint64_t position = 0);
 
     char_scanner (std::istream&,
                   validator_type,
                   bool crlf = true,
                   std::uint64_t line = 1,
+                  std::uint64_t column = 1,
                   std::uint64_t position = 0);
 
     char_scanner (const char_scanner&) = delete;
@@ -106,8 +92,9 @@ LIBBUTL_MODEXPORT namespace butl
       std::uint64_t line;
       std::uint64_t column;
 
-      // Logical character position (see ifdstream for details on the logical
-      // part) if the scanned stream is ifdstream and always zero otherwise.
+      // Logical character position (see bufstreambuf for details on the
+      // logical part) if the scanned stream is bufstreambuf-based and always
+      // zero otherwise.
       //
       std::uint64_t position;
 
@@ -240,7 +227,7 @@ LIBBUTL_MODEXPORT namespace butl
     // the hairy details; realistically, you would probably only direct-scan
     // ASCII fragments).
     //
-    fdbuf* buf_; // NULL if not ifdstream.
+    bufstreambuf* buf_; // NULL if not bufstreambuf-based.
     const char_type* gptr_;
     const char_type* egptr_;
 
diff --git a/libbutl/char-scanner.ixx b/libbutl/char-scanner.ixx
index 57aefc2..2dc41de 100644
--- a/libbutl/char-scanner.ixx
+++ b/libbutl/char-scanner.ixx
@@ -5,8 +5,10 @@ namespace butl
 {
   template <typename V, std::size_t N>
   inline char_scanner<V, N>::
-  char_scanner (std::istream& is, bool crlf, std::uint64_t l, std::uint64_t p)
-      : char_scanner (is, validator_type (), crlf, l, p)
+  char_scanner (std::istream& is,
+                bool crlf,
+                std::uint64_t l, std::uint64_t c, std::uint64_t p)
+      : char_scanner (is, validator_type (), crlf, l, c, p)
   {
   }
 
diff --git a/libbutl/char-scanner.txx b/libbutl/char-scanner.txx
index 35edf42..75ea189 100644
--- a/libbutl/char-scanner.txx
+++ b/libbutl/char-scanner.txx
@@ -1,9 +1,7 @@
 // file      : libbutl/char-scanner.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_lib_modules_ts
 #include <utility> // move
-#endif
 
 namespace butl
 {
@@ -13,13 +11,14 @@ namespace butl
                 validator_type v,
                 bool crlf,
                 std::uint64_t l,
+                std::uint64_t c,
                 std::uint64_t p)
       : line (l),
-        column (1),
+        column (c),
         position (p),
         is_ (is),
         val_ (std::move (v)),
-        buf_ (dynamic_cast<fdbuf*> (is.rdbuf ())),
+        buf_ (dynamic_cast<bufstreambuf*> (is.rdbuf ())),
         gptr_ (nullptr),
         egptr_ (nullptr),
         crlf_ (crlf)
diff --git a/libbutl/command.cxx b/libbutl/command.cxx
index c23dfd5..2df52dd 100644
--- a/libbutl/command.cxx
+++ b/libbutl/command.cxx
@@ -1,48 +1,18 @@
 // file      : libbutl/command.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/command.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <map>
-#include <string>
-#include <cstddef>
-#include <functional>
+#include <libbutl/command.hxx>
 
 #include <ios>          // ios::failure
 #include <vector>
+#include <cassert>
 #include <utility>      // move()
 #include <stdexcept>    // invalid_argument
 #include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.command;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-import butl.optional;
-#endif
-
-import butl.builtin;
-import butl.fdstream;
-import butl.string_parser;
-#else
-#include <libbutl/builtin.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/string-parser.mxx>
-#endif
+
+#include <libbutl/builtin.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/string-parser.hxx>
 
 using namespace std;
 
@@ -81,7 +51,7 @@ namespace butl
       //
       if (p == string::npos)
         throw invalid_argument (string ("unmatched substitution character '") +
-                                open + "'");
+                                open + '\'');
 
       if (p == sp)
         throw invalid_argument ("empty substitution variable");
@@ -90,12 +60,12 @@ namespace butl
 
       if (vn.find_first_of (" \t") != string::npos)
         throw invalid_argument ("whitespace in substitution variable '" +
-                                vn + "'");
+                                vn + '\'');
 
       // Find the variable and append its value or fail if it's unknown.
       //
       if (!sc (vn, r))
-        throw invalid_argument ("unknown substitution variable '" + vn + "'");
+        throw invalid_argument ("unknown substitution variable '" + vn + '\'');
     }
 
     // Append the source string tail following the last substitution.
@@ -198,7 +168,7 @@ namespace butl
         catch (const invalid_path& e)
         {
           throw invalid_argument ("invalid stdout redirect file path '" +
-                                  e.path + "'");
+                                  e.path + '\'');
         }
 
         if (redir->empty ())
diff --git a/libbutl/command.mxx b/libbutl/command.hxx
index 143d406..fb7258f 100644
--- a/libbutl/command.mxx
+++ b/libbutl/command.hxx
@@ -1,34 +1,19 @@
-// file      : libbutl/command.mxx -*- C++ -*-
+// file      : libbutl/command.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#ifndef __cpp_lib_modules_ts
 #include <map>
 #include <string>
 #include <cstddef>    // size_t
 #include <functional>
-#endif
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.command;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.process;
-import butl.optional;
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/process.hxx>
+#include <libbutl/optional.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Run a process or a builtin, interpreting the command line as
   // whitespace-separated, potentially quoted program path/builtin name,
diff --git a/libbutl/const-ptr.mxx b/libbutl/const-ptr.hxx
index 343ecf6..1474e17 100644
--- a/libbutl/const-ptr.mxx
+++ b/libbutl/const-ptr.hxx
@@ -1,28 +1,11 @@
-// file      : libbutl/const-ptr.mxx -*- C++ -*-
+// file      : libbutl/const-ptr.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <cstddef> // nullptr_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.const_ptr;
-#ifdef __cpp_lib_modules_ts
-import std.core; // @@ MOD std.fundamental.
-#endif
-#endif
-
-#include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Const-propagating pointer.
   //
diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx
index ac3d0cb..5649965 100644
--- a/libbutl/curl.cxx
+++ b/libbutl/curl.cxx
@@ -1,41 +1,14 @@
 // file      : libbutl/curl.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/curl.mxx>
-#endif
-
-// C includes.
+#include <libbutl/curl.hxx>
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
 #include <utility>   // move()
+#include <cstdlib>   // strtoul(), size_t
 #include <exception> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.curl;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#endif
 
-import butl.utility; // icasecmp()
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx>
 
 using namespace std;
 
@@ -49,7 +22,17 @@ namespace butl
     case ftp_put:
       throw invalid_argument ("no input specified for PUT method");
     case http_post:
-      throw invalid_argument ("no input specified for POST method");
+      {
+        // Post the empty data.
+        //
+        // Note that while it's tempting to specify the --request POST option
+        // instead, that can potentially overwrite the request methods for the
+        // HTTP 30X response code redirects.
+        //
+        d.options.push_back ("--data-raw");
+        d.options.push_back ("");
+      }
+      // Fall through.
     case ftp_get:
     case http_get:
       {
@@ -170,7 +153,7 @@ namespace butl
   }
 
   curl::method_proto curl::
-  translate (method_type m, const string& u, method_proto_options& o)
+  translate (method_type m, const string& u, method_proto_options& o, flags fs)
   {
     size_t n (u.find ("://"));
 
@@ -189,8 +172,11 @@ namespace butl
     }
     else if (icasecmp (u, "http", n) == 0 || icasecmp (u, "https", n) == 0)
     {
-      o.push_back ("--fail");     // Fail on HTTP errors (e.g., 404).
-      o.push_back ("--location"); // Follow redirects.
+      if ((fs & flags::no_fail) == flags::none)
+        o.push_back ("--fail");     // Fail on HTTP errors (e.g., 404).
+
+      if ((fs & flags::no_location) == flags::none)
+        o.push_back ("--location"); // Follow redirects.
 
       switch (m)
       {
@@ -203,4 +189,123 @@ namespace butl
 
     throw invalid_argument ("unsupported protocol");
   }
+
+  uint16_t curl::
+  parse_http_status_code (const string& s)
+  {
+    char* e (nullptr);
+    unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw.
+    assert (e != nullptr);
+
+    return *e == '\0' && c >= 100 && c < 600
+           ? static_cast<uint16_t> (c)
+           : 0;
+  }
+
+  string curl::
+  read_http_response_line (ifdstream& is)
+  {
+    string r;
+    getline (is, r); // Strips the trailing LF (0xA).
+
+    // Note that on POSIX CRLF is not automatically translated into LF, so we
+    // need to strip CR (0xD) manually.
+    //
+    if (!r.empty () && r.back () == '\r')
+      r.pop_back ();
+
+    return r;
+  }
+
+  curl::http_status curl::
+  read_http_status (ifdstream& is, bool skip_headers)
+  {
+    // After getting the status line, if requested, we will read until the
+    // empty line (containing just CRLF). Not being able to reach such a line
+    // is an error, which is the reason for the exception mask choice. When
+    // done, we will restore the original exception mask.
+    //
+    ifdstream::iostate es (is.exceptions ());
+    is.exceptions (ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);
+
+    auto read_status = [&is, es] ()
+    {
+      string l (read_http_response_line (is));
+
+      for (;;) // Breakout loop.
+      {
+        if (l.compare (0, 5, "HTTP/") != 0)
+          break;
+
+        size_t p (l.find (' ', 5));             // The protocol end.
+        if (p == string::npos)
+          break;
+
+        p = l.find_first_not_of (' ', p + 1);   // The code start.
+        if (p == string::npos)
+          break;
+
+        size_t e (l.find (' ', p + 1));         // The code end.
+        if (e == string::npos)
+          break;
+
+        uint16_t c (parse_http_status_code (string (l, p, e - p)));
+        if (c == 0)
+          break;
+
+        string r;
+        p = l.find_first_not_of (' ', e + 1);   // The reason start.
+        if (p != string::npos)
+        {
+          e = l.find_last_not_of (' ');         // The reason end.
+          assert (e != string::npos && e >= p);
+
+          r = string (l, p, e - p + 1);
+        }
+
+        return http_status {c, move (r)};
+      }
+
+      is.exceptions (es); // Restore the exception mask.
+
+      throw invalid_argument ("invalid status line '" + l + "'");
+    };
+
+    // The curl output for a successfull request looks like this:
+    //
+    // HTTP/1.1 100 Continue
+    //
+    // HTTP/1.1 200 OK
+    // Content-Length: 83
+    // Content-Type: text/manifest;charset=utf-8
+    //
+    // <response-body>
+    //
+    // curl normally sends the 'Expect: 100-continue' header for uploads, so
+    // we need to handle the interim HTTP server response with the continue
+    // (100) status code.
+    //
+    // Interestingly, Apache can respond with the continue (100) code and with
+    // the not found (404) code afterwords.
+    //
+    http_status rs (read_status ());
+
+    if (rs.code == 100)
+    {
+      // Skips the interim response.
+      //
+      while (!read_http_response_line (is).empty ()) ;
+
+      rs = read_status (); // Reads the final status code.
+    }
+
+    if (skip_headers)
+    {
+      while (!read_http_response_line (is).empty ()) ; // Skips headers.
+    }
+
+    is.exceptions (es);
+
+    return rs;
+  }
 }
diff --git a/libbutl/curl.mxx b/libbutl/curl.hxx
index 03aac99..ea91807 100644
--- a/libbutl/curl.mxx
+++ b/libbutl/curl.hxx
@@ -1,42 +1,20 @@
-// file      : libbutl/curl.mxx -*- C++ -*-
+// file      : libbutl/curl.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
+#include <cstdint>     // uint16_t
 #include <type_traits>
 
-#include <cstddef>   // size_t
-#include <utility>   // forward()
-#include <exception> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.curl;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process;      //@@ MOD TODO: should we re-export?
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Perform a method (GET, POST, PUT) on a URL using the curl(1) program.
   // Throw process_error and io_error (both derive from system_error) in case
@@ -113,6 +91,19 @@ LIBBUTL_MODEXPORT namespace butl
   public:
     enum method_type {get, put, post};
 
+    // By default the -sS and, for the HTTP protocol, --fail and --location
+    // options are passed to curl on the command line. Optionally, these
+    // options can be suppressed.
+    //
+    enum class flags: std::uint16_t
+    {
+      no_fail     = 0x01, // Don't pass --fail.
+      no_location = 0x02, // Don't pass --location
+      no_sS       = 0x04, // Don't pass -sS
+
+      none = 0            // Default options set.
+    };
+
     ifdstream in;
     ofdstream out;
 
@@ -143,12 +134,77 @@ LIBBUTL_MODEXPORT namespace butl
           const std::string& url,
           A&&... options);
 
+    // Similar to the above, but allows to adjust the curl's default command
+    // line.
+    //
+    template <typename I,
+              typename O,
+              typename E,
+              typename... A>
+    curl (I&& in,
+          O&& out,
+          E&& err,
+          method_type,
+          flags,
+          const std::string& url,
+          A&&... options);
+
+    template <typename C,
+              typename I,
+              typename O,
+              typename E,
+              typename... A>
+    curl (const C&,
+          I&& in,
+          O&& out,
+          E&& err,
+          method_type,
+          flags,
+          const std::string& url,
+          A&&... options);
+
+    // Read the HTTP response status from an input stream.
+    //
+    // Specifically, read and parse the HTTP status line, by default skip over
+    // the remaining headers (leaving the stream at the beginning of the
+    // response body), and return the status code and the reason phrase. Throw
+    // std::invalid_argument if the status line could not be parsed. Pass
+    // through the ios::failure exception on the stream error.
+    //
+    // Note that if ios::failure is thrown the stream's exception mask may not
+    // be preserved.
+    //
+    struct http_status
+    {
+      std::uint16_t code;
+      std::string reason;
+    };
+
+    static http_status
+    read_http_status (ifdstream&, bool skip_headers = true);
+
+    // Parse and return the HTTP status code. Return 0 if the argument is
+    // invalid.
+    //
+    static std::uint16_t
+    parse_http_status_code (const std::string&);
+
+    // Read the CRLF-terminated line from an input stream, stripping the
+    // trailing CRLF. Pass through the ios::failure exception on the stream
+    // error.
+    //
+    static std::string
+    read_http_response_line (ifdstream&);
+
   private:
     enum method_proto {ftp_get, ftp_put, http_get, http_post};
     using method_proto_options = small_vector<const char*, 2>;
 
     method_proto
-    translate (method_type, const std::string& url, method_proto_options&);
+    translate (method_type,
+               const std::string& url,
+               method_proto_options&,
+               flags);
 
   private:
     template <typename T>
@@ -188,6 +244,11 @@ LIBBUTL_MODEXPORT namespace butl
     typename std::enable_if<is_other<O>::value, O>::type
     map_out (O&&, method_proto, io_data&);
   };
+
+  curl::flags operator&  (curl::flags, curl::flags);
+  curl::flags operator|  (curl::flags, curl::flags);
+  curl::flags operator&= (curl::flags&, curl::flags);
+  curl::flags operator|= (curl::flags&, curl::flags);
 }
 
 #include <libbutl/curl.ixx>
diff --git a/libbutl/curl.ixx b/libbutl/curl.ixx
index 61a4ff5..6dcfe13 100644
--- a/libbutl/curl.ixx
+++ b/libbutl/curl.ixx
@@ -1,7 +1,11 @@
 // file      : libbutl/curl.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef>   // size_t
+#include <utility>   // forward()
+#include <exception> // invalid_argument
+
+namespace butl
 {
   template <typename I,
             typename O,
@@ -12,6 +16,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
         O&& out,
         E&& err,
         method_type m,
+        flags fs,
         const std::string& url,
         A&&... options)
       : curl ([] (const char* [], std::size_t) {},
@@ -19,8 +24,80 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
               std::forward<O> (out),
               std::forward<E> (err),
               m,
+              fs,
+              url,
+              std::forward<A> (options)...)
+  {
+  }
+
+  template <typename C,
+            typename I,
+            typename O,
+            typename E,
+            typename... A>
+  inline curl::
+  curl (const C& cmdc,
+        I&& in,
+        O&& out,
+        E&& err,
+        method_type m,
+        const std::string& url,
+        A&&... options)
+      : curl (cmdc,
+              std::forward<I> (in),
+              std::forward<O> (out),
+              std::forward<E> (err),
+              m,
+              flags::none,
+              url,
+              std::forward<A> (options)...)
+  {
+  }
+
+  template <typename I,
+            typename O,
+            typename E,
+            typename... A>
+  inline curl::
+  curl (I&& in,
+        O&& out,
+        E&& err,
+        method_type m,
+        const std::string& url,
+        A&&... options)
+      : curl (std::forward<I> (in),
+              std::forward<O> (out),
+              std::forward<E> (err),
+              m,
+              flags::none,
               url,
               std::forward<A> (options)...)
   {
   }
+
+  inline curl::flags
+  operator&= (curl::flags& x, curl::flags y)
+  {
+    return x = static_cast<curl::flags> (static_cast<std::uint16_t> (x) &
+                                         static_cast<std::uint16_t> (y));
+  }
+
+  inline curl::flags
+  operator|= (curl::flags& x, curl::flags y)
+  {
+    return x = static_cast<curl::flags> (static_cast<std::uint16_t> (x) |
+                                         static_cast<std::uint16_t> (y));
+  }
+
+  inline curl::flags
+  operator& (curl::flags x, curl::flags y)
+  {
+    return x &= y;
+  }
+
+  inline curl::flags
+  operator| (curl::flags x, curl::flags y)
+  {
+    return x |= y;
+  }
 }
diff --git a/libbutl/curl.txx b/libbutl/curl.txx
index 0c07d35..fc74470 100644
--- a/libbutl/curl.txx
+++ b/libbutl/curl.txx
@@ -1,7 +1,7 @@
 // file      : libbutl/curl.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
 {
   template <typename I>
   typename std::enable_if<curl::is_other<I>::value, I>::type curl::
@@ -65,11 +65,12 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
         O&& out,
         E&& err,
         method_type m,
+        flags fs,
         const std::string& url,
         A&&... options)
   {
     method_proto_options mpo;
-    method_proto mp (translate (m, url, mpo));
+    method_proto mp (translate (m, url, mpo, fs));
 
     io_data in_data;
     io_data out_data;
@@ -81,8 +82,9 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
       map_out (std::forward<O> (out), mp, out_data),
       std::forward<E> (err),
       "curl",
-      "-s", // Silent.
-      "-S", // But do show diagnostics.
+      ((fs & flags::no_sS) == flags::none
+       ? "-sS" // Silent but do show diagnostics.
+       : nullptr),
       mpo,
       in_data.options,
       out_data.options,
diff --git a/libbutl/default-options.cxx b/libbutl/default-options.cxx
deleted file mode 100644
index 28f6fb7..0000000
--- a/libbutl/default-options.cxx
+++ /dev/null
@@ -1,73 +0,0 @@
-// file      : libbutl/default-options.cxx -*- C++ -*-
-// license   : MIT; see accompanying LICENSE file
-
-#ifndef __cpp_modules_ts
-#include <libbutl/default-options.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <vector>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.default_options;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.small_vector;
-#endif
-
-#endif
-
-using namespace std;
-
-namespace butl
-{
-  optional<dir_path>
-  default_options_start (const optional<dir_path>& home,
-                         const vector<dir_path>& dirs)
-  {
-    if (home)
-      assert (home->absolute () && home->normalized ());
-
-    if (dirs.empty ())
-      return nullopt;
-
-    // Use the first directory as a start.
-    //
-    auto i (dirs.begin ());
-    dir_path d (*i);
-
-    // Try to find a common prefix for each subsequent directory.
-    //
-    for (++i; i != dirs.end (); ++i)
-    {
-      bool p (false);
-
-      for (;
-           !(d.root () || (home && d == *home));
-           d = d.directory ())
-      {
-        if (i->sub (d))
-        {
-          p = true;
-          break;
-        }
-      }
-
-      if (!p)
-        return nullopt;
-    }
-
-    return d;
-  }
-}
diff --git a/libbutl/default-options.mxx b/libbutl/default-options.hxx
index 11f7bb2..1d363b6 100644
--- a/libbutl/default-options.mxx
+++ b/libbutl/default-options.hxx
@@ -1,44 +1,18 @@
-// file      : libbutl/default-options.mxx -*- C++ -*-
+// file      : libbutl/default-options.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
 
-#include <utility>      // move(), forward(), make_pair()
-#include <algorithm>    // reverse()
-#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.default_options;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.small_vector;
-
-import butl.git;
-import butl.filesystem;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/small-vector.mxx>
-
-#include <libbutl/git.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/small-vector.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Default options files helper implementation.
   //
@@ -107,6 +81,15 @@ LIBBUTL_MODEXPORT namespace butl
   //
   // Note that the extra directory options files are never considered remote.
   //
+  // For the convenience of implementation, the function parses the option
+  // files in the reverse order. Thus, to make sure that positions in the
+  // options list monotonically increase, it needs the maximum number of
+  // arguments, globally and per file, to be specified. This way the starting
+  // options position for each file will be less than for the previously
+  // parsed file by arg_max_file and equal to arg_max - arg_max_file for the
+  // first file. If the actual number of arguments exceeds the specified, then
+  // invalid_argument is thrown.
+  //
   template <typename O, typename S, typename U, typename F>
   default_options<O>
   load_default_options (const optional<dir_path>& sys_dir,
@@ -115,6 +98,8 @@ LIBBUTL_MODEXPORT namespace butl
                         const default_options_files&,
                         F&&,
                         const std::string& option,
+                        std::size_t arg_max,
+                        std::size_t arg_max_file,
                         bool args = false);
 
   // Merge the default options/arguments and the command line
@@ -152,12 +137,25 @@ LIBBUTL_MODEXPORT namespace butl
   AS
   merge_default_arguments (const default_options<O>&, const AS&, F&&);
 
-  // Find a common start (parent) directory stopping at home or root
-  // (excluding).
+  // Find a common start (parent) directory for directories specified as an
+  // iterator range, stopping at home or root (excluding). Optionally pass a
+  // function resolving an iterator into a directory in a way other than just
+  // dereferencing it. The function signature is:
+  //
+  // const dir_path& (I)
   //
-  LIBBUTL_SYMEXPORT optional<dir_path>
-  default_options_start (const optional<dir_path>& home_dir,
-                         const std::vector<dir_path>&);
+  template <typename I, typename F>
+  optional<dir_path>
+  default_options_start (const optional<dir_path>& home, I, I, F&&);
+
+  template <typename I>
+  inline optional<dir_path>
+  default_options_start (const optional<dir_path>& home, I b, I e)
+  {
+    return default_options_start (home,
+                                  b, e,
+                                  [] (I i) -> const dir_path& {return *i;});
+  }
 }
 
 #include <libbutl/default-options.ixx>
diff --git a/libbutl/default-options.ixx b/libbutl/default-options.ixx
index 4a551ac..7248d7d 100644
--- a/libbutl/default-options.ixx
+++ b/libbutl/default-options.ixx
@@ -1,7 +1,7 @@
 // file      : libbutl/default-options.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
 {
   template <typename O>
   inline O
diff --git a/libbutl/default-options.txx b/libbutl/default-options.txx
index eaf4235..aa254b2 100644
--- a/libbutl/default-options.txx
+++ b/libbutl/default-options.txx
@@ -1,7 +1,15 @@
 // file      : libbutl/default-options.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <utility>      // move(), forward(), make_pair()
+#include <algorithm>    // reverse()
+#include <stdexcept>    // invalid_argument
+#include <system_error>
+
+#include <libbutl/git.hxx>
+#include <libbutl/filesystem.hxx>
+
+namespace butl
 {
   inline bool
   options_dir_exists (const dir_path& d)
@@ -14,10 +22,11 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
     throw std::make_pair (path_cast<path> (d), std::move (e));
   }
 
-  // Search for and parse the options files in the specified directory and
-  // its local/ subdirectory, if exists, in the reverse order and append the
-  // options to the resulting list. Return false if --no-default-options is
-  // encountered.
+  // Search for and parse the options files in the specified directory and its
+  // local/ subdirectory, if exists, in the reverse order and append the
+  // options to the resulting list. Verify that the number of arguments
+  // doesn't exceed the limits and decrement arg_max by arg_max_file after
+  // parsing each file. Return false if --no-default-options is encountered.
   //
   // Note that by default we check for the local/ subdirectory even if we
   // don't think it belongs to the remote directory; the user may move things
@@ -36,6 +45,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                               bool remote,
                               const small_vector<path, 2>& fs,
                               F&& fn,
+                              std::size_t& arg_max,
+                              std::size_t  arg_max_file,
                               default_options<O>& def_ops,
                               bool load_sub = true,
                               bool load_dir = true)
@@ -44,7 +55,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
 
     bool r (true);
 
-    auto load = [&opt, args, &fs, &fn, &def_ops, &r]
+    auto load = [&opt, args, &fs, &fn, &def_ops, &arg_max, arg_max_file, &r]
                 (const dir_path& d, bool rem)
     {
       using namespace std;
@@ -57,9 +68,14 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
         {
           if (file_exists (p)) // Follows symlinks.
           {
+            if (arg_max < arg_max_file)
+              throw invalid_argument ("too many options files");
+
+            size_t start_pos (arg_max - arg_max_file);
+
             fn (p, rem, false /* overwrite */);
 
-            S s (p.string (), opt);
+            S s (p.string (), opt, start_pos);
 
             // @@ Note that the potentially thrown exceptions (unknown option,
             //    unexpected argument, etc) will not contain any location
@@ -81,6 +97,15 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
             else
               o.parse (s, U::fail, U::fail);
 
+            if (s.position () > arg_max)
+              throw invalid_argument ("too many options in file " +
+                                      p.string ());
+
+            // Don't decrement arg_max for the empty option files.
+            //
+            if (s.position () != start_pos)
+              arg_max = start_pos;
+
             if (o.no_default_options ())
               r = false;
 
@@ -119,6 +144,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                         const default_options_files& ofs,
                         F&& fn,
                         const std::string& opt,
+                        std::size_t arg_max,
+                        std::size_t arg_max_file,
                         bool args)
   {
     if (sys_dir)
@@ -214,6 +241,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                                                         false /* remote */,
                                                         ofs.files,
                                                         std::forward<F> (fn),
+                                                        arg_max,
+                                                        arg_max_file,
                                                         r);
 
             load_extra        = false;
@@ -228,6 +257,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                                                         remote,
                                                         ofs.files,
                                                         std::forward<F> (fn),
+                                                        arg_max,
+                                                        arg_max_file,
                                                         r,
                                                         load_build2_local,
                                                         load_build2);
@@ -245,6 +276,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                                                   false /* remote */,
                                                   ofs.files,
                                                   std::forward<F> (fn),
+                                                  arg_max,
+                                                  arg_max_file,
                                                   r);
 
     if (load && home_dir)
@@ -258,6 +291,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                                                     false /* remote */,
                                                     ofs.files,
                                                     std::forward<F> (fn),
+                                                    arg_max,
+                                                    arg_max_file,
                                                     r);
     }
 
@@ -268,6 +303,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                                            false /* remote */,
                                            ofs.files,
                                            std::forward<F> (fn),
+                                           arg_max,
+                                           arg_max_file,
                                            r);
 
     std::reverse (r.begin (), r.end ());
@@ -318,4 +355,43 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
     r.insert (r.end (), cmd_args.begin (), cmd_args.end ());
     return r;
   }
+
+  template <typename I, typename F>
+  optional<dir_path>
+  default_options_start (const optional<dir_path>& home, I b, I e, F&& f)
+  {
+    if (home)
+      assert (home->absolute () && home->normalized ());
+
+    if (b == e)
+      return nullopt;
+
+    // Use the first directory as a start.
+    //
+    I i (b);
+    dir_path d (f (i));
+
+    // Try to find a common prefix for each subsequent directory.
+    //
+    for (++i; i != e; ++i)
+    {
+      bool p (false);
+
+      for (;
+           !(d.root () || (home && d == *home));
+           d = d.directory ())
+      {
+        if (f (i).sub (d))
+        {
+          p = true;
+          break;
+        }
+      }
+
+      if (!p)
+        return nullopt;
+    }
+
+    return d;
+  }
 }
diff --git a/libbutl/diagnostics.cxx b/libbutl/diagnostics.cxx
index b038e5d..6ac8192 100644
--- a/libbutl/diagnostics.cxx
+++ b/libbutl/diagnostics.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/diagnostics.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/diagnostics.mxx>
-#endif
+#include <libbutl/diagnostics.hxx>
 
 #ifndef _WIN32
 #  include <unistd.h> // write()
@@ -12,49 +10,36 @@
 #  include <io.h> //_write()
 #endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <utility>
-#include <exception>
-
 #include <ios>      // ios::failure
 #include <mutex>
 #include <string>
+#include <cassert>
 #include <cstddef>  // size_t
 #include <iostream> // cerr
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-module butl.diagnostics;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-import std.threading;
-import butl.utility;
-import butl.optional;
-import butl.fdstream; // stderr_fd(), fdterm()
+#ifndef LIBBUTL_MINGW_STDTHREAD
+#  include <mutex>
 #else
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
+#  include <libbutl/mingw-mutex.hxx>
 #endif
 
+#include <libbutl/ft/lang.hxx> // thread_local
+
+#include <libbutl/utility.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+
 using namespace std;
 
 namespace butl
 {
   ostream* diag_stream = &cerr;
 
-  static mutex diag_mutex;
+#ifndef LIBBUTL_MINGW_STDTHREAD
+  static std::mutex diag_mutex;
+#else
+  static mingw_stdthread::mutex diag_mutex;
+#endif
 
   string diag_progress;
   static string diag_progress_blank; // Being printed blanks out the line.
@@ -158,28 +143,28 @@ namespace butl
   default_writer (const diag_record& r)
   {
     r.os.put ('\n');
-    diag_stream_lock () << r.os.str ();
+
+    diag_stream_lock l;
+    (*diag_stream) << r.os.str ();
 
     // We can endup flushing the result of several writes. The last one may
     // possibly be incomplete, but that's not a problem as it will also be
     // followed by the flush() call.
     //
-    // @@ Strange: why not just hold the lock for both write and flush?
-    //
     diag_stream->flush ();
   }
 
-  void (*diag_record::writer) (const diag_record&) = &default_writer;
+  diag_writer* diag_record::writer = &default_writer;
 
   void diag_record::
-  flush () const
+  flush (void (*w) (const diag_record&)) const
   {
     if (!empty_)
     {
       if (epilogue_ == nullptr)
       {
-        if (writer != nullptr)
-          writer (*this);
+        if (w != nullptr || (w = writer) != nullptr)
+          w (*this);
 
         empty_ = true;
       }
@@ -189,8 +174,8 @@ namespace butl
         //
         auto e (epilogue_);
         epilogue_ = nullptr;
-        e (*this); // Can throw.
-        flush ();  // Call ourselves to write the data in case it returns.
+        e (*this, w); // Can throw.
+        flush (w);    // Call ourselves to write the data in case it returns.
       }
     }
   }
@@ -213,4 +198,28 @@ namespace butl
       flush ();
 #endif
   }
+
+  // Diagnostics stack.
+  //
+  static
+#ifdef __cpp_thread_local
+  thread_local
+#else
+  __thread
+#endif
+  const diag_frame* diag_frame_stack = nullptr;
+
+  const diag_frame* diag_frame::
+  stack () noexcept
+  {
+    return diag_frame_stack;
+  }
+
+  const diag_frame* diag_frame::
+  stack (const diag_frame* f) noexcept
+  {
+    const diag_frame* r (diag_frame_stack);
+    diag_frame_stack = f;
+    return r;
+  }
 }
diff --git a/libbutl/diagnostics.mxx b/libbutl/diagnostics.hxx
index d41ba74..c6db34b 100644
--- a/libbutl/diagnostics.mxx
+++ b/libbutl/diagnostics.hxx
@@ -1,32 +1,19 @@
-// file      : libbutl/diagnostics.mxx -*- C++ -*-
+// file      : libbutl/diagnostics.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <ostream>
 #include <sstream>
 #include <utility>   // move(), forward()
 #include <exception> // uncaught_exception[s]()
-#endif
 
 #include <libbutl/ft/exception.hxx> // uncaught_exceptions
 
-#ifdef __cpp_modules_ts
-export module butl.diagnostics;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Diagnostic facility base infrastructure.
   //
@@ -40,8 +27,11 @@ LIBBUTL_MODEXPORT namespace butl
   LIBBUTL_SYMEXPORT extern std::ostream* diag_stream;
 
   // Acquire the diagnostics exclusive access mutex in ctor, release in dtor.
-  // An object of the type must be created prior to writing to diag_stream (see
-  // above).
+  // An object of the type must be created prior to writing to diag_stream
+  // (see above).
+  //
+  // Note that this class also manages the interaction with the progress
+  // printing (see below).
   //
   struct LIBBUTL_SYMEXPORT diag_stream_lock
   {
@@ -87,13 +77,26 @@ LIBBUTL_MODEXPORT namespace butl
     ~diag_progress_lock ();
   };
 
+  // Diagnostic record and marks (error, warn, etc).
   //
+  // There are two ways to use this facility in a project: simple, where we
+  // just alias the types in our namespace, and complex, where instead we
+  // derive from them and "override" (hide, really) operator<< (and a few
+  // other functions) in order to make ADL look in our namespace rather than
+  // butl. In the simple case we may have to resort to defining some
+  // operator<< overloads in namespace std in order to satisfy ADL. This is
+  // usually not an acceptable approach for libraries, which is where the
+  // complex case comes in (see libbuild2 for a "canonical" example of the
+  // complex case). Note also that it doesn't seem worth templatazing epilogue
+  // so the complex case may also need to do a few casts but those should be
+  // limited to the diagnostics infrastructure.
   //
   struct diag_record;
   template <typename> struct diag_prologue;
   template <typename> struct diag_mark;
 
-  using diag_epilogue = void (const diag_record&);
+  using diag_writer = void (const diag_record&);
+  using diag_epilogue = void (const diag_record&, diag_writer*);
 
   struct LIBBUTL_SYMEXPORT diag_record
   {
@@ -130,7 +133,7 @@ LIBBUTL_MODEXPORT namespace butl
     full () const {return !empty_;}
 
     void
-    flush () const;
+    flush (diag_writer* = nullptr) const;
 
     void
     append (const char* indent, diag_epilogue* e) const
@@ -163,7 +166,7 @@ LIBBUTL_MODEXPORT namespace butl
 #endif
         empty_ (r.empty_),
         epilogue_ (r.epilogue_),
-        os (std::move (r.os))
+        os (std::move (r.os)) // Note: can throw.
     {
       if (!empty_)
       {
@@ -181,7 +184,7 @@ LIBBUTL_MODEXPORT namespace butl
     // Diagnostics writer. The default implementation writes the record text
     // to diag_stream. If it is NULL, then the record text is ignored.
     //
-    static void (*writer) (const diag_record&);
+    static diag_writer* writer;
 
   protected:
 #ifdef __cpp_lib_uncaught_exceptions
@@ -276,4 +279,97 @@ LIBBUTL_MODEXPORT namespace butl
       e.B::operator() (r);
     }
   };
+
+  // Diagnostics stack. Each frame is "applied" to the diag record.
+  //
+  // Unfortunately most of our use-cases don't fit into the 2-pointer small
+  // object optimization of std::function. So we have to complicate things
+  // a bit here.
+  //
+  struct LIBBUTL_SYMEXPORT diag_frame
+  {
+    explicit
+    diag_frame (void (*f) (const diag_frame&, const diag_record&))
+        : func_ (f)
+    {
+      if (func_ != nullptr)
+        prev_ = stack (this);
+    }
+
+    diag_frame (diag_frame&& x)
+        : func_ (x.func_)
+    {
+      if (func_ != nullptr)
+      {
+        prev_ = x.prev_;
+        stack (this);
+
+        x.func_ = nullptr;
+      }
+    }
+
+    diag_frame& operator= (diag_frame&&) = delete;
+
+    diag_frame (const diag_frame&) = delete;
+    diag_frame& operator= (const diag_frame&) = delete;
+
+    ~diag_frame ()
+    {
+      if (func_ != nullptr )
+        stack (prev_);
+    }
+
+    // Normally passed as an epilogue. Writer is not used.
+    //
+    static void
+    apply (const diag_record& r, diag_writer* = nullptr)
+    {
+      for (const diag_frame* f (stack ()); f != nullptr; f = f->prev_)
+        f->func_ (*f, r);
+    }
+
+    // Tip of the stack.
+    //
+    static const diag_frame*
+    stack () noexcept;
+
+    // Set the new and return the previous tip of the stack.
+    //
+    static const diag_frame*
+    stack (const diag_frame*) noexcept;
+
+    struct stack_guard
+    {
+      explicit stack_guard (const diag_frame* s): s_ (stack (s)) {}
+      ~stack_guard () {stack (s_);}
+      const diag_frame* s_;
+    };
+
+  private:
+    void (*func_) (const diag_frame&, const diag_record&);
+    const diag_frame* prev_;
+  };
+
+  template <typename F>
+  struct diag_frame_impl: diag_frame
+  {
+    explicit
+    diag_frame_impl (F f): diag_frame (&thunk), func_ (move (f)) {}
+
+  private:
+    static void
+    thunk (const diag_frame& f, const diag_record& r)
+    {
+      static_cast<const diag_frame_impl&> (f).func_ (r);
+    }
+
+    const F func_;
+  };
+
+  template <typename F>
+  inline diag_frame_impl<F>
+  make_diag_frame (F f)
+  {
+    return diag_frame_impl<F> (move (f));
+  }
 }
diff --git a/libbutl/export.hxx b/libbutl/export.hxx
index 3353ca8..dc04f85 100644
--- a/libbutl/export.hxx
+++ b/libbutl/export.hxx
@@ -3,14 +3,6 @@
 
 #pragma once
 
-// If modules are available, setup the module export.
-//
-#ifdef __cpp_modules_ts
-#  define LIBBUTL_MODEXPORT export
-#else
-#  define LIBBUTL_MODEXPORT
-#endif
-
 // Normally we don't export class templates (but do complete specializations),
 // inline functions, and classes with only inline member functions. Exporting
 // classes that inherit from non-exported/imported bases (e.g., std::string)
diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx
index 4948052..df5b531 100644
--- a/libbutl/fdstream.cxx
+++ b/libbutl/fdstream.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/fdstream.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/fdstream.hxx>
 
 #include <errno.h> // errno, E*
 
@@ -12,72 +10,54 @@
 #  include <unistd.h>     // close(), read(), write(), lseek(), dup(), pipe(),
                           // ftruncate(), isatty(), ssize_t, STD*_FILENO
 #  include <sys/uio.h>    // writev(), iovec
-#  include <sys/stat.h>   // stat(), S_I*
+#  include <sys/stat.h>   // stat(), fstat(), S_I*
 #  include <sys/time.h>   // timeval
 #  include <sys/types.h>  // stat, off_t
 #  include <sys/select.h>
 #else
 #  include <libbutl/win32-utility.hxx>
 
-#  include <io.h>       // _close(), _read(), _write(), _setmode(), _sopen(),
-                        // _lseek(), _dup(), _pipe(), _chsize_s,
-                        // _get_osfhandle()
-#  include <share.h>    // _SH_DENYNO
-#  include <stdio.h>    // _fileno(), stdin, stdout, stderr, SEEK_*
-#  include <fcntl.h>    // _O_*
-#  include <sys/stat.h> // S_I*
+#  ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
+#    define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x04
+#  endif
+
+#  include <io.h>        // _close(), _read(), _write(), _setmode(), _sopen(),
+                         // _lseek(), _dup(), _pipe(), _chsize_s,
+                         // _get_osfhandle()
+#  include <share.h>     // _SH_DENYNO
+#  include <stdio.h>     // _fileno(), stdin, stdout, stderr, SEEK_*
+#  include <fcntl.h>     // _O_*
+#  include <sys/types.h> // _stat
+#  include <sys/stat.h>  // fstat(), S_I*
+
+#  ifdef _MSC_VER // Unlikely to be fixed in newer versions.
+#    define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#    define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#    define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
+#  endif
 
 #  include <wchar.h> // wcsncmp(), wcsstr()
 
+#  include <thread>    // this_thread::yield()
 #  include <algorithm> // count()
 #endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <vector>
-#include <string>
-#include <chrono>
-#include <istream>
-#include <ostream>
-#include <utility>
-#include <cstdint>
-#include <cstddef>
-
 #include <ios>          // ios_base::openmode, ios_base::failure
 #include <new>          // bad_alloc
 #include <limits>       // numeric_limits
-#include <cstring>      // memcpy(), memmove()
+#include <cassert>
+#include <cstring>      // memcpy(), memmove(), memchr(), strcmp()
+#include <cstdlib>      // getenv()
 #include <iostream>     // cin, cout
 #include <exception>    // uncaught_exception[s]()
 #include <stdexcept>    // invalid_argument
 #include <system_error>
-#endif
 
-#include <libbutl/ft/exception.hxx>     // uncaught_exceptions
+#include <libbutl/ft/exception.hxx>    // uncaught_exceptions
 #include <libbutl/process-details.hxx>
 
-#ifdef __cpp_modules_ts
-module butl.fdstream;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.threading; // Clang wants it in purview (see process-details.hxx).
-#endif
-import butl.path;
-import butl.filesystem;
-import butl.small_vector;
-#endif
-
-import butl.utility; // throw_*_ios_failure(), function_cast()
-import butl.timestamp;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/timestamp.mxx>
-#endif
+#include <libbutl/utility.hxx>   // throw_*_ios_failure(), function_cast()
+#include <libbutl/timestamp.hxx>
 
 using namespace std;
 
@@ -167,7 +147,7 @@ namespace butl
   }
 #endif
 
-  // fdbuf
+  // fdstreambuf
   //
   // Return true if the file descriptor is in the non-blocking mode. Throw
   // ios::failure on the underlying OS error.
@@ -188,7 +168,7 @@ namespace butl
 #endif
   }
 
-  void fdbuf::
+  void fdstreambuf::
   open (auto_fd&& fd, uint64_t pos)
   {
     close ();
@@ -201,7 +181,7 @@ namespace butl
     fd_ = move (fd);
   }
 
-  bool fdbuf::
+  bool fdstreambuf::
   blocking (bool m)
   {
     // Verify that the file descriptor is open.
@@ -225,7 +205,7 @@ namespace butl
     return !m;
   }
 
-  streamsize fdbuf::
+  streamsize fdstreambuf::
   showmanyc ()
   {
     if (!is_open ())
@@ -260,7 +240,7 @@ namespace butl
     return 0;
   }
 
-  fdbuf::int_type fdbuf::
+  fdstreambuf::int_type fdstreambuf::
   underflow ()
   {
     int_type r (traits_type::eof ());
@@ -282,7 +262,7 @@ namespace butl
     return r;
   }
 
-  bool fdbuf::
+  bool fdstreambuf::
   load ()
   {
     // Doesn't handle blocking mode and so should not be called.
@@ -299,7 +279,7 @@ namespace butl
     return n != 0;
   }
 
-  void fdbuf::
+  void fdstreambuf::
   seekg (uint64_t off)
   {
     // In the future we may implement the blocking behavior for a non-blocking
@@ -334,7 +314,7 @@ namespace butl
     setg (buf_, buf_, buf_);
   }
 
-  fdbuf::int_type fdbuf::
+  fdstreambuf::int_type fdstreambuf::
   overflow (int_type c)
   {
     int_type r (traits_type::eof ());
@@ -362,7 +342,7 @@ namespace butl
     return r;
   }
 
-  int fdbuf::
+  int fdstreambuf::
   sync ()
   {
     if (!is_open ())
@@ -379,15 +359,7 @@ namespace butl
     return save () ? 0 : -1;
   }
 
-#ifdef _WIN32
-  static inline int
-  write (int fd, const void* buf, size_t n)
-  {
-    return _write (fd, buf, static_cast<unsigned int> (n));
-  }
-#endif
-
-  bool fdbuf::
+  bool fdstreambuf::
   save ()
   {
     size_t n (pptr () - pbase ());
@@ -398,7 +370,7 @@ namespace butl
       // descriptor opened for read-only access (while -1 with errno EBADF is
       // expected). This is in contrast with VC's _write() and POSIX's write().
       //
-      auto m (write (fd_.get (), buf_, n));
+      auto m (fdwrite (fd_.get (), buf_, n));
 
       if (m == -1)
         throw_generic_ios_failure (errno);
@@ -414,7 +386,7 @@ namespace butl
     return true;
   }
 
-  streamsize fdbuf::
+  streamsize fdstreambuf::
   xsputn (const char_type* s, streamsize sn)
   {
     // The xsputn() function interface doesn't support the non-blocking
@@ -513,7 +485,7 @@ namespace butl
     // Flush the buffer.
     //
     size_t wn (bn + an);
-    int r (wn > 0 ? write (fd_.get (), buf_, wn) : 0);
+    streamsize r (wn > 0 ? fdwrite (fd_.get (), buf_, wn) : 0);
 
     if (r == -1)
       throw_generic_ios_failure (errno);
@@ -556,7 +528,7 @@ namespace butl
 
     // The data tail doesn't fit the buffer so write it to the file.
     //
-    r = write (fd_.get (), s, n);
+    r = fdwrite (fd_.get (), s, n);
 
     if (r == -1)
       throw_generic_ios_failure (errno);
@@ -571,13 +543,13 @@ namespace butl
   //
   // - basic_ostream::seekp(pos)                    ->
   //     basic_streambuf::pubseekpos(pos, ios::out) ->
-  //       fdbuf::seekpos(pos, ios::out)
+  //       fdstreambuf::seekpos(pos, ios::out)
   //
   // - basic_istream::seekg(pos)                   ->
   //     basic_streambuf::pubseekpos(pos, ios::in) ->
-  //       fdbuf::seekpos(pos, ios::in)
+  //       fdstreambuf::seekpos(pos, ios::in)
   //
-  fdbuf::pos_type fdbuf::
+  fdstreambuf::pos_type fdstreambuf::
   seekpos (pos_type pos, ios_base::openmode which)
   {
     // Note that the position type provides an explicit conversion to the
@@ -592,21 +564,21 @@ namespace butl
   //
   // - basic_ostream::seekp(off, dir)                    ->
   //     basic_streambuf::pubseekoff(off, dir, ios::out) ->
-  //       fdbuf::seekoff(off, dir, ios::out)
+  //       fdstreambuf::seekoff(off, dir, ios::out)
   //
   // - basic_ostream::tellp()                               ->
   //     basic_streambuf::pubseekoff(0, ios::cur, ios::out) ->
-  //       fdbuf::seekoff(0, ios::cur, ios::out)
+  //       fdstreambuf::seekoff(0, ios::cur, ios::out)
   //
   // - basic_istream::seekg(off, dir)                   ->
   //     basic_streambuf::pubseekoff(off, dir, ios::in) ->
-  //       fdbuf::seekoff(off, dir, ios::in)
+  //       fdstreambuf::seekoff(off, dir, ios::in)
   //
   // - basic_istream::tellg()                              ->
   //     basic_streambuf::pubseekoff(0, ios::cur, ios::in) ->
-  //       fdbuf::seekoff(0, ios::cur, ios::in)
+  //       fdstreambuf::seekoff(0, ios::cur, ios::in)
   //
-  fdbuf::pos_type fdbuf::
+  fdstreambuf::pos_type fdstreambuf::
   seekoff (off_type off, ios_base::seekdir dir, ios_base::openmode which)
   {
     // The seekoff() function interface doesn't support the non-blocking
@@ -830,9 +802,8 @@ namespace butl
       catch (const ios_base::failure&) {}
     }
 
-    // Underlying file descriptor is closed by fdbuf dtor with errors (if any)
-    // being ignored.
-    //
+    // Underlying file descriptor is closed by fdstreambuf dtor with errors
+    // (if any) being ignored.
   }
 
   void ifdstream::
@@ -873,7 +844,7 @@ namespace butl
   }
 
   ifdstream&
-  getline (ifdstream& is, string& s, char delim)
+  getline (ifdstream& is, string& l, char delim)
   {
     ifdstream::iostate eb (is.exceptions ());
     assert (eb & ifdstream::badbit);
@@ -881,16 +852,16 @@ namespace butl
     // Amend the exception mask to prevent exceptions being thrown by the C++
     // IO runtime to avoid incompatibility issues due to ios_base::failure ABI
     // fiasco (#66145). We will not restore the mask when ios_base::failure is
-    // thrown by fdbuf since there is no way to "silently" restore it if the
-    // corresponding bits are in the error state without the exceptions() call
-    // throwing ios_base::failure. Not restoring exception mask on throwing
-    // because of badbit should probably be ok since the stream is no longer
-    // usable.
+    // thrown by fdstreambuf since there is no way to "silently" restore it if
+    // the corresponding bits are in the error state without the exceptions()
+    // call throwing ios_base::failure. Not restoring exception mask on
+    // throwing because of badbit should probably be ok since the stream is no
+    // longer usable.
     //
     if (eb != ifdstream::badbit)
       is.exceptions (ifdstream::badbit);
 
-    std::getline (is, s, delim);
+    std::getline (is, l, delim);
 
     // Throw if any of the newly set bits are present in the exception mask.
     //
@@ -903,6 +874,58 @@ namespace butl
     return is;
   }
 
+  bool
+  getline_non_blocking (ifdstream& is, string& l, char delim)
+  {
+    assert (!is.blocking () && (is.exceptions () & ifdstream::badbit) != 0);
+
+    fdstreambuf& sb (*static_cast<fdstreambuf*> (is.rdbuf ()));
+
+    // Read until blocked (0), EOF (-1) or encounter the delimiter.
+    //
+    // Note that here we reasonably assume that any failure in in_avail()
+    // will lead to badbit and thus an exception (see showmanyc()).
+    //
+    streamsize s;
+    while ((s = sb.in_avail ()) > 0)
+    {
+      const char* p (sb.gptr ());
+      size_t n (sb.egptr () - p);
+
+      const char* e (static_cast<const char*> (memchr (p, delim, n)));
+      if (e != nullptr)
+        n = e - p;
+
+      l.append (p, n);
+
+      // Note: consume the delimiter if found.
+      //
+      sb.gbump (static_cast<int> (n + (e != nullptr ? 1 : 0)));
+
+      if (e != nullptr)
+        break;
+    }
+
+    // Here s can be:
+    //
+    // -1 -- EOF.
+    //  0 -- blocked before encountering delimiter/EOF.
+    // >0 -- encountered the delimiter.
+    //
+    if (s == -1)
+    {
+      is.setstate (ifdstream::eofbit);
+
+      // If we couldn't extract anything, not even the delimiter, then this is
+      // a failure per the getline() interface.
+      //
+      if (l.empty ())
+        is.setstate (ifdstream::failbit);
+    }
+
+    return s != 0;
+  }
+
   // ofdstream
   //
   ofdstream::
@@ -1052,10 +1075,11 @@ namespace butl
 #endif
 
     // Unlike other platforms, *BSD allows opening a directory as a file which
-    // will cause all kinds of problems upstream (e.g., cpfile()). So we detect
-    // and diagnose this.
+    // will cause all kinds of problems upstream (e.g., cpfile()). So we
+    // detect and diagnose this. Note: not certain this is the case for NetBSD
+    // and OpenBSD.
     //
-#if defined(__FreeBSD__) || defined(__NetBSD__)
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
     {
       struct stat s;
       if (stat (f, &s) == 0 && S_ISDIR (s.st_mode))
@@ -1141,12 +1165,17 @@ namespace butl
     // underlying CreateFile() function call (see mventry() for details). If
     // that's the case, we will keep trying to open the file for two seconds.
     //
-    for (size_t i (0); i < 21; ++i)
+    // Also, it turns out, if someone memory-maps a file, it takes Windows
+    // some time to realize it's been unmapped and until then any attempt to
+    // open it results in EINVAL POSIX error, ERROR_USER_MAPPED_FILE system
+    // error. So we retry those as well.
+    //
+    for (size_t i (0); i < 41; ++i)
     {
-      // Sleep 100 milliseconds before the open retry.
+      // Sleep 50 milliseconds before the open retry.
       //
       if (i != 0)
-        Sleep (100);
+        Sleep (50);
 
       fd = pass_perm
            ? _sopen (f, of, _SH_DENYNO, pf)
@@ -1160,10 +1189,11 @@ namespace butl
       // Note that MinGW's _sopen() is just a stub forwarding the call to the
       // (publicly available) MSVCRT's implementation.
       //
-      if (!(fd == -1        &&
-            out             &&
-            errno == EACCES &&
-            GetLastError () == ERROR_SHARING_VIOLATION))
+      if (!(fd == -1                             &&
+            out                                  &&
+            (errno == EACCES || errno == EINVAL) &&
+            (GetLastError () == ERROR_SHARING_VIOLATION ||
+             GetLastError () == ERROR_USER_MAPPED_FILE)))
         break;
     }
 
@@ -1372,6 +1402,28 @@ namespace butl
       throw_generic_ios_failure (errno);
   }
 
+  entry_stat
+  fdstat (int fd)
+  {
+    struct stat s;
+    if (fstat (fd, &s) != 0)
+      throw_generic_error (errno);
+
+    auto m (s.st_mode);
+    entry_type t (entry_type::unknown);
+
+    // Note: cannot be a symlink.
+    //
+    if (S_ISREG (m))
+      t = entry_type::regular;
+    else if (S_ISDIR (m))
+      t = entry_type::directory;
+    else if (S_ISBLK (m) || S_ISCHR (m) || S_ISFIFO (m) || S_ISSOCK (m))
+      t = entry_type::other;
+
+    return entry_stat {t, static_cast<uint64_t> (s.st_size)};
+  }
+
   bool
   fdterm (int fd)
   {
@@ -1392,6 +1444,16 @@ namespace butl
     throw_generic_ios_failure (errno);
   }
 
+  bool
+  fdterm_color (int, bool)
+  {
+    const char* t (std::getenv ("TERM"));
+
+    // This test was lifted from GCC (Emacs shell sets TERM=dumb).
+    //
+    return t != nullptr && strcmp (t, "dumb") != 0;
+  }
+
   static pair<size_t, size_t>
   fdselect (fdselect_set& read,
             fdselect_set& write,
@@ -1410,6 +1472,8 @@ namespace butl
 
       for (fdselect_state& s: from)
       {
+        s.ready = false;
+
         if (s.fd == nullfd)
           continue;
 
@@ -1417,7 +1481,6 @@ namespace butl
           throw invalid_argument ("invalid file descriptor");
 
         FD_SET (s.fd, &to);
-        s.ready = false;
 
         if (max_fd < s.fd)
           max_fd = s.fd;
@@ -1524,6 +1587,12 @@ namespace butl
     return read (fd, buf, n);
   }
 
+  streamsize
+  fdwrite (int fd, const void* buf, size_t n)
+  {
+    return write (fd, buf, n);
+  }
+
 #else
 
   auto_fd
@@ -1779,9 +1848,34 @@ namespace butl
       throw_generic_ios_failure (e);
   }
 
+  entry_stat
+  fdstat (int fd)
+  {
+    // Since symlinks have been taken care of, we can just _fstat().
+    //
+    struct __stat64 s;
+    if (_fstat64 (fd, &s) != 0)
+      throw_generic_error (errno);
+
+    auto m (s.st_mode);
+    entry_type t (entry_type::unknown);
+
+    if (S_ISREG (m))
+      t = entry_type::regular;
+    else if (S_ISDIR (m))
+      t = entry_type::directory;
+    else if (S_ISCHR (m))
+      t = entry_type::other;
+
+    return entry_stat {t, static_cast<uint64_t> (s.st_size)};
+  }
+
   bool
   fdterm (int fd)
   {
+    // @@ Both GCC and Clang simply call GetConsoleMode() for this check. I
+    //    wonder why we don't do the same? See also fdterm_color() below.
+
     // We don't need to close it (see fd_to_handle()).
     //
     HANDLE h (fd_to_handle (fd));
@@ -1795,7 +1889,13 @@ namespace butl
       throw_system_ios_failure (e);
 
     if (t == FILE_TYPE_CHAR) // Terminal.
-      return true;
+    {
+      // One notable special file that has this type is nul (as returned by
+      // fdopen_null()). So tighten this case with the GetConsoleMode() call.
+      //
+      DWORD m;
+      return GetConsoleMode (h, &m) != 0;
+    }
 
     if (t != FILE_TYPE_PIPE) // Pipe still can be a terminal (see below).
       return false;
@@ -1867,6 +1967,42 @@ namespace butl
     return false;
   }
 
+  bool
+  fdterm_color (int fd, bool enable)
+  {
+    // We don't need to close it (see fd_to_handle()).
+    //
+    HANDLE h (fd_to_handle (fd));
+
+    // See GH issue #312 for background on this logic.
+    //
+    DWORD m;
+    if (!GetConsoleMode (h, &m))
+      throw_system_ios_failure (GetLastError ());
+
+    // Some terminals (e.g. Windows Terminal) enable VT processing by default.
+    //
+    if ((m & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0)
+      return true;
+
+    if (enable)
+    {
+      // If SetConsoleMode() fails, assume VT processing is unsupported (it
+      // is only supported from a certain build of Windows 10).
+      //
+      // Note that Wine pretends to support this but doesn't handle the escape
+      // sequences. See https://bugs.winehq.org/show_bug.cgi?id=49780.
+      //
+      if (SetConsoleMode (h,
+                          (m                                 |
+                           ENABLE_PROCESSED_OUTPUT           |
+                           ENABLE_VIRTUAL_TERMINAL_PROCESSING)))
+        return true;
+    }
+
+    return false;
+  }
+
   static pair<size_t, size_t>
   fdselect (fdselect_set& read,
             fdselect_set& write,
@@ -1883,13 +2019,14 @@ namespace butl
 
     for (fdselect_state& s: read)
     {
+      s.ready = false;
+
       if (s.fd == nullfd)
         continue;
 
       if (s.fd < 0)
         throw invalid_argument ("invalid file descriptor");
 
-      s.ready = false;
       ++n;
     }
 
@@ -1910,7 +2047,7 @@ namespace butl
     //
     size_t r (0);
 
-    while (true)
+    for (size_t i (0);; ++i)
     {
       for (fdselect_state& s: read)
       {
@@ -1983,7 +2120,11 @@ namespace butl
       if (r != 0)
         break;
 
-      DWORD t (50);
+      // Use exponential backoff but not too aggressive and with 25ms max.
+      //
+      DWORD t (
+        static_cast<DWORD> (i <= 1000  ?  0 :
+                            i >= 1000 + 100 ? 25 : 1 + ((i - 1000) / 4)));
 
       if (timeout)
       {
@@ -2000,7 +2141,10 @@ namespace butl
           break;
       }
 
-      Sleep (t);
+      if (t == 0)
+        this_thread::yield ();
+      else
+        Sleep (t);
     }
 
     return make_pair (r, 0);
@@ -2043,6 +2187,12 @@ namespace butl
     return r;
   }
 
+  streamsize
+  fdwrite (int fd, const void* buf, size_t n)
+  {
+    return _write (fd, buf, static_cast<unsigned int> (n));
+  }
+
 #endif
 
   pair<size_t, size_t>
diff --git a/libbutl/fdstream.mxx b/libbutl/fdstream.hxx
index c863d2c..9c8f786 100644
--- a/libbutl/fdstream.mxx
+++ b/libbutl/fdstream.hxx
@@ -1,13 +1,8 @@
-// file      : libbutl/fdstream.mxx -*- C++ -*-
+// file      : libbutl/fdstream.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-#include <cassert>
 
-#ifndef __cpp_lib_modules_ts
 #include <ios>     // streamsize
 #include <vector>
 #include <string>
@@ -18,29 +13,14 @@
 #include <cstdint> // uint16_t, uint64_t
 #include <cstddef> // size_t
 
-#include <iterator>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.fdstream;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.filesystem; // permissions
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/filesystem.hxx>   // permissions, entry_stat
+#include <libbutl/small-vector.hxx>
+#include <libbutl/bufstreambuf.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // RAII type for file descriptors. Note that failure to close the descriptor
   // is silently ignored by both the destructor and reset().
@@ -54,9 +34,6 @@ LIBBUTL_MODEXPORT namespace butl
     constexpr operator int () const {return -1;}
   };
 
-#if defined(__cpp_modules_ts) && defined(__clang__) //@@ MOD Clang duplicate sym.
-  inline
-#endif
   constexpr nullfd_t nullfd (-1);
 
   class LIBBUTL_SYMEXPORT auto_fd
@@ -142,9 +119,9 @@ LIBBUTL_MODEXPORT namespace butl
   // - input or output but not both (can use a union of two streams for that)
   // - no support for put back
   // - use of tell[gp]() and seek[gp]() is discouraged on Windows for
-  //   fdstreams opened in the text mode (see fdbuf::seekoff() implementation
-  //   for reasoning and consider using non-standard tellg() and seekg() in
-  //   fdbuf, instead)
+  //   fdstreams opened in the text mode (see fdstreambuf::seekoff()
+  //   implementation for reasoning and consider using non-standard tellg()
+  //   and seekg() in fdstreambuf, instead)
   // - non-blocking file descriptor is supported only by showmanyc() function
   //   and only for pipes on Windows, in contrast to POSIX systems
   // - throws ios::failure in case of open(), read(), write(), close(),
@@ -157,20 +134,26 @@ LIBBUTL_MODEXPORT namespace butl
   // - passing to constructor auto_fd with a negative file descriptor is valid
   //   and results in the creation of an unopened object
   //
-  class LIBBUTL_SYMEXPORT fdbuf: public std::basic_streambuf<char>
+  class LIBBUTL_SYMEXPORT fdstreambuf: public bufstreambuf
   {
   public:
-    fdbuf () = default;
+    // Reasonable (for stack allocation) buffer size that provides decent
+    // performance.
+    //
+    static const std::size_t buffer_size = 8192;
+
+    fdstreambuf () = default;
 
     // Unless specified, the current read/write position is assumed to
     // be 0 (note: not queried).
     //
-    fdbuf (auto_fd&&, std::uint64_t pos = 0);
+    fdstreambuf (auto_fd&&, std::uint64_t pos = 0);
 
-    // Before we invented auto_fd into fdstreams we keept fdbuf opened on
-    // faulty close attempt. Now fdbuf is always closed by close() function.
-    // This semantics change seems to be the right one as there is no reason to
-    // expect fdclose() to succeed after it has already failed once.
+    // Before we invented auto_fd into fdstreams we keept fdstreambuf opened
+    // on faulty close attempt. Now fdstreambuf is always closed by close()
+    // function.  This semantics change seems to be the right one as there is
+    // no reason to expect fdclose() to succeed after it has already failed
+    // once.
     //
     void
     close () {fd_.close ();}
@@ -196,14 +179,11 @@ LIBBUTL_MODEXPORT namespace butl
     bool
     blocking (bool);
 
-  public:
-    using base = std::basic_streambuf<char>;
-
-    using int_type = base::int_type;
-    using traits_type = base::traits_type;
+    bool
+    blocking () const {return !non_blocking_;}
 
-    using pos_type = base::pos_type; // std::streampos
-    using off_type = base::off_type; // std::streamoff
+  public:
+    using base = bufstreambuf;
 
     // basic_streambuf input interface.
     //
@@ -222,13 +202,7 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Return the (logical) position of the next byte to be read.
     //
-    // Note that on Windows when reading in the text mode the logical position
-    // may differ from the physical file descriptor position due to the CRLF
-    // character sequence translation. See the seekoff() implementation for
-    // more background on this issue.
-    //
-    std::uint64_t
-    tellg () const {return off_ - (egptr () - gptr ());}
+    using base::tellg;
 
     // Seek to the (logical) position as if by reading the specified number of
     // bytes from the beginning of the stream. Throw ios::failure on the
@@ -255,8 +229,7 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Return the (logical) position of the next byte to be written.
     //
-    std::uint64_t
-    tellp () const {return off_ + (pptr () - buf_);}
+    using base::tellp;
 
     // basic_streambuf positioning interface (both input/output).
     //
@@ -273,8 +246,7 @@ LIBBUTL_MODEXPORT namespace butl
 
   private:
     auto_fd fd_;
-    std::uint64_t off_;
-    char buf_[8192];
+    char buf_[buffer_size];
     bool non_blocking_ = false;
   };
 
@@ -307,7 +279,9 @@ LIBBUTL_MODEXPORT namespace butl
     binary       = 0x02,
     skip         = 0x04,
     blocking     = 0x08,
-    non_blocking = 0x10
+    non_blocking = 0x10,
+
+    none = 0
   };
 
   inline fdstream_mode operator& (fdstream_mode, fdstream_mode);
@@ -347,8 +321,11 @@ LIBBUTL_MODEXPORT namespace butl
     int
     fd () const {return buf_.fd ();}
 
+    bool
+    blocking () const {return buf_.blocking ();}
+
   protected:
-    fdbuf buf_;
+    fdstreambuf buf_;
   };
 
   // iofdstream constructors and open() functions that take openmode as an
@@ -677,6 +654,54 @@ LIBBUTL_MODEXPORT namespace butl
   LIBBUTL_SYMEXPORT ifdstream&
   getline (ifdstream&, std::string&, char delim = '\n');
 
+  // The non-blocking getline() version that reads the line in potentially
+  // multiple calls. Key differences compared to getline():
+  //
+  // - Stream must be in the non-blocking mode and exception mask must have
+  //   at least badbit.
+  //
+  // - Return type is bool instead of stream. Return true if the line has been
+  //   read or false if it should be called again once the stream has more
+  //   data to read. Also return true on failure.
+  //
+  // - The string must be empty on the first call.
+  //
+  // - There could still be data to read in the stream's buffer (as opposed to
+  //   file descriptor) after this function returns true and you should be
+  //   careful not to block on fdselect() in this case. In fact, the
+  //   recommended pattern is to call this function first and only call
+  //   fdselect() if it returns false.
+  //
+  // The typical usage in combination with the eof() helper:
+  //
+  // fdselect_set fds {is.fd (), ...};
+  // fdselect_state& ist (fds[0]);
+  // fdselect_state& ...;
+  //
+  // for (string l; ist.fd != nullfd || ...; )
+  // {
+  //   if (ist.fd != nullfd && getline_non_blocking (is, l))
+  //   {
+  //     if (eof (is))
+  //       ist.fd = nullfd;
+  //     else
+  //     {
+  //       // Consume line.
+  //
+  //       l.clear ();
+  //     }
+  //
+  //     continue;
+  //   }
+  //
+  //   ifdselect (fds);
+  //
+  //   // Handle other ready fds.
+  // }
+  //
+  LIBBUTL_SYMEXPORT bool
+  getline_non_blocking (ifdstream&, std::string&, char delim = '\n');
+
   // Open a file returning an auto_fd that holds its file descriptor on
   // success and throwing ios::failure otherwise.
   //
@@ -862,12 +887,28 @@ LIBBUTL_MODEXPORT namespace butl
   LIBBUTL_SYMEXPORT void
   fdtruncate (int, std::uint64_t);
 
-  // Test whether a file descriptor refers to a terminal. Throw ios::failure on
+  // Return filesystem entry stat from file descriptor. Throw ios::failure on
   // the underlying OS error.
   //
+  // See also path_entry() in filesystem.
+  //
+  LIBBUTL_SYMEXPORT entry_stat
+  fdstat (int);
+
+  // Test whether a file descriptor refers to a terminal. Throw ios::failure
+  // on the underlying OS error.
+  //
   LIBBUTL_SYMEXPORT bool
   fdterm (int);
 
+  // Test whether a terminal file descriptor supports ANSI color output. If
+  // the enable argument is true, then also try to enable color output (only
+  // applicable on some platforms, such as Windows). Throw ios::failure on the
+  // underlying OS error.
+  //
+  LIBBUTL_SYMEXPORT bool
+  fdterm_color (int, bool enable);
+
   // Wait until one or more file descriptors becomes ready for input (reading)
   // or output (writing). Return the pair of numbers of descriptors that are
   // ready. Throw std::invalid_argument if anything is wrong with arguments
@@ -875,7 +916,7 @@ LIBBUTL_MODEXPORT namespace butl
   // underlying OS error.
   //
   // Note that the function clears all the previously-ready entries on each
-  // call. Entries with nullfd are ignored.
+  // call. Entries with nullfd are ignored (but cleared).
   //
   // On Windows only pipes and only their input (read) ends are supported.
   //
@@ -883,11 +924,13 @@ LIBBUTL_MODEXPORT namespace butl
   {
     int  fd;
     bool ready;
+    void* data; // Arbitrary data which can be associated with the descriptor.
 
     // Note: intentionally non-explicit to allow implicit initialization when
     // pushing to fdselect_set.
     //
-    fdselect_state (int fd): fd (fd), ready (false) {}
+    fdselect_state (int fd, void* d = nullptr)
+        : fd (fd), ready (false), data (d) {}
   };
 
   using fdselect_set = small_vector<fdselect_state, 4>;
@@ -940,6 +983,11 @@ LIBBUTL_MODEXPORT namespace butl
   //
   LIBBUTL_SYMEXPORT std::streamsize
   fdread (int, void*, std::size_t);
+
+  // POSIX write() function wrapper, for uniformity.
+  //
+  LIBBUTL_SYMEXPORT std::streamsize
+  fdwrite (int, const void*, std::size_t);
 }
 
 #include <libbutl/fdstream.ixx>
diff --git a/libbutl/fdstream.ixx b/libbutl/fdstream.ixx
index 4ef5b1d..e024af9 100644
--- a/libbutl/fdstream.ixx
+++ b/libbutl/fdstream.ixx
@@ -1,6 +1,8 @@
 // file      : libbutl/fdstream.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
+#include <cassert>
+
 namespace butl
 {
   // auto_fd
@@ -27,16 +29,16 @@ namespace butl
     reset ();
   }
 
-  // fdbuf
+  // fdstreambuf
   //
-  inline fdbuf::
-  fdbuf (auto_fd&& fd, std::uint64_t pos)
+  inline fdstreambuf::
+  fdstreambuf (auto_fd&& fd, std::uint64_t pos)
   {
     if (fd.get () >= 0)
       open (std::move (fd), pos);
   }
 
-  inline auto_fd fdbuf::
+  inline auto_fd fdstreambuf::
   release ()
   {
     return std::move (fd_);
@@ -165,6 +167,8 @@ namespace butl
   inline std::vector<char> ifdstream::
   read_binary ()
   {
+    // @@ TODO: surely there is a more efficient way! See sha256!
+
     std::vector<char> v (std::istreambuf_iterator<char> (*this),
                          std::istreambuf_iterator<char> ());
     return v;
diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx
index 18be8a9..28a0de8 100644
--- a/libbutl/filesystem.cxx
+++ b/libbutl/filesystem.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/filesystem.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/filesystem.mxx>
-#endif
+#include <libbutl/filesystem.hxx>
 
 #include <errno.h> // errno, E*
 
@@ -18,61 +16,34 @@
 #else
 #  include <libbutl/win32-utility.hxx>
 
-#  include <io.h>        // _find*(), _unlink(), _chmod()
+#  include <io.h>        // _unlink(), _chmod()
 #  include <direct.h>    // _mkdir(), _rmdir()
 #  include <winioctl.h>  // FSCTL_SET_REPARSE_POINT
 #  include <sys/types.h> // _stat
 #  include <sys/stat.h>  // _stat(), S_I*
 
-#  include <cwchar>  // mbsrtowcs(), wcsrtombs(), mbstate_t
-#  include <cstring> // strncmp()
-
 #  ifdef _MSC_VER // Unlikely to be fixed in newer versions.
 #    define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
 #    define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#    define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
 #  endif
-#endif
-
-#include <cassert>
 
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-#include <utility>
-#include <iterator>
-#include <functional>
+#  include <cwchar>      // mbsrtowcs(), wcsrtombs(), mbstate_t
+#  include <cstring>     // strncmp()
+#  include <type_traits> // is_same
+#endif
 
+#include <chrono>
 #include <vector>
 #include <memory>       // unique_ptr
+#include <cassert>
 #include <algorithm>    // find(), copy()
 #include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.filesystem;
 
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.timestamp;
-import butl.path_pattern;
-#endif
-
-import butl.utility;      // throw_generic_error()
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx>      // throw_generic_error()
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
 
 #ifndef _WIN32
 #  ifndef PATH_MAX
@@ -213,6 +184,19 @@ namespace butl
   // static inline constexpr int
   // ansec (...) {return 0;}
 
+  static inline entry_time
+  entry_tm (const struct stat& s) noexcept
+  {
+    auto tm = [] (time_t sec, auto nsec) -> timestamp
+    {
+      return system_clock::from_time_t (sec) +
+        chrono::duration_cast<duration> (chrono::nanoseconds (nsec));
+    };
+
+    return {tm (s.st_mtime, mnsec<struct stat> (&s, true)),
+            tm (s.st_atime, ansec<struct stat> (&s, true))};
+  }
+
   // Return the modification and access times of a regular file or directory.
   //
   static entry_time
@@ -230,14 +214,7 @@ namespace butl
     if (dir ? !S_ISDIR (s.st_mode) : !S_ISREG (s.st_mode))
       return {timestamp_nonexistent, timestamp_nonexistent};
 
-    auto tm = [] (time_t sec, auto nsec) -> timestamp
-    {
-      return system_clock::from_time_t (sec) +
-        chrono::duration_cast<duration> (chrono::nanoseconds (nsec));
-    };
-
-    return {tm (s.st_mtime, mnsec<struct stat> (&s, true)),
-            tm (s.st_atime, ansec<struct stat> (&s, true))};
+    return entry_tm (s);
   }
 
   // Set the modification and access times for a regular file or directory.
@@ -339,16 +316,15 @@ namespace butl
 
   // Open a filesystem entry for reading and optionally writing its
   // meta-information and return the entry handle and meta-information if the
-  // path refers to an existing entry and nullhandle otherwise. Follow reparse
-  // points by default. Underlying OS errors are reported by throwing
-  // std::system_error, unless ignore_error is true in which case nullhandle
-  // is returned. In the latter case the error code can be obtained by calling
-  // GetLastError().
+  // path refers to an existing entry and nullhandle otherwise. Underlying OS
+  // errors are reported by throwing std::system_error, unless ignore_error is
+  // true in which case nullhandle is returned. In the latter case the error
+  // code can be obtained by calling GetLastError().
   //
   static inline pair<win32::auto_handle, BY_HANDLE_FILE_INFORMATION>
   entry_info_handle (const char* p,
                      bool write,
-                     bool fr = true,
+                     bool follow_reparse_points,
                      bool ie = false)
   {
     // Open the entry for reading/writing its meta-information. Follow reparse
@@ -363,7 +339,7 @@ namespace butl
                   nullptr,
                   OPEN_EXISTING,
                   FILE_FLAG_BACKUP_SEMANTICS | // Required for a directory.
-                  (fr ? 0 : FILE_FLAG_OPEN_REPARSE_POINT),
+                  (follow_reparse_points ? 0 : FILE_FLAG_OPEN_REPARSE_POINT),
                   nullptr));
 
     if (h == nullhandle)
@@ -388,13 +364,15 @@ namespace butl
   }
 
   // Return a flag indicating whether the path is to an existing filesystem
-  // entry and its meta-information if so. Follow reparse points by default.
+  // entry and its meta-information if so.
   //
   static inline pair<bool, BY_HANDLE_FILE_INFORMATION>
-  path_entry_info (const char* p, bool fr = true, bool ie = false)
+  path_entry_handle_info (const char* p,
+                          bool follow_reparse_points,
+                          bool ie = false)
   {
     pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
-      entry_info_handle (p, false /* write */, fr, ie));
+      entry_info_handle (p, false /* write */, follow_reparse_points, ie));
 
     if (hi.first == nullhandle)
       return make_pair (false, BY_HANDLE_FILE_INFORMATION ());
@@ -406,9 +384,34 @@ namespace butl
   }
 
   static inline pair<bool, BY_HANDLE_FILE_INFORMATION>
-  path_entry_info (const path& p, bool fr = true, bool ie = false)
+  path_entry_handle_info (const path& p, bool fr, bool ie = false)
   {
-    return path_entry_info (p.string ().c_str (), fr, ie);
+    return path_entry_handle_info (p.string ().c_str (), fr, ie);
+  }
+
+  // Return a flag indicating whether the path is to an existing filesystem
+  // entry and its extended attributes if so. Don't follow reparse points.
+  //
+  static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA>
+  path_entry_info (const char* p, bool ie = false)
+  {
+    WIN32_FILE_ATTRIBUTE_DATA r;
+    if (!GetFileAttributesExA (p, GetFileExInfoStandard, &r))
+    {
+      DWORD ec;
+      if (ie || error_file_not_found (ec = GetLastError ()))
+        return make_pair (false, WIN32_FILE_ATTRIBUTE_DATA ());
+
+      throw_system_error (ec);
+    }
+
+    return make_pair (true, r);
+  }
+
+  static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA>
+  path_entry_info (const path& p, bool ie = false)
+  {
+    return path_entry_info (p.string ().c_str (), ie);
   }
 
   // Reparse point data.
@@ -644,8 +647,48 @@ namespace butl
     return reparse_point_entry (p.string ().c_str (), ie);
   }
 
-  pair<bool, entry_stat>
-  path_entry (const char* p, bool fl, bool ie)
+  static inline timestamp
+  to_timestamp (const FILETIME& t)
+  {
+    // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
+    // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
+    // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
+    //
+    uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
+                   t.dwLowDateTime);
+
+    nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch.
+    nsec *= 100;                       // Now in nanoseconds.
+
+    return timestamp (
+      chrono::duration_cast<duration> (chrono::nanoseconds (nsec)));
+  }
+
+  static inline FILETIME
+  to_filetime (timestamp t)
+  {
+    // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
+    // (1601-01-01T00:00:00Z). To convert "UNIX epoch" (1970-01-01T00:00:00Z)
+    // to it we need to add 11644473600 seconds.
+    //
+    uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> (
+                      t.time_since_epoch ()).count ());
+
+    ticks /= 100;                       // Now in 100 nanosecond "ticks".
+    ticks += 11644473600ULL * 10000000; // Now in "Windows epoch".
+
+    FILETIME r;
+    r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF;
+    r.dwLowDateTime = ticks & 0xFFFFFFFF;
+    return r;
+  }
+
+  // If the being returned entry type is regular or directory and et is not
+  // NULL, then also save the entry modification and access times into the
+  // referenced variable.
+  //
+  static inline pair<bool, entry_stat>
+  path_entry (const char* p, bool fl, bool ie, entry_time* et)
   {
     // A path like 'C:', while being a root path in our terminology, is not as
     // such for Windows, that maintains current directory for each drive, and
@@ -656,73 +699,105 @@ namespace butl
     string d;
     if (path::traits_type::root (p))
     {
-      d = p;
+      d = string (p); // GCC bug #105329.
       d += path::traits_type::directory_separator;
       p = d.c_str ();
     }
 
     // Stat the entry not following reparse points.
     //
-    pair<bool, BY_HANDLE_FILE_INFORMATION> pi (
-      path_entry_info (p, false /* follow_reparse_points */, ie));
+    pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p, ie));
 
     if (!pi.first)
       return make_pair (false, entry_stat {entry_type::unknown, 0});
 
-    if (reparse_point (pi.second.dwFileAttributes))
+    auto entry_info = [et] (const auto& ei)
     {
-      pair<entry_type, path> rp (reparse_point_entry (p, ie));
+      if (et != nullptr)
+      {
+        et->modification = to_timestamp (ei.ftLastWriteTime);
+        et->access = to_timestamp (ei.ftLastAccessTime);
+      }
+
+      if (directory (ei.dwFileAttributes))
+        return make_pair (true, entry_stat {entry_type::directory, 0});
+      else
+        return make_pair (
+          true,
+          entry_stat {entry_type::regular,
+                      ((uint64_t (ei.nFileSizeHigh) << 32) | ei.nFileSizeLow)});
+    };
+
+    if (!reparse_point (pi.second.dwFileAttributes))
+      return entry_info (pi.second);
 
-      if (rp.first == entry_type::symlink)
+    pair<entry_type, path> rp (reparse_point_entry (p, ie));
+
+    if (rp.first == entry_type::symlink)
+    {
+      // If following symlinks is requested, then follow the reparse point and
+      // return its target information. Otherwise, return the symlink entry
+      // type.
+      //
+      if (fl)
       {
-        // If following symlinks is requested, then follow the reparse point,
-        // overwrite its own information with the resolved target information,
-        // and fall through. Otherwise, return the symlink entry type.
-        //
-        if (fl)
-        {
-          pi = path_entry_info (p, true /* follow_reparse_points */, ie);
+        pair<bool, BY_HANDLE_FILE_INFORMATION> pi (
+          path_entry_handle_info (p, true /* follow_reparse_points */, ie));
 
-          if (!pi.first)
-            return make_pair (false, entry_stat {entry_type::unknown, 0});
-        }
-        else
-          return make_pair (true, entry_stat {entry_type::symlink, 0});
+        return pi.first
+               ? entry_info (pi.second)
+               : make_pair (false, entry_stat {entry_type::unknown, 0});
       }
-      else if (rp.first == entry_type::unknown)
-        return make_pair (false, entry_stat {entry_type::unknown, 0});
-      else // entry_type::other
-        return make_pair (true, entry_stat {entry_type::other, 0});
+      else
+        return make_pair (true, entry_stat {entry_type::symlink, 0});
     }
+    else if (rp.first == entry_type::unknown)
+      return make_pair (false, entry_stat {entry_type::unknown, 0});
+    else // entry_type::other
+      return make_pair (true, entry_stat {entry_type::other, 0});
+  }
 
-    if (directory (pi.second.dwFileAttributes))
-      return make_pair (true, entry_stat {entry_type::directory, 0});
-    else
-      return make_pair (
-        true,
-        entry_stat {entry_type::regular,
-                    ((uint64_t (pi.second.nFileSizeHigh) << 32) |
-                     pi.second.nFileSizeLow)});
+  static inline pair<bool, entry_stat>
+  path_entry (const path& p, bool fl, bool ie, entry_time* et)
+  {
+    return path_entry (p.string ().c_str (), fl, ie, et);
+  }
+
+  pair<bool, entry_stat>
+  path_entry (const char* p, bool fl, bool ie)
+  {
+    return path_entry (p, fl, ie, nullptr /* entry_time */);
   }
 
   permissions
   path_permissions (const path& p)
   {
-    pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p));
+    // Let's optimize for the common case when the entry is not a reparse
+    // point.
+    //
+    auto attr_to_perm = [] (const auto& pi) -> permissions
+    {
+      if (!pi.first)
+        throw_generic_error (ENOENT);
 
-    if (!pi.first)
-      throw_generic_error (ENOENT);
+      // On Windows a filesystem entry is always readable. Also there is no
+      // notion of group/other permissions at OS level, so we extrapolate user
+      // permissions to group/other permissions (as the _stat() function
+      // does).
+      //
+      permissions r (permissions::ru | permissions::rg | permissions::ro);
 
-    // On Windows a filesystem entry is always readable. Also there is no
-    // notion of group/other permissions at OS level, so we extrapolate user
-    // permissions to group/other permissions (as the _stat() function does).
-    //
-    permissions r (permissions::ru | permissions::rg | permissions::ro);
+      if (!readonly (pi.second.dwFileAttributes))
+        r |= permissions::wu | permissions::wg | permissions::wo;
 
-    if (!readonly (pi.second.dwFileAttributes))
-      r |= permissions::wu | permissions::wg | permissions::wo;
+      return r;
+    };
 
-    return r;
+    pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p));
+    return !pi.first || !reparse_point (pi.second.dwFileAttributes)
+           ? attr_to_perm (pi)
+           : attr_to_perm (
+               path_entry_handle_info (p, true /* follow_reparse_points */));
   }
 
   void
@@ -748,50 +823,26 @@ namespace butl
   static entry_time
   entry_tm (const char* p, bool dir)
   {
-    pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p));
-
-    // If the entry is of the wrong type, then let's pretend that it doesn't
-    // exists.
+    // Let's optimize for the common case when the entry is not a reparse
+    // point.
     //
-    if (!pi.first || directory (pi.second.dwFileAttributes) != dir)
-      return {timestamp_nonexistent, timestamp_nonexistent};
-
-    auto tm = [] (const FILETIME& t) -> timestamp
+    auto attr_to_time = [dir] (const auto& pi) -> entry_time
     {
-      // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
-      // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
-      // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
+      // If the entry is of the wrong type, then let's pretend that it doesn't
+      // exists.
       //
-      uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
-                     t.dwLowDateTime);
-
-      nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch.
-      nsec *= 100;                       // Now in nanoseconds.
+      if (!pi.first || directory (pi.second.dwFileAttributes) != dir)
+        return entry_time {timestamp_nonexistent, timestamp_nonexistent};
 
-      return timestamp (
-        chrono::duration_cast<duration> (chrono::nanoseconds (nsec)));
+      return entry_time {to_timestamp (pi.second.ftLastWriteTime),
+                         to_timestamp (pi.second.ftLastAccessTime)};
     };
 
-    return {tm (pi.second.ftLastWriteTime), tm (pi.second.ftLastAccessTime)};
-  }
-
-  static inline FILETIME
-  to_filetime (timestamp t)
-  {
-    // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
-    // (1601-01-01T00:00:00Z). To convert "UNIX epoch"
-    // (1970-01-01T00:00:00Z) to it we need to add 11644473600 seconds.
-    //
-    uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> (
-                      t.time_since_epoch ()).count ());
-
-    ticks /= 100;                       // Now in 100 nanosecond "ticks".
-    ticks += 11644473600ULL * 10000000; // Now in "Windows epoch".
-
-    FILETIME r;
-    r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF;
-    r.dwLowDateTime = ticks & 0xFFFFFFFF;
-    return r;
+    pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p));
+    return !pi.first || !reparse_point (pi.second.dwFileAttributes)
+           ? attr_to_time (pi)
+           : attr_to_time (
+               path_entry_handle_info (p, true /* follow_reparse_points */));
   }
 
   // Set the modification and access times for a regular file or directory.
@@ -802,7 +853,9 @@ namespace butl
     // See also touch_file() below.
     //
     pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
-      entry_info_handle (p, true /* write */));
+      entry_info_handle (p,
+                         true /* write */,
+                         true /* follow_reparse_points */));
 
     // If the entry is of the wrong type, then let's pretend that it doesn't
     // exist.
@@ -887,7 +940,9 @@ namespace butl
     // implicitly.
     //
     pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
-      entry_info_handle (p.string ().c_str (), true /* write */));
+      entry_info_handle (p.string ().c_str (),
+                         true /* write */,
+                         true /* follow_reparse_points */));
 
     if (hi.first != nullhandle)
     {
@@ -1036,7 +1091,7 @@ namespace butl
     //
     try
     {
-      for (const dir_entry& de: dir_iterator (p, false /* ignore_dangling */))
+      for (const dir_entry& de: dir_iterator (p, dir_iterator::no_follow))
       {
         path ep (p / de.path ()); //@@ Would be good to reuse the buffer.
 
@@ -1063,8 +1118,8 @@ namespace butl
     }
   }
 
-  rmfile_status
-  try_rmfile (const path& p, bool ignore_error)
+  optional<rmfile_status>
+  try_rmfile_maybe_ignore_error (const path& p, bool ignore_error)
   {
     rmfile_status r (rmfile_status::success);
     const char* f (p.string ().c_str ());
@@ -1087,12 +1142,12 @@ namespace butl
     // failure (see mventry() for details). If that's the case, we will keep
     // trying to move the file for two seconds.
     //
-    for (size_t i (0); i < 21; ++i)
+    for (size_t i (0); i < 41; ++i)
     {
-      // Sleep 100 milliseconds before the removal retry.
+      // Sleep 50 milliseconds before the removal retry.
       //
       if (i != 0)
-        Sleep (100);
+        Sleep (50);
 
       ur = _unlink (f);
 
@@ -1143,6 +1198,8 @@ namespace butl
         r = rmfile_status::not_exist;
       else if (!ignore_error)
         throw_generic_error (errno);
+      else
+        return nullopt;
     }
 
     return r;
@@ -1596,7 +1653,7 @@ namespace butl
 
     rm = auto_rmfile (to);
 
-    // Throws ios::failure on fdbuf read/write failures.
+    // Throws ios::failure on fdstreambuf read/write failures.
     //
     // Note that the eof check is important: if the stream is at eof (empty
     // file) then this write will fail.
@@ -1638,9 +1695,12 @@ namespace butl
   }
 
   void
-  cpfile (const path& from, const path& to, cpflags fl)
+  cpfile (const path& from,
+          const path& to,
+          cpflags fl,
+          optional<permissions> cperm)
   {
-    permissions perm (path_permissions (from));
+    permissions perm (cperm ? *cperm : path_permissions (from));
     auto_rmfile rm;
 
     cpfile<is_base_of<system_error, ios_base::failure>::value> (
@@ -1732,12 +1792,12 @@ namespace butl
     // fdopen().
     //
     DWORD ec;
-    for (size_t i (0); i < 21; ++i)
+    for (size_t i (0); i < 41; ++i)
     {
       // Sleep 100 milliseconds before the move retry.
       //
       if (i != 0)
-        Sleep (100);
+        Sleep (50);
 
       if (MoveFileExA (f, t, mfl))
         return;
@@ -1839,7 +1899,7 @@ namespace butl
       h_ = x.h_;
       x.h_ = nullptr;
 
-      ignore_dangling_ = x.ignore_dangling_;
+      mode_ = x.mode_;
     }
     return *this;
   }
@@ -1860,6 +1920,11 @@ namespace butl
   entry_type dir_entry::
   type (bool follow_symlinks) const
   {
+    // Note that this function can only be used for resolving an entry type
+    // lazily and thus can't be used with the detect_dangling dir_iterator
+    // mode (see dir_iterator::next () implementation for details). Thus, we
+    // always throw on the stat()/lstat() failure.
+    //
     path_type p (b_ / p_);
     struct stat s;
     if ((follow_symlinks
@@ -1867,7 +1932,18 @@ namespace butl
          : lstat (p.string ().c_str (), &s)) != 0)
       throw_generic_error (errno);
 
-    return butl::type (s);
+    entry_type r (butl::type (s));
+
+    // While at it, also save the entry modification and access times.
+    //
+    if (r != entry_type::symlink)
+    {
+      entry_time t (entry_tm (s));
+      mtime_ = t.modification;
+      atime_ = t.access;
+    }
+
+    return r;
   }
 
   // dir_iterator
@@ -1878,8 +1954,8 @@ namespace butl
   };
 
   dir_iterator::
-  dir_iterator (const dir_path& d, bool ignore_dangling)
-    : ignore_dangling_ (ignore_dangling)
+  dir_iterator (const dir_path& d, mode m)
+    : mode_ (m)
   {
     unique_ptr<DIR, dir_deleter> h (opendir (d.string ().c_str ()));
     h_ = h.get ();
@@ -1895,7 +1971,7 @@ namespace butl
   }
 
   template <typename D>
-  static inline /*constexpr*/ entry_type
+  static inline /*constexpr*/ optional<entry_type>
   d_type (const D* d, decltype(d->d_type)*)
   {
     switch (d->d_type)
@@ -1923,13 +1999,13 @@ namespace butl
 #endif
       return entry_type::other;
 
-    default: return entry_type::unknown;
+    default: return nullopt;
     }
   }
 
   template <typename D>
-  static inline constexpr entry_type
-  d_type (...) {return entry_type::unknown;}
+  static inline constexpr optional<entry_type>
+  d_type (...) {return nullopt;}
 
   void dir_iterator::
   next ()
@@ -1951,25 +2027,43 @@ namespace butl
 
         e_.p_ = move (p);
         e_.t_ = d_type<struct dirent> (de, nullptr);
-        e_.lt_ = entry_type::unknown;
+        e_.lt_ = nullopt;
+
+        e_.mtime_ = timestamp_unknown;
+        e_.atime_ = timestamp_unknown;
 
         // If requested, we ignore dangling symlinks, skipping ones with
-        // non-existing or inaccessible targets.
+        // non-existing or inaccessible targets (ignore_dangling mode), or set
+        // the entry_type::unknown type for them (detect_dangling mode).
         //
-        if (ignore_dangling_)
+        if (mode_ != no_follow)
         {
-          // Note that ltype () can potentially lstat() (see d_type() for
+          bool dd (mode_ == detect_dangling);
+
+          // Note that ltype () can potentially lstat() (see type() for
           // details) and so throw. We, however, need to skip the entry if it
           // is already removed (due to a race) and throw on any other error.
           //
           path fp (e_.base () / e_.path ());
           const char* p (fp.string ().c_str ());
 
-          if (e_.t_ == entry_type::unknown)
+          if (!e_.t_)
           {
             struct stat s;
             if (lstat (p, &s) != 0)
             {
+              // Given that we have already enumerated the filesystem entry,
+              // these error codes can only mean that the entry doesn't exist
+              // anymore and so we always skip it.
+              //
+              // If errno is EACCES, then the permission to search a directory
+              // we currently iterate over has been revoked. Throwing in this
+              // case sounds like the best choice.
+              //
+              // Note that according to POSIX the filesystem entry we call
+              // lstat() on doesn't require any specific permissions to be
+              // granted.
+              //
               if (errno == ENOENT || errno == ENOTDIR)
                 continue;
 
@@ -1977,21 +2071,53 @@ namespace butl
             }
 
             e_.t_ = type (s);
+
+            if (*e_.t_ != entry_type::symlink)
+            {
+              entry_time t (entry_tm (s));
+              e_.mtime_ = t.modification;
+              e_.atime_ = t.access;
+            }
           }
 
-          if (e_.t_ == entry_type::symlink)
+          // The entry type should be present and may not be
+          // entry_type::unknown.
+          //
+          //assert (e_.t_ && *e_.t_ != entry_type::unknown);
+
+          // Check if the symlink target exists and is accessible and set the
+          // target type.
+          //
+          if (*e_.t_ == entry_type::symlink)
           {
             struct stat s;
             if (stat (p, &s) != 0)
             {
               if (errno == ENOENT || errno == ENOTDIR || errno == EACCES)
-                continue;
-
-              throw_generic_error (errno);
+              {
+                if (dd)
+                  e_.lt_ = entry_type::unknown;
+                else
+                  continue;
+              }
+              else
+                throw_generic_error (errno);
             }
+            else
+            {
+              e_.lt_ = type (s);
 
-            e_.lt_ = type (s); // While at it, set the target type.
+              entry_time t (entry_tm (s));
+              e_.mtime_ = t.modification;
+              e_.atime_ = t.access;
+            }
           }
+
+          // The symlink target type should be present and in the
+          // ignore_dangling mode it may not be entry_type::unknown.
+          //
+          //assert (*e_.t_ != entry_type::symlink ||
+          //        (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown)));
         }
       }
       else if (errno == 0)
@@ -2012,11 +2138,49 @@ namespace butl
 
   // dir_entry
   //
+  entry_type dir_entry::
+  type (bool follow_symlinks) const
+  {
+    // Note that this function can only be used for resolving an entry type
+    // lazily and thus can't be used with the detect_dangling dir_iterator
+    // mode (see dir_iterator::next () implementation for details). Thus, we
+    // always throw if the entry info can't be retrieved.
+    //
+    // While at it, also save the entry modification and access times.
+    //
+    path_type p (base () / path ());
+    entry_time et;
+    pair<bool, entry_stat> e (
+      path_entry (p, follow_symlinks, false /* ignore_error */, &et));
+
+    if (!e.first)
+      throw_generic_error (ENOENT);
+
+    if (e.second.type == entry_type::regular ||
+        e.second.type == entry_type::directory)
+    {
+      mtime_ = et.modification;
+      atime_ = et.access;
+    }
+
+    return e.second.type;
+  }
+
+  // dir_iterator
+  //
+  static_assert(is_same<HANDLE, void*>::value, "HANDLE is not void*");
+
+  static inline HANDLE
+  to_handle (intptr_t h)
+  {
+    return reinterpret_cast<HANDLE> (h);
+  }
+
   dir_iterator::
   ~dir_iterator ()
   {
     if (h_ != -1)
-      _findclose (h_); // Ignore any errors.
+      FindClose (to_handle (h_)); // Ignore any errors.
   }
 
   dir_iterator& dir_iterator::
@@ -2026,56 +2190,32 @@ namespace butl
     {
       e_ = move (x.e_);
 
-      if (h_ != -1 && _findclose (h_) == -1)
-        throw_generic_error (errno);
+      if (h_ != -1 && !FindClose (to_handle (h_)))
+        throw_system_error (GetLastError ());
 
       h_ = x.h_;
       x.h_ = -1;
 
-      ignore_dangling_ = x.ignore_dangling_;
+      mode_ = x.mode_;
     }
     return *this;
   }
 
-  entry_type dir_entry::
-  type (bool follow_symlinks) const
-  {
-    path_type p (base () / path ());
-    pair<bool, entry_stat> e (path_entry (p, follow_symlinks));
-
-    if (!e.first)
-      throw_generic_error (ENOENT);
-
-    return e.second.type;
-  }
-
-  // dir_iterator
-  //
-  struct auto_dir
+  dir_iterator::
+  dir_iterator (const dir_path& d, mode m)
+    : mode_ (m)
   {
-    explicit
-    auto_dir (intptr_t& h): h_ (&h) {}
-
-    auto_dir (const auto_dir&) = delete;
-    auto_dir& operator= (const auto_dir&) = delete;
-
-    ~auto_dir ()
+    struct deleter
     {
-      if (h_ != nullptr && *h_ != -1)
-        _findclose (*h_);
-    }
-
-    void release () {h_ = nullptr;}
+      void operator() (intptr_t* p) const
+      {
+        if (p != nullptr && *p != -1)
+          FindClose (to_handle (*p));
+      }
+    };
 
-  private:
-    intptr_t* h_;
-  };
+    unique_ptr<intptr_t, deleter> h (&h_);
 
-  dir_iterator::
-  dir_iterator (const dir_path& d, bool ignore_dangling)
-    : ignore_dangling_ (ignore_dangling)
-  {
-    auto_dir h (h_);
     e_.b_ = d; // Used by next().
 
     next ();
@@ -2088,31 +2228,37 @@ namespace butl
     for (;;)
     {
       bool r;
-      _finddata_t fi;
+      WIN32_FIND_DATA fi;
 
       if (h_ == -1)
       {
         // The call is made from the constructor. Any other call with h_ == -1
         // is illegal.
         //
-
-        // Check to distinguish non-existent vs empty directories.
+        // Note that we used to check for the directory existence before
+        // iterating over it. However, let's not pessimize things and only
+        // check for the directory existence if FindFirstFileExA() fails.
         //
-        if (!dir_exists (e_.base ()))
-          throw_generic_error (ENOENT);
 
-        h_ = _findfirst ((e_.base () / path ("*")).string ().c_str (), &fi);
-        r = h_ != -1;
+        h_ = reinterpret_cast<intptr_t> (
+          FindFirstFileExA ((e_.base () / path ("*")).string ().c_str (),
+                            FindExInfoBasic,
+                            &fi,
+                            FindExSearchNameMatch,
+                            NULL,
+                            0));
+
+        r = (h_ != -1);
       }
       else
-        r = _findnext (h_, &fi) == 0;
+        r = FindNextFileA (to_handle (h_), &fi);
 
       if (r)
       {
         // We can accept some overhead for '.' and '..' (relying on short
         // string optimization) in favor of a more compact code.
         //
-        path p (fi.name);
+        path p (fi.cFileName);
 
         // Skip '.' and '..'.
         //
@@ -2121,26 +2267,47 @@ namespace butl
 
         e_.p_ = move (p);
 
-        // Note that the entry type detection always requires to additionally
-        // query the entry information. Thus, we evaluate its type lazily.
+        DWORD a (fi.dwFileAttributes);
+        bool rp (reparse_point (a));
+
+        // Evaluate the entry type lazily if this is a reparse point since it
+        // requires to additionally query the entry information (see
+        // reparse_point_entry() for details).
         //
-        e_.t_ = entry_type::unknown;
+        e_.t_ = rp            ? nullopt                                      :
+                directory (a) ? optional<entry_type> (entry_type::directory) :
+                                optional<entry_type> (entry_type::regular)   ;
+
+        e_.lt_ = nullopt;
 
-        e_.lt_ = entry_type::unknown;
+        e_.mtime_ = rp ? timestamp_unknown : to_timestamp (fi.ftLastWriteTime);
+
+        // Note that according to MSDN for the FindFirstFile[Ex]() function
+        // "the NTFS file system delays updates to the last access time for a
+        // file by up to 1 hour after the last access" and "on the FAT file
+        // system access time has a resolution of 1 day".
+        //
+        e_.atime_ = timestamp_unknown;
 
         // If requested, we ignore dangling symlinks and junctions, skipping
-        // ones with non-existing or inaccessible targets.
+        // ones with non-existing or inaccessible targets (ignore_dangling
+        // mode), or set the entry_type::unknown type for them
+        // (detect_dangling mode).
         //
-        if (ignore_dangling_)
+        if (rp && mode_ != no_follow)
         {
+          bool dd (mode_ == detect_dangling);
+
           // Check the last error code throwing for codes other than "path not
-          // found" and "access denied".
+          // found" and "access denied" and returning this error code
+          // otherwise.
           //
           auto verify_error = [] ()
           {
             DWORD ec (GetLastError ());
             if (!error_file_not_found (ec) && ec != ERROR_ACCESS_DENIED)
               throw_system_error (ec);
+            return ec;
           };
 
           // Note that ltype() queries the entry information due to the type
@@ -2151,48 +2318,50 @@ namespace butl
           path fp (e_.base () / e_.path ());
           const char* p (fp.string ().c_str ());
 
-          DWORD a (GetFileAttributesA (p));
-          if (a == INVALID_FILE_ATTRIBUTES)
-          {
-            // Note that sometimes trying to obtain attributes for a
-            // filesystem entry that was potentially removed ends up with
-            // ERROR_ACCESS_DENIED. One can argue that there can be another
-            // reason for this error (antivirus, indexer, etc). However, given
-            // that the entry is seen by a _find*() function and normally you
-            // can retrieve attributes for a read-only entry and for an entry
-            // opened in the non-shared mode (see the CreateFile() function
-            // documentation for details) the only meaningful explanation for
-            // ERROR_ACCESS_DENIED is that the entry is being removed. Also
-            // the DeleteFile() documentation mentions such a possibility.
-            //
-            verify_error ();
-            continue;
-          }
+          pair<entry_type, path> rpe (
+            reparse_point_entry (p, true /* ignore_error */));
 
-          if (reparse_point (a))
+          if (rpe.first == entry_type::unknown)
           {
-            pair<entry_type, path> rp (
-              reparse_point_entry (p, true /* ignore_error */));
+            DWORD ec (verify_error ());
 
-            if (rp.first == entry_type::unknown)
-            {
-              verify_error ();
+            // Silently skip the entry if it is not found (being already
+            // deleted) or we are in the ignore dangling mode. Otherwise, set
+            // the entry type to unknown.
+            //
+            // Note that sometimes trying to obtain information for a being
+            // removed filesystem entry ends up with ERROR_ACCESS_DENIED (see
+            // DeleteFile() and CreateFile() for details). Probably getting
+            // this error code while trying to obtain the reparse point
+            // information (involves calling CreateFile(FILE_READ_EA) and
+            // DeviceIoControl()) can also be interpreted differently. We,
+            // however, always treat it as "access denied" in the detect
+            // dangling mode for good measure. Let's see if that won't be too
+            // noisy.
+            //
+            if (ec != ERROR_ACCESS_DENIED || !dd)
               continue;
-            }
 
-            e_.t_ = rp.first;
+            // Fall through.
           }
-          else
-            e_.t_ = directory (a)
-                    ? entry_type::directory
-                    : entry_type::regular;
 
-          if (e_.t_ == entry_type::symlink)
+          e_.t_ = rpe.first;
+
+          // In this mode the entry type should be present and in the
+          // ignore_dangling mode it may not be entry_type::unknown.
+          //
+          //assert (e_.t_ && (dd || *e_.t_ != entry_type::unknown));
+
+          // Check if the symlink target exists and is accessible and set the
+          // target type.
+          //
+          if (*e_.t_ == entry_type::symlink)
           {
             // Query the target info.
             //
             // Note that we use entry_info_handle() rather than
-            // path_entry_info() to be able to verify an error on failure.
+            // path_entry_handle_info() to be able to verify an error on
+            // failure.
             //
             pair<auto_handle, BY_HANDLE_FILE_INFORMATION> ti (
               entry_info_handle (p,
@@ -2203,31 +2372,59 @@ namespace butl
             if (ti.first == nullhandle)
             {
               verify_error ();
-              continue;
+
+              if (dd)
+                e_.lt_ = entry_type::unknown;
+              else
+                continue;
             }
+            else
+            {
+              ti.first.close (); // Checks for error.
 
-            ti.first.close (); // Checks for error.
+              e_.lt_ = directory (ti.second.dwFileAttributes)
+                       ? entry_type::directory
+                       : entry_type::regular;
 
-            // While at it, set the target type.
-            //
-            e_.lt_ = directory (ti.second.dwFileAttributes)
-                     ? entry_type::directory
-                     : entry_type::regular;
+              e_.mtime_ = to_timestamp (ti.second.ftLastWriteTime);
+              e_.atime_ = to_timestamp (ti.second.ftLastAccessTime);
+            }
           }
+
+          // In this mode the symlink target type should be present and in the
+          // ignore_dangling mode it may not be entry_type::unknown.
+          //
+          //assert (*e_.t_ != entry_type::symlink ||
+          //        (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown)));
         }
       }
-      else if (errno == ENOENT)
+      else
       {
-        // End of stream.
+        DWORD ec (GetLastError ());
+        bool first (h_ == -1);
+
+        // Check to distinguish non-existent vs empty directories.
+        //
+        // Note that dir_exists() handles not only the "filesystem entry does
+        // not exist" case but also the case when the entry exists but is not
+        // a directory.
         //
-        if (h_ != -1)
+        if (first && !dir_exists (e_.base ()))
+          throw_generic_error (ENOENT);
+
+        if (ec == (first ? ERROR_FILE_NOT_FOUND : ERROR_NO_MORE_FILES))
         {
-          _findclose (h_);
-          h_ = -1;
+          // End of stream.
+          //
+          if (h_ != -1)
+          {
+            FindClose (to_handle (h_));
+            h_ = -1;
+          }
         }
+        else
+          throw_system_error (ec);
       }
-      else
-        throw_generic_error (errno);
 
       break;
     }
@@ -2235,14 +2432,27 @@ namespace butl
 #endif
 
   // Search for paths matching the pattern and call the specified function for
-  // each matching path. Return false if the underlying func() call returns
-  // false. Otherwise the function conforms to the path_search() description.
+  // each matching path. Return false if the underlying func() or
+  // dangling_func() call returns false. Otherwise the function conforms to
+  // the path_search() description.
   //
   // Note that the access to the traversed directory tree (real or virtual) is
   // performed through the provided filesystem object.
   //
   static const string any_dir ("*/");
 
+  // Filesystem traversal callbacks.
+  //
+  // Called before entering a directory for the recursive traversal. If
+  // returns false, then the directory is not entered.
+  //
+  using preopen = function<bool (const dir_path&)>;
+
+  // Called before skipping a dangling link. If returns false, then the
+  // traversal is stopped.
+  //
+  using preskip = function<bool (const dir_entry&)>;
+
   template <typename FS>
   static bool
   search (
@@ -2250,11 +2460,14 @@ namespace butl
     dir_path pattern_dir,
     path_match_flags fl,
     const function<bool (path&&, const string& pattern, bool interm)>& func,
+    const function<bool (const dir_entry&)>& dangling_func,
     FS& filesystem)
   {
     bool follow_symlinks ((fl & path_match_flags::follow_symlinks) !=
                           path_match_flags::none);
 
+    assert (follow_symlinks || dangling_func == nullptr);
+
     // Fast-forward the leftmost pattern non-wildcard components. So, for
     // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/.
     //
@@ -2301,17 +2514,47 @@ namespace butl
     //
     bool simple (pattern.simple ());
 
-    // Note that we rely on "small function object" optimization here.
+    // If symlinks need to be followed, then pass the preskip callback for the
+    // filesystem iterator.
+    //
+    bool fs (follow_symlinks || !simple);
+    preskip ps;
+    bool dangling_stop (false);
+
+    if (fs)
+    {
+      if (dangling_func != nullptr)
+      {
+        // Note that we rely on the "small function object" optimization here.
+        //
+        ps = [&dangling_func, &dangling_stop] (const dir_entry& de) -> bool
+        {
+          dangling_stop = !dangling_func (de);
+          return !dangling_stop;
+        };
+      }
+      else
+      {
+        ps = [] (const dir_entry& de) -> bool
+        {
+          throw_generic_error (
+            de.ltype () == entry_type::symlink ? ENOENT : EACCES);
+        };
+      }
+    }
+
+    // Note that we rely on the "small function object" optimization here.
     //
     typename FS::iterator_type i (filesystem.iterator (
       pattern_dir,
       path_pattern_recursive (pcr),
       path_pattern_self_matching (pcr),
-      follow_symlinks || !simple,
+      fs,
       [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen.
       {
         return func (pattern_dir / p, any_dir, true);
-      }));
+      },
+      move (ps)));
 
     // Canonicalize the pattern component collapsing consecutive stars (used to
     // express that it is recursive) into a single one.
@@ -2357,7 +2600,7 @@ namespace butl
       // represented by the iterator as an empty path, and so we need to
       // compute it (the leaf would actually be enough) for matching. This
       // leaf can be acquired from the pattern_dir (if not empty) or
-      // start_dir.  We don't expect the start_dir to be empty, as the
+      // start_dir. We don't expect the start_dir to be empty, as the
       // filesystem object must replace an empty start directory with the
       // current one. This is the case when we search in the current directory
       // (start_dir is empty) with a pattern that starts with a *** wildcard
@@ -2396,10 +2639,14 @@ namespace butl
                               pattern_dir / path_cast<dir_path> (move (p)),
                               fl,
                               func,
+                              dangling_func,
                               filesystem))
         return false;
     }
 
+    if (dangling_stop)
+      return false;
+
     // If requested, also search with the absent-matching pattern path
     // component omitted, unless this is the only pattern component.
     //
@@ -2407,8 +2654,15 @@ namespace butl
         pc.to_directory ()                                              &&
         (!pattern_dir.empty () || !simple)                              &&
         pc.string ().find_first_not_of ('*') == string::npos            &&
-        !search (pattern.leaf (pc), pattern_dir, fl, func, filesystem))
+        !search (pattern.leaf (pc),
+                 pattern_dir,
+                 fl,
+                 func,
+                 dangling_func,
+                 filesystem))
+    {
       return false;
+    }
 
     return true;
   }
@@ -2417,8 +2671,6 @@ namespace butl
   //
   static const dir_path empty_dir;
 
-  using preopen = function<bool (const dir_path&)>;
-
   // Base for filesystem (see above) implementations.
   //
   // Don't copy start directory. It is expected to exist till the end of the
@@ -2468,13 +2720,17 @@ namespace butl
                             bool recursive,
                             bool self,
                             bool fs,
-                            preopen po)
+                            preopen po,
+                            preskip ps)
         : start_ (move (p)),
           recursive_ (recursive),
           self_ (self),
           follow_symlinks_ (fs),
-          preopen_ (move (po))
+          preopen_ (move (po)),
+          preskip_ (move (ps))
     {
+      assert (fs || ps == nullptr);
+
       open (dir_path (), self_);
     }
 
@@ -2484,12 +2740,16 @@ namespace butl
     recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete;
     recursive_dir_iterator (recursive_dir_iterator&&) = default;
 
-    // Return false if no more entries left. Otherwise save the next entry path
-    // and return true. The path is relative to the directory being
+    // Return false if no more entries left. Otherwise save the next entry
+    // path and return true. The path is relative to the directory being
     // traversed and contains a trailing separator for sub-directories. Throw
     // std::system_error in case of a failure (insufficient permissions,
     // dangling symlink encountered, etc).
     //
+    // If symlinks need to be followed, then skip inaccessible/dangling
+    // entries or, if the preskip callback is specified and returns false for
+    // such an entry, stop the entire traversal.
+    //
     bool
     next (path& p)
     {
@@ -2498,44 +2758,64 @@ namespace butl
 
       auto& i (iters_.back ());
 
-      // If we got to the end of directory sub-entries, then go one level up
-      // and return this directory path.
-      //
-      if (i.first == dir_iterator ())
+      for (;;) // Skip inaccessible/dangling entries.
       {
-        path d (move (i.second));
-        iters_.pop_back ();
+        // If we got to the end of directory sub-entries, then go one level up
+        // and return this directory path.
+        //
+        if (i.first == dir_iterator ())
+        {
+          path d (move (i.second));
+          iters_.pop_back ();
+
+          // Return the path unless it is the last one (the directory we
+          // started to iterate from) and the self flag is not set.
+          //
+          if (iters_.empty () && !self_)
+            return false;
+
+          p = move (d);
+          return true;
+        }
+
+        const dir_entry& de (*i.first);
 
-        // Return the path unless it is the last one (the directory we started
-        // to iterate from) and the self flag is not set.
+        // Append separator if a directory. Note that dir_entry::type() can
+        // throw.
         //
-        if (iters_.empty () && !self_)
-          return false;
+        entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
 
-        p = move (d);
-        return true;
-      }
+        // If the entry turned out to be inaccessible/dangling, then skip it
+        // if the preskip function is not specified or returns true and stop
+        // the entire traversal otherwise.
+        //
+        if (et == entry_type::unknown)
+        {
+          if (preskip_ != nullptr && !preskip_ (de))
+          {
+            iters_.clear ();
+            return false;
+          }
 
-      const dir_entry& de (*i.first);
+          ++i.first;
+          continue;
+        }
 
-      // Append separator if a directory. Note that dir_entry::type() can
-      // throw.
-      //
-      entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
-      path pe (et == entry_type::directory
-               ? path_cast<dir_path> (i.second / de.path ())
-               : i.second / de.path ());
+        path pe (et == entry_type::directory
+                 ? path_cast<dir_path> (i.second / de.path ())
+                 : i.second / de.path ());
 
-      ++i.first;
+        ++i.first;
 
-      if (recursive_ && pe.to_directory ())
-      {
-        open (path_cast<dir_path> (move (pe)), true);
-        return next (p);
-      }
+        if (recursive_ && pe.to_directory ())
+        {
+          open (path_cast<dir_path> (move (pe)), true);
+          return next (p);
+        }
 
-      p = move (pe);
-      return true;
+        p = move (pe);
+        return true;
+      }
     }
 
   private:
@@ -2557,10 +2837,15 @@ namespace butl
         {
           dir_path d (start_ / p);
 
-          // If we follow symlinks, then we ignore the dangling ones.
+          // If we follow symlinks, then we may need to skip the dangling
+          // ones. Note, however, that we will be skipping them not at the
+          // dir_iterator level but ourselves, after calling the preskip
+          // callback function (see next() for details).
           //
           i = dir_iterator (!d.empty () ? d : dir_path ("."),
-                            follow_symlinks_);
+                            follow_symlinks_
+                            ? dir_iterator::detect_dangling
+                            : dir_iterator::no_follow);
         }
 
         iters_.emplace_back (move (i), move (p));
@@ -2590,6 +2875,7 @@ namespace butl
     bool self_;
     bool follow_symlinks_;
     preopen preopen_;
+    preskip preskip_;
     small_vector<pair<dir_iterator, dir_path>, 1> iters_;
   };
 
@@ -2613,13 +2899,15 @@ namespace butl
               bool recursive,
               bool self,
               bool follow_symlinks,
-              preopen po) const
+              preopen po,
+              preskip ps) const
     {
       return iterator_type (start_ / p,
                             recursive,
                             self,
                             follow_symlinks,
-                            move (po));
+                            move (po),
+                            move (ps));
     }
   };
 
@@ -2628,10 +2916,11 @@ namespace butl
     const path& pattern,
     const function<bool (path&&, const string& pattern, bool interm)>& func,
     const dir_path& start,
-    path_match_flags flags)
+    path_match_flags flags,
+    const function<bool (const dir_entry&)>& dangling_func)
   {
     real_filesystem fs (pattern.relative () ? start : empty_dir);
-    search (pattern, dir_path (), flags, func, fs);
+    search (pattern, dir_path (), flags, func, dangling_func, fs);
   }
 
   // Search path in the directory tree represented by a path.
@@ -2789,7 +3078,8 @@ namespace butl
               bool recursive,
               bool self,
               bool /*follow_symlinks*/,
-              preopen po)
+              preopen po,
+              preskip)
     {
       // If path and sub-path are non-empty, and both are absolute or relative,
       // then no extra effort is required (prior to checking if one is a
@@ -2848,6 +3138,6 @@ namespace butl
     path_match_flags flags)
   {
     path_filesystem fs (start, entry);
-    search (pattern, dir_path (), flags, func, fs);
+    search (pattern, dir_path (), flags, func, nullptr /* dangle_func */, fs);
   }
 }
diff --git a/libbutl/filesystem.mxx b/libbutl/filesystem.hxx
index 935fc3f..0f5fb0b 100644
--- a/libbutl/filesystem.mxx
+++ b/libbutl/filesystem.hxx
@@ -1,9 +1,7 @@
-// file      : libbutl/filesystem.mxx -*- C++ -*-
+// file      : libbutl/filesystem.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
 #include <errno.h> // E*
 
@@ -22,7 +20,6 @@
    using mode_t = int;
 #endif
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <cstddef>    // ptrdiff_t
 #include <cstdint>    // uint16_t, etc
@@ -30,37 +27,45 @@
 #include <iterator>   // input_iterator_tag
 #include <functional>
 
-#include <chrono>     //@@ MOD needed by timestamp module (no re-export).
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/timestamp.hxx>
+#include <libbutl/path-pattern.hxx> // path_match_flags
 
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.filesystem;
+#include <libbutl/export.hxx>
 
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
+namespace butl
+{
+  // Path permissions.
+  //
+  enum class permissions: std::uint16_t
+  {
+    // Note: matching POSIX values.
+    //
+    xo = 0001,
+    wo = 0002,
+    ro = 0004,
 
-import butl.path;
-import butl.timestamp;
-import butl.path_pattern; // path_match_flags
+    xg = 0010,
+    wg = 0020,
+    rg = 0040,
 
-import butl.utility; // operator<<(ostream,exception), throw_generic_error()
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/timestamp.mxx>
-#include <libbutl/path-pattern.mxx>
+    xu = 0100,
+    wu = 0200,
+    ru = 0400,
 
-#include <libbutl/utility.mxx>
-#endif
+    none = 0
+  };
 
-#include <libbutl/export.hxx>
+  inline permissions operator& (permissions, permissions);
+  inline permissions operator| (permissions, permissions);
+  inline permissions operator&= (permissions&, permissions);
+  inline permissions operator|= (permissions&, permissions);
 
-LIBBUTL_MODEXPORT namespace butl
-{
   // Return true if the path is to an existing regular file. Note that by
   // default this function follows symlinks. Underlying OS errors are reported
-  // by throwing std::system_error, unless ignore_error is true.
+  // by throwing std::system_error, unless ignore_error is true (in which case
+  // erroneous entries are treated as non-existent).
   //
   LIBBUTL_SYMEXPORT bool
   file_exists (const char*,
@@ -73,7 +78,8 @@ LIBBUTL_MODEXPORT namespace butl
 
   // Return true if the path is to an existing directory. Note that this
   // function follows symlinks. Underlying OS errors are reported by throwing
-  // std::system_error, unless ignore_error is true.
+  // std::system_error, unless ignore_error is true (in which case erroneous
+  // entries are treated as non-existent).
   //
   LIBBUTL_SYMEXPORT bool
   dir_exists (const char*, bool ignore_error = false);
@@ -84,7 +90,8 @@ LIBBUTL_MODEXPORT namespace butl
 
   // Return true if the path is to an existing file system entry. Note that by
   // default this function doesn't follow symlinks. Underlying OS errors are
-  // reported by throwing std::system_error, unless ignore_error is true.
+  // reported by throwing std::system_error, unless ignore_error is true (in
+  // which case erroneous entries are treated as non-existent).
   //
   LIBBUTL_SYMEXPORT bool
   entry_exists (const char*,
@@ -117,7 +124,10 @@ LIBBUTL_MODEXPORT namespace butl
   // Return a flag indicating if the path is to an existing filesystem entry
   // and its info if so. Note that by default this function doesn't follow
   // symlinks. Underlying OS errors are reported by throwing
-  // std::system_error, unless ignore_error is true.
+  // std::system_error, unless ignore_error is true (in which case erroneous
+  // entries are treated as non-existent).
+  //
+  // See also fdstat() in fdstream.
   //
   LIBBUTL_SYMEXPORT std::pair<bool, entry_stat>
   path_entry (const char*,
@@ -206,9 +216,12 @@ LIBBUTL_MODEXPORT namespace butl
   // is not atomic. It is also not atomic for the directory-type reparse point
   // removal.
   //
-  LIBBUTL_SYMEXPORT rmfile_status
+  rmfile_status
   try_rmfile (const path&, bool ignore_error = false);
 
+  optional<rmfile_status>
+  try_rmfile_ignore_error (const path&);
+
   // Automatically try to remove a non-empty path on destruction unless
   // cancelled. Since the non-cancelled destruction will normally happen as a
   // result of an exception, the failure to remove the path is silently
@@ -228,8 +241,8 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Movable-only type. Move-assignment cancels the lhs object.
     //
-    auto_rm (auto_rm&&);
-    auto_rm& operator= (auto_rm&&);
+    auto_rm (auto_rm&&) noexcept;
+    auto_rm& operator= (auto_rm&&) noexcept;
     auto_rm (const auto_rm&) = delete;
     auto_rm& operator= (const auto_rm&) = delete;
 
@@ -394,11 +407,13 @@ LIBBUTL_MODEXPORT namespace butl
   inline cpflags operator&= (cpflags&, cpflags);
   inline cpflags operator|= (cpflags&, cpflags);
 
-  // Copy a regular file, including its permissions, and optionally timestamps.
-  // Throw std::system_error on failure. Fail if the destination file exists
-  // and the overwrite_content flag is not set. Leave permissions of an
-  // existing destination file intact unless the overwrite_permissions flag is
-  // set. Delete incomplete copies before throwing.
+  // Copy a regular file, including its permissions (unless custom permissions
+  // are specified), and optionally timestamps. Throw std::system_error on
+  // failure. Fail if the destination file exists and the overwrite_content
+  // flag is not set. Leave permissions of an existing destination file intact
+  // (including if custom permissions are specified) unless the
+  // overwrite_permissions flag is set. Delete incomplete copies before
+  // throwing.
   //
   // Note that in case of overwriting, the existing destination file gets
   // truncated (not deleted) prior to being overwritten. As a side-effect,
@@ -410,7 +425,10 @@ LIBBUTL_MODEXPORT namespace butl
   // fail.
   //
   LIBBUTL_SYMEXPORT void
-  cpfile (const path& from, const path& to, cpflags = cpflags::none);
+  cpfile (const path& from,
+          const path& to,
+          cpflags = cpflags::none,
+          optional<permissions> perm = nullopt);
 
   // Copy a regular file into (inside) an existing directory.
   //
@@ -618,32 +636,6 @@ LIBBUTL_MODEXPORT namespace butl
     return dir_atime (p.string ().c_str (), t);
   }
 
-  // Path permissions.
-  //
-  enum class permissions: std::uint16_t
-  {
-    // Note: matching POSIX values.
-    //
-    xo = 0001,
-    wo = 0002,
-    ro = 0004,
-
-    xg = 0010,
-    wg = 0020,
-    rg = 0040,
-
-    xu = 0100,
-    wu = 0200,
-    ru = 0400,
-
-    none = 0
-  };
-
-  inline permissions operator& (permissions, permissions);
-  inline permissions operator| (permissions, permissions);
-  inline permissions operator&= (permissions&, permissions);
-  inline permissions operator|= (permissions&, permissions);
-
   // Get path permissions. Throw std::system_error on failure. Note that this
   // function resolves symlinks.
   //
@@ -665,12 +657,45 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Symlink target type in case of the symlink, ltype() otherwise.
     //
+    // If type() returns entry_type::unknown then this entry is inaccessible
+    // (ltype() also returns entry_type::unknown) or is a dangling symlink
+    // (ltype() returns entry_type::symlink). Used with the detect_dangling
+    // dir_iterator mode. Note that on POSIX ltype() can never return unknown
+    // (because it is part of the directory iteration result).
+    //
     entry_type
     type () const;
 
     entry_type
     ltype () const;
 
+    // Modification and access times of the filesystem entry if it is not a
+    // symlink and of the symlink target otherwise.
+    //
+    // These are provided as an optimization if they can be obtained as a
+    // byproduct of work that is already being done anyway (iteration itself,
+    // calls to [l]type(), etc). If (not yet) available, timestamp_unknown is
+    // returned.
+    //
+    // Specifically:
+    //
+    // - On Windows mtime is always set by dir_iterator for entries other than
+    //   reparse points.
+    //
+    // - On all platforms mtime and atime are always set for symlink targets
+    //   by dir_iterator in the {detect,ignore}_dangling modes.
+    //
+    // - On all platforms mtime and atime can potentially be set by [l]type()
+    //   if the stat() call is required to retrieve the type information (the
+    //   native directory entry iterating API doesn't provide it, the type of
+    //   the symlink target is queried, etc).
+    //
+    timestamp
+    mtime () const {return mtime_;}
+
+    timestamp
+    atime () const {return atime_;}
+
     // Entry path (excluding the base). To get the full path, do
     // base () / path ().
     //
@@ -681,8 +706,17 @@ LIBBUTL_MODEXPORT namespace butl
     base () const {return b_;}
 
     dir_entry () = default;
-    dir_entry (entry_type t, path_type p, dir_path b)
-        : t_ (t), p_ (std::move (p)), b_ (std::move (b)) {}
+
+    dir_entry (entry_type t,
+               path_type p,
+               dir_path b,
+               timestamp mt = timestamp_unknown,
+               timestamp at = timestamp_unknown)
+      : t_ (t),
+        mtime_ (mt),
+        atime_ (at),
+        p_ (std::move (p)),
+        b_ (std::move (b)) {}
 
   private:
     entry_type
@@ -691,8 +725,14 @@ LIBBUTL_MODEXPORT namespace butl
   private:
     friend class dir_iterator;
 
-    mutable entry_type t_ = entry_type::unknown;  // Lazy evaluation.
-    mutable entry_type lt_ = entry_type::unknown; // Lazy evaluation.
+    // Note: lazy evaluation.
+    //
+    mutable optional<entry_type> t_;  // Entry type.
+    mutable optional<entry_type> lt_; // Symlink target type.
+
+    mutable timestamp mtime_ = timestamp_unknown;
+    mutable timestamp atime_ = timestamp_unknown;
+
     path_type p_;
     dir_path b_;
   };
@@ -709,12 +749,15 @@ LIBBUTL_MODEXPORT namespace butl
     ~dir_iterator ();
     dir_iterator () = default;
 
-    // If it is requested to ignore dangling symlinks, then the increment
-    // operator will skip symlinks that refer to non-existing or inaccessible
-    // targets. That implies that it will always try to stat() symlinks.
+    // If the mode is either ignore_dangling or detect_dangling, then stat()
+    // the entry and either ignore inaccessible/dangling entry or return it
+    // with the corresponding dir_entry type set to unknown (see dir_entry
+    // type()/ltype() for details).
     //
+    enum mode {no_follow, detect_dangling, ignore_dangling};
+
     explicit
-    dir_iterator (const dir_path&, bool ignore_dangling);
+    dir_iterator (const dir_path&, mode);
 
     dir_iterator (const dir_iterator&) = delete;
     dir_iterator& operator= (const dir_iterator&) = delete;
@@ -740,10 +783,10 @@ LIBBUTL_MODEXPORT namespace butl
 #ifndef _WIN32
     DIR* h_ = nullptr;
 #else
-    intptr_t h_ = -1;
+    intptr_t h_ = -1; // INVALID_HANDLE_VALUE
 #endif
 
-    bool ignore_dangling_ = false;
+    mode mode_ = no_follow;
   };
 
   // Range-based for loop support.
@@ -769,7 +812,7 @@ LIBBUTL_MODEXPORT namespace butl
 
   // Wildcard pattern search (aka glob).
   //
-  // For details on the wildcard patterns see <libbutl/path-pattern.mxx>
+  // For details on the wildcard patterns see <libbutl/path-pattern.hxx>
 
   // Search for paths matching the pattern calling the specified function for
   // each matching path (see below for details).
@@ -834,9 +877,20 @@ LIBBUTL_MODEXPORT namespace butl
   // (a/b/,   b*/, true)
   // (a/b/c/, c*/, false)
   //
-  // Note that recursive iterating through directories currently goes
-  // depth-first which make sense for the cleanup use cases. In future we may
-  // want to make it controllable.
+  // Note that recursive iterating through directories currently goes depth-
+  // first which make sense for the cleanup use cases. In the future we may
+  // want to make this controllable.
+  //
+  // If the match flags contain follow_symlinks, then call the dangling
+  // callback function for inaccessible/dangling entries if specified, and
+  // throw appropriate std::system_error otherwise. If the callback function
+  // returns true, then inaccessible/dangling entry is ignored. Otherwise,
+  // the entire search is stopped.
+  //
+  // Note also that if pattern is not simple (that is, contains directory
+  // components), then some symlinks (those that are matched against the
+  // directory components) may still be followed and thus the dangling
+  // function called.
   //
   LIBBUTL_SYMEXPORT void
   path_search (const path& pattern,
@@ -844,7 +898,8 @@ LIBBUTL_MODEXPORT namespace butl
                                          const std::string& pattern,
                                          bool interm)>&,
                const dir_path& start = dir_path (),
-               path_match_flags = path_match_flags::follow_symlinks);
+               path_match_flags = path_match_flags::follow_symlinks,
+               const std::function<bool (const dir_entry&)>& dangling = nullptr);
 
   // Same as above, but behaves as if the directory tree being searched
   // through contains only the specified entry. The start directory is used if
diff --git a/libbutl/filesystem.ixx b/libbutl/filesystem.ixx
index f7c3777..b3f9224 100644
--- a/libbutl/filesystem.ixx
+++ b/libbutl/filesystem.ixx
@@ -1,6 +1,9 @@
 // file      : libbutl/filesystem.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
+#include <libbutl/utility.hxx> // operator<<(ostream,exception),
+                               // throw_generic_error()
+
 namespace butl
 {
   inline bool
@@ -8,7 +11,7 @@ namespace butl
   {
     // @@ Could 0 size be a valid and faster way?
     //
-    return dir_iterator (d, false /* ignore_dangling */) == dir_iterator ();
+    return dir_iterator (d, dir_iterator::no_follow) == dir_iterator ();
   }
 
   inline bool
@@ -38,6 +41,23 @@ namespace butl
     return e ? rmdir_status::success : rmdir_status::not_exist;
   }
 
+  LIBBUTL_SYMEXPORT optional<rmfile_status>
+  try_rmfile_maybe_ignore_error (const path&, bool ignore_error);
+
+  inline rmfile_status
+  try_rmfile (const path& p, bool ignore_error)
+  {
+    auto r (try_rmfile_maybe_ignore_error (p, ignore_error));
+    return r ? *r : rmfile_status::success;
+  }
+
+  inline optional<rmfile_status>
+  try_rmfile_ignore_error (const path& p)
+  {
+    return try_rmfile_maybe_ignore_error (p, true);
+  }
+
+
   inline path
   followsymlink (const path& p)
   {
@@ -53,7 +73,7 @@ namespace butl
   //
   template <typename P>
   inline auto_rm<P>::
-  auto_rm (auto_rm&& x)
+  auto_rm (auto_rm&& x) noexcept
       : path (std::move (x.path)), active (x.active)
   {
     x.active = false;
@@ -61,7 +81,7 @@ namespace butl
 
   template <typename P>
   inline auto_rm<P>& auto_rm<P>::
-  operator= (auto_rm&& x)
+  operator= (auto_rm&& x) noexcept
   {
     if (this != &x)
     {
@@ -117,54 +137,28 @@ namespace butl
       static_cast<std::uint16_t> (y));
   }
 
-  // path_match_flags
-  //
-  inline path_match_flags operator& (path_match_flags x, path_match_flags y)
-  {
-    return x &= y;
-  }
-
-  inline path_match_flags operator| (path_match_flags x, path_match_flags y)
-  {
-    return x |= y;
-  }
-
-  inline path_match_flags operator&= (path_match_flags& x, path_match_flags y)
-  {
-    return x = static_cast<path_match_flags> (
-      static_cast<std::uint16_t> (x) &
-      static_cast<std::uint16_t> (y));
-  }
-
-  inline path_match_flags operator|= (path_match_flags& x, path_match_flags y)
-  {
-    return x = static_cast<path_match_flags> (
-      static_cast<std::uint16_t> (x) |
-      static_cast<std::uint16_t> (y));
-  }
-
   // dir_entry
   //
   inline entry_type dir_entry::
   ltype () const
   {
-    return t_ != entry_type::unknown ? t_ : (t_ = type (false));
+    return t_ ? *t_ : *(t_ = type (false /* follow_symlinks */));
   }
 
   inline entry_type dir_entry::
   type () const
   {
     entry_type t (ltype ());
-    return t != entry_type::symlink
-      ? t
-      : lt_ != entry_type::unknown ? lt_ : (lt_ = type (true));
+    return t != entry_type::symlink ? t    :
+           lt_                      ? *lt_ :
+           *(lt_ = type (true /* follow_symlinks */));
   }
 
   // dir_iterator
   //
   inline dir_iterator::
   dir_iterator (dir_iterator&& x) noexcept
-    : e_ (std::move (x.e_)), h_ (x.h_), ignore_dangling_ (x.ignore_dangling_)
+    : e_ (std::move (x.e_)), h_ (x.h_), mode_ (x.mode_)
   {
 #ifndef _WIN32
     x.h_ = nullptr;
diff --git a/libbutl/ft/lang.hxx b/libbutl/ft/lang.hxx
index 567f5a4..82971d2 100644
--- a/libbutl/ft/lang.hxx
+++ b/libbutl/ft/lang.hxx
@@ -7,9 +7,14 @@
 // __cpp_thread_local (extension)
 //
 // If this macro is undefined then one may choose to fallback to __thread.
-// Note, however, that it only for values that do not require dynamic
+// Note, however, that it only works for values that do not require dynamic
 // (runtime) initialization.
 //
+// Note that thread_local with dynamic allocation/destruction appears to be
+// broken when we use our own implementation of C++14 threads on MinGW. So
+// we restrict ourselves to __thread which appears to be functioning, at
+// least in the POSIX threads GCC configuration.
+//
 #ifndef __cpp_thread_local
    //
    // Apparently Apple's Clang "temporarily disabled" C++11 thread_local until
@@ -20,7 +25,7 @@
 #    if __apple_build_version__ >= 8000000
 #      define __cpp_thread_local 201103
 #    endif
-#  else
+#  elif !defined(LIBBUTL_MINGW_STDTHREAD)
 #    define __cpp_thread_local 201103
 #  endif
 #endif
diff --git a/libbutl/git.cxx b/libbutl/git.cxx
index b9dd9bc..f37e16a 100644
--- a/libbutl/git.cxx
+++ b/libbutl/git.cxx
@@ -1,43 +1,11 @@
 // file      : libbutl/git.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/git.mxx>
-#endif
+#include <libbutl/git.hxx>
 
-// C includes.
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
-#include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.git;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.semantic_version
-#endif
-
-import butl.utility;    // digit()
-import butl.filesystem; // entry_exists()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/semantic-version.mxx>
-#endif
+#include <libbutl/optional.hxx>
+#include <libbutl/filesystem.hxx>       // entry_exists()
+#include <libbutl/semantic-version.hxx>
 
 using namespace std;
 
@@ -68,7 +36,9 @@ namespace butl
     // MinGit: git version 2.16.1.windows.1
     //
     if (s.compare (0, 12, "git version ") == 0)
-      return parse_semantic_version (s, 12, "" /* build_separators */);
+      return parse_semantic_version (s, 12,
+                                     semantic_version::allow_build,
+                                     "" /* build_separators */);
 
     return nullopt;
   }
diff --git a/libbutl/git.mxx b/libbutl/git.hxx
index 3f003be..add721e 100644
--- a/libbutl/git.mxx
+++ b/libbutl/git.hxx
@@ -1,35 +1,17 @@
-// file      : libbutl/git.mxx -*- C++ -*-
+// file      : libbutl/git.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.git;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-import butl.semantic_version;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/semantic-version.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/semantic-version.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Return true if the specified directory is a git repository root (contains
   // the .git filesystem entry).
diff --git a/libbutl/host-os-release.cxx b/libbutl/host-os-release.cxx
new file mode 100644
index 0000000..f13f62c
--- /dev/null
+++ b/libbutl/host-os-release.cxx
@@ -0,0 +1,323 @@
+// file      : libbutl/host-os-release.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbutl/host-os-release.hxx>
+
+#include <sstream>
+#include <stdexcept> // runtime_error
+
+#include <libbutl/path.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/filesystem.hxx>    // file_exists()
+#include <libbutl/string-parser.hxx> // parse_quoted()
+
+#ifdef _WIN32
+#  include <libbutl/win32-utility.hxx>
+#endif
+
+using namespace std;
+
+namespace butl
+{
+  // Note: exported for access from the test.
+  //
+  LIBBUTL_SYMEXPORT os_release
+  host_os_release_linux (path f = {})
+  {
+    os_release r;
+
+    // According to os-release(5), we should use /etc/os-release and fallback
+    // to /usr/lib/os-release if the former does not exist. It also lists the
+    // fallback values for individual variables, in case some are not present.
+    //
+    auto exists = [] (const path& f)
+    {
+      try
+      {
+        return file_exists (f);
+      }
+      catch (const system_error& e)
+      {
+        ostringstream os;
+        os << "unable to stat path " << f << ": " << e;
+        throw runtime_error (os.str ());
+      }
+    };
+
+    if (!f.empty ()
+        ? exists (f)
+        : (exists (f = path ("/etc/os-release")) ||
+           exists (f = path ("/usr/lib/os-release"))))
+    {
+      try
+      {
+        ifdstream ifs (f, ifdstream::badbit);
+
+        string l;
+        for (uint64_t ln (1); !eof (getline (ifs, l)); ++ln)
+        {
+          trim (l);
+
+          // Skip blanks lines and comments.
+          //
+          if (l.empty () || l[0] == '#')
+            continue;
+
+          // The variable assignments are in the "shell style" and so can be
+          // quoted/escaped. For now we only handle quoting, which is what all
+          // the instances seen in the wild seems to use.
+          //
+          size_t p (l.find ('='));
+          if (p == string::npos)
+            continue;
+
+          string n (l, 0, p);
+          l.erase (0, p + 1);
+
+          using string_parser::parse_quoted;
+          using string_parser::invalid_string;
+
+          try
+          {
+            if (n == "ID_LIKE")
+            {
+              r.like_ids.clear ();
+
+              vector<string> vs (parse_quoted (l, true /* unquote */));
+              for (const string& v: vs)
+              {
+                for (size_t b (0), e (0); next_word (v, b, e); )
+                {
+                  r.like_ids.push_back (string (v, b, e - b));
+                }
+              }
+            }
+            else if (string* p = (n == "ID"               ?  &r.name_id :
+                                  n == "VERSION_ID"       ?  &r.version_id :
+                                  n == "VARIANT_ID"       ?  &r.variant_id :
+                                  n == "NAME"             ?  &r.name :
+                                  n == "VERSION_CODENAME" ?  &r.version_codename :
+                                  n == "VARIANT"          ?  &r.variant :
+                                  nullptr))
+            {
+              vector<string> vs (parse_quoted (l, true /* unquote */));
+              switch (vs.size ())
+              {
+              case 0:  *p =  ""; break;
+              case 1:  *p = move (vs.front ()); break;
+              default: throw invalid_string (0, "multiple values");
+              }
+            }
+          }
+          catch (const invalid_string& e)
+          {
+            ostringstream os;
+            os << "invalid " << n << " value in " << f << ':' << ln << ": "
+               << e;
+            throw runtime_error (os.str ());
+          }
+        }
+
+        ifs.close ();
+      }
+      catch (const ios::failure& e)
+      {
+        ostringstream os;
+        os << "unable to read from " << f << ": " << e;
+        throw runtime_error (os.str ());
+      }
+    }
+
+    // Assign fallback values.
+    //
+    if (r.name_id.empty ()) r.name_id = "linux";
+    if (r.name.empty ())    r.name    = "Linux";
+
+    return r;
+  }
+
+  static os_release
+  host_os_release_macos ()
+  {
+    // Run sw_vers -productVersion to get Mac OS version.
+    //
+    try
+    {
+      process pr;
+      try
+      {
+        fdpipe pipe (fdopen_pipe ());
+
+        pr = process_start (0, pipe, 2, "sw_vers", "-productVersion");
+
+        pipe.out.close ();
+        ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit);
+
+        // The output should be one line containing the version.
+        //
+        optional<string> v;
+        for (string l; !eof (getline (is, l)); )
+        {
+          if (l.empty () || v)
+          {
+            v = nullopt;
+            break;
+          }
+
+          v = move (l);
+        }
+
+        is.close (); // Detect errors.
+
+        if (pr.wait ())
+        {
+          if (!v)
+            throw runtime_error ("unexpected sw_vers -productVersion output");
+
+          return os_release {"macos", {}, move (*v), "", "Mac OS", "", ""};
+        }
+
+      }
+      catch (const ios::failure& e)
+      {
+        if (pr.wait ())
+        {
+          ostringstream os;
+          os << "error reading sw_vers output: " << e;
+          throw runtime_error (os.str ());
+        }
+
+        // Fall through.
+      }
+
+      // We should only get here if the child exited with an error status.
+      //
+      assert (!pr.wait ());
+      throw runtime_error ("process sw_vers exited with non-zero code");
+    }
+    catch (const process_error& e)
+    {
+      ostringstream os;
+      os << "unable to execute sw_vers: " << e;
+      throw runtime_error (os.str ());
+    }
+  }
+
+  static os_release
+  host_os_release_windows ()
+  {
+#ifdef _WIN32
+    // The straightforward way to get the version would be the GetVersionEx()
+    // Win32 function. However, if the application is built with a certain
+    // assembly manifest, this function will return the version the
+    // application was built for rather than what's actually running.
+    //
+    // The other plausible options are to call the `ver` program and parse it
+    // output (of questionable regularity) or to call RtlGetVersion(). The
+    // latter combined with GetProcAddress() seems to be a widely-used
+    // approach, so we are going with that (seeing that we employ a similar
+    // technique in quite a few places).
+    //
+    HMODULE nh (GetModuleHandle ("ntdll.dll"));
+    if (nh == nullptr)
+      throw runtime_error ("unable to get handle to ntdll.dll");
+
+    using RtlGetVersion = LONG /*NTSTATUS*/ (WINAPI*)(PRTL_OSVERSIONINFOW);
+
+    RtlGetVersion gv (
+      function_cast<RtlGetVersion> (
+        GetProcAddress (nh, "RtlGetVersion")));
+
+    // RtlGetVersion() is available from Windows 2000 which is way before
+    // anything we might possibly care about (e.g., XP or 7).
+    //
+    if (gv == nullptr)
+      throw runtime_error ("unable to get address of RtlGetVersion()");
+
+    RTL_OSVERSIONINFOW vi;
+    vi.dwOSVersionInfoSize = sizeof (vi);
+    gv (&vi); // Always succeeds, according to documentation.
+
+    // Ok, the real mess starts here. Here is how the commonly known Windows
+    // versions correspond to the major/minor/build numbers and how we will
+    // map them (note that there are also Server versions in the mix; see the
+    // OSVERSIONINFOEXW struct documentation for the complete picture):
+    //
+    //                        major  minor  build      mapped
+    // Windows 11             10     0      >=22000    11
+    // Windows 10             10     0      <22000     10
+    // Windows 8.1             6     3                 8.1
+    // Windows 8               6     2                 8
+    // Windows 7               6     1                 7
+    // Windows Vista           6     0                 6
+    // Windows XP Pro/64-bit   5     2                 5.2
+    // Windows XP              5     1                 5.1
+    // Windows 2000            5     0                 5
+    //
+    // Based on this it's probably not wise to try to map any future versions
+    // automatically.
+    //
+    string v;
+    if (vi.dwMajorVersion == 10 && vi.dwMinorVersion == 0)
+    {
+      v = vi.dwBuildNumber >= 22000 ? "11" : "10";
+    }
+    else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 3) v = "8.1";
+    else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 2) v = "8";
+    else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 1) v = "7";
+    else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 0) v = "6";
+    else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 2) v = "5.2";
+    else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 1) v = "5.1";
+    else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 0) v = "5";
+    else throw ("unknown windows version " +
+                std::to_string (vi.dwMajorVersion) + '.' +
+                std::to_string (vi.dwMinorVersion) + '.' +
+                std::to_string (vi.dwBuildNumber));
+
+    return os_release {"windows", {}, move (v), "", "Windows", "", ""};
+#else
+    throw runtime_error ("unexpected host operating system");
+#endif
+  }
+
+  optional<os_release>
+  host_os_release (const target_triplet& h)
+  {
+    const string& c (h.class_);
+    const string& s (h.system);
+
+    if (c == "linux")
+      return host_os_release_linux ();
+
+    if (c == "macos")
+      return host_os_release_macos ();
+
+    if (c == "windows")
+      return host_os_release_windows ();
+
+    if (c == "bsd")
+    {
+      // @@ TODO: ideally we would want to run uname and obtain the actual
+      //    version we are runnig on rather than what we've been built for.
+      //    (Think also how this will affect tests).
+      //
+      if (s == "freebsd")
+        return os_release {"freebsd", {}, h.version, "", "FreeBSD", "", ""};
+
+      if (s == "netbsd")
+        return os_release {"netbsd", {}, h.version, "", "NetBSD", "", ""};
+
+      if (s == "openbsd")
+        return os_release {"openbsd", {}, h.version, "", "OpenBSD", "", ""};
+
+      // Assume some other BSD.
+      //
+      return os_release {s, {}, h.version, "", s, "", ""};
+    }
+
+    return nullopt;
+  }
+}
diff --git a/libbutl/host-os-release.hxx b/libbutl/host-os-release.hxx
new file mode 100644
index 0000000..058afdc
--- /dev/null
+++ b/libbutl/host-os-release.hxx
@@ -0,0 +1,86 @@
+// file      : libbutl/host-os-release.hxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <libbutl/optional.hxx>
+#include <libbutl/target-triplet.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+  // Information extracted from /etc/os-release on Linux. See os-release(5)
+  // for background. For other platforms we derive the equivalent information
+  // from other sources. Some examples:
+  //
+  // {"debian", {}, "10", "",
+  //  "Debian GNU/Linux", "buster", ""}
+  //
+  // {"fedora", {}, "35", "workstation",
+  //  "Fedora Linux", "", "Workstation Edition"}
+  //
+  // {"ubuntu", {"debian"}, "20.04", "",
+  //  "Ubuntu", "focal", ""}
+  //
+  // {"macos", {}, "12.5", "",
+  //  "Mac OS", "", ""}
+  //
+  // {"freebsd", {}, "13.1", "",
+  //  "FreeBSD", "", ""}
+  //
+  // {"windows", {}, "10", "",
+  //  "Windows", "", ""}
+  //
+  // Note that for Mac OS, the version is the Mac OS version (as printed by
+  // sw_vers) rather than Darwin version (as printed by uname).
+  //
+  // For Windows we currently do not distinguish the Server edition and the
+  // version mapping is as follows:
+  //
+  // Windows 11             11
+  // Windows 10             10
+  // Windows 8.1            8.1
+  // Windows 8              8
+  // Windows 7              7
+  // Windows Vista          6
+  // Windows XP Pro/64-bit  5.2
+  // Windows XP             5.1
+  // Windows 2000           5
+  //
+  // Note that version_id may be empty, for example, on Debian testing:
+  //
+  // {"debian", {}, "", "",
+  //  "Debian GNU/Linux", "", ""}
+  //
+  // Note also that we don't extract PRETTY_NAME because its content is
+  // unpredictable. For example, it may include variant, as in "Fedora Linux
+  // 35 (Workstation Edition)". Instead, construct it from the individual
+  // components as appropriate, normally "$name $version ($version_codename)".
+  //
+  struct os_release
+  {
+    std::string              name_id;    // ID
+    std::vector<std::string> like_ids;   // ID_LIKE
+    std::string              version_id; // VERSION_ID
+    std::string              variant_id; // VARIANT_ID
+
+    std::string name;             // NAME
+    std::string version_codename; // VERSION_CODENAME
+    std::string variant;          // VARIANT
+  };
+
+  // Return the release information for the specified host or nullopt if the
+  // specific host is unknown/unsupported. Throw std::runtime_error if
+  // anything goes wrong.
+  //
+  // Note that "host" here implies that we may be running programs, reading
+  // files, examining environment variables, etc., of the machine we are
+  // running on.
+  //
+  LIBBUTL_SYMEXPORT optional<os_release>
+  host_os_release (const target_triplet& host);
+}
diff --git a/libbutl/json/event.hxx b/libbutl/json/event.hxx
new file mode 100644
index 0000000..77185cc
--- /dev/null
+++ b/libbutl/json/event.hxx
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace butl
+{
+  namespace json
+  {
+    // Parsing/serialization event.
+    //
+    enum class event: std::uint8_t
+    {
+      begin_object = 1,
+      end_object,
+      begin_array,
+      end_array,
+      name,
+      string,
+      number,
+      boolean,
+      null
+    };
+
+    constexpr std::size_t event_count = 9;
+  }
+}
diff --git a/libbutl/json/parser.cxx b/libbutl/json/parser.cxx
new file mode 100644
index 0000000..8ef7422
--- /dev/null
+++ b/libbutl/json/parser.cxx
@@ -0,0 +1,645 @@
+#define PDJSON_SYMEXPORT static // See below.
+
+#include <libbutl/json/parser.hxx>
+
+#include <istream>
+
+// There is an issue (segfault) with using std::current_exception() and
+// std::rethrow_exception() with older versions of libc++ on Linux. While the
+// exact root cause hasn't been determined, the suspicion is that something
+// gets messed up if we "smuggle" std::exception_ptr through extern "C" call
+// frames (we cannot even destroy such an exception without a segfault). We
+// also could not determine in which version exactly this has been fixed but
+// we know that libc++ 6.0.0 doesn't appear to have this issue (though we are
+// not entirely sure the issue is (only) in libc++; libgcc_s could also be
+// involved).
+//
+// The workaround is to just catch (and note) the exception and then throw a
+// new instance of generic std::istream::failure. In order not to drag the
+// below test into the header, we wrap exception_ptr with optional<> and use
+// NULL to indicate the presence of the exception when the workaround is
+// required.
+//
+// Note that if/when we drop this workaround, we should also get rid of
+// optional<> in stream::exception member.
+//
+#undef LIBBUTL_JSON_NO_EXCEPTION_PTR
+
+#if defined (__linux__) && defined(__clang__)
+#  if __has_include(<__config>)
+#    include <__config> // _LIBCPP_VERSION
+#    if _LIBCPP_VERSION < 6000
+#      define LIBBUTL_JSON_NO_EXCEPTION_PTR 1
+#    endif
+#  endif
+#endif
+
+namespace butl
+{
+  namespace json
+  {
+    using namespace std;
+
+    parser::
+    ~parser ()
+    {
+      json_close (impl_);
+    }
+
+    static int
+    stream_get (void* x)
+    {
+      auto& s (*static_cast<parser::stream*> (x));
+
+      // In the multi-value mode reading of whitespaces/separators is split
+      // between our code and pdjson's. As a result, these functions may end
+      // up being called more than once after EOF is reached. Which is
+      // something iostream does not handle gracefully.
+      //
+      if (!s.is->eof ())
+      {
+        try
+        {
+          // We first peek not to trip failbit on EOF.
+          //
+          if (s.is->peek () != istream::traits_type::eof ())
+            return static_cast<char> (s.is->get ());
+        }
+        catch (...)
+        {
+#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR
+          s.exception = current_exception ();
+#else
+          s.exception = nullptr;
+#endif
+        }
+      }
+
+      return EOF;
+    }
+
+    static int
+    stream_peek (void* x)
+    {
+      auto& s (*static_cast<parser::stream*> (x));
+
+      if (!s.is->eof ())
+      {
+        try
+        {
+          auto c (s.is->peek ());
+          if (c != istream::traits_type::eof ())
+            return static_cast<char> (c);
+        }
+        catch (...)
+        {
+#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR
+          s.exception = current_exception ();
+#else
+          s.exception = nullptr;
+#endif
+        }
+      }
+
+      return EOF;
+    }
+
+    // NOTE: watch out for exception safety (specifically, doing anything that
+    // might throw after opening the stream).
+    //
+    parser::
+    parser (istream& is, const char* n, bool mv, const char* sep) noexcept
+        : input_name (n),
+          stream_ {&is, nullopt},
+          multi_value_ (mv),
+          separators_ (sep),
+          raw_s_ (nullptr),
+          raw_n_ (0)
+    {
+      json_open_user (impl_, &stream_get, &stream_peek, &stream_);
+      json_set_streaming (impl_, multi_value_);
+    }
+
+    parser::
+    parser (const void* t,
+            size_t s,
+            const char* n,
+            bool mv,
+            const char* sep) noexcept
+        : input_name (n),
+          stream_ {nullptr, nullopt},
+          multi_value_ (mv),
+          separators_ (sep),
+          raw_s_ (nullptr),
+          raw_n_ (0)
+    {
+      json_open_buffer (impl_, t, s);
+      json_set_streaming (impl_, multi_value_);
+    }
+
+    optional<event> parser::
+    next ()
+    {
+      name_p_ = value_p_ = location_p_ = false;
+
+      // Note that for now we don't worry about the state of the parser if
+      // next_impl() throws assuming it is not going to be reused.
+      //
+      if (peeked_)
+      {
+        parsed_ = peeked_;
+        peeked_ = nullopt;
+      }
+      else
+        parsed_ = next_impl ();
+
+      return translate (*parsed_);
+    }
+
+    optional<event> parser::
+    peek ()
+    {
+      if (!peeked_)
+      {
+        if (parsed_)
+        {
+          cache_parsed_data ();
+          cache_parsed_location ();
+        }
+        peeked_ = next_impl ();
+      }
+      return translate (*peeked_);
+    }
+
+    static inline const char*
+    event_name (event e)
+    {
+      switch (e)
+      {
+      case event::begin_object: return "beginning of object";
+      case event::end_object:   return "end of object";
+      case event::begin_array:  return "beginning of array";
+      case event::end_array:    return "end of array";
+      case event::name:         return "member name";
+      case event::string:       return "string value";
+      case event::number:       return "numeric value";
+      case event::boolean:      return "boolean value";
+      case event::null:         return "null value";
+      }
+
+      return "";
+    }
+
+    bool parser::
+    next_expect (event p, optional<event> s)
+    {
+      optional<event> e (next ());
+      bool r;
+      if (e && ((r = *e == p) || (s && *e == *s)))
+        return r;
+
+      string d ("expected ");
+      d += event_name (p);
+
+      if (s)
+      {
+        d += " or ";
+        d += event_name (*s);
+      }
+
+      if (e)
+      {
+        d += " instead of ";
+        d += event_name (*e);
+      }
+
+      throw invalid_json_input (input_name != nullptr ? input_name : "",
+                                line (),
+                                column (),
+                                position (),
+                                move (d));
+    }
+
+    void parser::
+    next_expect_name (const char* n, bool su)
+    {
+      for (;;)
+      {
+        next_expect (event::name);
+
+        if (name () == n)
+          return;
+
+        if (!su)
+          break;
+
+        next_expect_value_skip ();
+      }
+
+      string d ("expected object member name '");
+      d += n;
+      d += "' instead of '";
+      d += name ();
+      d += '\'';
+
+      throw invalid_json_input (input_name != nullptr ? input_name : "",
+                                line (),
+                                column (),
+                                position (),
+                                move (d));
+    }
+
+    void parser::
+    next_expect_value_skip ()
+    {
+      optional<event> e (next ());
+
+      if (e)
+      {
+        switch (*e)
+        {
+        case event::begin_object:
+        case event::begin_array:
+          {
+            // Skip until matching end_object/array keeping track of nesting.
+            // We are going to rely on the fact that we should either get such
+            // an event or next() should throw.
+            //
+            event be (*e);
+            event ee (be == event::begin_object
+                      ? event::end_object
+                      : event::end_array);
+
+            for (size_t n (0);; )
+            {
+              event e (*next ());
+
+              if (e == ee)
+              {
+                if (n == 0)
+                  break;
+
+                --n;
+              }
+              else if (e == be)
+                ++n;
+            }
+
+            return;
+          }
+        case event::string:
+        case event::number:
+        case event::boolean:
+        case event::null:
+          return;
+        case event::name:
+        case event::end_object:
+        case event::end_array:
+          break;
+        }
+      }
+
+      string d ("expected value");
+
+      if (e)
+      {
+        d += " instead of ";
+        d += event_name (*e);
+      }
+
+      throw invalid_json_input (input_name != nullptr ? input_name : "",
+                                line (),
+                                column (),
+                                position (),
+                                move (d));
+    }
+
+    std::uint64_t parser::
+    line () const noexcept
+    {
+      if (!location_p_)
+      {
+        if (!parsed_)
+          return 0;
+
+        assert (!peeked_);
+
+        return static_cast<uint64_t> (
+            json_get_lineno (const_cast<json_stream*> (impl_)));
+      }
+
+      return line_;
+    }
+
+    std::uint64_t parser::
+    column () const noexcept
+    {
+      if (!location_p_)
+      {
+        if (!parsed_)
+          return 0;
+
+        assert (!peeked_);
+
+        return static_cast<uint64_t> (
+            json_get_column (const_cast<json_stream*> (impl_)));
+      }
+
+      return column_;
+    }
+
+    std::uint64_t parser::
+    position () const noexcept
+    {
+      if (!location_p_)
+      {
+        if (!parsed_)
+          return 0;
+
+        assert (!peeked_);
+
+        return static_cast<uint64_t> (
+            json_get_position (const_cast<json_stream*> (impl_)));
+      }
+
+      return position_;
+    }
+
+    json_type parser::
+    next_impl ()
+    {
+      raw_s_ = nullptr;
+      raw_n_ = 0;
+      json_type e;
+
+      // Read characters between values skipping required separators and JSON
+      // whitespaces. Return whether a required separator was encountered as
+      // well as the first non-separator/whitespace character (which, if EOF,
+      // should trigger a check for input/output errors).
+      //
+      // Note that the returned non-separator will not have been extracted
+      // from the input (so position, column, etc. will still refer to its
+      // predecessor).
+      //
+      auto skip_separators = [this] () -> pair<bool, int>
+      {
+        bool r (separators_ == nullptr);
+
+        int c;
+        for (; (c = json_source_peek (impl_)) != EOF; json_source_get (impl_))
+        {
+          // User separator.
+          //
+          if (separators_ != nullptr && *separators_ != '\0')
+          {
+            if (strchr (separators_, c) != nullptr)
+            {
+              r = true;
+              continue;
+            }
+          }
+
+          // JSON separator.
+          //
+          if (json_isspace (c))
+          {
+            if (separators_ != nullptr && *separators_ == '\0')
+              r = true;
+
+            continue;
+          }
+
+          break;
+        }
+
+        return make_pair (r, c);
+      };
+
+      // In the multi-value mode skip any instances of required separators
+      // (and any other JSON whitespace) preceding the first JSON value.
+      //
+      if (multi_value_ && !parsed_ && !peeked_)
+      {
+        if (skip_separators ().second == EOF && stream_.is != nullptr)
+        {
+          if (stream_.exception)   goto fail_rethrow;
+          if (stream_.is->fail ()) goto fail_stream;
+        }
+      }
+
+      e = json_next (impl_);
+
+      // First check for a pending input/output error.
+      //
+      if (stream_.is != nullptr)
+      {
+        if (stream_.exception)   goto fail_rethrow;
+        if (stream_.is->fail ()) goto fail_stream;
+      }
+
+      // There are two ways to view separation between two values: as following
+      // the first value or as preceding the second value. And one aspect that
+      // is determined by this is whether a separation violation is a problem
+      // with the first value or with the second, which becomes important if
+      // the user bails out before parsing the second value.
+      //
+      // Consider these two unseparated value (yes, in JSON they are two
+      // values, leading zeros are not allowed in JSON numbers):
+      //
+      // 01
+      //
+      // If the user bails out after parsing 0 in a stream that should have
+      // been newline-delimited, they most likely would want to get an error
+      // since this is most definitely an invalid value rather than two
+      // values that are not properly separated. So in this light we handle
+      // separators at the end of the first value.
+      //
+      switch (e)
+      {
+      case JSON_DONE:
+        {
+          // Deal with the following value separators.
+          //
+          // Note that we must not do this for the second JSON_DONE (or the
+          // first one in case there are no values) that signals the end of
+          // input.
+          //
+          if (multi_value_         &&
+              (parsed_ || peeked_) &&
+              (peeked_ ? *peeked_ : *parsed_) != JSON_DONE)
+          {
+            auto p (skip_separators ());
+
+            if (p.second == EOF && stream_.is != nullptr)
+            {
+              if (stream_.exception)   goto fail_rethrow;
+              if (stream_.is->fail ()) goto fail_stream;
+            }
+
+            // Note that we don't require separators after the last value.
+            //
+            if (!p.first && p.second != EOF)
+            {
+              json_source_get (impl_); // Consume to update column number.
+              goto fail_separation;
+            }
+
+            json_reset (impl_);
+          }
+          break;
+        }
+      case JSON_ERROR: goto fail_json;
+      case JSON_STRING:
+      case JSON_NUMBER:
+        raw_s_ = json_get_string (impl_, &raw_n_);
+        raw_n_--; // Includes terminating `\0`.
+        break;
+      case JSON_TRUE:  raw_s_ = "true";  raw_n_ = 4; break;
+      case JSON_FALSE: raw_s_ = "false"; raw_n_ = 5; break;
+      case JSON_NULL:  raw_s_ = "null";  raw_n_ = 4; break;
+      default: break;
+      }
+
+      return e;
+
+    fail_json:
+      throw invalid_json_input (
+          input_name != nullptr ? input_name : "",
+          static_cast<uint64_t> (json_get_lineno (impl_)),
+          static_cast<uint64_t> (json_get_column (impl_)),
+          static_cast<uint64_t> (json_get_position (impl_)),
+          json_get_error (impl_));
+
+    fail_separation:
+      throw invalid_json_input (
+          input_name != nullptr ? input_name : "",
+          static_cast<uint64_t> (json_get_lineno (impl_)),
+          static_cast<uint64_t> (json_get_column (impl_)),
+          static_cast<uint64_t> (json_get_position (impl_)),
+          "missing separator between JSON values");
+
+    fail_stream:
+      throw invalid_json_input (
+          input_name != nullptr ? input_name : "",
+          static_cast<uint64_t> (json_get_lineno (impl_)),
+          static_cast<uint64_t> (json_get_column (impl_)),
+          static_cast<uint64_t> (json_get_position (impl_)),
+          "unable to read JSON input text");
+
+    fail_rethrow:
+#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR
+      rethrow_exception (move (*stream_.exception));
+#else
+      throw istream::failure ("unable to read");
+#endif
+    }
+
+    optional<event> parser::
+    translate (json_type e) const noexcept
+    {
+      switch (e)
+      {
+      case JSON_DONE: return nullopt;
+      case JSON_OBJECT: return event::begin_object;
+      case JSON_OBJECT_END: return event::end_object;
+      case JSON_ARRAY: return event::begin_array;
+      case JSON_ARRAY_END: return event::end_array;
+      case JSON_STRING:
+        {
+          // This can be a value or, inside an object, a name from the
+          // name/value pair.
+          //
+          size_t n;
+          return json_get_context (const_cast<json_stream*> (impl_), &n) ==
+                             JSON_OBJECT &&
+                         n % 2 == 1
+                     ? event::name
+                     : event::string;
+        }
+      case JSON_NUMBER: return event::number;
+      case JSON_TRUE: return event::boolean;
+      case JSON_FALSE: return event::boolean;
+      case JSON_NULL: return event::null;
+      case JSON_ERROR: assert (false); // Should've been handled by caller.
+      }
+
+      return nullopt; // Should never reach.
+    }
+
+    void parser::
+    cache_parsed_data ()
+    {
+      name_p_ = value_p_ = false;
+      if (const optional<event> e = translate (*parsed_))
+      {
+        if (e == event::name)
+        {
+          name_.assign (raw_s_, raw_n_);
+          name_p_ = true;
+        }
+        else if (value_event (e))
+        {
+          value_.assign (raw_s_, raw_n_);
+          value_p_ = true;
+        }
+      }
+    }
+
+    void parser::
+    cache_parsed_location () noexcept
+    {
+      line_ = static_cast<uint64_t> (json_get_lineno (impl_));
+      column_ = static_cast<uint64_t> (json_get_column (impl_));
+      position_ = static_cast<uint64_t> (json_get_position (impl_));
+      location_p_ = true;
+    }
+
+    bool parser::
+    value_event (optional<event> e) noexcept
+    {
+      if (!e)
+        return false;
+
+      switch (*e)
+      {
+      case event::string:
+      case event::number:
+      case event::boolean:
+      case event::null:
+        return true;
+      default:
+        return false;
+      }
+    }
+
+    [[noreturn]] void parser::
+    throw_invalid_value (const char* type, const char* v, size_t n) const
+    {
+      string d (string ("invalid ") + type + " value: '");
+      d.append (v, n);
+      d += '\'';
+
+      throw invalid_json_input (input_name != nullptr ? input_name : "",
+                                line (),
+                                column (),
+                                position (),
+                                move (d));
+    }
+  } // namespace json
+} // namespace butl
+
+// Include the implementation into our translation unit (instead of compiling
+// it separately) to (hopefully) get function inlining without LTO.
+//
+// Let's keep it last since the implementation defines a couple of macros.
+//
+#if defined(__clang__) || defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+extern "C"
+{
+#define PDJSON_STACK_INC 16
+#define PDJSON_STACK_MAX 2048
+#include "pdjson.c"
+}
diff --git a/libbutl/json/parser.hxx b/libbutl/json/parser.hxx
new file mode 100644
index 0000000..95d9c4e
--- /dev/null
+++ b/libbutl/json/parser.hxx
@@ -0,0 +1,705 @@
+#pragma once
+
+#ifdef BUILD2_BOOTSTRAP
+#  error JSON parser not available during bootstrap
+#endif
+
+#include <iosfwd>
+#include <string>
+#include <cstddef>   // size_t
+#include <cstdint>   // uint64_t
+#include <utility>   // pair
+#include <exception> // exception_ptr
+#include <stdexcept> // invalid_argument
+
+#include <libbutl/optional.hxx> // butl::optional is std::optional or similar.
+
+#include <libbutl/json/event.hxx>
+
+#include <libbutl/json/pdjson.h> // Implementation details.
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+  // Using the RFC8259 terminology: JSON (input) text, JSON value, object
+  // member.
+  //
+  namespace json
+  {
+    class invalid_json_input: public std::invalid_argument
+    {
+    public:
+      std::string   name;
+      std::uint64_t line;
+      std::uint64_t column;
+      std::uint64_t position;
+
+      invalid_json_input (std::string name,
+                          std::uint64_t line,
+                          std::uint64_t column,
+                          std::uint64_t position,
+                          const std::string& description);
+
+      invalid_json_input (std::string name,
+                          std::uint64_t line,
+                          std::uint64_t column,
+                          std::uint64_t position,
+                          const char* description);
+    };
+
+    class LIBBUTL_SYMEXPORT parser
+    {
+    public:
+      const char* input_name;
+
+      // Construction.
+      //
+
+      // Parse JSON input text from std::istream.
+      //
+      // The name argument is used to identify the input being parsed. Note
+      // that the stream, name, and separators are kept as references so they
+      // must outlive the parser instance.
+      //
+      // If stream exceptions are enabled then the std::ios_base::failure
+      // exception is used to report input/output errors (badbit and failbit).
+      // Otherwise, those are reported as the invalid_json_input exception.
+      //
+      // If multi_value is true, enable the multi-value mode in which case the
+      // input stream may contain multiple JSON values (more precisely, zero
+      // or more). If false (the default), parsing will fail unless there is
+      // exactly one JSON value in the input stream.
+      //
+      // If multi_value is true, the separators argument specifies the
+      // required separator characters between JSON values. At least one of
+      // them must be present between every pair of JSON values (in addition
+      // to any number of JSON whitespaces). No separators are required after
+      // the last JSON value (but any found will be skipped).
+      //
+      // Specifically, if it is NULL, then no separation is required (that is,
+      // both `{...}{...}` and `{...}  {...}` would be valid). If it is empty,
+      // then at least one JSON whitespace is required. And if it is non-
+      // empty, then at least one of its characters must be present (for
+      // example, "\n\t" would require at least one newline or TAB character
+      // between JSON values).
+      //
+      // Note that a separator need not be valid JSON whitespace: any
+      // character is acceptable (though it probably shouldn't be an object,
+      // array, or string delimiter and should not occur within a non-self-
+      // delimited top-level value, such as `true`, `false`, `null`, or a
+      // number). All instances of required separators before and after a
+      // value are skipped. Therefore JSON Text Sequences (RFC 7464; AKA
+      // Record Separator-delimited JSON), which requires the RS (0x1E)
+      // character before each value, can be handled as well.
+      //
+      parser (std::istream&,
+              const std::string& name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (std::istream&,
+              const char* name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (std::istream&,
+              std::string&&,
+              bool = false,
+              const char* = nullptr) = delete;
+
+      // Parse a memory buffer that contains the entire JSON input text.
+      //
+      // The name argument is used to identify the input being parsed. Note
+      // that the buffer, name, and separators are kept as references so they
+      // must outlive the parser instance.
+      //
+      parser (const void* text,
+              std::size_t size,
+              const std::string& name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (const void* text,
+              std::size_t size,
+              const char* name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (const void*,
+              std::size_t,
+              std::string&&,
+              bool = false,
+              const char* = nullptr) = delete;
+
+      // Similar to the above but parse a string.
+      //
+      parser (const std::string& text,
+              const std::string& name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (const std::string& text,
+              const char* name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (const std::string&,
+              std::string&&,
+              bool = false,
+              const char* = nullptr) = delete;
+
+      // Similar to the above but parse a C-string.
+      //
+      parser (const char* text,
+              const std::string& name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (const char* text,
+              const char* name,
+              bool multi_value = false,
+              const char* separators = nullptr) noexcept;
+
+      parser (const char*,
+              std::string&&,
+              bool = false,
+              const char* = nullptr) = delete;
+
+      parser (parser&&) = delete;
+      parser (const parser&) = delete;
+
+      parser& operator= (parser&&) = delete;
+      parser& operator= (const parser&) = delete;
+
+      // Event iteration.
+      //
+
+      // Return the next event or nullopt if end of input is reached.
+      //
+      // In the single-value parsing mode (default) the parsing code could
+      // look like this:
+      //
+      //     while (optional<event> e = p.next ())
+      //     {
+      //       switch (*e)
+      //       {
+      //         // ...
+      //       }
+      //     }
+      //
+      // In the multi-value mode the parser additionally returns nullopt after
+      // every JSON value parsed (so there will be two nullopt's after the
+      // last JSON value, the second indicating the end of input).
+      //
+      // One way to perform multi-value parsing is with the help of the peek()
+      // function (see below):
+      //
+      //     while (p.peek ())
+      //     {
+      //       while (optional<event> e = p.next ())
+      //       {
+      //         switch (*e)
+      //         {
+      //           //...
+      //         }
+      //       }
+      //     }
+      //
+      // Note that while the single-value mode will always parse exactly one
+      // value, the multi-value mode will accept zero values in which case a
+      // single nullopt is returned.
+      //
+      optional<event>
+      next ();
+
+      // The range-based for loop support.
+      //
+      // In the single-value parsing mode (default) the parsing code could
+      // look like this:
+      //
+      //     for (event e: p)
+      //     {
+      //       switch (e)
+      //       {
+      //         //...
+      //       }
+      //     }
+      //
+      // And in the multi-value mode (see next() for more information) like
+      // this:
+      //
+      //     while (p.peek ())
+      //     {
+      //       for (event e: p)
+      //       {
+      //         switch (e)
+      //         {
+      //           //...
+      //         }
+      //       }
+      //     }
+      //
+      // Note that generally, the iterator interface doesn't make much sense
+      // for the parser so for now we have an implementation that is just
+      // enough for the range-based for.
+      //
+      struct iterator;
+
+      iterator begin () {return iterator (this, next ());}
+      iterator end ()   {return iterator (nullptr, nullopt);}
+
+      // Return the next event without considering it parsed. In other words,
+      // after this call, any subsequent calls to peek() and the next call to
+      // next() (if any) will all return the same event.
+      //
+      // Note that the name, value, and line corresponding to the peeked event
+      // are not accessible with name(), value() and line(); these functions
+      // will still return values corresponding to the most recent call to
+      // next(). The peeked values, however, can be accessed in the raw form
+      // using data().
+      //
+      optional<event>
+      peek ();
+
+
+      // Event data access.
+      //
+
+      // Return the object member name.
+      //
+      const std::string&
+      name ();
+
+      // Any value (string, number, boolean, and null) can be retrieved as a
+      // string. Calling this function after any non-value events is illegal.
+      //
+      // Note that the value is returned as a non-const string reference and
+      // you are allowed to move the value out of it. However, this should not
+      // be done unnecessarily or in cases where the small string optimization
+      // is likely since the string's buffer is reused to store subsequent
+      // values.
+      //
+      std::string&
+      value ();
+
+      // Convert the value to an integer, floating point, or bool. Throw
+      // invalid_json_input if the conversion is impossible without a loss.
+      //
+      template <typename T>
+      T
+      value () const;
+
+      // Return the value or object member name in the raw form.
+      //
+      // Calling this function on non-value/name events is legal in which case
+      // NULL is returned. Note also that the returned data corresponds to the
+      // most recent event, whether peeked or parsed.
+      //
+      std::pair<const char*, std::size_t>
+      data () const {return std::make_pair (raw_s_, raw_n_);}
+
+
+      // Higher-level API suitable for parsing specific JSON vocabularies.
+      //
+      // The API summary:
+      //
+      // void next_expect (event);
+      // bool next_expect (event primary, event secondary);
+      //
+      // void next_expect_name (string name, bool skip_unknown = false);
+      //
+      // std::string& next_expect_string    ();
+      // T            next_expect_string<T> ();
+      // std::string& next_expect_number    ();
+      // T            next_expect_number<T> ();
+      // std::string& next_expect_boolean   ();
+      // T            next_expect_boolean<T>();
+      //
+      // std::string* next_expect_string_null    ();
+      // optional<T>  next_expect_string_null<T> ();
+      // std::string* next_expect_number_null    ();
+      // optional<T>  next_expect_number_null<T> ();
+      // std::string* next_expect_boolean_null   ();
+      // optional<T>  next_expect_boolean_null<T>();
+      //
+      // std::string& next_expect_member_string    (string name, bool = false);
+      // T            next_expect_member_string<T> (string name, bool = false);
+      // std::string& next_expect_member_number    (string name, bool = false);
+      // T            next_expect_member_number<T> (string name, bool = false);
+      // std::string& next_expect_member_boolean   (string name, bool = false);
+      // T            next_expect_member_boolean<T>(string name, bool = false);
+      //
+      // std::string* next_expect_member_string_null    (string, bool = false);
+      // optional<T>  next_expect_member_string_null<T> (string, bool = false);
+      // std::string* next_expect_member_number_null    (string, bool = false);
+      // optional<T>  next_expect_member_number_null<T> (string, bool = false);
+      // std::string* next_expect_member_boolean_null   (string, bool = false);
+      // optional<T>  next_expect_member_boolean_null<T>(string, bool = false);
+      //
+      // void next_expect_member_object     (string name, bool = false);
+      // bool next_expect_member_object_null(string name, bool = false);
+      //
+      // void next_expect_member_array     (string name, bool = false);
+      // bool next_expect_member_array_null(string name, bool = false);
+      //
+      // void next_expect_value_skip();
+
+      // Get the next event and make sure that it's what's expected: primary
+      // or, if specified, secondary event. If it is not either, then throw
+      // invalid_json_input with appropriate description. Return true if it is
+      // primary.
+      //
+      // The secondary expected event is primarily useful for handling
+      // optional members. For example:
+      //
+      //     while (p.next_expect (event::name, event::end_object))
+      //     {
+      //       // Handle object member.
+      //     }
+      //
+      // Or homogeneous arrays:
+      //
+      //     while (p.next_expect (event::string, event::end_array))
+      //     {
+      //       // Handle array element.
+      //     }
+      //
+      // Or values that can be null:
+      //
+      //     if (p.next_expect (event::begin_object, event::null))
+      //     {
+      //       // Parse object.
+      //     }
+      //
+      bool
+      next_expect (event primary, optional<event> secondary = nullopt);
+
+      // Get the next event and make sure it is event::name and the object
+      // member matches the specified name. If either is not, then throw
+      // invalid_json_input with appropriate description. If skip_unknown is
+      // true, then skip over unknown member names until a match is found.
+      //
+      void
+      next_expect_name (const char* name, bool skip_unknown = false);
+
+      void
+      next_expect_name (const std::string&, bool = false);
+
+      // Get the next event and make sure it is event::<type> returning its
+      // value similar to the value() functions. If it is not, then throw
+      // invalid_json_input with appropriate description.
+      //
+      std::string&
+      next_expect_string ();
+
+      template <typename T>
+      T
+      next_expect_string ();
+
+      std::string&
+      next_expect_number ();
+
+      template <typename T>
+      T
+      next_expect_number ();
+
+      std::string&
+      next_expect_boolean ();
+
+      template <typename T>
+      T
+      next_expect_boolean ();
+
+      // Similar to next_expect_<type>() but in addition to event::<type> also
+      // allow event::null, in which case returning no value.
+      //
+      std::string*
+      next_expect_string_null ();
+
+      template <typename T>
+      optional<T>
+      next_expect_string_null ();
+
+      std::string*
+      next_expect_number_null ();
+
+      template <typename T>
+      optional<T>
+      next_expect_number_null ();
+
+      std::string*
+      next_expect_boolean_null ();
+
+      template <typename T>
+      optional<T>
+      next_expect_boolean_null ();
+
+      // Call next_expect_name() followed by next_expect_<type>[_null]()
+      // returning its result. In other words, parse the entire object member
+      // with the specifed name and of type <type>, returning its value.
+
+      // next_expect_member_string()
+      //
+      std::string&
+      next_expect_member_string (const char* name, bool skip_unknown = false);
+
+      std::string&
+      next_expect_member_string (const std::string&, bool = false);
+
+      template <typename T>
+      T
+      next_expect_member_string (const char*, bool = false);
+
+      template <typename T>
+      T
+      next_expect_member_string (const std::string&, bool = false);
+
+      // next_expect_member_number()
+      //
+      std::string&
+      next_expect_member_number (const char* name, bool skip_unknown = false);
+
+      std::string&
+      next_expect_member_number (const std::string&, bool = false);
+
+      template <typename T>
+      T
+      next_expect_member_number (const char*, bool = false);
+
+      template <typename T>
+      T
+      next_expect_member_number (const std::string&, bool = false);
+
+      // next_expect_member_boolean()
+      //
+      std::string&
+      next_expect_member_boolean (const char* name, bool skip_unknown = false);
+
+      std::string&
+      next_expect_member_boolean (const std::string&, bool = false);
+
+      template <typename T>
+      T
+      next_expect_member_boolean (const char*, bool = false);
+
+      template <typename T>
+      T
+      next_expect_member_boolean (const std::string&, bool = false);
+
+      // next_expect_member_string_null()
+      //
+      std::string*
+      next_expect_member_string_null (const char*, bool = false);
+
+      std::string*
+      next_expect_member_string_null (const std::string&, bool = false);
+
+      template <typename T>
+      optional<T>
+      next_expect_member_string_null (const char*, bool = false);
+
+      template <typename T>
+      optional<T>
+      next_expect_member_string_null (const std::string&, bool = false);
+
+      // next_expect_member_number_null()
+      //
+      std::string*
+      next_expect_member_number_null (const char*, bool = false);
+
+      std::string*
+      next_expect_member_number_null (const std::string&, bool = false);
+
+      template <typename T>
+      optional<T>
+      next_expect_member_number_null (const char*, bool = false);
+
+      template <typename T>
+      optional<T>
+      next_expect_member_number_null (const std::string&, bool = false);
+
+      // next_expect_member_boolean_null()
+      //
+      std::string*
+      next_expect_member_boolean_null (const char*, bool = false);
+
+      std::string*
+      next_expect_member_boolean_null (const std::string&, bool = false);
+
+      template <typename T>
+      optional<T>
+      next_expect_member_boolean_null (const char*, bool = false);
+
+      template <typename T>
+      optional<T>
+      next_expect_member_boolean_null (const std::string&, bool = false);
+
+      // Call next_expect_name() followed by next_expect(event::begin_object).
+      // In the _null version also allow event::null, in which case return
+      // false.
+      //
+      void
+      next_expect_member_object (const char* name, bool skip_unknown = false);
+
+      void
+      next_expect_member_object (const std::string&, bool = false);
+
+      bool
+      next_expect_member_object_null (const char*, bool = false);
+
+      bool
+      next_expect_member_object_null (const std::string&, bool = false);
+
+      // Call next_expect_name() followed by next_expect(event::begin_array).
+      // In the _null version also allow event::null, in which case return
+      // false.
+      //
+      void
+      next_expect_member_array (const char* name, bool skip_unknown = false);
+
+      void
+      next_expect_member_array (const std::string&, bool = false);
+
+      bool
+      next_expect_member_array_null (const char*, bool = false);
+
+      bool
+      next_expect_member_array_null (const std::string&, bool = false);
+
+      // Get the next event and make sure it is the beginning of a value
+      // (begin_object, begin_array, string, number, boolean, null). If it is
+      // not, then throw invalid_json_input with appropriate description.
+      // Otherwise, skip until the end of the value, recursively in case of
+      // object and array.
+      //
+      // This function is primarily useful for skipping unknown object
+      // members, for example:
+      //
+      //     while (p.next_expect (event::name, event::end_object))
+      //     {
+      //       if (p.name () == "known")
+      //       {
+      //         // Handle known member.
+      //       }
+      //       else
+      //         p.next_expect_value_skip ();
+      //     }
+      //
+      void
+      next_expect_value_skip ();
+
+      // Parsing location.
+      //
+
+      // Return the line number (1-based) corresponding to the most recently
+      // parsed event or 0 if nothing has been parsed yet.
+      //
+      std::uint64_t
+      line () const noexcept;
+
+      // Return the column number (1-based) corresponding to the beginning of
+      // the most recently parsed event or 0 if nothing has been parsed yet.
+      //
+      std::uint64_t
+      column () const noexcept;
+
+      // Return the position (byte offset) pointing immediately after the most
+      // recently parsed event or 0 if nothing has been parsed yet.
+      //
+      std::uint64_t
+      position () const noexcept;
+
+      // Implementation details.
+      //
+    public:
+      struct iterator
+      {
+        using value_type = event;
+
+        explicit
+        iterator (parser* p = nullptr, optional<event> e = nullopt)
+            : p_ (p), e_ (e) {}
+
+        event operator* () const {return *e_;}
+        iterator& operator++ () {e_ = p_->next (); return *this;}
+
+        // Comparison only makes sense when comparing to end (eof).
+        //
+        bool operator== (iterator y) const {return !e_ && !y.e_;}
+        bool operator!= (iterator y) const {return !(*this == y);}
+
+      private:
+        parser* p_;
+        optional<event> e_;
+      };
+
+      struct stream
+      {
+        std::istream*                is;
+        optional<std::exception_ptr> exception;
+      };
+
+      [[noreturn]] void
+      throw_invalid_value (const char* type, const char*, std::size_t) const;
+
+      ~parser ();
+
+    private:
+      // Functionality shared by next() and peek().
+      //
+      json_type
+      next_impl ();
+
+      // Translate the event produced by the most recent call to next_impl().
+      //
+      // Note that the underlying parser state determines whether name or
+      // value is returned when translating JSON_STRING.
+      //
+      optional<event>
+      translate (json_type) const noexcept;
+
+      // Cache state (name/value) produced by the most recent call to
+      // next_impl().
+      //
+      void
+      cache_parsed_data ();
+
+      // Cache the location numbers as determined by the most recent call to
+      // next_impl().
+      //
+      void
+      cache_parsed_location () noexcept;
+
+      // Return true if this is a value event (string, number, boolean, or
+      // null).
+      //
+      static bool
+      value_event (optional<event>) noexcept;
+
+      stream stream_;
+
+      bool multi_value_;
+      const char* separators_;
+
+      // The *_p_ members indicate whether the value is present (cached).
+      // Note: not using optional not to reallocate the string's buffer.
+      //
+      std::string name_;                       bool name_p_     = false;
+      std::string value_;                      bool value_p_    = false;
+      std::uint64_t line_, column_, position_; bool location_p_ = false;
+
+      optional<json_type> parsed_; // Current parsed event if any.
+      optional<json_type> peeked_; // Current peeked event if any.
+
+      ::json_stream impl_[1];
+
+      // Cached raw value.
+      //
+      const char* raw_s_;
+      std::size_t raw_n_;
+    };
+  }
+}
+
+#include <libbutl/json/parser.ixx>
diff --git a/libbutl/json/parser.ixx b/libbutl/json/parser.ixx
new file mode 100644
index 0000000..cf6dca3
--- /dev/null
+++ b/libbutl/json/parser.ixx
@@ -0,0 +1,552 @@
+#include <cerrno>
+#include <limits>      // numeric_limits
+#include <utility>     // move()
+#include <cassert>
+#include <cstdlib>     // strto*()
+#include <type_traits> // enable_if, is_*
+#include <cstring>     // strlen()
+
+namespace butl
+{
+  namespace json
+  {
+    inline invalid_json_input::
+    invalid_json_input (std::string n,
+                        std::uint64_t l,
+                        std::uint64_t c,
+                        std::uint64_t p,
+                        const std::string& d)
+        : invalid_json_input (move (n), l, c, p, d.c_str ())
+    {
+    }
+
+    inline invalid_json_input::
+    invalid_json_input (std::string n,
+                        std::uint64_t l,
+                        std::uint64_t c,
+                        std::uint64_t p,
+                        const char* d)
+        : invalid_argument (d),
+          name (std::move (n)),
+          line (l), column (c), position (p)
+    {
+    }
+
+    inline parser::
+    parser (std::istream& is,
+            const std::string& n,
+            bool mv,
+            const char* sep) noexcept
+        : parser (is, n.c_str (), mv, sep)
+    {
+    }
+
+    inline parser::
+    parser (const void* t,
+            std::size_t s,
+            const std::string& n,
+            bool mv,
+            const char* sep) noexcept
+        : parser (t, s, n.c_str (), mv, sep)
+    {
+    }
+
+    inline parser::
+    parser (const std::string& t,
+            const std::string& n,
+            bool mv,
+            const char* sep) noexcept
+        : parser (t.data (), t.size (), n.c_str (), mv, sep)
+    {
+    }
+
+    inline parser::
+    parser (const std::string& t,
+            const char* n,
+            bool mv,
+            const char* sep) noexcept
+        : parser (t.data (), t.size (), n, mv, sep)
+    {
+    }
+
+    inline parser::
+    parser (const char* t,
+            const std::string& n,
+            bool mv,
+            const char* sep) noexcept
+        : parser (t, std::strlen (t), n.c_str (), mv, sep)
+    {
+    }
+
+    inline parser::
+    parser (const char* t,
+            const char* n,
+            bool mv,
+            const char* sep) noexcept
+        : parser (t, std::strlen (t), n, mv, sep)
+    {
+    }
+
+    inline const std::string& parser::
+    name ()
+    {
+      if (!name_p_)
+      {
+        assert (parsed_ && !peeked_ && !value_p_);
+        cache_parsed_data ();
+        assert (name_p_);
+      }
+      return name_;
+    }
+
+    inline std::string& parser::
+    value ()
+    {
+      if (!value_p_)
+      {
+        assert (parsed_ && !peeked_ && !name_p_);
+        cache_parsed_data ();
+        assert (value_p_);
+      }
+      return value_;
+    }
+
+    // Note: one day we will be able to use C++17 from_chars() which was made
+    // exactly for this.
+    //
+    template <typename T>
+    inline typename std::enable_if<std::is_same<T, bool>::value, T>::type
+    parse_value (const char* b, size_t, const parser&)
+    {
+      return *b == 't';
+    }
+
+    template <typename T>
+    inline typename std::enable_if<
+      std::is_integral<T>::value &&
+      std::is_signed<T>::value &&
+      !std::is_same<T, bool>::value, T>::type
+    parse_value (const char* b, size_t n, const parser& p)
+    {
+      char* e (nullptr);
+      errno = 0; // We must clear it according to POSIX.
+      std::int64_t v (strtoll (b, &e, 10)); // Can't throw.
+
+      if (e == b || e != b + n || errno == ERANGE ||
+          v < std::numeric_limits<T>::min () ||
+          v > std::numeric_limits<T>::max ())
+        p.throw_invalid_value ("signed integer", b, n);
+
+      return static_cast<T> (v);
+    }
+
+    template <typename T>
+    inline typename std::enable_if<
+      std::is_integral<T>::value &&
+      std::is_unsigned<T>::value &&
+      !std::is_same<T, bool>::value, T>::type
+    parse_value (const char* b, size_t n, const parser& p)
+    {
+      char* e (nullptr);
+      errno = 0; // We must clear it according to POSIX.
+      std::uint64_t v (strtoull (b, &e, 10)); // Can't throw.
+
+      if (e == b || e != b + n || errno == ERANGE ||
+          v > std::numeric_limits<T>::max ())
+        p.throw_invalid_value ("unsigned integer", b, n);
+
+      return static_cast<T> (v);
+    }
+
+    template <typename T>
+    inline typename std::enable_if<std::is_same<T, float>::value, T>::type
+    parse_value (const char* b, size_t n, const parser& p)
+    {
+      char* e (nullptr);
+      errno = 0; // We must clear it according to POSIX.
+      T r (std::strtof (b, &e));
+
+      if (e == b || e != b + n || errno == ERANGE)
+        p.throw_invalid_value ("float", b, n);
+
+      return r;
+    }
+
+    template <typename T>
+    inline typename std::enable_if<std::is_same<T, double>::value, T>::type
+    parse_value (const char* b, size_t n, const parser& p)
+    {
+      char* e (nullptr);
+      errno = 0; // We must clear it according to POSIX.
+      T r (std::strtod (b, &e));
+
+      if (e == b || e != b + n || errno == ERANGE)
+        p.throw_invalid_value ("double", b, n);
+
+      return r;
+    }
+
+    template <typename T>
+    inline typename std::enable_if<std::is_same<T, long double>::value, T>::type
+    parse_value (const char* b, size_t n, const parser& p)
+    {
+      char* e (nullptr);
+      errno = 0; // We must clear it according to POSIX.
+      T r (std::strtold (b, &e));
+
+      if (e == b || e != b + n || errno == ERANGE)
+        p.throw_invalid_value ("long double", b, n);
+
+      return r;
+    }
+
+    template <typename T>
+    inline T parser::
+    value () const
+    {
+      if (!value_p_)
+      {
+        assert (parsed_ && !peeked_ && value_event (translate (*parsed_)));
+        return parse_value<T> (raw_s_, raw_n_, *this);
+      }
+
+      return parse_value<T> (value_.data (), value_.size (), *this);
+    }
+
+    inline void parser::
+    next_expect_name (const std::string& n, bool su)
+    {
+      next_expect_name (n.c_str (), su);
+    }
+
+    // next_expect_<type>()
+    //
+    inline std::string& parser::
+    next_expect_string ()
+    {
+      next_expect (event::string);
+      return value ();
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_string ()
+    {
+      next_expect (event::string);
+      return value<T> ();
+    }
+
+    inline std::string& parser::
+    next_expect_number ()
+    {
+      next_expect (event::number);
+      return value ();
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_number ()
+    {
+      next_expect (event::number);
+      return value<T> ();
+    }
+
+    inline std::string& parser::
+    next_expect_boolean ()
+    {
+      next_expect (event::boolean);
+      return value ();
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_boolean ()
+    {
+      next_expect (event::boolean);
+      return value<T> ();
+    }
+
+    // next_expect_<type>_null()
+    //
+    inline std::string* parser::
+    next_expect_string_null ()
+    {
+      return next_expect (event::string, event::null) ? &value () : nullptr;
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_string_null ()
+    {
+      return next_expect (event::string, event::null)
+        ? optional<T> (value<T> ())
+        : nullopt;
+    }
+
+    inline std::string* parser::
+    next_expect_number_null ()
+    {
+      return next_expect (event::number, event::null) ? &value () : nullptr;
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_number_null ()
+    {
+      return next_expect (event::number, event::null)
+        ? optional<T> (value<T> ())
+        : nullopt;
+    }
+
+    inline std::string* parser::
+    next_expect_boolean_null ()
+    {
+      return next_expect (event::boolean, event::null) ? &value () : nullptr;
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_boolean_null ()
+    {
+      return next_expect (event::boolean, event::null)
+        ? optional<T> (value<T> ())
+        : nullopt;
+    }
+
+    // next_expect_member_string()
+    //
+    inline std::string& parser::
+    next_expect_member_string (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_string ();
+    }
+
+    inline std::string& parser::
+    next_expect_member_string (const std::string& n, bool su)
+    {
+      return next_expect_member_string (n.c_str (), su);
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_member_string (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_string<T> ();
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_member_string (const std::string& n, bool su)
+    {
+      return next_expect_member_string<T> (n.c_str (), su);
+    }
+
+    // next_expect_member_number()
+    //
+    inline std::string& parser::
+    next_expect_member_number (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_number ();
+    }
+
+    inline std::string& parser::
+    next_expect_member_number (const std::string& n, bool su)
+    {
+      return next_expect_member_number (n.c_str (), su);
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_member_number (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_number<T> ();
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_member_number (const std::string& n, bool su)
+    {
+      return next_expect_member_number<T> (n.c_str (), su);
+    }
+
+    // next_expect_member_boolean()
+    //
+    inline std::string& parser::
+    next_expect_member_boolean (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_boolean ();
+    }
+
+    inline std::string& parser::
+    next_expect_member_boolean (const std::string& n, bool su)
+    {
+      return next_expect_member_boolean (n.c_str (), su);
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_member_boolean (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_boolean<T> ();
+    }
+
+    template <typename T>
+    inline T parser::
+    next_expect_member_boolean (const std::string& n, bool su)
+    {
+      return next_expect_member_boolean<T> (n.c_str (), su);
+    }
+
+    // next_expect_member_string_null()
+    //
+    inline std::string* parser::
+    next_expect_member_string_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_string_null ();
+    }
+
+    inline std::string* parser::
+    next_expect_member_string_null (const std::string& n, bool su)
+    {
+      return next_expect_member_string_null (n.c_str (), su);
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_member_string_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_string_null<T> ();
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_member_string_null (const std::string& n, bool su)
+    {
+      return next_expect_member_string_null<T> (n.c_str (), su);
+    }
+
+    // next_expect_member_number_null()
+    //
+    inline std::string* parser::
+    next_expect_member_number_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_number_null ();
+    }
+
+    inline std::string* parser::
+    next_expect_member_number_null (const std::string& n, bool su)
+    {
+      return next_expect_member_number_null (n.c_str (), su);
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_member_number_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_number_null<T> ();
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_member_number_null (const std::string& n, bool su)
+    {
+      return next_expect_member_number_null<T> (n.c_str (), su);
+    }
+
+    // next_expect_member_boolean_null()
+    //
+    inline std::string* parser::
+    next_expect_member_boolean_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_boolean_null ();
+    }
+
+    inline std::string* parser::
+    next_expect_member_boolean_null (const std::string& n, bool su)
+    {
+      return next_expect_member_boolean_null (n.c_str (), su);
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_member_boolean_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect_boolean_null<T> ();
+    }
+
+    template <typename T>
+    inline optional<T> parser::
+    next_expect_member_boolean_null (const std::string& n, bool su)
+    {
+      return next_expect_member_boolean_null<T> (n.c_str (), su);
+    }
+
+    // next_expect_member_object[_null]()
+    //
+    inline void parser::
+    next_expect_member_object (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      next_expect (event::begin_object);
+    }
+
+    inline void parser::
+    next_expect_member_object (const std::string& n, bool su)
+    {
+      next_expect_member_object (n.c_str (), su);
+    }
+
+    inline bool parser::
+    next_expect_member_object_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect (event::begin_object, event::null);
+    }
+
+    inline bool parser::
+    next_expect_member_object_null (const std::string& n, bool su)
+    {
+      return next_expect_member_object_null (n.c_str (), su);
+    }
+
+    // next_expect_member_array[_null]()
+    //
+    inline void parser::
+    next_expect_member_array (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      next_expect (event::begin_array);
+    }
+
+    inline void parser::
+    next_expect_member_array (const std::string& n, bool su)
+    {
+      next_expect_member_array (n.c_str (), su);
+    }
+
+    inline bool parser::
+    next_expect_member_array_null (const char* n, bool su)
+    {
+      next_expect_name (n, su);
+      return next_expect (event::begin_array, event::null);
+    }
+
+    inline bool parser::
+    next_expect_member_array_null (const std::string& n, bool su)
+    {
+      return next_expect_member_array_null (n.c_str (), su);
+    }
+  }
+}
diff --git a/libbutl/json/pdjson.c b/libbutl/json/pdjson.c
new file mode 100644
index 0000000..ae10c95
--- /dev/null
+++ b/libbutl/json/pdjson.c
@@ -0,0 +1,1044 @@
+#ifndef _POSIX_C_SOURCE
+#  define _POSIX_C_SOURCE 200112L
+#elif _POSIX_C_SOURCE < 200112L
+#  error incompatible _POSIX_C_SOURCE level
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#ifndef PDJSON_H
+#  include "pdjson.h"
+#endif
+
+#define JSON_FLAG_ERROR      (1u << 0)
+#define JSON_FLAG_STREAMING  (1u << 1)
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+
+#define json_error(json, format, ...)                             \
+    if (!(json->flags & JSON_FLAG_ERROR)) {                       \
+        json->flags |= JSON_FLAG_ERROR;                           \
+        _snprintf_s(json->errmsg, sizeof(json->errmsg),           \
+                 _TRUNCATE,                                       \
+                 format,                                          \
+                 __VA_ARGS__);                                    \
+    }                                                             \
+
+#else
+
+#define json_error(json, format, ...)                             \
+    if (!(json->flags & JSON_FLAG_ERROR)) {                       \
+        json->flags |= JSON_FLAG_ERROR;                           \
+        snprintf(json->errmsg, sizeof(json->errmsg),              \
+                 format,                                          \
+                 __VA_ARGS__);                                    \
+    }                                                             \
+
+#endif /* _MSC_VER */
+
+/* See also PDJSON_STACK_MAX below. */
+#ifndef PDJSON_STACK_INC
+#  define PDJSON_STACK_INC 4
+#endif
+
+struct json_stack {
+    enum json_type type;
+    long count;
+};
+
+static enum json_type
+push(json_stream *json, enum json_type type)
+{
+    json->stack_top++;
+
+#ifdef PDJSON_STACK_MAX
+    if (json->stack_top > PDJSON_STACK_MAX) {
+        json_error(json, "%s", "maximum depth of nesting reached");
+        return JSON_ERROR;
+    }
+#endif
+
+    if (json->stack_top >= json->stack_size) {
+        struct json_stack *stack;
+        size_t size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack);
+        stack = (struct json_stack *)json->alloc.realloc(json->stack, size);
+        if (stack == NULL) {
+            json_error(json, "%s", "out of memory");
+            return JSON_ERROR;
+        }
+
+        json->stack_size += PDJSON_STACK_INC;
+        json->stack = stack;
+    }
+
+    json->stack[json->stack_top].type = type;
+    json->stack[json->stack_top].count = 0;
+
+    return type;
+}
+
+/* Note: c is assumed not to be EOF. */
+static enum json_type
+pop(json_stream *json, int c, enum json_type expected)
+{
+    if (json->stack == NULL || json->stack[json->stack_top].type != expected) {
+        json_error(json, "unexpected byte '%c'", c);
+        return JSON_ERROR;
+    }
+    json->stack_top--;
+    return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END;
+}
+
+static int buffer_peek(struct json_source *source)
+{
+    if (source->position < source->source.buffer.length)
+        return source->source.buffer.buffer[source->position];
+    else
+        return EOF;
+}
+
+static int buffer_get(struct json_source *source)
+{
+    int c = source->peek(source);
+    if (c != EOF)
+        source->position++;
+    return c;
+}
+
+static int stream_get(struct json_source *source)
+{
+    int c = fgetc(source->source.stream.stream);
+    if (c != EOF)
+        source->position++;
+    return c;
+}
+
+static int stream_peek(struct json_source *source)
+{
+    int c = fgetc(source->source.stream.stream);
+    ungetc(c, source->source.stream.stream);
+    return c;
+}
+
+static void init(json_stream *json)
+{
+    json->lineno = 1;
+    json->linepos = 0;
+    json->lineadj = 0;
+    json->linecon = 0;
+    json->colno = 0;
+    json->flags = JSON_FLAG_STREAMING;
+    json->errmsg[0] = '\0';
+    json->ntokens = 0;
+    json->next = (enum json_type)0;
+
+    json->stack = NULL;
+    json->stack_top = -1;
+    json->stack_size = 0;
+
+    json->data.string = NULL;
+    json->data.string_size = 0;
+    json->data.string_fill = 0;
+    json->source.position = 0;
+
+    json->alloc.malloc = malloc;
+    json->alloc.realloc = realloc;
+    json->alloc.free = free;
+}
+
+static enum json_type
+is_match(json_stream *json, const char *pattern, enum json_type type)
+{
+    int c;
+    for (const char *p = pattern; *p; p++) {
+        if (*p != (c = json->source.get(&json->source))) {
+            if (c != EOF) {
+                json_error(json, "expected '%c' instead of byte '%c'", *p, c);
+            } else {
+                json_error(json, "expected '%c' instead of end of text", *p);
+            }
+            return JSON_ERROR;
+        }
+    }
+    return type;
+}
+
+static int pushchar(json_stream *json, int c)
+{
+    if (json->data.string_fill == json->data.string_size) {
+        size_t size = json->data.string_size * 2;
+        char *buffer = (char *)json->alloc.realloc(json->data.string, size);
+        if (buffer == NULL) {
+            json_error(json, "%s", "out of memory");
+            return -1;
+        } else {
+            json->data.string_size = size;
+            json->data.string = buffer;
+        }
+    }
+    json->data.string[json->data.string_fill++] = c;
+    return 0;
+}
+
+static int init_string(json_stream *json)
+{
+    json->data.string_fill = 0;
+    if (json->data.string == NULL) {
+        json->data.string_size = 1024;
+        json->data.string = (char *)json->alloc.malloc(json->data.string_size);
+        if (json->data.string == NULL) {
+            json_error(json, "%s", "out of memory");
+            return -1;
+        }
+    }
+    json->data.string[0] = '\0';
+    return 0;
+}
+
+static int encode_utf8(json_stream *json, unsigned long c)
+{
+    if (c < 0x80UL) {
+        return pushchar(json, c);
+    } else if (c < 0x0800UL) {
+        return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) &&
+                 (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0));
+    } else if (c < 0x010000UL) {
+        if (c >= 0xd800 && c <= 0xdfff) {
+            json_error(json, "invalid codepoint %06lx", c);
+            return -1;
+        }
+        return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) &&
+                 (pushchar(json, (c >>  6 & 0x3F) | 0x80) == 0) &&
+                 (pushchar(json, (c >>  0 & 0x3F) | 0x80) == 0));
+    } else if (c < 0x110000UL) {
+        return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) &&
+                (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) &&
+                (pushchar(json, (c >> 6  & 0x3F) | 0x80) == 0) &&
+                (pushchar(json, (c >> 0  & 0x3F) | 0x80) == 0));
+    } else {
+        json_error(json, "unable to encode %06lx as UTF-8", c);
+        return -1;
+    }
+}
+
+static int hexchar(int c)
+{
+    switch (c) {
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    case 'a':
+    case 'A': return 10;
+    case 'b':
+    case 'B': return 11;
+    case 'c':
+    case 'C': return 12;
+    case 'd':
+    case 'D': return 13;
+    case 'e':
+    case 'E': return 14;
+    case 'f':
+    case 'F': return 15;
+    default:
+        return -1;
+    }
+}
+
+static long
+read_unicode_cp(json_stream *json)
+{
+    long cp = 0;
+    int shift = 12;
+
+    for (size_t i = 0; i < 4; i++) {
+        int c = json->source.get(&json->source);
+        int hc;
+
+        if (c == EOF) {
+            json_error(json, "%s", "unterminated string literal in Unicode");
+            return -1;
+        } else if ((hc = hexchar(c)) == -1) {
+            json_error(json, "invalid escape Unicode byte '%c'", c);
+            return -1;
+        }
+
+        cp += hc * (1 << shift);
+        shift -= 4;
+    }
+
+
+    return cp;
+}
+
+static int read_unicode(json_stream *json)
+{
+    long cp, h, l;
+
+    if ((cp = read_unicode_cp(json)) == -1) {
+        return -1;
+    }
+
+    if (cp >= 0xd800 && cp <= 0xdbff) {
+        /* This is the high portion of a surrogate pair; we need to read the
+         * lower portion to get the codepoint
+         */
+        h = cp;
+
+        int c = json->source.get(&json->source);
+        if (c == EOF) {
+            json_error(json, "%s", "unterminated string literal in Unicode");
+            return -1;
+        } else if (c != '\\') {
+            json_error(json, "invalid continuation for surrogate pair '%c', "
+                             "expected '\\'", c);
+            return -1;
+        }
+
+        c = json->source.get(&json->source);
+        if (c == EOF) {
+            json_error(json, "%s", "unterminated string literal in Unicode");
+            return -1;
+        } else if (c != 'u') {
+            json_error(json, "invalid continuation for surrogate pair '%c', "
+                             "expected 'u'", c);
+            return -1;
+        }
+
+        if ((l = read_unicode_cp(json)) == -1) {
+            return -1;
+        }
+
+        if (l < 0xdc00 || l > 0xdfff) {
+            json_error(json, "surrogate pair continuation \\u%04lx out "
+                             "of range (dc00-dfff)", l);
+            return -1;
+        }
+
+        cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
+    } else if (cp >= 0xdc00 && cp <= 0xdfff) {
+            json_error(json, "dangling surrogate \\u%04lx", cp);
+            return -1;
+    }
+
+    return encode_utf8(json, cp);
+}
+
+static int
+read_escaped(json_stream *json)
+{
+    int c = json->source.get(&json->source);
+    if (c == EOF) {
+        json_error(json, "%s", "unterminated string literal in escape");
+        return -1;
+    } else if (c == 'u') {
+        if (read_unicode(json) != 0)
+            return -1;
+    } else {
+        switch (c) {
+        case '\\':
+        case 'b':
+        case 'f':
+        case 'n':
+        case 'r':
+        case 't':
+        case '/':
+        case '"':
+            {
+                const char *codes = "\\bfnrt/\"";
+                const char *p = strchr(codes, c);
+                if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0)
+                    return -1;
+            }
+            break;
+        default:
+            json_error(json, "invalid escaped byte '%c'", c);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int
+char_needs_escaping(int c)
+{
+    if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) {
+        return 1;
+    }
+
+    return 0;
+}
+
+static int
+utf8_seq_length(char byte)
+{
+    unsigned char u = (unsigned char) byte;
+    if (u < 0x80) return 1;
+
+    if (0x80 <= u && u <= 0xBF)
+    {
+        // second, third or fourth byte of a multi-byte
+        // sequence, i.e. a "continuation byte"
+        return 0;
+    }
+    else if (u == 0xC0 || u == 0xC1)
+    {
+        // overlong encoding of an ASCII byte
+        return 0;
+    }
+    else if (0xC2 <= u && u <= 0xDF)
+    {
+        // 2-byte sequence
+        return 2;
+    }
+    else if (0xE0 <= u && u <= 0xEF)
+    {
+        // 3-byte sequence
+        return 3;
+    }
+    else if (0xF0 <= u && u <= 0xF4)
+    {
+        // 4-byte sequence
+        return 4;
+    }
+    else
+    {
+        // u >= 0xF5
+        // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8
+        return 0;
+    }
+}
+
+static int
+is_legal_utf8(const unsigned char *bytes, int length)
+{
+    if (0 == bytes || 0 == length) return 0;
+
+    unsigned char a;
+    const unsigned char* srcptr = bytes + length;
+    switch (length)
+    {
+    default:
+        return 0;
+        // Everything else falls through when true.
+    case 4:
+        if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+        /* FALLTHRU */
+    case 3:
+        if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+        /* FALLTHRU */
+    case 2:
+        a = (*--srcptr);
+        switch (*bytes)
+        {
+        case 0xE0:
+            if (a < 0xA0 || a > 0xBF) return 0;
+            break;
+        case 0xED:
+            if (a < 0x80 || a > 0x9F) return 0;
+            break;
+        case 0xF0:
+            if (a < 0x90 || a > 0xBF) return 0;
+            break;
+        case 0xF4:
+            if (a < 0x80 || a > 0x8F) return 0;
+            break;
+        default:
+            if (a < 0x80 || a > 0xBF) return 0;
+            break;
+        }
+        /* FALLTHRU */
+    case 1:
+        if (*bytes >= 0x80 && *bytes < 0xC2) return 0;
+    }
+    return *bytes <= 0xF4;
+}
+
+static int
+read_utf8(json_stream* json, int next_char)
+{
+    int count = utf8_seq_length(next_char);
+    if (!count)
+    {
+        json_error(json, "%s", "invalid UTF-8 character");
+        return -1;
+    }
+
+    char buffer[4];
+    buffer[0] = next_char;
+    int i;
+    for (i = 1; i < count; ++i)
+    {
+        if ((next_char = json->source.get(&json->source)) == EOF)
+            break;
+
+        buffer[i] = next_char;
+        json->lineadj++;
+    }
+
+    if (i != count || !is_legal_utf8((unsigned char*) buffer, count))
+    {
+        json_error(json, "%s", "invalid UTF-8 text");
+        return -1;
+    }
+
+    for (i = 0; i < count; ++i)
+    {
+        if (pushchar(json, buffer[i]) != 0)
+            return -1;
+    }
+    return 0;
+}
+
+static enum json_type
+read_string(json_stream *json)
+{
+    if (init_string(json) != 0)
+        return JSON_ERROR;
+    while (1) {
+        int c = json->source.get(&json->source);
+        if (c == EOF) {
+            json_error(json, "%s", "unterminated string literal");
+            return JSON_ERROR;
+        } else if (c == '"') {
+            if (pushchar(json, '\0') == 0)
+                return JSON_STRING;
+            else
+                return JSON_ERROR;
+        } else if (c == '\\') {
+            if (read_escaped(json) != 0)
+                return JSON_ERROR;
+        } else if ((unsigned) c >= 0x80) {
+            if (read_utf8(json, c) != 0)
+                return JSON_ERROR;
+        } else {
+            if (char_needs_escaping(c)) {
+                json_error(json, "%s", "unescaped control character in string");
+                return JSON_ERROR;
+            }
+
+            if (pushchar(json, c) != 0)
+                return JSON_ERROR;
+        }
+    }
+    return JSON_ERROR;
+}
+
+static int
+is_digit(int c)
+{
+    return c >= 48 /*0*/ && c <= 57 /*9*/;
+}
+
+static int
+read_digits(json_stream *json)
+{
+    int c;
+    unsigned nread = 0;
+    while (is_digit(c = json->source.peek(&json->source))) {
+        if (pushchar(json, json->source.get(&json->source)) != 0)
+            return -1;
+
+        nread++;
+    }
+
+    if (nread == 0) {
+        if (c != EOF) {
+            json_error(json, "expected digit instead of byte '%c'", c);
+        } else {
+            json_error(json, "%s", "expected digit instead of end of text");
+        }
+        return -1;
+    }
+
+    return 0;
+}
+
+static enum json_type
+read_number(json_stream *json, int c)
+{
+    if (pushchar(json, c) != 0)
+        return JSON_ERROR;
+    if (c == '-') {
+        c = json->source.get(&json->source);
+        if (is_digit(c)) {
+            return read_number(json, c);
+        } else {
+            if (c != EOF) {
+                json_error(json, "unexpected byte '%c' in number", c);
+            } else {
+                json_error(json, "%s", "unexpected end of text in number");
+            }
+            return JSON_ERROR;
+        }
+    } else if (strchr("123456789", c) != NULL) {
+        c = json->source.peek(&json->source);
+        if (is_digit(c)) {
+            if (read_digits(json) != 0)
+                return JSON_ERROR;
+        }
+    }
+    /* Up to decimal or exponent has been read. */
+    c = json->source.peek(&json->source);
+    if (strchr(".eE", c) == NULL) {
+        if (pushchar(json, '\0') != 0)
+            return JSON_ERROR;
+        else
+            return JSON_NUMBER;
+    }
+    if (c == '.') {
+        json->source.get(&json->source); // consume .
+        if (pushchar(json, c) != 0)
+            return JSON_ERROR;
+        if (read_digits(json) != 0)
+            return JSON_ERROR;
+    }
+    /* Check for exponent. */
+    c = json->source.peek(&json->source);
+    if (c == 'e' || c == 'E') {
+        json->source.get(&json->source); // consume e/E
+        if (pushchar(json, c) != 0)
+            return JSON_ERROR;
+        c = json->source.peek(&json->source);
+        if (c == '+' || c == '-') {
+            json->source.get(&json->source); // consume
+            if (pushchar(json, c) != 0)
+                return JSON_ERROR;
+            if (read_digits(json) != 0)
+                return JSON_ERROR;
+        } else if (is_digit(c)) {
+            if (read_digits(json) != 0)
+                return JSON_ERROR;
+        } else {
+            json->source.get(&json->source); // consume (for column)
+            if (c != EOF) {
+                json_error(json, "unexpected byte '%c' in number", c);
+            } else {
+                json_error(json, "%s", "unexpected end of text in number");
+            }
+            return JSON_ERROR;
+        }
+    }
+    if (pushchar(json, '\0') != 0)
+        return JSON_ERROR;
+    else
+        return JSON_NUMBER;
+}
+
+bool
+json_isspace(int c)
+{
+    switch (c) {
+    case 0x09:
+    case 0x0a:
+    case 0x0d:
+    case 0x20:
+        return true;
+    }
+
+    return false;
+}
+
+static void newline(json_stream *json)
+{
+    json->lineno++;
+    json->linepos = json->source.position;
+    json->lineadj = 0;
+    json->linecon = 0;
+}
+
+/* Returns the next non-whitespace character in the stream.
+ *
+ * Note that this is the only function (besides user-facing json_source_get())
+ * that needs to worry about newline housekeeping.
+ */
+static int next(json_stream *json)
+{
+   int c;
+   while (json_isspace(c = json->source.get(&json->source)))
+       if (c == '\n')
+           newline(json);
+   return c;
+}
+
+static enum json_type
+read_value(json_stream *json, int c)
+{
+    enum json_type type;
+    size_t colno = json_get_column(json);
+
+    json->ntokens++;
+
+    switch (c) {
+    case EOF:
+        json_error(json, "%s", "unexpected end of text");
+        type = JSON_ERROR;
+        break;
+    case '{':
+        type = push(json, JSON_OBJECT);
+        break;
+    case '[':
+        type = push(json, JSON_ARRAY);
+        break;
+    case '"':
+        type = read_string(json);
+        break;
+    case 'n':
+        type = is_match(json, "ull", JSON_NULL);
+        break;
+    case 'f':
+        type = is_match(json, "alse", JSON_FALSE);
+        break;
+    case 't':
+        type = is_match(json, "rue", JSON_TRUE);
+        break;
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+    case '-':
+        type = init_string(json) == 0 ? read_number(json, c) : JSON_ERROR;
+        break;
+    default:
+        type = JSON_ERROR;
+        json_error(json, "unexpected byte '%c' in value", c);
+        break;
+    }
+
+    if (type != JSON_ERROR)
+        json->colno = colno;
+
+    return type;
+}
+
+enum json_type json_peek(json_stream *json)
+{
+    enum json_type next;
+    if (json->next)
+        next = json->next;
+    else
+        next = json->next = json_next(json);
+    return next;
+}
+
+enum json_type json_next(json_stream *json)
+{
+    if (json->flags & JSON_FLAG_ERROR)
+        return JSON_ERROR;
+    if (json->next != 0) {
+        enum json_type next = json->next;
+        json->next = (enum json_type)0;
+        return next;
+    }
+
+    json->colno = 0;
+
+    if (json->ntokens > 0 && json->stack_top == (size_t)-1) {
+
+        /* In the streaming mode leave any trailing whitespaces in the stream.
+         * This allows the user to validate any desired separation between
+         * values (such as newlines) using json_source_get/peek() with any
+         * remaining whitespaces ignored as leading when we parse the next
+         * value. */
+        if (!(json->flags & JSON_FLAG_STREAMING)) {
+            int c = next(json);
+            if (c != EOF) {
+                json_error(json, "expected end of text instead of byte '%c'", c);
+                return JSON_ERROR;
+            }
+        }
+
+        return JSON_DONE;
+    }
+    int c = next(json);
+    if (json->stack_top == (size_t)-1) {
+        if (c == EOF && (json->flags & JSON_FLAG_STREAMING))
+            return JSON_DONE;
+
+        return read_value(json, c);
+    }
+    if (json->stack[json->stack_top].type == JSON_ARRAY) {
+        if (json->stack[json->stack_top].count == 0) {
+            if (c == ']') {
+                return pop(json, c, JSON_ARRAY);
+            }
+            json->stack[json->stack_top].count++;
+            return read_value(json, c);
+        } else if (c == ',') {
+            json->stack[json->stack_top].count++;
+            return read_value(json, next(json));
+        } else if (c == ']') {
+            return pop(json, c, JSON_ARRAY);
+        } else {
+            if (c != EOF) {
+                json_error(json, "unexpected byte '%c'", c);
+            } else {
+                json_error(json, "%s", "unexpected end of text");
+            }
+            return JSON_ERROR;
+        }
+    } else if (json->stack[json->stack_top].type == JSON_OBJECT) {
+        if (json->stack[json->stack_top].count == 0) {
+            if (c == '}') {
+                return pop(json, c, JSON_OBJECT);
+            }
+
+            /* No member name/value pairs yet. */
+            enum json_type value = read_value(json, c);
+            if (value != JSON_STRING) {
+                if (value != JSON_ERROR)
+                    json_error(json, "%s", "expected member name or '}'");
+                return JSON_ERROR;
+            } else {
+                json->stack[json->stack_top].count++;
+                return value;
+            }
+        } else if ((json->stack[json->stack_top].count % 2) == 0) {
+            /* Expecting comma followed by member name. */
+            if (c != ',' && c != '}') {
+                json_error(json, "%s", "expected ',' or '}' after member value");
+                return JSON_ERROR;
+            } else if (c == '}') {
+                return pop(json, c, JSON_OBJECT);
+            } else {
+                enum json_type value = read_value(json, next(json));
+                if (value != JSON_STRING) {
+                    if (value != JSON_ERROR)
+                        json_error(json, "%s", "expected member name");
+                    return JSON_ERROR;
+                } else {
+                    json->stack[json->stack_top].count++;
+                    return value;
+                }
+            }
+        } else if ((json->stack[json->stack_top].count % 2) == 1) {
+            /* Expecting colon followed by value. */
+            if (c != ':') {
+                json_error(json, "%s", "expected ':' after member name");
+                return JSON_ERROR;
+            } else {
+                json->stack[json->stack_top].count++;
+                return read_value(json, next(json));
+            }
+        }
+    }
+    json_error(json, "%s", "invalid parser state");
+    return JSON_ERROR;
+}
+
+void json_reset(json_stream *json)
+{
+    json->stack_top = -1;
+    json->ntokens = 0;
+    json->flags &= ~JSON_FLAG_ERROR;
+    json->errmsg[0] = '\0';
+}
+
+enum json_type json_skip(json_stream *json)
+{
+    enum json_type type = json_next(json);
+    size_t cnt_arr = 0;
+    size_t cnt_obj = 0;
+
+    for (enum json_type skip = type; ; skip = json_next(json)) {
+        if (skip == JSON_ERROR || skip == JSON_DONE)
+            return skip;
+
+        if (skip == JSON_ARRAY) {
+            ++cnt_arr;
+        } else if (skip == JSON_ARRAY_END && cnt_arr > 0) {
+            --cnt_arr;
+        } else if (skip == JSON_OBJECT) {
+            ++cnt_obj;
+        } else if (skip == JSON_OBJECT_END && cnt_obj > 0) {
+            --cnt_obj;
+        }
+
+        if (!cnt_arr && !cnt_obj)
+            break;
+    }
+
+    return type;
+}
+
+enum json_type json_skip_until(json_stream *json, enum json_type type)
+{
+    while (1) {
+        enum json_type skip = json_skip(json);
+
+        if (skip == JSON_ERROR || skip == JSON_DONE)
+            return skip;
+
+        if (skip == type)
+            break;
+    }
+
+    return type;
+}
+
+const char *json_get_string(json_stream *json, size_t *length)
+{
+    if (length != NULL)
+        *length = json->data.string_fill;
+    if (json->data.string == NULL)
+        return "";
+    else
+        return json->data.string;
+}
+
+double json_get_number(json_stream *json)
+{
+    char *p = json->data.string;
+    return p == NULL ? 0 : strtod(p, NULL);
+}
+
+const char *json_get_error(json_stream *json)
+{
+    return json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL;
+}
+
+size_t json_get_lineno(json_stream *json)
+{
+    return json->lineno;
+}
+
+size_t json_get_position(json_stream *json)
+{
+    return json->source.position;
+}
+
+size_t json_get_column(json_stream *json)
+{
+    return json->colno == 0
+               ? json->source.position == 0 ? 1 : json->source.position - json->linepos - json->lineadj
+               : json->colno;
+}
+
+size_t json_get_depth(json_stream *json)
+{
+    return json->stack_top + 1;
+}
+
+/* Return the current parsing context, that is, JSON_OBJECT if we are inside
+   an object, JSON_ARRAY if we are inside an array, and JSON_DONE if we are
+   not yet/anymore in either.
+
+   Additionally, for the first two cases, also return the number of parsing
+   events that have already been observed at this level with json_next/peek().
+   In particular, inside an object, an odd number would indicate that the just
+   observed JSON_STRING event is a member name.
+*/
+enum json_type json_get_context(json_stream *json, size_t *count)
+{
+    if (json->stack_top == (size_t)-1)
+        return JSON_DONE;
+
+    if (count != NULL)
+        *count = json->stack[json->stack_top].count;
+
+    return json->stack[json->stack_top].type;
+}
+
+int json_source_get(json_stream *json)
+{
+    /* If the caller reads a multi-byte UTF-8 sequence, we expect them to read
+     * it in its entirety. We also assume that any invalid bytes within such a
+     * sequence belong to the same column (as opposed to starting a new column
+     * or some such). */
+
+    int c = json->source.get(&json->source);
+    if (json->linecon > 0) {
+        /* Expecting a continuation byte within a multi-byte UTF-8 sequence. */
+        json->linecon--;
+        if (c != EOF)
+            json->lineadj++;
+    } else if (c == '\n')
+        newline(json);
+    else if (c >= 0xC2 && c <= 0xF4) /* First in multi-byte UTF-8 sequence. */
+        json->linecon = utf8_seq_length(c) - 1;
+
+    return c;
+}
+
+int json_source_peek(json_stream *json)
+{
+    return json->source.peek(&json->source);
+}
+
+void json_open_buffer(json_stream *json, const void *buffer, size_t size)
+{
+    init(json);
+    json->source.get = buffer_get;
+    json->source.peek = buffer_peek;
+    json->source.source.buffer.buffer = (const char *)buffer;
+    json->source.source.buffer.length = size;
+}
+
+void json_open_string(json_stream *json, const char *string)
+{
+    json_open_buffer(json, string, strlen(string));
+}
+
+void json_open_stream(json_stream *json, FILE * stream)
+{
+    init(json);
+    json->source.get = stream_get;
+    json->source.peek = stream_peek;
+    json->source.source.stream.stream = stream;
+}
+
+static int user_get(struct json_source *json)
+{
+    int c = json->source.user.get(json->source.user.ptr);
+    if (c != EOF)
+        json->position++;
+    return c;
+}
+
+static int user_peek(struct json_source *json)
+{
+    return json->source.user.peek(json->source.user.ptr);
+}
+
+void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user)
+{
+    init(json);
+    json->source.get = user_get;
+    json->source.peek = user_peek;
+    json->source.source.user.ptr = user;
+    json->source.source.user.get = get;
+    json->source.source.user.peek = peek;
+}
+
+void json_set_allocator(json_stream *json, json_allocator *a)
+{
+    json->alloc = *a;
+}
+
+void json_set_streaming(json_stream *json, bool streaming)
+{
+    if (streaming)
+        json->flags |= JSON_FLAG_STREAMING;
+    else
+        json->flags &= ~JSON_FLAG_STREAMING;
+}
+
+void json_close(json_stream *json)
+{
+    json->alloc.free(json->stack);
+    json->alloc.free(json->data.string);
+}
diff --git a/libbutl/json/pdjson.h b/libbutl/json/pdjson.h
new file mode 100644
index 0000000..ac698e4
--- /dev/null
+++ b/libbutl/json/pdjson.h
@@ -0,0 +1,147 @@
+#ifndef PDJSON_H
+#define PDJSON_H
+
+#ifndef PDJSON_SYMEXPORT
+#   define PDJSON_SYMEXPORT
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#else
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+    #include <stdbool.h>
+#else
+    #ifndef bool
+        #define bool int
+        #define true 1
+        #define false 0
+    #endif /* bool */
+#endif /* __STDC_VERSION__ */
+#endif /* __cplusplus */
+
+#include <stdio.h>
+
+enum json_type {
+    JSON_ERROR = 1, JSON_DONE,
+    JSON_OBJECT, JSON_OBJECT_END, JSON_ARRAY, JSON_ARRAY_END,
+    JSON_STRING, JSON_NUMBER, JSON_TRUE, JSON_FALSE, JSON_NULL
+};
+
+struct json_allocator {
+    void *(*malloc)(size_t);
+    void *(*realloc)(void *, size_t);
+    void (*free)(void *);
+};
+
+typedef int (*json_user_io)(void *user);
+
+typedef struct json_stream json_stream;
+typedef struct json_allocator json_allocator;
+
+PDJSON_SYMEXPORT void json_open_buffer(json_stream *json, const void *buffer, size_t size);
+PDJSON_SYMEXPORT void json_open_string(json_stream *json, const char *string);
+PDJSON_SYMEXPORT void json_open_stream(json_stream *json, FILE *stream);
+PDJSON_SYMEXPORT void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user);
+PDJSON_SYMEXPORT void json_close(json_stream *json);
+
+PDJSON_SYMEXPORT void json_set_allocator(json_stream *json, json_allocator *a);
+PDJSON_SYMEXPORT void json_set_streaming(json_stream *json, bool mode);
+
+PDJSON_SYMEXPORT enum json_type json_next(json_stream *json);
+PDJSON_SYMEXPORT enum json_type json_peek(json_stream *json);
+PDJSON_SYMEXPORT void json_reset(json_stream *json);
+PDJSON_SYMEXPORT const char *json_get_string(json_stream *json, size_t *length);
+PDJSON_SYMEXPORT double json_get_number(json_stream *json);
+
+PDJSON_SYMEXPORT enum json_type json_skip(json_stream *json);
+PDJSON_SYMEXPORT enum json_type json_skip_until(json_stream *json, enum json_type type);
+
+PDJSON_SYMEXPORT size_t json_get_lineno(json_stream *json);
+PDJSON_SYMEXPORT size_t json_get_position(json_stream *json);
+PDJSON_SYMEXPORT size_t json_get_column(json_stream *json);
+PDJSON_SYMEXPORT size_t json_get_depth(json_stream *json);
+PDJSON_SYMEXPORT enum json_type json_get_context(json_stream *json, size_t *count);
+PDJSON_SYMEXPORT const char *json_get_error(json_stream *json);
+
+PDJSON_SYMEXPORT int json_source_get(json_stream *json);
+PDJSON_SYMEXPORT int json_source_peek(json_stream *json);
+PDJSON_SYMEXPORT bool json_isspace(int c);
+
+/* internal */
+
+struct json_source {
+    int (*get)(struct json_source *);
+    int (*peek)(struct json_source *);
+    size_t position;
+    union {
+        struct {
+            FILE *stream;
+        } stream;
+        struct {
+            const char *buffer;
+            size_t length;
+        } buffer;
+        struct {
+            void *ptr;
+            json_user_io get;
+            json_user_io peek;
+        } user;
+    } source;
+};
+
+struct json_stream {
+    size_t lineno;
+
+    /* While counting lines is straightforward, columns are tricky because we
+     * have to count codepoints, not bytes. We could have peppered the code
+     * with increments in all the relevant places but that seems inelegant.
+     * So instead we calculate the column dynamically, based on the current
+     * position.
+     *
+     * Specifically, we will remember the position at the beginning of each
+     * line (linepos) and, assuming only the ASCII characters on the line, the
+     * column will be the difference between the current position and linepos.
+     * Of course there could also be multi-byte UTF-8 sequences which we will
+     * handle by keeping an adjustment (lineadj) -- the number of continuation
+     * bytes encountered on this line so far. Finally, for json_source_get()
+     * we also have to keep the number of remaining continuation bytes in the
+     * current multi-byte UTF-8 sequence (linecon).
+     *
+     * This is not the end of the story, however: with only the just described
+     * approach we will always end up with the column of the latest character
+     * read which is not what we want when returning potentially multi-
+     * character value events (string, number, etc); in these cases we want to
+     * return the column of the first character (note that if the value itself
+     * is invalid and we are returning JSON_ERROR, we still want the current
+     * column). So to handle this we will cache the start column (colno) for
+     * such events.
+     */
+    size_t linepos; /* Position at the beginning of the current line. */
+    size_t lineadj; /* Adjustment for multi-byte UTF-8 sequences. */
+    size_t linecon; /* Number of remaining UTF-8 continuation bytes. */
+    size_t colno;   /* Start column for value events or 0. */
+
+    struct json_stack *stack;
+    size_t stack_top;
+    size_t stack_size;
+    enum json_type next;
+    unsigned flags;
+
+    struct {
+        char *string;
+        size_t string_fill;
+        size_t string_size;
+    } data;
+
+    size_t ntokens;
+
+    struct json_source source;
+    struct json_allocator alloc;
+    char errmsg[128];
+};
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif
diff --git a/libbutl/json/serializer.cxx b/libbutl/json/serializer.cxx
new file mode 100644
index 0000000..fbd569a
--- /dev/null
+++ b/libbutl/json/serializer.cxx
@@ -0,0 +1,671 @@
+#include <cstdio>   // snprintf
+#include <cstdarg>  // va_list
+#include <cstring>  // memcpy
+#include <ostream>
+
+#include <libbutl/json/serializer.hxx>
+
+using namespace std;
+
+namespace butl
+{
+  namespace json
+  {
+    using buffer     = buffer_serializer::buffer;
+    using error_code = invalid_json_output::error_code;
+
+    template <typename T>
+    static void
+    dynarray_overflow (void* d, event, buffer& b, size_t ex)
+    {
+      T& v (*static_cast<T*> (d));
+      v.resize (b.capacity + ex);
+      v.resize (v.capacity ());
+      // const_cast is required for std::string pre C++17.
+      //
+      b.data = const_cast<typename T::value_type*> (v.data ());
+      b.capacity = v.size ();
+    }
+
+    template <typename T>
+    static void
+    dynarray_flush (void* d, event, buffer& b)
+    {
+      T& v (*static_cast<T*> (d));
+      v.resize (b.size);
+      b.data = const_cast<typename T::value_type*> (v.data ());
+      b.capacity = b.size;
+    }
+
+    buffer_serializer::
+    buffer_serializer (string& s, size_t i)
+        : buffer_serializer (const_cast<char*> (s.data ()), size_, s.size (),
+                             dynarray_overflow<string>,
+                             dynarray_flush<string>,
+                             &s,
+                             i)
+    {
+      size_ = s.size ();
+    }
+
+    buffer_serializer::
+    buffer_serializer (vector<char>& v, size_t i)
+        : buffer_serializer (v.data (), size_, v.size (),
+                             dynarray_overflow<vector<char>>,
+                             dynarray_flush<vector<char>>,
+                             &v,
+                             i)
+    {
+      size_ = v.size ();
+    }
+
+    static void
+    ostream_overflow (void* d, event e, buffer& b, size_t)
+    {
+      ostream& s (*static_cast<ostream*> (d));
+      s.write (static_cast<char*> (b.data), b.size);
+      if (s.fail ())
+        throw invalid_json_output (
+            e, error_code::buffer_overflow, "unable to write JSON output text");
+      b.size = 0;
+    }
+
+    static void
+    ostream_flush (void* d, event e, buffer& b)
+    {
+      ostream_overflow (d, e, b, 0);
+
+      ostream& s (*static_cast<ostream*> (d));
+      s.flush ();
+      if (s.fail ())
+        throw invalid_json_output (
+            e, error_code::buffer_overflow, "unable to write JSON output text");
+    }
+
+    stream_serializer::
+    stream_serializer (ostream& os, size_t i)
+        : buffer_serializer (tmp_, sizeof (tmp_),
+                             ostream_overflow,
+                             ostream_flush,
+                             &os,
+                             i)
+    {
+    }
+
+    bool buffer_serializer::
+    next (optional<event> e, pair<const char*, size_t> val, bool check)
+    {
+      if (absent_ == 2)
+        goto fail_complete;
+
+      if (e == nullopt)
+      {
+        if (!state_.empty ())
+          goto fail_incomplete;
+
+        absent_++;
+        return false;
+      }
+
+      absent_ = 0; // Clear inter-value absent event.
+
+      {
+        state* st (state_.empty () ? nullptr : &state_.back ());
+
+        auto name_expected = [] (const state& s)
+        {
+          return s.type == event::begin_object && s.count % 2 == 0;
+        };
+
+        auto make_str = [] (const char* s, size_t n)
+        {
+          return make_pair (s, n);
+        };
+
+        // When it comes to pretty-printing, the common way to do it is along
+        // these lines:
+        //
+        // {
+        //   "str": "value",
+        //   "obj": {
+        //     "arr": [
+        //       1,
+        //       2,
+        //       3
+        //     ]
+        //   },
+        //   "num": 123
+        // }
+        //
+        // Empty objects and arrays are printed without a newline:
+        //
+        // {
+        //   "obj": {},
+        //   "arr": []
+        // }
+        //
+        // There are two types of separators: between name and value, which is
+        // always ": ", and before/after value inside an object or array which
+        // is either newline followed by indentation, or comma followed by
+        // newline followed by indentation (we also have separation between
+        // top-level values but that's orthogonal to pretty-printing).
+        //
+        // Based on this observation, we are going to handle the latter case by
+        // starting with the ",\n" string (in this->sep_) and pushing/popping
+        // indentation spaces as we enter/leave objects and arrays. We handle
+        // the cases where we don't need the comma by simply skipping it in the
+        // C-string pointer.
+        //
+        bool pp (indent_ != 0);
+
+        pair<const char*, size_t> sep;
+        if (st != nullptr)
+        {
+          // The name-value separator.
+          //
+          if (st->type == event::begin_object && st->count % 2 == 1)
+          {
+            sep = !pp ? make_str (":", 1) : make_str (": ", 2);
+          }
+          // We don't need the comma if we are closing the object or array.
+          //
+          else if (e == event::end_array || e == event::end_object)
+          {
+            // But in this case we need to unindent one level prior to writing
+            // the brace. Also handle the empty object/array as a special case.
+            //
+            sep = !pp || st->count == 0
+              ? make_str (nullptr, 0)
+              : make_str (sep_.c_str () + 1, sep_.size () - 1 - indent_);
+          }
+          // Or if this is the first value (note: must come after end_*).
+          //
+          else if (st->count == 0)
+          {
+            sep = !pp
+              ? make_str (nullptr, 0)
+              : make_str (sep_.c_str () + 1, sep_.size () - 1);
+          }
+          else
+          {
+            sep = !pp
+              ? make_str (",", 1)
+              : make_str (sep_.c_str (), sep_.size ());
+          }
+        }
+        else if (values_ != 0) // Subsequent top-level value.
+        {
+          // Top-level value separation. For now we always separate them with
+          // newlines, which is the most common/sensible way.
+          //
+          sep = make_str ("\n", 1);
+        }
+
+        switch (*e)
+        {
+        case event::begin_array:
+        case event::begin_object:
+          {
+            if (st != nullptr && name_expected (*st))
+              goto fail_unexpected_event;
+
+            write (*e,
+                   sep,
+                   make_str (e == event::begin_array ? "[" : "{", 1),
+                   false);
+
+            if (st != nullptr)
+              st->count++;
+
+            if (pp)
+              sep_.append (indent_, ' ');
+
+            state_.push_back (state {*e, 0});
+            break;
+          }
+        case event::end_array:
+        case event::end_object:
+          {
+            if (st == nullptr || (e == event::end_array
+                                  ? st->type != event::begin_array
+                                  : !name_expected (*st)))
+              goto fail_unexpected_event;
+
+            write (*e,
+                   sep,
+                   make_str (e == event::end_array ? "]" : "}", 1),
+                   false);
+
+            if (pp)
+              sep_.erase (sep_.size () - indent_);
+
+            state_.pop_back ();
+            break;
+          }
+        case event::name:
+        case event::string:
+          {
+            if (e == event::name
+                ? (st == nullptr || !name_expected (*st))
+                : (st != nullptr && name_expected (*st)))
+              goto fail_unexpected_event;
+
+            write (*e, sep, val, check, '"');
+
+            if (st != nullptr)
+              st->count++;
+            break;
+          }
+        case event::null:
+        case event::boolean:
+          {
+            if (e == event::null && val.first == nullptr)
+              val = {"null", 4};
+            else if (check)
+            {
+              auto eq = [&val] (const char* v, size_t n)
+              {
+                return val.second == n && memcmp (val.first, v, n) == 0;
+              };
+
+              if (e == event::null)
+              {
+                if (!eq ("null", 4))
+                  goto fail_null;
+              }
+              else
+              {
+                if (!eq ("true", 4) && !eq ("false", 5))
+                  goto fail_bool;
+              }
+            }
+          }
+          // Fall through.
+        case event::number:
+          {
+            // Note: this event is also used by value_json_text().
+
+            if (st != nullptr && name_expected (*st))
+              goto fail_unexpected_event;
+
+            write (*e, sep, val, check);
+
+            if (st != nullptr)
+              st->count++;
+            break;
+          }
+        }
+      }
+
+      if (state_.empty ())
+      {
+        values_++;
+        if (flush_ != nullptr)
+          flush_ (data_, *e, buf_);
+
+        return false;
+      }
+
+      return true;
+
+    fail_complete:
+      throw invalid_json_output (
+          e, error_code::invalid_value, "value sequence is complete");
+    fail_incomplete:
+      throw invalid_json_output (
+          e, error_code::invalid_value, "value is incomplete");
+    fail_null:
+      throw invalid_json_output (
+          e, error_code::invalid_value, "invalid null value");
+    fail_bool:
+      throw invalid_json_output (
+          e, error_code::invalid_value, "invalid boolean value");
+    fail_unexpected_event:
+      throw invalid_json_output (
+          e, error_code::unexpected_event, "unexpected event");
+    }
+
+    // JSON escape sequences for control characters <= 0x1F.
+    //
+    static const char* json_escapes[] =
+    {"\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005",
+     "\\u0006", "\\u0007", "\\b",     "\\t",     "\\n",     "\\u000B",
+     "\\f",     "\\r",     "\\u000E", "\\u000F", "\\u0010", "\\u0011",
+     "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017",
+     "\\u0018", "\\u0019", "\\u001A", "\\u001B", "\\u001C", "\\u001D",
+     "\\u001E", "\\u001F"};
+
+    void buffer_serializer::
+    write (event e,
+           pair<const char*, size_t> sep,
+           pair<const char*, size_t> val,
+           bool check,
+           char q)
+    {
+      // Assumptions:
+      //
+      // 1. A call to overflow should be able to provide enough capacity to
+      //    write the entire separator (in other words, we are not going to
+      //    bother with chunking the separator).
+      //
+      // 2. Similarly, a call to overflow should be able to provide enough
+      //    capacity to write an entire UTF-8 multi-byte sequence.
+      //
+      // 3. Performance-wise, we do not expect very long contiguous sequences
+      //    of character that require escaping.
+
+      // Total number of bytes remaining to be written and the capacity
+      // currently available.
+      //
+      size_t size (sep.second + val.second + (q != '\0' ? 2 : 0));
+      size_t cap (buf_.capacity - buf_.size);
+
+      auto grow = [this, e, &size, &cap] (size_t min, size_t extra = 0)
+      {
+        if (overflow_ == nullptr)
+          return false;
+
+        extra += size;
+        extra -= cap;
+        overflow_ (data_, e, buf_, extra > min ? extra : min);
+        cap = buf_.capacity - buf_.size;
+
+        return cap >= min;
+      };
+
+      auto append = [this, &cap, &size] (const char* d, size_t s)
+      {
+        memcpy (static_cast<char*> (buf_.data) + buf_.size, d, s);
+        buf_.size += s;
+        cap -= s;
+        size -= s;
+      };
+
+      // Return the longest chunk of input that fits into the buffer and does
+      // not end in the middle of a multi-byte UTF-8 sequence. Assume value
+      // size and capacity are not 0. Return NULL in first if no chunk could
+      // be found that fits into the remaining space. In this case, second is
+      // the additional (to size) required space (used to handle escapes in
+      // the checked version).
+      //
+      // The basic idea is to seek in the input buffer to the capacity of the
+      // output buffer (unless the input is shorter than the output). If we
+      // ended up in the middle of a multi-byte UTF-8 sequence, then seek back
+      // until we end up at the UTF-8 sequence boundary. Note that this
+      // implementation assumes valid UTF-8.
+      //
+      auto chunk = [&cap, &val] () -> pair<const char*, size_t>
+      {
+        pair<const char*, size_t> r (nullptr, 0);
+
+        if (cap >= val.second)
+          r = val;
+        else
+        {
+          // Start from the character past capacity and search for a UTF-8
+          // sequence boundary.
+          //
+          for (const char* p (val.first + cap); p != val.first; --p)
+          {
+            const auto u (static_cast<uint8_t> (*p));
+            if (u < 0x80 || u > 0xBF) // Not a continuation byte
+            {
+              r = {val.first, p - val.first};
+              break;
+            }
+          }
+        }
+
+        val.first += r.second;
+        val.second -= r.second;
+
+        return r;
+      };
+
+      // Escaping and UTF-8-validating version of chunk().
+      //
+      // There are three classes of mandatory escapes in a JSON string:
+      //
+      // - \\ and \"
+      //
+      // - \b \f \n \r \t for popular control characters
+      //
+      // - \u00NN for other control characters <= 0x1F
+      //
+      // If the input begins with a character that must be escaped, return
+      // only its escape sequence. Otherwise validate and return everything up
+      // to the end of input or buffer capacity, but cutting it short before
+      // the next character that must be escaped or the first UTF-8 sequence
+      // that would not fit.
+      //
+      // Return string::npos in second in case of a stray continuation byte or
+      // any byte in an invalid UTF-8 range (for example, an "overlong" 2-byte
+      // encoding of a 7-bit/ASCII character or a 4-, 5-, or 6-byte sequence
+      // that would encode a codepoint beyond the U+10FFFF Unicode limit).
+      //
+      auto chunk_checked = [&cap, &size, &val] () -> pair<const char*, size_t>
+      {
+        pair<const char*, size_t> r (nullptr, 0);
+
+        // Check whether the first character needs to be escaped.
+        //
+        const uint8_t c (val.first[0]);
+        if (c == '"')
+          r = {"\\\"", 2};
+        else if (c == '\\')
+          r = {"\\\\", 2};
+        else if (c <= 0x1F)
+        {
+          auto s (json_escapes[c]);
+          r = {s, s[1] == 'u' ? 6 : 2};
+        }
+
+        if (r.first != nullptr)
+        {
+          // Return in second the additional (to size) space required.
+          //
+          if (r.second > cap)
+            return {nullptr, r.second - 1};
+
+          // If we had to escape the character then adjust size accordingly
+          // (see append() above).
+          //
+          size += r.second - 1;
+
+          val.first += 1;
+          val.second -= 1;
+          return r;
+        }
+
+        // First character doesn't need to be escaped. Return as much of the
+        // rest of the input as possible.
+        //
+        size_t i (0);
+        for (size_t n (min (cap, val.second)); i != n; i++)
+        {
+          const uint8_t c1 (val.first[i]);
+
+          if (c1 == '"' || c1 == '\\' || c1 <= 0x1F) // Needs to be escaped.
+            break;
+          else if (c1 >= 0x80) // Not ASCII, so validate as a UTF-8 sequence.
+          {
+            size_t i1 (i); // Position of the first byte.
+
+            // The control flow here is to continue if valid and to fall
+            // through to return on error.
+            //
+            if (c1 >= 0xC2 && c1 <= 0xDF) // 2-byte sequence.
+            {
+              if (i + 2 <= val.second) // Sequence is complete in JSON value.
+              {
+                if (i + 2 > cap) // Sequence won't fit.
+                  break;
+
+                const uint8_t c2 (val.first[++i]);
+
+                if (c2 >= 0x80 && c2 <= 0xBF)
+                  continue;
+              }
+            }
+            else if (c1 >= 0xE0 && c1 <= 0xEF) // 3-byte sequence.
+            {
+              if (i + 3 <= val.second)
+              {
+                if (i + 3 > cap)
+                  break;
+
+                const uint8_t c2 (val.first[++i]), c3 (val.first[++i]);
+
+                if (c3 >= 0x80 && c3 <= 0xBF)
+                {
+                  switch (c1)
+                  {
+                  case 0xE0: if (c2 >= 0xA0 && c2 <= 0xBF) continue; break;
+                  case 0xED: if (c2 >= 0x80 && c2 <= 0x9F) continue; break;
+                  default:   if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
+                  }
+                }
+              }
+            }
+            else if (c1 >= 0xF0 && c1 <= 0xF4) // 4-byte sequence.
+            {
+              if (i + 4 <= val.second)
+              {
+                if (i + 4 > cap)
+                  break;
+
+                const uint8_t c2 (val.first[++i]),
+                              c3 (val.first[++i]),
+                              c4 (val.first[++i]);
+
+                if (c3 >= 0x80 && c3 <= 0xBF &&
+                    c4 >= 0x80 && c4 <= 0xBF)
+                {
+                  switch (c1)
+                  {
+                  case 0xF0: if (c2 >= 0x90 && c2 <= 0xBF) continue; break;
+                  case 0xF4: if (c2 >= 0x80 && c2 <= 0x8F) continue; break;
+                  default:   if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
+                  }
+                }
+              }
+            }
+
+            r = {val.first, string::npos};
+
+            // Update val to point to the beginning of the invalid sequence.
+            //
+            val.first += i1;
+            val.second -= i1;
+
+            return r;
+          }
+        }
+
+        if (i != 0) // We have a chunk.
+        {
+          r = {val.first, i};
+
+          val.first += i;
+          val.second -= i;
+        }
+
+        return r;
+      };
+
+      // Value's original size (used to calculate the offset of the errant
+      // character in case of a validation failure).
+      //
+      const size_t vn (val.second);
+
+      // Write the separator, if any.
+      //
+      if (sep.second != 0)
+      {
+        if (cap < sep.second && !grow (sep.second))
+          goto fail_nospace;
+
+        append (sep.first, sep.second);
+      }
+
+      // Write the value's opening quote, if requested.
+      //
+      if (q != '\0')
+      {
+        if (cap == 0 && !grow (1))
+          goto fail_nospace;
+
+        append ("\"", 1);
+      }
+
+      // Write the value, unless empty.
+      //
+      while (val.second != 0)
+      {
+        pair<const char*, size_t> ch (nullptr, 0);
+
+        if (cap != 0)
+          ch = check ? chunk_checked () : chunk ();
+
+        if (ch.first == nullptr)
+        {
+          // The minimum extra bytes we need the overflow function to be able
+          // to provide is based on these sequences that we do not break:
+          //
+          // - 4 bytes for a UTF-8 sequence
+          // - 6 bytes for an escaped Unicode sequence (\uXXXX).
+          //
+          if (!grow (6, ch.second))
+            goto fail_nospace;
+        }
+        else if (ch.second != string::npos)
+          append (ch.first, ch.second);
+        else
+          goto fail_utf8;
+      }
+
+      // Write the value's closing quote, if requested.
+      //
+      if (q != '\0')
+      {
+        if (cap == 0 && !grow (1))
+          goto fail_nospace;
+
+        append ("\"", 1);
+      }
+
+      return;
+
+      // Note: keep descriptions consistent with the parser.
+      //
+    fail_utf8:
+      throw invalid_json_output (e,
+                                 e == event::name ? error_code::invalid_name
+                                                  : error_code::invalid_value,
+                                 "invalid UTF-8 text",
+                                 vn - val.second);
+
+    fail_nospace:
+      throw invalid_json_output (
+          e, error_code::buffer_overflow, "insufficient space in buffer");
+    }
+
+    size_t buffer_serializer::
+    to_chars_impl (char* b, size_t n, const char* f, ...)
+    {
+      va_list a;
+      va_start (a, f);
+      const int r (vsnprintf (b, n, f, a));
+      va_end (a);
+
+      if (r < 0 || r >= static_cast<int> (n))
+      {
+        throw invalid_json_output (event::number,
+                                   error_code::invalid_value,
+                                   "unable to convert number to string");
+      }
+
+      return static_cast<size_t> (r);
+    }
+  }
+}
diff --git a/libbutl/json/serializer.hxx b/libbutl/json/serializer.hxx
new file mode 100644
index 0000000..5192cb4
--- /dev/null
+++ b/libbutl/json/serializer.hxx
@@ -0,0 +1,413 @@
+#pragma once
+
+#ifdef BUILD2_BOOTSTRAP
+#  error JSON serializer not available during bootstrap
+#endif
+
+#include <array>
+#include <iosfwd>
+#include <string>
+#include <vector>
+#include <cstddef>     // size_t, nullptr_t
+#include <utility>     // pair
+#include <stdexcept>   // invalid_argument
+#include <type_traits> // enable_if, is_*
+
+#include <libbutl/optional.hxx> // butl::optional is std::optional or similar.
+
+#include <libbutl/json/event.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+  // Using the RFC8259 terminology: JSON (output) text, JSON value, object
+  // member.
+  //
+  namespace json
+  {
+    class invalid_json_output: public std::invalid_argument
+    {
+    public:
+      using event_type = json::event;
+
+      enum class error_code
+      {
+        buffer_overflow,
+        unexpected_event,
+        invalid_name,
+        invalid_value
+      };
+
+      invalid_json_output (optional<event_type> event,
+                           error_code code,
+                           const char* description,
+                           std::size_t offset = std::string::npos);
+
+      invalid_json_output (optional<event_type> event,
+                           error_code code,
+                           const std::string& description,
+                           std::size_t offset = std::string::npos);
+
+      // Event that triggered the error. If the error is in the value, then
+      // offset points to the offending byte (for example, the beginning of an
+      // invalid UTF-8 byte sequence). Otherwise, offset is string::npos.
+      //
+      optional<event_type> event;
+      error_code           code;
+      std::size_t          offset;
+    };
+
+    // The serializer makes sure the resulting JSON is syntactically but not
+    // necessarily semantically correct. For example, it's possible to
+    // serialize a number event with non-numeric data.
+    //
+    // Note that unlike the parser, the serializer is always in the multi-
+    // value mode allowing the serialization of zero or more values. Note also
+    // that while values are separated with newlines, there is no trailing
+    // newline after the last (or only) value and the user is expected to add
+    // it manually if needed.
+    //
+    // Also note that while RFC8259 recommends object members to have unique
+    // names, the serializer does not enforce this.
+    //
+    class LIBBUTL_SYMEXPORT buffer_serializer
+    {
+    public:
+      // Serialize to string growing it as necessary.
+      //
+      // The indentation argument specifies the number of indentation spaces
+      // that should be used for pretty-printing. If 0 is passed, no
+      // pretty-printing is performed.
+      //
+      explicit
+      buffer_serializer (std::string&, std::size_t indentation = 2);
+
+      // Serialize to vector of characters growing it as necessary.
+      //
+      explicit
+      buffer_serializer (std::vector<char>&, std::size_t indentation = 2);
+
+      // Serialize to a fixed array.
+      //
+      // The length of the output text written is tracked in the size
+      // argument.
+      //
+      // If the array is not big enough to store the entire output text, the
+      // next() call that reaches the limit will throw invalid_json_output.
+      //
+      template <std::size_t N>
+      buffer_serializer (std::array<char, N>&, std::size_t& size,
+                         std::size_t indentation = 2);
+
+      // Serialize to a fixed buffer.
+      //
+      // The length of the output text written is tracked in the size
+      // argument.
+      //
+      // If the buffer is not big enough to store the entire output text, the
+      // next() call that reaches the limit will throw invalid_json_output.
+      //
+      buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
+                         std::size_t indentation = 2);
+
+      // The overflow function is called when the output buffer is out of
+      // space. The extra argument is a hint indicating the extra space likely
+      // to be required.
+      //
+      // Possible strategies include re-allocating a larger buffer or flushing
+      // the contents of the original buffer to the output destination. In
+      // case of a reallocation, the implementation is responsible for copying
+      // the contents of the original buffer over.
+      //
+      // The flush function is called when the complete JSON value has been
+      // serialized to the buffer. It can be used to write the contents of the
+      // buffer to the output destination. Note that flush is not called after
+      // the second absent (nullopt) event (or the only absent event; see
+      // next() for details).
+      //
+      // Both functions are passed the original buffer, its size (the amount
+      // of output text), and its capacity. They return (by modifying the
+      // argument) the replacement buffer and its size and capacity (these may
+      // refer to the original buffer). If space cannot be made available, the
+      // implementation can throw an appropriate exception (for example,
+      // std::bad_alloc or std::ios_base::failure). Any exceptions thrown is
+      // propagated to the user.
+      //
+      struct buffer
+      {
+        void*        data;
+        std::size_t& size;
+        std::size_t  capacity;
+      };
+
+      using overflow_function = void (void* data,
+                                      event,
+                                      buffer&,
+                                      std::size_t extra);
+      using flush_function    = void (void* data, event, buffer&);
+
+      // Serialize using a custom buffer and overflow/flush functions (both
+      // are optional).
+      //
+      buffer_serializer (void* buf, std::size_t capacity,
+                         overflow_function*,
+                         flush_function*,
+                         void* data,
+                         std::size_t indentation = 2);
+
+      // As above but the length of the output text written is tracked in the
+      // size argument.
+      //
+      buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
+                         overflow_function*,
+                         flush_function*,
+                         void* data,
+                         std::size_t indentation = 2);
+
+      // Begin/end an object.
+      //
+      // The member_begin_object() version is a shortcut for:
+      //
+      //     member_name (name, check);
+      //     begin_object ();
+      //
+      void
+      begin_object ();
+
+      void
+      member_begin_object (const char*, bool check = true);
+
+      void
+      member_begin_object (const std::string&, bool check = true);
+
+      void
+      end_object ();
+
+      // Serialize an object member (name and value).
+      //
+      // If check is false, then don't check whether the name (or value, if
+      // it's a string) is valid UTF-8 and don't escape any characters.
+      //
+      template <typename T>
+      void
+      member (const char* name, const T& value, bool check = true);
+
+      template <typename T>
+      void
+      member (const std::string& name, const T& value, bool check = true);
+
+      // Serialize an object member name.
+      //
+      // If check is false, then don't check whether the name is valid UTF-8
+      // and don't escape any characters.
+      //
+      void
+      member_name (const char*, bool check = true);
+
+      void
+      member_name (const std::string&, bool check = true);
+
+      // Begin/end an array.
+      //
+      // The member_begin_array() version is a shortcut for:
+      //
+      //     member_name (name, check);
+      //     begin_array ();
+      //
+      void
+      begin_array ();
+
+      void
+      member_begin_array (const char*, bool check = true);
+
+      void
+      member_begin_array (const std::string&, bool check = true);
+
+      void
+      end_array ();
+
+      // Serialize a string.
+      //
+      // If check is false, then don't check whether the value is valid UTF-8
+      // and don't escape any characters.
+      //
+      // Note that a NULL C-string pointer is serialized as a null value.
+      //
+      void
+      value (const char*, bool check = true);
+
+      void
+      value (const std::string&, bool check = true);
+
+      // Serialize a number.
+      //
+      template <typename T>
+      typename std::enable_if<std::is_integral<T>::value ||
+                              std::is_floating_point<T>::value>::type
+      value (T);
+
+      // Serialize a boolean value.
+      //
+      void
+      value (bool);
+
+      // Serialize a null value.
+      //
+      void
+      value (std::nullptr_t);
+
+      // Serialize value as a pre-serialized JSON value.
+      //
+      // Note that the value is expected to be a valid (and suitable) UTF-8-
+      // encoded JSON text. Note also that if pretty-printing is enabled,
+      // the resulting output may not be correctly indented.
+      //
+      void
+      value_json_text (const char*);
+
+      void
+      value_json_text (const std::string&);
+
+      // Serialize next JSON event.
+      //
+      // If check is false, then don't check whether the value is valid UTF-8
+      // and don't escape any characters.
+      //
+      // Return true if more events are required to complete the (top-level)
+      // value (that is, it is currently incomplete) and false otherwise.
+      // Throw invalid_json_output exception in case of an invalid event or
+      // value.
+      //
+      // At the end of the value an optional absent (nullopt) event can be
+      // serialized to verify the value is complete. If it is incomplete an
+      // invalid_json_output exception is thrown. An optional followup absent
+      // event can be serialized to indicate the completion of a multi-value
+      // sequence (one and only absent event indicates a zero value sequence).
+      // If anything is serialized to a complete value sequence an
+      // invalid_json_output exception is thrown.
+      //
+      // Note that this function was designed to be easily invoked with the
+      // output from parser::next() and parser::data(). For example, for a
+      // single-value mode:
+      //
+      //   optional<event> e;
+      //   do
+      //   {
+      //     e = p.next ();
+      //     s.next (e, p.data ());
+      //   }
+      //   while (e);
+      //
+      // For a multi-value mode:
+      //
+      //   while (p.peek ())
+      //   {
+      //     optional<event> e;
+      //     do
+      //     {
+      //       e = p.next ();
+      //       s.next (e, p.data ());
+      //     }
+      //     while (e);
+      //   }
+      //   s.next (nullopt); // End of value sequence.
+      //
+      bool
+      next (optional<event> event,
+            std::pair<const char*, std::size_t> value = {},
+            bool check = true);
+
+    private:
+      void
+      write (event,
+             std::pair<const char*, std::size_t> sep,
+             std::pair<const char*, std::size_t> val,
+             bool check, char quote = '\0');
+
+      // Forward a value(v, check) call to value(v) ignoring the check
+      // argument. Used in the member() implementation.
+      //
+      template <typename T>
+      void
+      value (const T& v, bool /*check*/)
+      {
+        value (v);
+      }
+
+      // Convert numbers to string.
+      //
+      static std::size_t to_chars (char*, std::size_t, int);
+      static std::size_t to_chars (char*, std::size_t, long);
+      static std::size_t to_chars (char*, std::size_t, long long);
+      static std::size_t to_chars (char*, std::size_t, unsigned int);
+      static std::size_t to_chars (char*, std::size_t, unsigned long);
+      static std::size_t to_chars (char*, std::size_t, unsigned long long);
+      static std::size_t to_chars (char*, std::size_t, double);
+      static std::size_t to_chars (char*, std::size_t, long double);
+
+      static std::size_t to_chars_impl (char*, size_t, const char* fmt, ...);
+
+      buffer buf_;
+      std::size_t size_;
+      overflow_function* overflow_;
+      flush_function* flush_;
+      void* data_;
+
+      // State of a "structured type" (array or object; as per the RFC
+      // terminology).
+      //
+      struct state
+      {
+        const event type;  // Type kind (begin_array or begin_object).
+        std::size_t count; // Number of events serialized inside this type.
+      };
+
+      // Stack of nested structured type states.
+      //
+      // @@ TODO: would have been nice to use small_vector.
+      //
+      std::vector<state> state_;
+
+      // The number of consecutive absent events (nullopt) serialized thus
+      // far.
+      //
+      // Note: initialized to 1 to naturally handle a single absent event
+      // (declares an empty value sequence complete).
+      //
+      std::size_t absent_ = 1;
+
+      // The number of spaces with which to indent (once for each level of
+      // nesting). If zero, pretty-printing is disabled.
+      //
+      std::size_t indent_;
+
+      // Separator and indentation before/after value inside an object or
+      // array (see pretty-printing implementation for details).
+      //
+      std::string sep_;
+
+      // The number of complete top-level values serialized thus far.
+      //
+      std::size_t values_ = 0;
+    };
+
+    class LIBBUTL_SYMEXPORT stream_serializer: public buffer_serializer
+    {
+    public:
+      // Serialize to std::ostream.
+      //
+      // If stream exceptions are enabled then the std::ios_base::failure
+      // exception is used to report input/output errors (badbit and failbit).
+      // Otherwise, those are reported as the invalid_json_output exception.
+      //
+      explicit
+      stream_serializer (std::ostream&, std::size_t indentation = 2);
+
+    protected:
+      char tmp_[4096];
+    };
+  }
+}
+
+#include <libbutl/json/serializer.ixx>
diff --git a/libbutl/json/serializer.ixx b/libbutl/json/serializer.ixx
new file mode 100644
index 0000000..a719ef6
--- /dev/null
+++ b/libbutl/json/serializer.ixx
@@ -0,0 +1,247 @@
+#include <cstring> // strlen()
+
+namespace butl
+{
+  namespace json
+  {
+    inline invalid_json_output::
+    invalid_json_output (optional<event_type> e,
+                         error_code c,
+                         const char* d,
+                         std::size_t o)
+        : std::invalid_argument (d), event (e), code (c), offset (o)
+    {
+    }
+
+    inline invalid_json_output::
+    invalid_json_output (optional<event_type> e,
+                         error_code c,
+                         const std::string& d,
+                         std::size_t o)
+        : invalid_json_output (e, c, d.c_str (), o)
+    {
+    }
+
+    inline buffer_serializer::
+    buffer_serializer (void* b, std::size_t& s, std::size_t c,
+                       overflow_function* o, flush_function* f, void* d,
+                       std::size_t i)
+        : buf_ {b, s, c},
+          overflow_ (o),
+          flush_ (f),
+          data_ (d),
+          indent_ (i),
+          sep_ (indent_ != 0 ? ",\n" : "")
+    {
+    }
+
+    template <std::size_t N>
+    inline buffer_serializer::
+    buffer_serializer (std::array<char, N>& a, std::size_t& s, std::size_t i)
+        : buffer_serializer (a.data (), s, a.size (),
+                             nullptr, nullptr, nullptr,
+                             i)
+    {
+    }
+
+    inline buffer_serializer::
+    buffer_serializer (void* b, std::size_t& s, std::size_t c, std::size_t i)
+        : buffer_serializer (b, s, c, nullptr, nullptr, nullptr, i)
+    {
+    }
+
+    inline buffer_serializer::
+    buffer_serializer (void* b, std::size_t c,
+                       overflow_function* o, flush_function* f, void* d,
+                       std::size_t i)
+        : buffer_serializer (b, size_, c, o, f, d, i)
+    {
+      size_ = 0;
+    }
+
+    inline void buffer_serializer::
+    begin_object ()
+    {
+      next (event::begin_object);
+    }
+
+    inline void buffer_serializer::
+    end_object ()
+    {
+      next (event::end_object);
+    }
+
+    inline void buffer_serializer::
+    member_name (const char* n, bool c)
+    {
+      next (event::name, {n, n != nullptr ? std::strlen (n) : 0}, c);
+    }
+
+    inline void buffer_serializer::
+    member_name (const std::string& n, bool c)
+    {
+      next (event::name, {n.c_str (), n.size ()}, c);
+    }
+
+    inline void buffer_serializer::
+    member_begin_object (const char* n, bool c)
+    {
+      member_name (n, c);
+      begin_object ();
+    }
+
+    inline void buffer_serializer::
+    member_begin_object (const std::string& n, bool c)
+    {
+      member_name (n, c);
+      begin_object ();
+    }
+
+    template <typename T>
+    inline void buffer_serializer::
+    member (const char* n, const T& v, bool c)
+    {
+      member_name (n, c);
+      value (v, c);
+    }
+
+    template <typename T>
+    inline void buffer_serializer::
+    member (const std::string& n, const T& v, bool c)
+    {
+      member_name (n, c);
+      value (v, c);
+    }
+
+    inline void buffer_serializer::
+    begin_array ()
+    {
+      next (event::begin_array);
+    }
+
+    inline void buffer_serializer::
+    member_begin_array (const char* n, bool c)
+    {
+      member_name (n, c);
+      begin_array ();
+    }
+
+    inline void buffer_serializer::
+    member_begin_array (const std::string& n, bool c)
+    {
+      member_name (n, c);
+      begin_array ();
+    }
+
+    inline void buffer_serializer::
+    end_array ()
+    {
+      next (event::end_array);
+    }
+
+    inline void buffer_serializer::
+    value (const char* v, bool c)
+    {
+      if (v != nullptr)
+        next (event::string, {v, std::strlen (v)}, c);
+      else
+        next (event::null);
+    }
+
+    inline void buffer_serializer::
+    value (const std::string& v, bool c)
+    {
+      next (event::string, {v.c_str (), v.size ()}, c);
+    }
+
+    template <typename T>
+    typename std::enable_if<std::is_integral<T>::value ||
+                            std::is_floating_point<T>::value>::type
+    buffer_serializer::
+    value (T v)
+    {
+      // The largest 128-bit integer has 39 digits, and long floating point
+      // numbers will fit because they are output in scientific notation.
+      //
+      char b[40];
+      const std::size_t n (to_chars (b, sizeof (b), v));
+      next (event::number, {b, n});
+    }
+
+    inline void buffer_serializer::
+    value (bool b)
+    {
+      next (event::boolean,
+            b ? std::make_pair ("true", 4) : std::make_pair ("false", 5));
+    }
+
+    inline void buffer_serializer::
+    value (std::nullptr_t)
+    {
+      next (event::null);
+    }
+
+    inline void buffer_serializer::
+    value_json_text (const char* v)
+    {
+      // Use event::number (which doesn't involve any quoting) with a disabled
+      // check.
+      //
+      next (event::number, {v, std::strlen (v)}, false /* check */);
+    }
+
+    inline void buffer_serializer::
+    value_json_text (const std::string& v)
+    {
+      next (event::number, {v.c_str (), v.size ()}, false /* check */);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, int v)
+    {
+      return to_chars_impl (b, s, "%d", v);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, long v)
+    {
+      return to_chars_impl (b, s, "%ld", v);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, long long v)
+    {
+      return to_chars_impl (b, s, "%lld", v);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, unsigned v)
+    {
+      return to_chars_impl (b, s, "%u", v);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, unsigned long v)
+    {
+      return to_chars_impl (b, s, "%lu", v);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, unsigned long long v)
+    {
+      return to_chars_impl (b, s, "%llu", v);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, double v)
+    {
+      return to_chars_impl (b, s, "%.10g", v);
+    }
+
+    inline size_t buffer_serializer::
+    to_chars (char* b, size_t s, long double v)
+    {
+      return to_chars_impl (b, s, "%.10Lg", v);
+    }
+  }
+}
diff --git a/libbutl/lz4-stream.cxx b/libbutl/lz4-stream.cxx
new file mode 100644
index 0000000..8001770
--- /dev/null
+++ b/libbutl/lz4-stream.cxx
@@ -0,0 +1,281 @@
+// file      : libbutl/lz4-stream.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbutl/lz4-stream.hxx>
+
+#include <cstring>   // memcpy()
+#include <stdexcept> // invalid_argument
+
+#include <libbutl/utility.hxx> // eof()
+
+using namespace std;
+
+namespace butl
+{
+  namespace lz4
+  {
+    // istream
+    //
+
+    // Read into the specified buffer returning the number of bytes read and
+    // the eof flag.
+    //
+    pair<size_t, bool> istreambuf::
+    read (char* b, size_t c)
+    {
+      size_t n (0);
+      bool e (false);
+
+      // @@ TODO: would it be faster to do a direct buffer copy if input
+      //    stream is bufstreabuf-based (see sha*.cxx for code)?
+      do
+      {
+        e = eof (is_->read (b + n, c - n));
+        n += static_cast<size_t> (is_->gcount ());
+      }
+      while (!e && n != c);
+
+      return make_pair (n, e);
+    }
+
+    optional<uint64_t> istreambuf::
+    open (std::istream& is, bool end)
+    {
+      assert (is.exceptions () == std::istream::badbit);
+
+      is_ = &is;
+      end_ = end;
+
+      // Read in the header and allocate the buffers.
+      //
+      // What if we hit EOF here? And could begin() return 0? Turns out the
+      // answer to both questions is yes: 0-byte content compresses to 15
+      // bytes (with or without content size; 1-byte -- to 20/28 bytes). We
+      // can ignore EOF here since an attempt to read more will result in
+      // another EOF. And our load() is prepared to handle 0 hint.
+      //
+      // @@ We could end up leaving some of the input content from the header
+      //    in the input buffer which the caller will have to way of using
+      //    (e.g., in a stream of compressed contents). Doesn't look like
+      //    there is much we can do (our streams don't support putback) other
+      //    than document this limitation.
+      //
+      optional<uint64_t> r;
+
+      d_.hn = read (d_.hb, sizeof (d_.hb)).first;
+      h_ = d_.begin (&r);
+
+      ib_.reset ((d_.ib = new char[d_.ic]));
+      ob_.reset ((d_.ob = new char[d_.oc]));
+
+      // Copy over whatever is left in the header buffer.
+      //
+      memcpy (d_.ib, d_.hb, (d_.in = d_.hn));
+
+      setg (d_.ob, d_.ob, d_.ob);
+      return r;
+    }
+
+    void istreambuf::
+    close ()
+    {
+      if (is_open ())
+      {
+        is_ = nullptr;
+      }
+    }
+
+    istreambuf::int_type istreambuf::
+    underflow ()
+    {
+      int_type r (traits_type::eof ());
+
+      if (is_open ())
+      {
+        if (gptr () < egptr () || load ())
+          r = traits_type::to_int_type (*gptr ());
+      }
+
+      return r;
+    }
+
+    bool istreambuf::
+    load ()
+    {
+      // Note that the first call to this function may be with h_ == 0 (see
+      // open() for details). In which case we just need to verify there is
+      // no just after the compressed content.
+      //
+      bool r;
+
+      if (h_ == 0)
+        r = false; // EOF
+      else
+      {
+        // Note: next() may just buffer the data.
+        //
+        do
+        {
+          // Note that on the first call we may already have some data in the
+          // input buffer (leftover header data).
+          //
+          if (h_ > d_.in)
+          {
+            pair<size_t, bool> p (read (d_.ib + d_.in, h_ - d_.in));
+
+            d_.in += p.first;
+
+            if (p.second && d_.in != h_)
+              throw invalid_argument ("incomplete LZ4 compressed content");
+          }
+
+          h_ = d_.next (); // Clears d_.in.
+
+        } while (d_.on == 0 && h_ != 0);
+
+        setg (d_.ob, d_.ob, d_.ob + d_.on);
+        off_ += d_.on;
+        r = (d_.on != 0);
+      }
+
+      // If we don't expect any more compressed content and we were asked to
+      // end the underlying input stream, then verify there is no more input.
+      //
+      if (h_ == 0 && end_)
+      {
+        end_ = false;
+
+        if (d_.in != 0 ||
+            (!is_->eof () &&
+             is_->good () &&
+             is_->peek () != istream::traits_type::eof ()))
+          throw invalid_argument ("junk after LZ4 compressed content");
+      }
+
+      return r;
+    }
+
+    // ostream
+    //
+
+    void ostreambuf::
+    write (char* b, std::size_t n)
+    {
+      os_->write (b, static_cast<streamsize> (n));
+    }
+
+    void ostreambuf::
+    open (std::ostream& os,
+          int level,
+          int block_id,
+          optional<std::uint64_t> content_size)
+    {
+      assert (os.exceptions () == (std::ostream::badbit |
+                                   std::ostream::failbit));
+
+      os_ = &os;
+
+      // Determine required buffer capacities.
+      //
+      c_.begin (level, block_id, content_size);
+
+      ib_.reset ((c_.ib = new char[c_.ic]));
+      ob_.reset ((c_.ob = new char[c_.oc]));
+
+      setp (c_.ib, c_.ib + c_.ic - 1); // Keep space for overflow's char.
+      end_ = false;
+    }
+
+    void ostreambuf::
+    close ()
+    {
+      if (is_open ())
+      {
+        if (!end_)
+          save ();
+
+        os_ = nullptr;
+      }
+    }
+
+    ostreambuf::
+    ~ostreambuf ()
+    {
+      close ();
+    }
+
+    ostreambuf::int_type ostreambuf::
+    overflow (int_type c)
+    {
+      int_type r (traits_type::eof ());
+
+      if (is_open () && c != traits_type::eof ())
+      {
+        // Store last character in the space we reserved in open(). Note
+        // that pbump() doesn't do any checks.
+        //
+        *pptr () = traits_type::to_char_type (c);
+        pbump (1);
+
+        save ();
+        r = c;
+      }
+
+      return r;
+    }
+
+    void ostreambuf::
+    save ()
+    {
+      c_.in = pptr () - pbase ();
+      off_ += c_.in;
+
+      // We assume this is the end if the input buffer is not full.
+      //
+      end_ = (c_.in != c_.ic);
+      c_.next (end_);
+
+      if (c_.on != 0) // next() may just buffer the data.
+        write (c_.ob, c_.on);
+
+      setp (c_.ib, c_.ib + c_.ic - 1);
+    }
+
+    streamsize ostreambuf::
+    xsputn (const char_type* s, streamsize sn)
+    {
+      if (!is_open () || end_)
+        return 0;
+
+      // To avoid futher 'signed/unsigned comparison' compiler warnings.
+      //
+      size_t n (static_cast<size_t> (sn));
+
+      // The plan is to keep copying the data into the input buffer and
+      // calling save() (our compressor API currently has no way of avoiding
+      // the copy).
+      //
+      while (n != 0)
+      {
+        // Amount of free space in the buffer (including the extra byte
+        // we've reserved).
+        //
+        size_t an (epptr () - pptr () + 1);
+
+        size_t m (n > an ? an : n);
+        memcpy (pptr (), s, m);
+        pbump (static_cast<int> (m));
+
+        if (n < an)
+          break; // All fitted with at least 1 byte left.
+
+        save ();
+
+        s += m;
+        n -= m;
+      }
+
+      return sn;
+    }
+  }
+}
diff --git a/libbutl/lz4-stream.hxx b/libbutl/lz4-stream.hxx
new file mode 100644
index 0000000..b11c0a2
--- /dev/null
+++ b/libbutl/lz4-stream.hxx
@@ -0,0 +1,280 @@
+// file      : libbutl/lz4-stream.hxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <memory>  // unique_ptr
+#include <cstddef> // size_t
+#include <cstdint> // uint64_t
+#include <utility> // move()
+#include <istream>
+#include <ostream>
+#include <cassert>
+
+#include <libbutl/lz4.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/bufstreambuf.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+  namespace lz4
+  {
+    // istream
+    //
+
+    class LIBBUTL_SYMEXPORT istreambuf: public bufstreambuf
+    {
+    public:
+      optional<std::uint64_t>
+      open (std::istream&, bool end);
+
+      bool
+      is_open () const {return is_ != nullptr;}
+
+      void
+      close ();
+
+    public:
+      using base = bufstreambuf;
+
+      // basic_streambuf input interface.
+      //
+    public:
+      virtual int_type
+      underflow () override;
+
+      // Direct access to the get area. Use with caution.
+      //
+      using base::gptr;
+      using base::egptr;
+      using base::gbump;
+
+      // Return the (logical) position of the next byte to be read.
+      //
+      using base::tellg;
+
+    private:
+      std::pair<std::size_t, bool>
+      read (char*, std::size_t);
+
+      bool
+      load ();
+
+    private:
+      std::istream* is_ = nullptr;
+      bool end_;
+      decompressor d_;
+      std::unique_ptr<char[]> ib_; // Decompressor input buffer.
+      std::unique_ptr<char[]> ob_; // Decompressor output buffer.
+      std::size_t h_;              // Decompressor next chunk hint.
+    };
+
+    // Typical usage:
+    //
+    //   try
+    //   {
+    //     ifdstream ifs (..., fdopen_mode::binary, ifdstream::badbit);
+    //     lz4::istream izs (ifs, true /* end */);
+    //     ... // Read from izs.
+    //   }
+    //   catch (const invalid_argument& e)
+    //   {
+    //     ... // Invalid compressed content, call e.what() for description.
+    //   }
+    //   catch (/* ifdstream exceptions */)
+    //   {
+    //     ...
+    //   }
+    //
+    // See class decompressor for details on semantics nad exceptions thrown.
+    //
+    // @@ TODO: get rid of badbit-only requirement.
+    // @@ TODO: re-openning support (will need compressor reset).
+    //
+    class LIBBUTL_SYMEXPORT istream: public std::istream
+    {
+    public:
+      explicit
+      istream (iostate e = badbit | failbit)
+        : std::istream (&buf_)
+      {
+        assert (e & badbit);
+        exceptions (e);
+      }
+
+      // The underlying input stream is expected to throw on badbit but not
+      // failbit. If end is true, then on reaching the end of compressed data
+      // verify there is no more input.
+      //
+      // Note that this implementation does not support handing streams of
+      // compressed contents (end is false) that may include individual
+      // contents that uncompress to 0 bytes (see istreambuf::open()
+      // implementation for details).
+      //
+      istream (std::istream& is, bool end, iostate e = badbit | failbit)
+        : istream (e)
+      {
+        open (is, end);
+      }
+
+      // Return decompressed content size, if available.
+      //
+      optional<std::uint64_t>
+      open (std::istream& is, bool end)
+      {
+        return buf_.open (is, end);
+      }
+
+      bool
+      is_open () const
+      {
+        return buf_.is_open ();
+      }
+
+      // Signal that no further uncompressed input will be read.
+      //
+      void
+      close ()
+      {
+        return buf_.close ();
+      }
+
+    private:
+      istreambuf buf_;
+    };
+
+    // ostream
+    //
+
+    class LIBBUTL_SYMEXPORT ostreambuf: public bufstreambuf
+    {
+    public:
+      void
+      open (std::ostream&,
+            int compression_level,
+            int block_size_id,
+            optional<std::uint64_t> content_size);
+
+      bool
+      is_open () const {return os_ != nullptr;}
+
+      void
+      close ();
+
+      virtual
+      ~ostreambuf () override;
+
+    public:
+      using base = bufstreambuf;
+
+      // basic_streambuf output interface.
+      //
+      // Note that syncing the input buffer before the end doesn't make much
+      // sense (it will just get buffered in the compressor). In fact, it can
+      // break our single-shot compression arrangement (for compatibility with
+      // the lz4 utility). Thus we inherit noop sync() from the base.
+      //
+    public:
+      virtual int_type
+      overflow (int_type) override;
+
+      virtual std::streamsize
+      xsputn (const char_type*, std::streamsize) override;
+
+      // Return the (logical) position of the next byte to be written.
+      //
+      using base::tellp;
+
+    private:
+      void
+      write (char*, std::size_t);
+
+      void
+      save ();
+
+    private:
+      std::ostream* os_ = nullptr;
+      bool end_;
+      compressor c_;
+      std::unique_ptr<char[]> ib_; // Compressor input buffer.
+      std::unique_ptr<char[]> ob_; // Compressor output buffer.
+    };
+
+    // Typical usage:
+    //
+    //   try
+    //   {
+    //     ofdstream ofs (..., fdopen_mode::binary);
+    //     lz4::ostream ozs (ofs, 9, 4 /* 64KB */, nullopt /* content_size */);
+    //
+    //     ... // Write to ozs.
+    //
+    //     ozs.close ();
+    //     ofs.close ();
+    //   }
+    //   catch (/* ofdstream exceptions */)
+    //   {
+    //     ...
+    //   }
+    //
+    // See class compressor for details on semantics nad exceptions thrown.
+    //
+    // @@ TODO: re-openning support (will need compressor reset).
+    //
+    class LIBBUTL_SYMEXPORT ostream: public std::ostream
+    {
+    public:
+      explicit
+      ostream (iostate e = badbit | failbit)
+        : std::ostream (&buf_)
+      {
+        assert (e & badbit);
+        exceptions (e);
+      }
+
+      // The underlying output stream is expected to throw on badbit or
+      // failbit.
+      //
+      // See compress() for the description of the compression level, block
+      // size and content size arguments.
+      //
+      ostream (std::ostream& os,
+               int compression_level,
+               int block_size_id,
+               optional<std::uint64_t> content_size,
+               iostate e = badbit | failbit)
+        : ostream (e)
+      {
+        open (os, compression_level, block_size_id, content_size);
+      }
+
+      void
+      open (std::ostream& os,
+            int compression_level,
+            int block_size_id,
+            optional<std::uint64_t> content_size)
+      {
+        buf_.open (os, compression_level, block_size_id, content_size);
+      }
+
+      bool
+      is_open () const
+      {
+        return buf_.is_open ();
+      }
+
+      // Signal that no further uncompressed output will be written.
+      //
+      void
+      close ()
+      {
+        return buf_.close ();
+      }
+
+    private:
+      ostreambuf buf_;
+    };
+  }
+}
diff --git a/libbutl/lz4.c b/libbutl/lz4.c
new file mode 100644
index 0000000..3f0e430
--- /dev/null
+++ b/libbutl/lz4.c
@@ -0,0 +1,2495 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-present, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+/*-************************************
+*  Tuning parameters
+**************************************/
+/*
+ * LZ4_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4_HEAPMODE
+#  define LZ4_HEAPMODE 0
+#endif
+
+/*
+ * LZ4_ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
+
+
+/*-************************************
+*  CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which assembly generation depends on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
+#  if defined(__GNUC__) && \
+  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define LZ4_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
+#    define LZ4_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
+#  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+
+/*-************************************
+*  Dependency
+**************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+#  define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
+#include "lz4.h"
+/* see also "memory routines" below */
+
+
+/*-************************************
+*  Compiler Options
+**************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
+#  include <intrin.h>               /* only present in VS2005+ */
+#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
+#endif  /* _MSC_VER */
+
+#ifndef LZ4_FORCE_INLINE
+#  ifdef _MSC_VER    /* Visual Studio */
+#    define LZ4_FORCE_INLINE static __forceinline
+#  else
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+#      else
+#        define LZ4_FORCE_INLINE static inline
+#      endif
+#    else
+#      define LZ4_FORCE_INLINE static
+#    endif /* __STDC_VERSION__ */
+#  endif  /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
+#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
+#  undef LZ4_FORCE_INLINE
+#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
+#else
+#  define LZ4_FORCE_O2
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#ifndef likely
+#define likely(expr)     expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
+#define unlikely(expr)   expect((expr) != 0, 0)
+#endif
+
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST  /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif
+
+
+/*-************************************
+*  Memory routines
+**************************************/
+#ifdef LZ4_USER_MEMORY_FUNCTIONS
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void  LZ4_free(void* p);
+# define ALLOC(s)          LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p)        LZ4_free(p)
+#else
+# include <stdlib.h>   /* malloc, calloc, free */
+# define ALLOC(s)          malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p)        free(p)
+#endif
+
+#include <string.h>   /* memset, memcpy */
+#define MEM_INIT(p,v,s)   memset((p),(v),(s))
+
+
+/*-************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
+#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+*  Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+#  include <stdio.h>
+   static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                          \
+        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+            fprintf(stderr, __FILE__ ": ");           \
+            fprintf(stderr, __VA_ARGS__);             \
+            fprintf(stderr, " \n");                   \
+    }   }
+#else
+#  define DEBUGLOG(l, ...) {}    /* disabled */
+#endif
+
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+    return ((size_t)ptr & (alignment -1)) == 0;
+}
+
+
+/*-************************************
+*  Types
+**************************************/
+#include <limits.h>
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef uintptr_t uptrval;
+#else
+# if UINT_MAX != 4294967295UL
+#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
+#endif
+
+#if defined(__x86_64__)
+  typedef U64    reg_t;   /* 64-bits in x32 mode */
+#else
+  typedef size_t reg_t;   /* 32-bits in x32 mode */
+#endif
+
+typedef enum {
+    notLimited = 0,
+    limitedOutput = 1,
+    fillOutput = 2
+} limitedOutput_directive;
+
+
+/*-************************************
+*  Reading and writing into memory
+**************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+
+static unsigned LZ4_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
+    return one.c[0];
+}
+
+
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+/* lie to the compiler about data alignment; use with caution */
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; }
+static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; }
+static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; }
+
+#else  /* safe and portable access using memcpy() */
+
+static U16 LZ4_read16(const void* memPtr)
+{
+    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static reg_t LZ4_read_ARCH(const void* memPtr)
+{
+    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static void LZ4_write16(void* memPtr, U16 value)
+{
+    LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+static void LZ4_write32(void* memPtr, U32 value)
+{
+    LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* LZ4_FORCE_MEMORY_ACCESS */
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian()) {
+        return LZ4_read16(memPtr);
+    } else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian()) {
+        LZ4_write16(memPtr, value);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
+
+/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+}
+
+static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
+static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
+
+
+#ifndef LZ4_FAST_DEC_LOOP
+#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+#    define LZ4_FAST_DEC_LOOP 1
+#  elif defined(__aarch64__) && !defined(__clang__)
+     /* On aarch64, we disable this optimization for clang because on certain
+      * mobile chipsets, performance is reduced with clang. For information
+      * refer to https://github.com/lz4/lz4/pull/707 */
+#    define LZ4_FAST_DEC_LOOP 1
+#  else
+#    define LZ4_FAST_DEC_LOOP 0
+#  endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    assert(srcPtr + offset == dstPtr);
+    if (offset < 8) {
+        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
+        dstPtr[0] = srcPtr[0];
+        dstPtr[1] = srcPtr[1];
+        dstPtr[2] = srcPtr[2];
+        dstPtr[3] = srcPtr[3];
+        srcPtr += inc32table[offset];
+        LZ4_memcpy(dstPtr+4, srcPtr, 4);
+        srcPtr -= dec64table[offset];
+        dstPtr += 8;
+    } else {
+        LZ4_memcpy(dstPtr, srcPtr, 8);
+        dstPtr += 8;
+        srcPtr += 8;
+    }
+
+    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}
+
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}
+
+/* LZ4_memcpy_using_offset()  presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    BYTE v[8];
+
+    assert(dstEnd >= dstPtr + MINMATCH);
+
+    switch(offset) {
+    case 1:
+        MEM_INIT(v, *srcPtr, 8);
+        break;
+    case 2:
+        LZ4_memcpy(v, srcPtr, 2);
+        LZ4_memcpy(&v[2], srcPtr, 2);
+        LZ4_memcpy(&v[4], v, 4);
+        break;
+    case 4:
+        LZ4_memcpy(v, srcPtr, 4);
+        LZ4_memcpy(&v[4], srcPtr, 4);
+        break;
+    default:
+        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+        return;
+    }
+
+    LZ4_memcpy(dstPtr, v, 8);
+    dstPtr += 8;
+    while (dstPtr < dstEnd) {
+        LZ4_memcpy(dstPtr, v, 8);
+        dstPtr += 8;
+    }
+}
+#endif
+
+
+/*-************************************
+*  Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (reg_t val)
+{
+    assert(val != 0);
+    if (LZ4_isLittleEndian()) {
+        if (sizeof(val) == 8) {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+            return (unsigned)_tzcnt_u64(val) >> 3;
+#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64(&r, (U64)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctzll((U64)val) >> 3;
+#       else
+            const U64 m = 0x0101010101010101ULL;
+            val ^= val - 1;
+            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
+#       endif
+        } else /* 32 bits */ {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward(&r, (U32)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctz((U32)val) >> 3;
+#       else
+            const U32 m = 0x01010101;
+            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
+#       endif
+        }
+    } else   /* Big Endian CPU */ {
+        if (sizeof(val)==8) {
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clzll((U64)val) >> 3;
+#       else
+#if 1
+            /* this method is probably faster,
+             * but adds a 128 bytes lookup table */
+            static const unsigned char ctz7_tab[128] = {
+                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+            };
+            U64 const mask = 0x0101010101010101ULL;
+            U64 const t = (((val >> 8) - mask) | val) & mask;
+            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+            /* this method doesn't consume memory space like the previous one,
+             * but it contains several branches,
+             * that may end up slowing execution */
+            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
+            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+            Note that this code path is never triggered in 32-bits mode. */
+            unsigned r;
+            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#endif
+#       endif
+        } else /* 32 bits */ {
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clz((U32)val) >> 3;
+#       else
+            val >>= 8;
+            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+              (val + 0x00FF0000)) >> 24;
+            return (unsigned)val ^ 3;
+#       endif
+        }
+    }
+}
+
+
+#define STEPSIZE sizeof(reg_t)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) {
+            pIn+=STEPSIZE; pMatch+=STEPSIZE;
+        } else {
+            return LZ4_NbCommonBytes(diff);
+    }   }
+
+    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/*-************************************
+*  Local Constants
+**************************************/
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+
+/*-************************************
+*  Local Structures and types
+**************************************/
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
+
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict        : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ *                   blob being compressed are valid and refer to the preceding
+ *                   content (of length ctx->dictSize), which is available
+ *                   contiguously preceding in memory the content currently
+ *                   being compressed.
+ * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
+ *                   else in memory, starting at ctx->dictionary with length
+ *                   ctx->dictSize.
+ * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
+ *                   content is in a separate context, pointed to by
+ *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
+ *                   entries in the current context that refer to positions
+ *                   preceding the beginning of the current compression are
+ *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ *                   ->dictSize describe the location and size of the preceding
+ *                   content, and matches are found by looking in the ctx
+ *                   ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+
+/*-************************************
+*  Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
+int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
+
+
+/*-************************************
+*  Internal Definitions used in Tests
+**************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*-******************************
+*  Compression functions
+********************************/
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+{
+    if (tableType == byU16)
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    if (LZ4_isLittleEndian()) {
+        const U64 prime5bytes = 889523592379ULL;
+        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
+    } else {
+        const U64 prime8bytes = 11400714785074694791ULL;
+        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+    }
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+{
+    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+    return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: /* fallthrough */
+    case byPtr: { /* illegal! */ assert(0); return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+                                  void* tableBase, tableType_t const tableType,
+                            const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+{
+    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+    if (tableType == byU32) {
+        const U32* const hashTable = (const U32*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+        return hashTable[h];
+    }
+    if (tableType == byU16) {
+        const U16* const hashTable = (const U16*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+        return hashTable[h];
+    }
+    assert(0); return 0;  /* forbidden case */
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+                const void* tableBase, tableType_t tableType,
+                const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+           const int inputSize,
+           const tableType_t tableType) {
+    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+     * therefore safe to use no matter what mode we're in. Otherwise, we figure
+     * out if it's safe to leave as is or whether it needs to be reset.
+     */
+    if ((tableType_t)cctx->tableType != clearedTable) {
+        assert(inputSize >= 0);
+        if ((tableType_t)cctx->tableType != tableType
+          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
+          || tableType == byPtr
+          || inputSize >= 4 KB)
+        {
+            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+            cctx->currentOffset = 0;
+            cctx->tableType = (U32)clearedTable;
+        } else {
+            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+        }
+    }
+
+    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
+     * than compressing without a gap. However, compressing with
+     * currentOffset == 0 is faster still, so we preserve that case.
+     */
+    if (cctx->currentOffset != 0 && tableType == byU32) {
+        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+        cctx->currentOffset += 64 KB;
+    }
+
+    /* Finally, clear history */
+    cctx->dictCtx = NULL;
+    cctx->dictionary = NULL;
+    cctx->dictSize = 0;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time.
+ *  Presumed already validated at this stage:
+ *  - source != NULL
+ *  - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    int result;
+    const BYTE* ip = (const BYTE*) source;
+
+    U32 const startIndex = cctx->currentOffset;
+    const BYTE* base = (const BYTE*) source - startIndex;
+    const BYTE* lowLimit;
+
+    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+    const BYTE* const dictionary =
+        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+    const U32 dictSize =
+        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+    const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
+
+    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
+    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    /* the dictCtx currentOffset is indexed on the start of the dictionary,
+     * while a dictionary in the current context precedes the currentOffset */
+    const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
+                            dictionary + dictSize - dictCtx->currentOffset :
+                            dictionary + dictSize - startIndex;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 offset = 0;
+    U32 forwardH;
+
+    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+    assert(ip != NULL);
+    /* If init conditions are not met, we don't have to mark stream
+     * as having dirty context, since no action was taken yet */
+    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
+    if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
+    assert(acceleration >= 1);
+
+    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+    /* Update context state */
+    if (dictDirective == usingDictCtx) {
+        /* Subsequent linked blocks can't use the dictionary. */
+        /* Instead, they use the block we just compressed. */
+        cctx->dictCtx = NULL;
+        cctx->dictSize = (U32)inputSize;
+    } else {
+        cctx->dictSize += (U32)inputSize;
+    }
+    cctx->currentOffset += (U32)inputSize;
+    cctx->tableType = (U32)tableType;
+
+    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+        const BYTE* filledIp;
+
+        /* Find a match */
+        if (tableType == byPtr) {
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+            } while ( (match+LZ4_DISTANCE_MAX < ip)
+                   || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+        } else {   /* byU32, byU16 */
+
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                U32 const current = (U32)(forwardIp - base);
+                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+                assert(matchIndex <= current);
+                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                if (dictDirective == usingDictCtx) {
+                    if (matchIndex < startIndex) {
+                        /* there was no match, try the dictionary */
+                        assert(tableType == byU32);
+                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                        match = dictBase + matchIndex;
+                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else if (dictDirective==usingExtDict) {
+                    if (matchIndex < startIndex) {
+                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
+                        assert(startIndex - matchIndex >= MINMATCH);
+                        match = dictBase + matchIndex;
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else {   /* single continuous memory segment */
+                    match = base + matchIndex;
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
+                assert(matchIndex < current);
+                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+                    continue;
+                } /* too far */
+                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
+
+                if (LZ4_read32(match) == LZ4_read32(ip)) {
+                    if (maybe_extMem) offset = current - matchIndex;
+                    break;   /* match found */
+                }
+
+            } while(1);
+        }
+
+        /* Catch up */
+        filledIp = ip;
+        while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        /* Encode Literals */
+        {   unsigned const litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+            if ((outputDirective == fillOutput) &&
+                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+                op--;
+                goto _last_literals;
+            }
+            if (litLength >= RUN_MASK) {
+                int len = (int)(litLength - RUN_MASK);
+                *token = (RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy8(op, anchor, op+litLength);
+            op+=litLength;
+            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
+        }
+
+_next_match:
+        /* at this stage, the following variables must be correctly set :
+         * - ip : at start of LZ operation
+         * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
+         * - offset : if maybe_ext_memSegment==1 (constant)
+         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+         */
+
+        if ((outputDirective == fillOutput) &&
+            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+            /* the match was too close to the end, rewind and go to last literals */
+            op = token;
+            goto _last_literals;
+        }
+
+        /* Encode Offset */
+        if (maybe_extMem) {   /* static test */
+            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
+            LZ4_writeLE16(op, (U16)offset); op+=2;
+        } else  {
+            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
+            assert(ip-match <= LZ4_DISTANCE_MAX);
+            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+        }
+
+        /* Encode MatchLength */
+        {   unsigned matchCode;
+
+            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+              && (lowLimit==dictionary) /* match within extDict */ ) {
+                const BYTE* limit = ip + (dictEnd-match);
+                assert(dictEnd > match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += (size_t)matchCode + MINMATCH;
+                if (ip==limit) {
+                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
+                    matchCode += more;
+                    ip += more;
+                }
+                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
+            } else {
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += (size_t)matchCode + MINMATCH;
+                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
+            }
+
+            if ((outputDirective) &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
+                if (outputDirective == fillOutput) {
+                    /* Match description too long : reduce it */
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
+                    ip -= matchCode - newMatchCode;
+                    assert(newMatchCode < matchCode);
+                    matchCode = newMatchCode;
+                    if (unlikely(ip <= filledIp)) {
+                        /* We have already filled up to filledIp so if ip ends up less than filledIp
+                         * we have positions in the hash table beyond the current position. This is
+                         * a problem if we reuse the hash table. So we have to remove these positions
+                         * from the hash table.
+                         */
+                        const BYTE* ptr;
+                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+                        for (ptr = ip; ptr <= filledIp; ++ptr) {
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
+                        }
+                    }
+                } else {
+                    assert(outputDirective == limitedOutput);
+                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+                }
+            }
+            if (matchCode >= ML_MASK) {
+                *token += ML_MASK;
+                matchCode -= ML_MASK;
+                LZ4_write32(op, 0xFFFFFFFF);
+                while (matchCode >= 4*255) {
+                    op+=4;
+                    LZ4_write32(op, 0xFFFFFFFF);
+                    matchCode -= 4*255;
+                }
+                op += matchCode / 255;
+                *op++ = (BYTE)(matchCode % 255);
+            } else
+                *token += (BYTE)(matchCode);
+        }
+        /* Ensure we have enough space for the last literals. */
+        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip >= mflimitPlusOne) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
+
+        /* Test next position */
+        if (tableType == byPtr) {
+
+            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+            if ( (match+LZ4_DISTANCE_MAX >= ip)
+              && (LZ4_read32(match) == LZ4_read32(ip)) )
+            { token=op++; *token=0; goto _next_match; }
+
+        } else {   /* byU32, byU16 */
+
+            U32 const h = LZ4_hashPosition(ip, tableType);
+            U32 const current = (U32)(ip-base);
+            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if (dictDirective == usingDictCtx) {
+                if (matchIndex < startIndex) {
+                    /* there was no match, try the dictionary */
+                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                    matchIndex += dictDelta;
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;  /* required for match length counter */
+                }
+            } else if (dictDirective==usingExtDict) {
+                if (matchIndex < startIndex) {
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;   /* required for match length counter */
+                }
+            } else {   /* single memory segment */
+                match = base + matchIndex;
+            }
+            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+                token=op++;
+                *token=0;
+                if (maybe_extMem) offset = current - matchIndex;
+                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+                goto _next_match;
+            }
+        }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {   size_t lastRun = (size_t)(iend - anchor);
+        if ( (outputDirective) &&  /* Check output buffer overflow */
+            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+            if (outputDirective == fillOutput) {
+                /* adapt lastRun to fill 'dst' */
+                assert(olimit >= op);
+                lastRun  = (size_t)(olimit-op) - 1/*token*/;
+                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
+            } else {
+                assert(outputDirective == limitedOutput);
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+        }
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
+        if (lastRun >= RUN_MASK) {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        LZ4_memcpy(op, anchor, lastRun);
+        ip = anchor + lastRun;
+        op += lastRun;
+    }
+
+    if (outputDirective == fillOutput) {
+        *inputConsumed = (int) (((const char*)ip)-source);
+    }
+    result = (int)(((char*)op) - dest);
+    assert(result > 0);
+    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
+    return result;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time;
+ *  takes care of src == (NULL, 0)
+ *  and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const src,
+                 char* const dst,
+                 const int srcSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int dstCapacity,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+                srcSize, dstCapacity);
+
+    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
+    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
+        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
+        DEBUGLOG(5, "Generating an empty block");
+        assert(outputDirective == notLimited || dstCapacity >= 1);
+        assert(dst != NULL);
+        dst[0] = 0;
+        if (outputDirective == fillOutput) {
+            assert (inputConsumed != NULL);
+            *inputConsumed = 0;
+        }
+        return 1;
+    }
+    assert(src != NULL);
+
+    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+                inputConsumed, /* only written into if outputDirective == fillOutput */
+                dstCapacity, outputDirective,
+                tableType, dictDirective, dictIssue, acceleration);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+    assert(ctx != NULL);
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    if (dstCapacity >= LZ4_compressBound(srcSize)) {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    int result;
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctxPtr == NULL) return 0;
+#else
+    LZ4_stream_t ctx;
+    LZ4_stream_t* const ctxPtr = &ctx;
+#endif
+    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctxPtr);
+#endif
+    return result;
+}
+
+
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
+{
+    return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
+}
+
+
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
+static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+    void* const s = LZ4_initStream(state, sizeof (*state));
+    assert(s != NULL); (void)s;
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    } else {
+        if (*srcSizePtr < LZ4_64Klimit) {
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+        } else {
+            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
+    }   }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctx == NULL) return 0;
+#else
+    LZ4_stream_t ctxBody;
+    LZ4_stream_t* ctx = &ctxBody;
+#endif
+
+    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+
+
+/*-******************************
+*  Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+    if (lz4s == NULL) return NULL;
+    LZ4_initStream(lz4s, sizeof(*lz4s));
+    return lz4s;
+}
+
+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+    typedef struct { char c; LZ4_stream_t t; } t_a;
+    return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+    return 1;  /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+    DEBUGLOG(5, "LZ4_initStream");
+    if (buffer == NULL) { return NULL; }
+    if (size < sizeof(LZ4_stream_t)) { return NULL; }
+    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+    return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
+}
+
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+    if (!LZ4_stream) return 0;   /* support free on NULL */
+    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
+    FREEMEM(LZ4_stream);
+    return (0);
+}
+
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+    LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+    const tableType_t tableType = byU32;
+    const BYTE* p = (const BYTE*)dictionary;
+    const BYTE* const dictEnd = p + dictSize;
+    const BYTE* base;
+
+    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+    /* It's necessary to reset the context,
+     * and not just continue it with prepareTable()
+     * to avoid any risk of generating overflowing matchIndex
+     * when compressing using this dictionary */
+    LZ4_resetStream(LZ4_dict);
+
+    /* We always increment the offset by 64 KB, since, if the dict is longer,
+     * we truncate it to the last 64k, and if it's shorter, we still want to
+     * advance by a whole window length so we can provide the guarantee that
+     * there are only valid offsets in the window, which allows an optimization
+     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+     * dictionary isn't a full 64k. */
+    dict->currentOffset += 64 KB;
+
+    if (dictSize < (int)HASH_UNIT) {
+        return 0;
+    }
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    base = dictEnd - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->tableType = (U32)tableType;
+
+    while (p <= dictEnd-HASH_UNIT) {
+        LZ4_putPosition(p, dict->hashTable, tableType, base);
+        p+=3;
+    }
+
+    return (int)dict->dictSize;
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
+    const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
+        &(dictionaryStream->internal_donotuse);
+
+    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+             workingStream, dictionaryStream,
+             dictCtx != NULL ? dictCtx->dictSize : 0);
+
+    if (dictCtx != NULL) {
+        /* If the current offset is zero, we will never look in the
+         * external dictionary context, since there is no value a table
+         * entry can take that indicate a miss. In that case, we need
+         * to bump the offset to something non-zero.
+         */
+        if (workingStream->internal_donotuse.currentOffset == 0) {
+            workingStream->internal_donotuse.currentOffset = 64 KB;
+        }
+
+        /* Don't actually attach an empty dictionary.
+         */
+        if (dictCtx->dictSize == 0) {
+            dictCtx = NULL;
+        }
+    }
+    workingStream->internal_donotuse.dictCtx = dictCtx;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
+{
+    assert(nextSize >= 0);
+    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
+        /* rescale hash table */
+        U32 const delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        DEBUGLOG(4, "LZ4_renormDictT");
+        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+                                const char* source, char* dest,
+                                int inputSize, int maxOutputSize,
+                                int acceleration)
+{
+    const tableType_t tableType = byU32;
+    LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
+    const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
+
+    LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    /* invalidate tiny dictionaries */
+    if ( (streamPtr->dictSize-1 < 4-1)   /* intentional underflow */
+      && (dictEnd != (const BYTE*)source) ) {
+        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+        streamPtr->dictSize = 0;
+        streamPtr->dictionary = (const BYTE*)source;
+        dictEnd = (const BYTE*)source;
+    }
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source) {
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
+        else
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
+    }
+
+    /* external dictionary mode */
+    {   int result;
+        if (streamPtr->dictCtx) {
+            /* We depend here on the fact that dictCtx'es (produced by
+             * LZ4_loadDict) guarantee that their tables contain no references
+             * to offsets between dictCtx->currentOffset - 64 KB and
+             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+             * to use noDictIssue even when the dict isn't a full 64 KB.
+             */
+            if (inputSize > 4 KB) {
+                /* For compressing large blobs, it is faster to pay the setup
+                 * cost to copy the dictionary's tables into the active context,
+                 * so that the compression loop is only looking into one table.
+                 */
+                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+            }
+        } else {
+            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            }
+        }
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        return result;
+    }
+}
+
+
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
+{
+    LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
+    int result;
+
+    LZ4_renormDictT(streamPtr, srcSize);
+
+    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+    } else {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    }
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)srcSize;
+
+    return result;
+}
+
+
+/*! LZ4_saveDict() :
+ *  If previously compressed data block is not guaranteed to remain available at its memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
+ *  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
+    const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
+
+    if (safeBuffer == NULL) assert(dictSize == 0);
+    if (dictSize > 0)
+        memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
+}
+
+
+
+/*-*******************************
+ *  Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
+
+/* Read the variable-length literal or match length.
+ *
+ * ip - pointer to use as input.
+ * lencheck - end ip.  Return an error if ip advances >= lencheck.
+ * loop_check - check ip >= lencheck in body of loop.  Returns loop_error if so.
+ * initial_check - check ip >= lencheck before start of loop.  Returns initial_error if so.
+ * error (output) - error code.  Should be set to 0 before call.
+ */
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
+LZ4_FORCE_INLINE unsigned
+read_variable_length(const BYTE**ip, const BYTE* lencheck,
+                     int loop_check, int initial_check,
+                     variable_length_error* error)
+{
+    U32 length = 0;
+    U32 s;
+    if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+        *error = initial_error;
+        return length;
+    }
+    do {
+        s = **ip;
+        (*ip)++;
+        length += s;
+        if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+            *error = loop_error;
+            return length;
+        }
+    } while (s==255);
+
+    return length;
+}
+
+/*! LZ4_decompress_generic() :
+ *  This generic decompression function covers all use cases.
+ *  It shall be instantiated several times, using different sets of directives.
+ *  Note that it is important for performance that this function really get inlined,
+ *  in order to remove useless branches during compilation optimization.
+ */
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
+                 const char* const src,
+                 char* const dst,
+                 int srcSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
+
+                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
+                 earlyEnd_directive partialDecoding,  /* full, partial */
+                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    if ((src == NULL) || (outputSize < 0)) { return -1; }
+
+    {   const BYTE* ip = (const BYTE*) src;
+        const BYTE* const iend = ip + srcSize;
+
+        BYTE* op = (BYTE*) dst;
+        BYTE* const oend = op + outputSize;
+        BYTE* cpy;
+
+        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
+
+        const int safeDecode = (endOnInput==endOnInputSize);
+        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+        /* Set up the "end" pointers for the shortcut. */
+        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+        const BYTE* match;
+        size_t offset;
+        unsigned token;
+        size_t length;
+
+
+        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+        /* Special cases */
+        assert(lowPrefix <= op);
+        if ((endOnInput) && (unlikely(outputSize==0))) {
+            /* Empty output buffer */
+            if (partialDecoding) return 0;
+            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+        }
+        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
+        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
+
+	/* Currently the fast loop shows a regression on qualcomm arm chips. */
+#if LZ4_FAST_DEC_LOOP
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+            DEBUGLOG(6, "skip fast decode loop");
+            goto safe_decode;
+        }
+
+        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+            if (endOnInput) { assert(ip < iend); }
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+
+                /* copy literals */
+                cpy = op+length;
+                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
+                    LZ4_wildCopy32(op, ip, cpy);
+                } else {   /* LZ4_decompress_fast() */
+                    if (cpy>oend-8) { goto safe_literal_copy; }
+                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                                                 * it doesn't know input length, and only relies on end-of-block properties */
+                }
+                ip += length; op = cpy;
+            } else {
+                cpy = op+length;
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+                    /* We don't need to check oend, since we check it once for each loop below */
+                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
+                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
+                    LZ4_memcpy(op, ip, 16);
+                } else {  /* LZ4_decompress_fast() */
+                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                     * it doesn't know input length, and relies on end-of-block properties */
+                    LZ4_memcpy(op, ip, 8);
+                    if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
+                }
+                ip += length; op = cpy;
+            }
+
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
+            assert(match <= op);
+
+            /* get matchlength */
+            length = token & ML_MASK;
+
+            if (length == ML_MASK) {
+                variable_length_error error = ok;
+                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+                length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+                if (error != ok) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+            } else {
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+
+                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
+                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+                    if (offset >= 8) {
+                        assert(match >= lowPrefix);
+                        assert(match <= op);
+                        assert(op + 18 <= oend);
+
+                        LZ4_memcpy(op, match, 8);
+                        LZ4_memcpy(op+8, match+8, 8);
+                        LZ4_memcpy(op+16, match+16, 2);
+                        op += length;
+                        continue;
+            }   }   }
+
+            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) {
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+                        length = MIN(length, (size_t)(oend-op));
+                    } else {
+                        goto _output_error;  /* end-of-block condition violated */
+                }   }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) { *op++ = *copyFrom++; }
+                    } else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
+            }
+
+            /* copy match within block */
+            cpy = op + length;
+
+            assert((op <= oend) && (oend-op >= 32));
+            if (unlikely(offset<16)) {
+                LZ4_memcpy_using_offset(op, match, cpy, offset);
+            } else {
+                LZ4_wildCopy32(op, match, cpy);
+            }
+
+            op = cpy;   /* wildcopy correction */
+        }
+    safe_decode:
+#endif
+
+        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* A two-stage shortcut for the most common case:
+             * 1) If the literal length is 0..14, and there is enough space,
+             * enter the shortcut and copy 16 bytes on behalf of the literals
+             * (in the fast mode, only 8 bytes can be safely copied this way).
+             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+             * manner; but we ensure that there's enough space in the output for
+             * those 18 bytes earlier, upon entering the shortcut (in other words,
+             * there is a combined check for both stages).
+             */
+            if ( (endOnInput ? length != RUN_MASK : length <= 8)
+                /* strictly "less than" on input, to re-enter the loop with at least one byte */
+              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+                /* Copy the literals */
+                LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
+                op += length; ip += length;
+
+                /* The second stage: prepare for match copying, decode full info.
+                 * If it doesn't work out, the info won't be wasted. */
+                length = token & ML_MASK; /* match length */
+                offset = LZ4_readLE16(ip); ip += 2;
+                match = op - offset;
+                assert(match <= op); /* check overflow */
+
+                /* Do not deal with overlapping matches. */
+                if ( (length != ML_MASK)
+                  && (offset >= 8)
+                  && (dict==withPrefix64k || match >= lowPrefix) ) {
+                    /* Copy the match. */
+                    LZ4_memcpy(op + 0, match + 0, 8);
+                    LZ4_memcpy(op + 8, match + 8, 8);
+                    LZ4_memcpy(op +16, match +16, 2);
+                    op += length + MINMATCH;
+                    /* Both stages worked, load the next token. */
+                    continue;
+                }
+
+                /* The second stage didn't work out, but the info is ready.
+                 * Propel it right to the point of match copying. */
+                goto _copy_match;
+            }
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+            }
+
+            /* copy literals */
+            cpy = op+length;
+#if LZ4_FAST_DEC_LOOP
+        safe_literal_copy:
+#endif
+            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+            {
+                /* We've either hit the input parsing restriction or the output parsing restriction.
+                 * In the normal scenario, decoding a full block, it must be the last sequence,
+                 * otherwise it's an error (invalid input or dimensions).
+                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
+                 */
+                if (partialDecoding) {
+                    /* Since we are partial decoding we may be in this block because of the output parsing
+                     * restriction, which is not valid since the output buffer is allowed to be undersized.
+                     */
+                    assert(endOnInput);
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
+                     */
+                    if (ip+length > iend) {
+                        length = (size_t)(iend-ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
+                     */
+                    if (cpy > oend) {
+                        cpy = oend;
+                        assert(op<=oend);
+                        length = (size_t)(oend-op);
+                    }
+                } else {
+                    /* We must be on the last sequence because of the parsing limitations so check
+                     * that we exactly regenerate the original size (must be exact when !endOnInput).
+                     */
+                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
+                     /* We must be on the last sequence (or invalid) because of the parsing limitations
+                      * so check that we exactly consume the input and don't overrun the output buffer.
+                      */
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
+                        DEBUGLOG(6, "should have been last run of literals")
+                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+                        goto _output_error;
+                    }
+                }
+                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
+                ip += length;
+                op += length;
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
+                 */
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
+                    break;
+                }
+            } else {
+                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+                ip += length; op = cpy;
+            }
+
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
+
+            /* get matchlength */
+            length = token & ML_MASK;
+
+    _copy_match:
+            if (length == ML_MASK) {
+              variable_length_error error = ok;
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+              if (error != ok) goto _output_error;
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
+            }
+            length += MINMATCH;
+
+#if LZ4_FAST_DEC_LOOP
+        safe_match_copy:
+#endif
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                    else goto _output_error;   /* doesn't respect parsing restriction */
+                }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) *op++ = *copyFrom++;
+                    } else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
+            }
+            assert(match >= lowPrefix);
+
+            /* copy match within block */
+            cpy = op + length;
+
+            /* partialDecoding : may end anywhere within the block */
+            assert(op<=oend);
+            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                size_t const mlen = MIN(length, (size_t)(oend-op));
+                const BYTE* const matchEnd = match + mlen;
+                BYTE* const copyEnd = op + mlen;
+                if (matchEnd > op) {   /* overlap copy */
+                    while (op < copyEnd) { *op++ = *match++; }
+                } else {
+                    LZ4_memcpy(op, match, mlen);
+                }
+                op = copyEnd;
+                if (op == oend) { break; }
+                continue;
+            }
+
+            if (unlikely(offset<8)) {
+                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
+                op[0] = match[0];
+                op[1] = match[1];
+                op[2] = match[2];
+                op[3] = match[3];
+                match += inc32table[offset];
+                LZ4_memcpy(op+4, match, 4);
+                match -= dec64table[offset];
+            } else {
+                LZ4_memcpy(op, match, 8);
+                match += 8;
+            }
+            op += 8;
+
+            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+                if (op < oCopyLimit) {
+                    LZ4_wildCopy8(op, match, oCopyLimit);
+                    match += oCopyLimit - op;
+                    op = oCopyLimit;
+                }
+                while (op < cpy) { *op++ = *match++; }
+            } else {
+                LZ4_memcpy(op, match, 8);
+                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
+            }
+            op = cpy;   /* wildcopy correction */
+        }
+
+        /* end of decoding */
+        if (endOnInput) {
+            DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
+           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
+       } else {
+           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
+       }
+
+        /* Overflow error detected */
+    _output_error:
+        return (int) (-(((const char*)ip)-src))-1;
+    }
+}
+
+
+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
+{
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                  endOnInputSize, partial_decode,
+                                  noDict, (BYTE*)dst, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    /* LZ4_decompress_fast doesn't validate match offsets,
+     * and thus serves well with any prefixed dictionary. */
+    return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                               size_t prefixSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+                                       const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+/*===== streaming decompression functions =====*/
+
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal));    /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
+    return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
+    FREEMEM(LZ4_stream);
+    return 0;
+}
+
+/*! LZ4_setStreamDecode() :
+ *  Use this function to instruct where to find the dictionary.
+ *  This function is not necessary if previous data is still available where it was decoded.
+ *  Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
+}
+
+/*! LZ4_decoderRingBufferSize() :
+ *  when setting a ring buffer for streaming decompression (optional scenario),
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ *  Note : in a ring buffer scenario,
+ *  blocks are presumed decompressed next to each other.
+ *  When not enough space remains for next block (remainingSize < maxBlockSize),
+ *  decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+    if (maxBlockSize < 0) return 0;
+    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+    if (maxBlockSize < 16) maxBlockSize = 16;
+    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+LZ4_FORCE_O2
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+
+    if (lz4sd->prefixSize == 0) {
+        /* The first call, no dictionary yet. */
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)result;
+        lz4sd->prefixEnd = (BYTE*)dest + result;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        /* They're rolling the current segment. */
+        if (lz4sd->prefixSize >= 64 KB - 1)
+            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        else if (lz4sd->extDictSize == 0)
+            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+                                                         lz4sd->prefixSize);
+        else
+            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += (size_t)result;
+        lz4sd->prefixEnd  += result;
+    } else {
+        /* The buffer wraps around, or they're switching to another buffer. */
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+                                                  lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+    assert(originalSize >= 0);
+
+    if (lz4sd->prefixSize == 0) {
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_fast(source, dest, originalSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)originalSize;
+        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+            result = LZ4_decompress_fast(source, dest, originalSize);
+        else
+            result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += (size_t)originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+                                             lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+    if (dictStart+dictSize == dest) {
+        if (dictSize >= 64 KB - 1) {
+            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        }
+        assert(dictSize >= 0);
+        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
+    }
+    assert(dictSize >= 0);
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+    if (dictSize==0 || dictStart+dictSize == dest)
+        return LZ4_decompress_fast(source, dest, originalSize);
+    assert(dictSize >= 0);
+    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
+}
+
+
+/*=*************************************************
+*  Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* src, char* dest, int srcSize)
+{
+    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
+
+/*
+These decompression functions are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+    (void)inputBuffer;
+    LZ4_resetStream((LZ4_stream_t*)state);
+    return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+    (void)inputBuffer;
+    return LZ4_createStream();
+}
+
+char* LZ4_slideInputBuffer (void* state)
+{
+    /* avoid const char * -> char * conversion warning */
+    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
+}
+
+#endif   /* LZ4_COMMONDEFS_ONLY */
diff --git a/libbutl/lz4.cxx b/libbutl/lz4.cxx
new file mode 100644
index 0000000..2db7af2
--- /dev/null
+++ b/libbutl/lz4.cxx
@@ -0,0 +1,555 @@
+// file      : libbutl/lz4.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <libbutl/lz4.hxx>
+
+// This careful macro dance makes sure that all the LZ4 C API functions are
+// made static while making sure we include the headers in the same way as the
+// implementation files that we include below.
+//
+#define LZ4LIB_VISIBILITY static
+#define LZ4_STATIC_LINKING_ONLY
+#define LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS
+#include "lz4.h"
+#include "lz4hc.h"
+
+#define LZ4FLIB_VISIBILITY static
+#define LZ4F_STATIC_LINKING_ONLY
+#define LZ4F_PUBLISH_STATIC_FUNCTIONS
+#define LZ4F_DISABLE_DEPRECATE_WARNINGS
+#include "lz4frame.h"
+
+#include <new>       // bad_alloc
+#include <memory>    // unique_ptr
+#include <cstring>   // memcpy()
+#include <cassert>
+#include <stdexcept> // invalid_argument, logic_error
+
+#include <libbutl/utility.hxx> // eos()
+
+#if 0
+#include <libbutl/lz4-stream.hxx>
+#endif
+
+using namespace std;
+
+namespace butl
+{
+  namespace lz4
+  {
+    static inline size_t
+    block_size (LZ4F_blockSizeID_t id)
+    {
+      return (id == LZ4F_max4MB   ? 4 * 1024 * 1024 :
+              id == LZ4F_max1MB   ? 1 * 1024 * 1024 :
+              id == LZ4F_max256KB ?      256 * 1024 :
+              id == LZ4F_max64KB  ?       64 * 1024 : 0);
+    }
+
+    [[noreturn]] static void
+    throw_exception (LZ4F_errorCodes c)
+    {
+      using i = invalid_argument;
+
+      switch (c)
+      {
+      case LZ4F_ERROR_GENERIC:                     throw i ("generic LZ4 error");
+      case LZ4F_ERROR_maxBlockSize_invalid:        throw i ("invalid LZ4 block size");
+      case LZ4F_ERROR_blockMode_invalid:           throw i ("invalid LZ4 block mode");
+      case LZ4F_ERROR_contentChecksumFlag_invalid: throw i ("invalid LZ4 content checksum flag");
+      case LZ4F_ERROR_compressionLevel_invalid:    throw i ("invalid LZ4 compression level");
+      case LZ4F_ERROR_headerVersion_wrong:         throw i ("wrong LZ4 header version");
+      case LZ4F_ERROR_blockChecksum_invalid:       throw i ("invalid LZ4 block checksum");
+      case LZ4F_ERROR_reservedFlag_set:            throw i ("reserved LZ4 flag set");
+      case LZ4F_ERROR_srcSize_tooLarge:            throw i ("LZ4 input too large");
+      case LZ4F_ERROR_dstMaxSize_tooSmall:         throw i ("LZ4 output too small");
+      case LZ4F_ERROR_frameHeader_incomplete:      throw i ("incomplete LZ4 frame header");
+      case LZ4F_ERROR_frameType_unknown:           throw i ("unknown LZ4 frame type");
+      case LZ4F_ERROR_frameSize_wrong:             throw i ("wrong LZ4 frame size");
+      case LZ4F_ERROR_decompressionFailed:         throw i ("invalid LZ4 compressed content");
+      case LZ4F_ERROR_headerChecksum_invalid:      throw i ("invalid LZ4 header checksum");
+      case LZ4F_ERROR_contentChecksum_invalid:     throw i ("invalid LZ4 content checksum");
+
+      case LZ4F_ERROR_allocation_failed:           throw bad_alloc ();
+
+      // These seem to be programming errors.
+      //
+      case LZ4F_ERROR_srcPtr_wrong:                 // NULL pointer.
+      case LZ4F_ERROR_frameDecoding_alreadyStarted: // Incorrect call seq.
+
+      // We should never get these.
+      //
+      case LZ4F_OK_NoError:
+      case LZ4F_ERROR_maxCode:
+      case _LZ4F_dummy_error_enum_for_c89_never_used:
+        break;
+      }
+
+      assert (false);
+      throw logic_error (LZ4F_getErrorName ((LZ4F_errorCode_t)(-c)));
+    }
+
+    // As above but for erroneous LZ4F_*() function result.
+    //
+    [[noreturn]] static inline void
+    throw_exception (size_t r)
+    {
+      throw_exception (LZ4F_getErrorCode (r));
+    }
+
+    // compression
+    //
+
+    compressor::
+    ~compressor ()
+    {
+      if (LZ4F_cctx* ctx = static_cast<LZ4F_cctx*> (ctx_))
+      {
+        LZ4F_errorCode_t e (LZ4F_freeCompressionContext (ctx));
+        assert (!LZ4F_isError (e));
+      }
+    }
+
+    inline void compressor::
+    init_preferences (void* vp) const
+    {
+      LZ4F_preferences_t* p (static_cast<LZ4F_preferences_t*> (vp));
+
+      p->autoFlush = 1;
+      p->favorDecSpeed = 0;
+      p->compressionLevel = level_;
+      p->frameInfo.blockMode = LZ4F_blockLinked;
+      p->frameInfo.blockSizeID = static_cast<LZ4F_blockSizeID_t> (block_id_);
+      p->frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum;
+      p->frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
+      p->frameInfo.contentSize = content_size_
+        ? static_cast<unsigned long long> (*content_size_)
+        : 0;
+    }
+
+    void compressor::
+    begin (int level,
+           int block_id,
+           optional<uint64_t> content_size)
+    {
+      assert (block_id >= 4 && block_id <= 7);
+
+      level_ = level;
+      block_id_ = block_id;
+      content_size_ = content_size;
+
+      LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
+      init_preferences (&prefs);
+
+      // Input/output buffer capacities.
+      //
+      // To be binary compatible with the lz4 utility we have to compress
+      // files that fit into the block with a single *_compressFrame() call
+      // instead of *_compressBegin()/*_compressUpdate(). And to determine the
+      // output buffer capacity we must use *_compressFrameBound() instead of
+      // *_compressBound(). The problem is, at this stage (before filling the
+      // input buffer), we don't know which case it will be.
+      //
+      // However, in our case (autoFlush=1), *Bound() < *FrameBound() and so
+      // we can always use the latter at the cost of slight overhead. Also,
+      // using *FrameBound() allows us to call *Begin() and *Update() without
+      // flushing the buffer in between (this insight is based on studying the
+      // implementation of the *Bound() functions).
+      //
+      // Actually, we can use content_size (we can get away with much smaller
+      // buffers for small inputs). We just need to verify the caller is not
+      // lying to us (failed that, we may end up with strange error like
+      // insufficient output buffer space).
+      //
+      ic = block_size (prefs.frameInfo.blockSizeID);
+
+      if (content_size_ && *content_size_ < ic)
+      {
+        // This is nuanced: we need to add an extra byte in order to detect
+        // EOF.
+        //
+        ic = static_cast<size_t> (*content_size_) + 1;
+      }
+
+      oc = LZ4F_compressFrameBound (ic, &prefs);
+
+      begin_ = true;
+    }
+
+    void compressor::
+    next (bool end)
+    {
+      LZ4F_cctx* ctx;
+
+      // Unlike the decompression case below, compression cannot fail due to
+      // invalid content. So any LZ4F_*() function failure is either due to a
+      // programming bug or argument inconsistencies (e.g., content size does
+      // not match actual).
+
+      if (begin_)
+      {
+        begin_ = false;
+
+        LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
+        init_preferences (&prefs);
+
+        // If we've allocated smaller buffers based on content_size_, then
+        // verify the input size matches what's promised.
+        //
+        // Note also that LZ4F_compressFrame() does not fail if it doesn't
+        // match instead replacing it with the actual value.
+        //
+        size_t bs (block_size (prefs.frameInfo.blockSizeID));
+        if (content_size_ && *content_size_ < bs)
+        {
+          if (!end || in != *content_size_)
+            throw_exception (LZ4F_ERROR_frameSize_wrong);
+        }
+
+        // Must be < for lz4 compatibility (see EOF nuance above for the
+        // likely reason).
+        //
+        if (end && in < bs)
+        {
+          on = LZ4F_compressFrame (ob, oc, ib, in, &prefs);
+          if (LZ4F_isError (on))
+            throw_exception (on);
+
+          in = 0; // All consumed.
+          return;
+        }
+        else
+        {
+          if (LZ4F_isError (LZ4F_createCompressionContext (&ctx, LZ4F_VERSION)))
+            throw bad_alloc ();
+
+          ctx_ = ctx;
+
+          // Write the header.
+          //
+          on = LZ4F_compressBegin (ctx, ob, oc, &prefs);
+          if (LZ4F_isError (on))
+            throw_exception (on);
+
+          // Fall through.
+        }
+      }
+      else
+      {
+        ctx = static_cast<LZ4F_cctx*> (ctx_);
+        on = 0;
+      }
+
+      size_t n;
+
+      if (in != 0)
+      {
+        n = LZ4F_compressUpdate (ctx, ob + on, oc - on, ib, in, nullptr);
+        if (LZ4F_isError (n))
+          throw_exception (n);
+
+        in = 0; // All consumed.
+        on += n;
+      }
+
+      // Write the end marker.
+      //
+      if (end)
+      {
+        // Note that this call also verifies specified and actual content
+        // sizes match.
+        //
+        n = LZ4F_compressEnd (ctx, ob + on, oc - on, nullptr);
+        if (LZ4F_isError (n))
+          throw_exception (n);
+
+        on += n;
+      }
+    }
+
+    uint64_t
+    compress (ofdstream& os, ifdstream& is,
+              int level,
+              int block_id,
+              optional<uint64_t> content_size)
+    {
+#if 0
+      char buf[1024 * 3 + 7];
+      ostream cos (os, level, block_id, content_size);
+
+      for (bool e (false); !e; )
+      {
+        e = eof (is.read (buf, sizeof (buf)));
+        cos.write (buf, is.gcount ());
+        //for (streamsize i (0), n (is.gcount ()); i != n; ++i)
+        //  cos.put (buf[i]);
+      }
+
+      cos.close ();
+      return content_size ? *content_size : 0;
+#else
+      compressor c;
+
+      // Input/output buffer guards.
+      //
+      unique_ptr<char[]> ibg;
+      unique_ptr<char[]> obg;
+
+      // First determine required buffer capacities.
+      //
+      c.begin (level, block_id, content_size);
+
+      ibg.reset ((c.ib = new char[c.ic]));
+      obg.reset ((c.ob = new char[c.oc]));
+
+      // Read into the input buffer updating the eof flag.
+      //
+      // Note that we could try to do direct fd read/write but that would
+      // complicate things quite a bit (error handling, stream state, etc).
+      //
+      bool eof (false);
+      auto read = [&is, &c, &eof] ()
+      {
+        eof = butl::eof (is.read (c.ib, c.ic));
+        c.in = static_cast<size_t> (is.gcount ());
+      };
+
+      // Write from the output buffer updating the total written.
+      //
+      uint64_t ot (0);
+      auto write = [&os, &c, &ot] ()
+      {
+        os.write (c.ob, static_cast<streamsize> (c.on));
+        ot += c.on;
+      };
+
+      // Keep reading, compressing, and writing chunks of content.
+      //
+      while (!eof)
+      {
+        read ();
+
+        c.next (eof);
+
+        if (c.on != 0) // next() may just buffer the data.
+          write ();
+      }
+
+      return ot;
+#endif
+    }
+
+    // decompression
+    //
+
+    static_assert (sizeof (decompressor::hb) == LZ4F_HEADER_SIZE_MAX,
+                   "LZ4 header size mismatch");
+
+    decompressor::
+    ~decompressor ()
+    {
+      if (LZ4F_dctx* ctx = static_cast<LZ4F_dctx*> (ctx_))
+      {
+        LZ4F_errorCode_t e (LZ4F_freeDecompressionContext (ctx));
+        assert (!LZ4F_isError (e));
+      }
+    }
+
+    size_t decompressor::
+    begin (optional<uint64_t>* content_size)
+    {
+      LZ4F_dctx* ctx;
+
+      if (LZ4F_isError (LZ4F_createDecompressionContext (&ctx, LZ4F_VERSION)))
+        throw bad_alloc ();
+
+      ctx_ = ctx;
+
+      LZ4F_frameInfo_t info = LZ4F_INIT_FRAMEINFO;
+
+      // Input hint and end as signalled by the LZ4F_*() functions.
+      //
+      size_t h, e;
+
+      h = LZ4F_getFrameInfo (ctx, &info, hb, &(e = hn));
+      if (LZ4F_isError (h))
+        throw_exception (h);
+
+      if (content_size != nullptr)
+      {
+        if (info.contentSize != 0)
+          *content_size = static_cast<uint64_t> (info.contentSize);
+        else
+          *content_size = nullopt;
+      }
+
+      // Use the block size for the output buffer capacity and compressed
+      // bound plus the header size for the input. The expectation is that
+      // LZ4F_decompress() should never hint for more than that.
+      //
+      oc = block_size (info.blockSizeID);
+      ic = LZ4F_compressBound (oc, nullptr) + LZ4F_BLOCK_HEADER_SIZE;
+
+      assert (h <= ic);
+
+      // Move over whatever is left in the header buffer to be beginning.
+      //
+      hn -= e;
+      memmove (hb, hb + e, hn);
+
+      return h;
+    }
+
+    size_t decompressor::
+    next ()
+    {
+      LZ4F_dctx* ctx (static_cast<LZ4F_dctx*> (ctx_));
+
+      size_t h, e;
+
+      // Note that LZ4F_decompress() verifies specified and actual content
+      // sizes match (similar to compression).
+      //
+      h = LZ4F_decompress (ctx, ob, &(on = oc), ib, &(e = in), nullptr);
+      if (LZ4F_isError (h))
+        throw_exception (h);
+
+      // We expect LZ4F_decompress() to consume what it asked for.
+      //
+      assert (e == in && h <= ic);
+      in = 0; // All consumed.
+
+      return h;
+    }
+
+    uint64_t
+    decompress (ofdstream& os, ifdstream& is)
+    {
+      // Write the specified number of bytes from the output buffer updating
+      // the total written.
+      //
+      uint64_t ot (0);
+      auto write = [&os, &ot] (char* b, size_t n)
+      {
+        os.write (b, static_cast<streamsize> (n));
+        ot += n;
+      };
+
+#if 0
+      char buf[1024 * 3 + 7];
+      istream dis (is, true, istream::badbit);
+
+      for (bool e (false); !e; )
+      {
+        e = eof (dis.read (buf, sizeof (buf)));
+        write (buf, static_cast<size_t> (dis.gcount ()));
+      }
+#else
+      // Read into the specified buffer returning the number of bytes read and
+      // updating the eof flag.
+      //
+      bool eof (false);
+      auto read = [&is, &eof] (char* b, size_t c) -> size_t
+      {
+        size_t n (0);
+        do
+        {
+          eof = butl::eof (is.read (b + n, c - n));
+          n += static_cast<size_t> (is.gcount ());
+        }
+        while (!eof && n != c);
+
+        return n;
+      };
+
+      decompressor d;
+
+      // Input/output buffer guards.
+      //
+      unique_ptr<char[]> ibg;
+      unique_ptr<char[]> obg;
+
+      size_t h; // Input hint.
+
+      // First read in the header and allocate the buffers.
+      //
+      // What if we hit EOF here? And could begin() return 0? Turns out the
+      // answer to both questions is yes: 0-byte content compresses to 15
+      // bytes (with or without content size; 1-byte -- to 20/28 bytes). We
+      // can ignore EOF here since an attempt to read more will result in
+      // another EOF. And code below is prepared to handle 0 initial hint.
+      //
+      // @@ We could end up leaving some of the input content from the
+      //    header in the input buffer which the caller will have to way
+      //    of using/detecting.
+      //
+      d.hn = read (d.hb, sizeof (d.hb));
+      h = d.begin ();
+
+      ibg.reset ((d.ib = new char[d.ic]));
+      obg.reset ((d.ob = new char[d.oc]));
+
+      // Copy over whatever is left in the header buffer and read up to
+      // the hinted size.
+      //
+      memcpy (d.ib, d.hb, (d.in = d.hn));
+
+      if (h > d.in)
+        d.in += read (d.ib + d.in, h - d.in);
+
+      // Keep decompressing, writing, and reading chunks of compressed
+      // content.
+      //
+      while (h != 0)
+      {
+        h = d.next ();
+
+        if (d.on != 0) // next() may just buffer the data.
+          write (d.ob, d.on);
+
+        if (h != 0)
+        {
+          if (eof)
+            throw invalid_argument ("incomplete LZ4 compressed content");
+
+          d.in = read (d.ib, h);
+        }
+      }
+#endif
+
+      return ot;
+    }
+  }
+}
+
+// Include the implementation into our translation unit. Let's keep it last
+// since the implementation defines a bunch of macros.
+//
+#if defined(__clang__) || defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+// This header is only include in the implementation so we can include it
+// here instead of the above.
+//
+#define XXH_PRIVATE_API // Makes API static and includes xxhash.c.
+#include "xxhash.h"
+
+// Clang targeting MSVC prior to version 10 has difficulty with _tzcnt_u64()
+// (see Clang bug 47099 for a potentially related issue). Including relevant
+// headers (<immintrin.h>, <intrin.h>) does not appear to help. So for now we
+// just disable the use of _tzcnt_u64().
+//
+#if defined(_MSC_VER) && defined(__clang__) && __clang_major__ < 10
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+// Note that the order of inclusion is important (see *_SRC_INCLUDED macros).
+//
+extern "C"
+{
+#include "lz4.c"
+#include "lz4hc.c"
+#include "lz4frame.c"
+}
diff --git a/libbutl/lz4.h b/libbutl/lz4.h
new file mode 100644
index 0000000..7ab1e48
--- /dev/null
+++ b/libbutl/lz4.h
@@ -0,0 +1,774 @@
+/*
+ *  LZ4 - Fast LZ compression algorithm
+ *  Header File
+ *  Copyright (C) 2011-present, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
+/* --- Dependency --- */
+#include <stddef.h>   /* size_t */
+
+
+/**
+  Introduction
+
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+  The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
+  Compression can be done in:
+    - a single step (described as Simple Functions)
+    - a single step, reusing a context (described in Advanced Functions)
+    - unbounded multiple steps (described as Streaming compression)
+
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.
+
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  Embedding metadata is required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  The `lz4` CLI can only manage frames.
+*/
+
+/*^***************************************************************
+*  Export parameters
+*****************************************************************/
+/*
+*  LZ4_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*  LZ4LIB_VISIBILITY :
+*  Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4LIB_VISIBILITY
+#  endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*------   Version   ------*/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
+
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version */
+
+
+/*-************************************
+*  Tuning parameter
+**************************************/
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio.
+ * Reduced memory usage may improve speed, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
+
+
+/*-************************************
+*  Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ *  It also runs faster, so it's a recommended setting.
+ *  If the function cannot compress 'src' into a more limited 'dst' budget,
+ *  compression stops *immediately*, and the function result is zero.
+ *  In which case, 'dst' content is undefined (invalid).
+ *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ *                or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+
+/*! LZ4_decompress_safe() :
+ *  compressedSize : is the exact complete size of the compressed block.
+ *  dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ *          even if the compressed block is maliciously modified to order the decoder to do these actions.
+ *          In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+
+
+/*-************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*! LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ *  Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ *           or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+
+
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+
+
+/*-*********************************************
+*  Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
+
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for successful decoding.
+ *  Dictionary are useful for better compression of small data (KB range).
+ *  While LZ4 accept any input as dictionary,
+ *  results are generally better when using Zstandard's Dictionary Builder.
+ *  Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+
+
+/*-**********************************************
+*  Streaming Decompression Functions
+*  Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ *  Use this function to start decompression of a new stream of blocks.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ *  Decompression functions only accepts one block at a time.
+ *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
+ *
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ *  These decoding functions work the same as
+ *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+
+/*^*************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ *  From a high level, the difference is that
+ *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ *  This is an experimental API that allows
+ *  efficient use of a static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDict() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply this limit.
+ *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif   /* LZ4_STATIC_3504398509 */
+#endif   /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ *  Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  int8_t  LZ4_i8;
+  typedef uint8_t  LZ4_byte;
+  typedef uint16_t LZ4_u16;
+  typedef uint32_t LZ4_u32;
+#else
+  typedef   signed char  LZ4_i8;
+  typedef unsigned char  LZ4_byte;
+  typedef unsigned short LZ4_u16;
+  typedef unsigned int   LZ4_u32;
+#endif
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+    LZ4_u32 currentOffset;
+    LZ4_u32 tableType;
+    const LZ4_byte* dictionary;
+    const LZ4_stream_t_internal* dictCtx;
+    LZ4_u32 dictSize;
+};
+
+typedef struct {
+    const LZ4_byte* externalDict;
+    size_t extDictSize;
+    const LZ4_byte* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+
+/*! LZ4_stream_t :
+ *  Do not use below internal definitions directly !
+ *  Declare or allocate an LZ4_stream_t instead.
+ *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ *  The structure definition can be convenient for static allocation
+ *  (on stack, or as part of larger structure).
+ *  Init this structure with LZ4_initStream() before first use.
+ *  note : only use this definition in association with static linking !
+ *  this definition is not API/ABI safe, and may change in future versions.
+ */
+#define LZ4_STREAMSIZE       16416  /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
+union LZ4_stream_u {
+    void* table[LZ4_STREAMSIZE_VOIDP];
+    LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not respected.
+ *         In which case, the function will @return NULL.
+ *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ *  Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
+ *  information structure to track an LZ4 stream during decompression.
+ *  init this structure  using LZ4_setStreamDecode() before first use.
+ *  note : only use in association with static linking !
+ *         this definition is not API/ABI safe,
+ *         and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    LZ4_streamDecode_t_internal internal_donotuse;
+} ;   /* previously typedef'd to LZ4_streamDecode_t */
+
+
+
+/*-************************************
+*  Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to disable them,
+ *  typically with -Wno-deprecated-declarations for gcc
+ *  or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  else
+#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    define LZ4_DEPRECATED(message)   /* disabled */
+#  endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ *  These functions used to be faster than LZ4_decompress_safe(),
+ *  but this is no longer the case. They are now slower.
+ *  This is because LZ4_decompress_fast() doesn't know the input size,
+ *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ *  The last remaining LZ4_decompress_fast() specificity is that
+ *  it can decompress a block without knowing its compressed size.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ *         These issues never happen if input (compressed) data is correct.
+ *         But they may happen if input data is invalid (error or intentional tampering).
+ *         As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+
+#endif /* LZ4_H_98237428734687 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/libbutl/lz4.hxx b/libbutl/lz4.hxx
new file mode 100644
index 0000000..7886788
--- /dev/null
+++ b/libbutl/lz4.hxx
@@ -0,0 +1,205 @@
+// file      : libbutl/lz4.hxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+
+#include <libbutl/export.hxx>
+
+namespace butl
+{
+  namespace lz4
+  {
+    // Read the content from the input stream, compress it using the specified
+    // compression level and block size, and write the compressed content to
+    // the output stream. If content size is specified, then include it into
+    // the compressed content header. Return the compressed content size.
+    //
+    // This function may throw std::bad_alloc as well as exceptions thrown by
+    // fdstream read/write functions. It may also throw std::invalid_argument
+    // in case of argument inconsistencies (e.g., content size does not match
+    // actual) with what() returning the error description. The input stream
+    // is expected to throw on badbit (but not failbit). The output stream is
+    // expected to throw on badbit or failbit.
+    //
+    // The output and most likely the input streams must be in the binary
+    // mode.
+    //
+    // Valid values for the compression level are between 1 (fastest) and 12
+    // (best compression level) though, practically, after 9 returns are
+    // diminished.
+    //
+    // Valid block sizes and their IDs:
+    //
+    // 4:   64KB
+    // 5:  256KB
+    // 6:    1MB
+    // 7:    4MB
+    //
+    // Note that due to the underlying API limitations, 0 content size is
+    // treated as absent and it's therefore impossible to compress 0-byte
+    // content with content size.
+    //
+    // This function produces compressed content identical to:
+    //
+    // lz4 -z -<compression_level> -B<block_size_id> -BD [--content-size]
+    //
+    LIBBUTL_SYMEXPORT std::uint64_t
+    compress (ofdstream&,
+              ifdstream&,
+              int compression_level,
+              int block_size_id,
+              optional<std::uint64_t> content_size);
+
+    // Low-level iterative compression API.
+    //
+    // This API may throw std::bad_alloc in case of memory allocation errors
+    // and std::invalid_argument in case of argument inconsistencies (e.g.,
+    // content size does not match actual) with what() returning the error
+    // description.
+    //
+    // See the implementation of the compress() function above for usage
+    // example.
+    //
+    // @@ TODO: reset support.
+    //
+    struct LIBBUTL_SYMEXPORT compressor
+    {
+      // Buffer, current size (part filled with data), and capacity.
+      //
+      char* ib; std::size_t in, ic; // Input.
+      char* ob; std::size_t on, oc; // Output.
+
+      // As a first step call begin(). This function sets the required input
+      // and output buffer capacities (ic, oc).
+      //
+      // The caller normally allocates the input and output buffers and fills
+      // the input buffer.
+      //
+      void
+      begin (int compression_level,
+             int block_size_id,
+             optional<std::uint64_t> content_size);
+
+      // Then call next() to compress the next chunk of input passing true on
+      // reaching EOF. Note that the input buffer should be filled to capacity
+      // unless end is true and the output buffer must be flushed before each
+      // subsequent call to next().
+      //
+      void
+      next (bool end);
+
+      // Not copyable or movable.
+      //
+      compressor (const compressor&) = delete;
+      compressor (compressor&&) = delete;
+      compressor& operator= (const compressor&) = delete;
+      compressor& operator= (compressor&&) = delete;
+
+      // Implementation details.
+      //
+      compressor (): ctx_ (nullptr) {}
+      ~compressor ();
+
+    public:
+      void
+      init_preferences (void*) const;
+
+      void* ctx_;
+      int level_;
+      int block_id_;
+      optional<std::uint64_t> content_size_;
+      bool begin_;
+    };
+
+
+    // Read the compressed content from the input stream, decompress it, and
+    // write the decompressed content to the output stream. Return the
+    // decompressed content size.
+    //
+    // This function may throw std::bad_alloc as well as exceptions thrown by
+    // fdstream read/write functions. It may also throw std::invalid_argument
+    // if the compressed content is invalid with what() returning the error
+    // description. The input stream is expected to throw on badbit but not
+    // failbit. The output stream is expected to throw on badbit or failbit.
+    //
+    // The input and most likely the output streams must be in the binary
+    // mode.
+    //
+    // Note that this function does not require the input stream to reach EOF
+    // at the end of compressed content. So if you have this requirement, you
+    // will need to enforce it yourself.
+    //
+    LIBBUTL_SYMEXPORT std::uint64_t
+    decompress (ofdstream&, ifdstream&);
+
+    // Low-level iterative decompression API.
+    //
+    // This API may throw std::bad_alloc in case of memory allocation errors
+    // and std::invalid_argument if the compressed content is invalid with
+    // what() returning the error description.
+    //
+    // See the implementation of the decompress() function above for usage
+    // example.
+    //
+    // The LZ4F_*() decompression functions return a hint of how much data
+    // they want on the next call. So the plan is to allocate the input
+    // buffer large enough to hold anything that can be asked for and then
+    // fill it in in the asked chunks. This way we avoid having to shift the
+    // unread data around.
+    //
+    // @@ TODO: reset support.
+    //
+    struct LIBBUTL_SYMEXPORT decompressor
+    {
+      // Buffer, current size (part filled with data), and capacity.
+      //
+      char  hb[19]; std::size_t hn    ; // Header.
+      char* ib;     std::size_t in, ic; // Input.
+      char* ob;     std::size_t on, oc; // Output.
+
+      // As a first step, fill in the header buffer and call begin(). This
+      // function sets the required input and output buffer capacities (ic,
+      // oc) and the number of bytes left in the header buffer (hn) and
+      // returns the number of bytes expected by the following call to next().
+      // If content_size is not NULL, then it is set to the decompressed
+      // content size, if available.
+      //
+      // The caller normally allocates the input and output buffers, copies
+      // remaining header buffer data over to the input buffer, and then fills
+      // in the remainder of the input buffer up to what's expected by the
+      // call to next().
+      //
+      std::size_t
+      begin (optional<std::uint64_t>* content_size = nullptr);
+
+      // Then call next() to decompress the next chunk of input. This function
+      // returns the number of bytes expected by the following call to next()
+      // or 0 if no further input is expected. Note that the output buffer
+      // must be flushed before each subsequent call to next().
+      //
+      std::size_t
+      next ();
+
+      // Not copyable or movable.
+      //
+      decompressor (const decompressor&) = delete;
+      decompressor (decompressor&&) = delete;
+      decompressor& operator= (const decompressor&) = delete;
+      decompressor& operator= (decompressor&&) = delete;
+
+      // Implementation details.
+      //
+      decompressor (): hn (0), in (0), on (0), ctx_ (nullptr) {}
+      ~decompressor ();
+
+    public:
+      void* ctx_;
+    };
+  }
+}
diff --git a/libbutl/lz4frame.c b/libbutl/lz4frame.c
new file mode 100644
index 0000000..0db8c1e
--- /dev/null
+++ b/libbutl/lz4frame.c
@@ -0,0 +1,1899 @@
+/*
+ * LZ4 auto-framing library
+ * Copyright (C) 2011-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following disclaimer
+ *   in the documentation and/or other materials provided with the
+ *   distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+/* LZ4F is a stand-alone API to create LZ4-compressed Frames
+ * in full conformance with specification v1.6.1 .
+ * This library rely upon memory management capabilities (malloc, free)
+ * provided either by <stdlib.h>,
+ * or redirected towards another library of user's choice
+ * (see Memory Routines below).
+ */
+
+
+/*-************************************
+*  Compiler Options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/*-************************************
+*  Tuning parameters
+**************************************/
+/*
+ * LZ4F_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4F_HEAPMODE
+#  define LZ4F_HEAPMODE 0
+#endif
+
+
+/*-************************************
+*  Memory routines
+**************************************/
+/*
+ * User may redirect invocations of
+ * malloc(), calloc() and free()
+ * towards another library or solution of their choice
+ * by modifying below section.
+ */
+#ifndef LZ4_SRC_INCLUDED   /* avoid redefinition when sources are coalesced */
+#  include <stdlib.h>   /* malloc, calloc, free */
+#  define ALLOC(s)          malloc(s)
+#  define ALLOC_AND_ZERO(s) calloc(1,(s))
+#  define FREEMEM(p)        free(p)
+#endif
+
+#include <string.h>   /* memset, memcpy, memmove */
+#ifndef LZ4_SRC_INCLUDED  /* avoid redefinition when sources are coalesced */
+#  define MEM_INIT(p,v,s)   memset((p),(v),(s))
+#endif
+
+
+/*-************************************
+*  Library declarations
+**************************************/
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+#define LZ4_STATIC_LINKING_ONLY
+#include "lz4.h"
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "lz4hc.h"
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/*-************************************
+*  Debug
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4F_STATIC_ASSERT(c)    { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+#  include <stdio.h>
+static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                                  \
+                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+                    fprintf(stderr, __FILE__ ": ");           \
+                    fprintf(stderr, __VA_ARGS__);             \
+                    fprintf(stderr, " \n");                   \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
+
+
+/*-************************************
+*  Basic Types
+**************************************/
+#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/* unoptimized version; solves endianess & alignment issues */
+static U32 LZ4F_readLE32 (const void* src)
+{
+    const BYTE* const srcPtr = (const BYTE*)src;
+    U32 value32 = srcPtr[0];
+    value32 += ((U32)srcPtr[1])<< 8;
+    value32 += ((U32)srcPtr[2])<<16;
+    value32 += ((U32)srcPtr[3])<<24;
+    return value32;
+}
+
+static void LZ4F_writeLE32 (void* dst, U32 value32)
+{
+    BYTE* const dstPtr = (BYTE*)dst;
+    dstPtr[0] = (BYTE)value32;
+    dstPtr[1] = (BYTE)(value32 >> 8);
+    dstPtr[2] = (BYTE)(value32 >> 16);
+    dstPtr[3] = (BYTE)(value32 >> 24);
+}
+
+static U64 LZ4F_readLE64 (const void* src)
+{
+    const BYTE* const srcPtr = (const BYTE*)src;
+    U64 value64 = srcPtr[0];
+    value64 += ((U64)srcPtr[1]<<8);
+    value64 += ((U64)srcPtr[2]<<16);
+    value64 += ((U64)srcPtr[3]<<24);
+    value64 += ((U64)srcPtr[4]<<32);
+    value64 += ((U64)srcPtr[5]<<40);
+    value64 += ((U64)srcPtr[6]<<48);
+    value64 += ((U64)srcPtr[7]<<56);
+    return value64;
+}
+
+static void LZ4F_writeLE64 (void* dst, U64 value64)
+{
+    BYTE* const dstPtr = (BYTE*)dst;
+    dstPtr[0] = (BYTE)value64;
+    dstPtr[1] = (BYTE)(value64 >> 8);
+    dstPtr[2] = (BYTE)(value64 >> 16);
+    dstPtr[3] = (BYTE)(value64 >> 24);
+    dstPtr[4] = (BYTE)(value64 >> 32);
+    dstPtr[5] = (BYTE)(value64 >> 40);
+    dstPtr[6] = (BYTE)(value64 >> 48);
+    dstPtr[7] = (BYTE)(value64 >> 56);
+}
+
+
+/*-************************************
+*  Constants
+**************************************/
+#ifndef LZ4_SRC_INCLUDED   /* avoid double definition */
+#  define KB *(1<<10)
+#  define MB *(1<<20)
+#  define GB *(1<<30)
+#endif
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _8BITS 0xFF
+
+#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U
+#define LZ4F_MAGICNUMBER 0x184D2204U
+#define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
+#define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB
+
+static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN;   /*  7 */
+static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX;   /* 19 */
+static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE;  /* block header : size, and compress flag */
+static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE;  /* block footer : checksum (optional) */
+
+
+/*-************************************
+*  Structures and local types
+**************************************/
+typedef struct LZ4F_cctx_s
+{
+    LZ4F_preferences_t prefs;
+    U32    version;
+    U32    cStage;
+    const LZ4F_CDict* cdict;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    BYTE*  tmpBuff;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    U64    totalInSize;
+    XXH32_state_t xxh;
+    void*  lz4CtxPtr;
+    U16    lz4CtxAlloc; /* sized for: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+    U16    lz4CtxState; /* in use as: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+} LZ4F_cctx_t;
+
+
+/*-************************************
+*  Error management
+**************************************/
+#define LZ4F_GENERATE_STRING(STRING) #STRING,
+static const char* LZ4F_errorStrings[] = { LZ4F_LIST_ERRORS(LZ4F_GENERATE_STRING) };
+
+
+unsigned LZ4F_isError(LZ4F_errorCode_t code)
+{
+    return (code > (LZ4F_errorCode_t)(-LZ4F_ERROR_maxCode));
+}
+
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (LZ4F_isError(code)) return LZ4F_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult)
+{
+    if (!LZ4F_isError(functionResult)) return LZ4F_OK_NoError;
+    return (LZ4F_errorCodes)(-(ptrdiff_t)functionResult);
+}
+
+static LZ4F_errorCode_t err0r(LZ4F_errorCodes code)
+{
+    /* A compilation error here means sizeof(ptrdiff_t) is not large enough */
+    LZ4F_STATIC_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));
+    return (LZ4F_errorCode_t)-(ptrdiff_t)code;
+}
+
+unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; }
+
+int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; }
+
+size_t LZ4F_getBlockSize(unsigned blockSizeID)
+{
+    static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB };
+
+    if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    if (blockSizeID < LZ4F_max64KB || blockSizeID > LZ4F_max4MB)
+        return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+    blockSizeID -= LZ4F_max64KB;
+    return blockSizes[blockSizeID];
+}
+
+/*-************************************
+*  Private functions
+**************************************/
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+
+static BYTE LZ4F_headerChecksum (const void* header, size_t length)
+{
+    U32 const xxh = XXH32(header, length, 0);
+    return (BYTE)(xxh >> 8);
+}
+
+
+/*-************************************
+*  Simple-pass compression functions
+**************************************/
+static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID,
+                                           const size_t srcSize)
+{
+    LZ4F_blockSizeID_t proposedBSID = LZ4F_max64KB;
+    size_t maxBlockSize = 64 KB;
+    while (requestedBSID > proposedBSID) {
+        if (srcSize <= maxBlockSize)
+            return proposedBSID;
+        proposedBSID = (LZ4F_blockSizeID_t)((int)proposedBSID + 1);
+        maxBlockSize <<= 2;
+    }
+    return requestedBSID;
+}
+
+/*! LZ4F_compressBound_internal() :
+ *  Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
+ *  prefsPtr is optional : if NULL is provided, preferences will be set to cover worst case scenario.
+ * @return is always the same for a srcSize and prefsPtr, so it can be relied upon to size reusable buffers.
+ *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+ */
+static size_t LZ4F_compressBound_internal(size_t srcSize,
+                                    const LZ4F_preferences_t* preferencesPtr,
+                                          size_t alreadyBuffered)
+{
+    LZ4F_preferences_t prefsNull = LZ4F_INIT_PREFERENCES;
+    prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;   /* worst case */
+    prefsNull.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled;   /* worst case */
+    {   const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
+        U32 const flush = prefsPtr->autoFlush | (srcSize==0);
+        LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID;
+        size_t const blockSize = LZ4F_getBlockSize(blockID);
+        size_t const maxBuffered = blockSize - 1;
+        size_t const bufferedSize = MIN(alreadyBuffered, maxBuffered);
+        size_t const maxSrcSize = srcSize + bufferedSize;
+        unsigned const nbFullBlocks = (unsigned)(maxSrcSize / blockSize);
+        size_t const partialBlockSize = maxSrcSize & (blockSize-1);
+        size_t const lastBlockSize = flush ? partialBlockSize : 0;
+        unsigned const nbBlocks = nbFullBlocks + (lastBlockSize>0);
+
+        size_t const blockCRCSize = BFSize * prefsPtr->frameInfo.blockChecksumFlag;
+        size_t const frameEnd = BHSize + (prefsPtr->frameInfo.contentChecksumFlag*BFSize);
+
+        return ((BHSize + blockCRCSize) * nbBlocks) +
+               (blockSize * nbFullBlocks) + lastBlockSize + frameEnd;
+    }
+}
+
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefs;
+    size_t const headerSize = maxFHSize;      /* max header size, including optional fields */
+
+    if (preferencesPtr!=NULL) prefs = *preferencesPtr;
+    else MEM_INIT(&prefs, 0, sizeof(prefs));
+    prefs.autoFlush = 1;
+
+    return headerSize + LZ4F_compressBound_internal(srcSize, &prefs, 0);;
+}
+
+
+/*! LZ4F_compressFrame_usingCDict() :
+ *  Compress srcBuffer using a dictionary, in a single step.
+ *  cdict can be NULL, in which case, no dictionary is used.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ *  however, it's the only way to provide a dictID, so it's not recommended.
+ * @return : number of bytes written into dstBuffer,
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
+                                     void* dstBuffer, size_t dstCapacity,
+                               const void* srcBuffer, size_t srcSize,
+                               const LZ4F_CDict* cdict,
+                               const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefs;
+    LZ4F_compressOptions_t options;
+    BYTE* const dstStart = (BYTE*) dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* const dstEnd = dstStart + dstCapacity;
+
+    if (preferencesPtr!=NULL)
+        prefs = *preferencesPtr;
+    else
+        MEM_INIT(&prefs, 0, sizeof(prefs));
+    if (prefs.frameInfo.contentSize != 0)
+        prefs.frameInfo.contentSize = (U64)srcSize;   /* auto-correct content size if selected (!=0) */
+
+    prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize);
+    prefs.autoFlush = 1;
+    if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
+        prefs.frameInfo.blockMode = LZ4F_blockIndependent;   /* only one block => no need for inter-block link */
+
+    MEM_INIT(&options, 0, sizeof(options));
+    options.stableSrc = 1;
+
+    if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs))  /* condition to guarantee success */
+        return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+
+    { size_t const headerSize = LZ4F_compressBegin_usingCDict(cctx, dstBuffer, dstCapacity, cdict, &prefs);  /* write header */
+      if (LZ4F_isError(headerSize)) return headerSize;
+      dstPtr += headerSize;   /* header size */ }
+
+    assert(dstEnd >= dstPtr);
+    { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, (size_t)(dstEnd-dstPtr), srcBuffer, srcSize, &options);
+      if (LZ4F_isError(cSize)) return cSize;
+      dstPtr += cSize; }
+
+    assert(dstEnd >= dstPtr);
+    { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, (size_t)(dstEnd-dstPtr), &options);   /* flush last block, and generate suffix */
+      if (LZ4F_isError(tailSize)) return tailSize;
+      dstPtr += tailSize; }
+
+    assert(dstEnd >= dstStart);
+    return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressFrame() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame, in a single step.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                    const void* srcBuffer, size_t srcSize,
+                    const LZ4F_preferences_t* preferencesPtr)
+{
+    size_t result;
+#if (LZ4F_HEAPMODE)
+    LZ4F_cctx_t *cctxPtr;
+    result = LZ4F_createCompressionContext(&cctxPtr, LZ4F_VERSION);
+    if (LZ4F_isError(result)) return result;
+#else
+    LZ4F_cctx_t cctx;
+    LZ4_stream_t lz4ctx;
+    LZ4F_cctx_t *cctxPtr = &cctx;
+
+    DEBUGLOG(4, "LZ4F_compressFrame");
+    MEM_INIT(&cctx, 0, sizeof(cctx));
+    cctx.version = LZ4F_VERSION;
+    cctx.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
+    if (preferencesPtr == NULL ||
+        preferencesPtr->compressionLevel < LZ4HC_CLEVEL_MIN)
+    {
+        LZ4_initStream(&lz4ctx, sizeof(lz4ctx));
+        cctxPtr->lz4CtxPtr = &lz4ctx;
+        cctxPtr->lz4CtxAlloc = 1;
+        cctxPtr->lz4CtxState = 1;
+    }
+#endif
+
+    result = LZ4F_compressFrame_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+                                           srcBuffer, srcSize,
+                                           NULL, preferencesPtr);
+
+#if (LZ4F_HEAPMODE)
+    LZ4F_freeCompressionContext(cctxPtr);
+#else
+    if (preferencesPtr != NULL &&
+        preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN)
+    {
+        FREEMEM(cctxPtr->lz4CtxPtr);
+    }
+#endif
+    return result;
+}
+
+
+/*-***************************************************
+*   Dictionary compression
+*****************************************************/
+
+struct LZ4F_CDict_s {
+    void* dictContent;
+    LZ4_stream_t* fastCtx;
+    LZ4_streamHC_t* HCCtx;
+}; /* typedef'd to LZ4F_CDict within lz4frame_static.h */
+
+/*! LZ4F_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  LZ4F_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4F_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4F_CDict creation, since its content is copied within CDict
+ * @return : digested dictionary for compression, or NULL if failed */
+LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize)
+{
+    const char* dictStart = (const char*)dictBuffer;
+    LZ4F_CDict* cdict = (LZ4F_CDict*) ALLOC(sizeof(*cdict));
+    DEBUGLOG(4, "LZ4F_createCDict");
+    if (!cdict) return NULL;
+    if (dictSize > 64 KB) {
+        dictStart += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    cdict->dictContent = ALLOC(dictSize);
+    cdict->fastCtx = LZ4_createStream();
+    cdict->HCCtx = LZ4_createStreamHC();
+    if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) {
+        LZ4F_freeCDict(cdict);
+        return NULL;
+    }
+    memcpy(cdict->dictContent, dictStart, dictSize);
+    LZ4_loadDict (cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize);
+    LZ4_setCompressionLevel(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
+    LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize);
+    return cdict;
+}
+
+void LZ4F_freeCDict(LZ4F_CDict* cdict)
+{
+    if (cdict==NULL) return;  /* support free on NULL */
+    FREEMEM(cdict->dictContent);
+    LZ4_freeStream(cdict->fastCtx);
+    LZ4_freeStreamHC(cdict->HCCtx);
+    FREEMEM(cdict);
+}
+
+
+/*-*********************************
+*  Advanced compression functions
+***********************************/
+
+/*! LZ4F_createCompressionContext() :
+ *  The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ *  This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ *  The version provided MUST be LZ4F_VERSION. It is intended to track potential incompatible differences between different binaries.
+ *  The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
+ *  If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+ *  Object can release its memory using LZ4F_freeCompressionContext();
+ */
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** LZ4F_compressionContextPtr, unsigned version)
+{
+    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOC_AND_ZERO(sizeof(LZ4F_cctx_t));
+    if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed);
+
+    cctxPtr->version = version;
+    cctxPtr->cStage = 0;   /* Next stage : init stream */
+
+    *LZ4F_compressionContextPtr = cctxPtr;
+
+    return LZ4F_OK_NoError;
+}
+
+
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctxPtr)
+{
+    if (cctxPtr != NULL) {  /* support free on NULL */
+       FREEMEM(cctxPtr->lz4CtxPtr);  /* note: LZ4_streamHC_t and LZ4_stream_t are simple POD types */
+       FREEMEM(cctxPtr->tmpBuff);
+       FREEMEM(cctxPtr);
+    }
+
+    return LZ4F_OK_NoError;
+}
+
+
+/**
+ * This function prepares the internal LZ4(HC) stream for a new compression,
+ * resetting the context and attaching the dictionary, if there is one.
+ *
+ * It needs to be called at the beginning of each independent compression
+ * stream (i.e., at the beginning of a frame in blockLinked mode, or at the
+ * beginning of each block in blockIndependent mode).
+ */
+static void LZ4F_initStream(void* ctx,
+                            const LZ4F_CDict* cdict,
+                            int level,
+                            LZ4F_blockMode_t blockMode) {
+    if (level < LZ4HC_CLEVEL_MIN) {
+        if (cdict != NULL || blockMode == LZ4F_blockLinked) {
+            /* In these cases, we will call LZ4_compress_fast_continue(),
+             * which needs an already reset context. Otherwise, we'll call a
+             * one-shot API. The non-continued APIs internally perform their own
+             * resets at the beginning of their calls, where they know what
+             * tableType they need the context to be in. So in that case this
+             * would be misguided / wasted work. */
+            LZ4_resetStream_fast((LZ4_stream_t*)ctx);
+        }
+        LZ4_attach_dictionary((LZ4_stream_t *)ctx, cdict ? cdict->fastCtx : NULL);
+    } else {
+        LZ4_resetStreamHC_fast((LZ4_streamHC_t*)ctx, level);
+        LZ4_attach_HC_dictionary((LZ4_streamHC_t *)ctx, cdict ? cdict->HCCtx : NULL);
+    }
+}
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ *  init streaming compression and writes frame header into dstBuffer.
+ *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * @return : number of bytes written into dstBuffer for the header
+ *           or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
+                          void* dstBuffer, size_t dstCapacity,
+                          const LZ4F_CDict* cdict,
+                          const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefNull;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* headerStart;
+
+    if (dstCapacity < maxFHSize) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    MEM_INIT(&prefNull, 0, sizeof(prefNull));
+    if (preferencesPtr == NULL) preferencesPtr = &prefNull;
+    cctxPtr->prefs = *preferencesPtr;
+
+    /* Ctx Management */
+    {   U16 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;
+        if (cctxPtr->lz4CtxAlloc < ctxTypeID) {
+            FREEMEM(cctxPtr->lz4CtxPtr);
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+                cctxPtr->lz4CtxPtr = LZ4_createStream();
+            } else {
+                cctxPtr->lz4CtxPtr = LZ4_createStreamHC();
+            }
+            if (cctxPtr->lz4CtxPtr == NULL)
+                return err0r(LZ4F_ERROR_allocation_failed);
+            cctxPtr->lz4CtxAlloc = ctxTypeID;
+            cctxPtr->lz4CtxState = ctxTypeID;
+        } else if (cctxPtr->lz4CtxState != ctxTypeID) {
+            /* otherwise, a sufficient buffer is allocated, but we need to
+             * reset it to the correct context type */
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+                LZ4_initStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr, sizeof (LZ4_stream_t));
+            } else {
+                LZ4_initStreamHC((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, sizeof(LZ4_streamHC_t));
+                LZ4_setCompressionLevel((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+            }
+            cctxPtr->lz4CtxState = ctxTypeID;
+        }
+    }
+
+    /* Buffer Management */
+    if (cctxPtr->prefs.frameInfo.blockSizeID == 0)
+        cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
+
+    {   size_t const requiredBuffSize = preferencesPtr->autoFlush ?
+                ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 64 KB : 0) :  /* only needs past data up to window size */
+                cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 128 KB : 0);
+
+        if (cctxPtr->maxBufferSize < requiredBuffSize) {
+            cctxPtr->maxBufferSize = 0;
+            FREEMEM(cctxPtr->tmpBuff);
+            cctxPtr->tmpBuff = (BYTE*)ALLOC_AND_ZERO(requiredBuffSize);
+            if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+            cctxPtr->maxBufferSize = requiredBuffSize;
+    }   }
+    cctxPtr->tmpIn = cctxPtr->tmpBuff;
+    cctxPtr->tmpInSize = 0;
+    (void)XXH32_reset(&(cctxPtr->xxh), 0);
+
+    /* context init */
+    cctxPtr->cdict = cdict;
+    if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) {
+        /* frame init only for blockLinked : blockIndependent will be init at each block */
+        LZ4F_initStream(cctxPtr->lz4CtxPtr, cdict, cctxPtr->prefs.compressionLevel, LZ4F_blockLinked);
+    }
+    if (preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) {
+        LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed);
+    }
+
+    /* Magic Number */
+    LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER);
+    dstPtr += 4;
+    headerStart = dstPtr;
+
+    /* FLG Byte */
+    *dstPtr++ = (BYTE)(((1 & _2BITS) << 6)    /* Version('01') */
+        + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)
+        + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4)
+        + ((unsigned)(cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
+        + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)
+        +  (cctxPtr->prefs.frameInfo.dictID > 0) );
+    /* BD Byte */
+    *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
+    /* Optional Frame content size field */
+    if (cctxPtr->prefs.frameInfo.contentSize) {
+        LZ4F_writeLE64(dstPtr, cctxPtr->prefs.frameInfo.contentSize);
+        dstPtr += 8;
+        cctxPtr->totalInSize = 0;
+    }
+    /* Optional dictionary ID field */
+    if (cctxPtr->prefs.frameInfo.dictID) {
+        LZ4F_writeLE32(dstPtr, cctxPtr->prefs.frameInfo.dictID);
+        dstPtr += 4;
+    }
+    /* Header CRC Byte */
+    *dstPtr = LZ4F_headerChecksum(headerStart, (size_t)(dstPtr - headerStart));
+    dstPtr++;
+
+    cctxPtr->cStage = 1;   /* header written, now request input data block */
+    return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressBegin() :
+ *  init streaming compression and writes frame header into dstBuffer.
+ *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ *  preferencesPtr can be NULL, in which case default parameters are selected.
+ * @return : number of bytes written into dstBuffer for the header
+ *        or an error code (can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr,
+                          void* dstBuffer, size_t dstCapacity,
+                          const LZ4F_preferences_t* preferencesPtr)
+{
+    return LZ4F_compressBegin_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+                                         NULL, preferencesPtr);
+}
+
+
+/*  LZ4F_compressBound() :
+ * @return minimum capacity of dstBuffer for a given srcSize to handle worst case scenario.
+ *  LZ4F_preferences_t structure is optional : if NULL, preferences will be set to cover worst case scenario.
+ *  This function cannot fail.
+ */
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    if (preferencesPtr && preferencesPtr->autoFlush) {
+        return LZ4F_compressBound_internal(srcSize, preferencesPtr, 0);
+    }
+    return LZ4F_compressBound_internal(srcSize, preferencesPtr, (size_t)-1);
+}
+
+
+typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level, const LZ4F_CDict* cdict);
+
+
+/*! LZ4F_makeBlock():
+ *  compress a single block, add header and optional checksum.
+ *  assumption : dst buffer capacity is >= BHSize + srcSize + crcSize
+ */
+static size_t LZ4F_makeBlock(void* dst,
+                       const void* src, size_t srcSize,
+                             compressFunc_t compress, void* lz4ctx, int level,
+                       const LZ4F_CDict* cdict,
+                             LZ4F_blockChecksum_t crcFlag)
+{
+    BYTE* const cSizePtr = (BYTE*)dst;
+    U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+BHSize),
+                                      (int)(srcSize), (int)(srcSize-1),
+                                      level, cdict);
+    if (cSize == 0) {  /* compression failed */
+        DEBUGLOG(5, "LZ4F_makeBlock: compression failed, creating a raw block (size %u)", (U32)srcSize);
+        cSize = (U32)srcSize;
+        LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG);
+        memcpy(cSizePtr+BHSize, src, srcSize);
+    } else {
+        LZ4F_writeLE32(cSizePtr, cSize);
+    }
+    if (crcFlag) {
+        U32 const crc32 = XXH32(cSizePtr+BHSize, cSize, 0);  /* checksum of compressed data */
+        LZ4F_writeLE32(cSizePtr+BHSize+cSize, crc32);
+    }
+    return BHSize + cSize + ((U32)crcFlag)*BFSize;
+}
+
+
+static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    int const acceleration = (level < 0) ? -level + 1 : 1;
+    LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+    if (cdict) {
+        return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+    } else {
+        return LZ4_compress_fast_extState_fastReset(ctx, src, dst, srcSize, dstCapacity, acceleration);
+    }
+}
+
+static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    int const acceleration = (level < 0) ? -level + 1 : 1;
+    (void)cdict; /* init once at beginning of frame */
+    return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+}
+
+static int LZ4F_compressBlockHC(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
+    if (cdict) {
+        return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+    }
+    return LZ4_compress_HC_extStateHC_fastReset(ctx, src, dst, srcSize, dstCapacity, level);
+}
+
+static int LZ4F_compressBlockHC_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
+{
+    (void)level; (void)cdict; /* init once at beginning of frame */
+    return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
+}
+
+static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level)
+{
+    if (level < LZ4HC_CLEVEL_MIN) {
+        if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlock;
+        return LZ4F_compressBlock_continue;
+    }
+    if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlockHC;
+    return LZ4F_compressBlockHC_continue;
+}
+
+static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr)
+{
+    if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+        return LZ4_saveDict ((LZ4_stream_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+    return LZ4_saveDictHC ((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+}
+
+typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
+
+/*! LZ4F_compressUpdate() :
+ *  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ *  dstBuffer MUST be >= LZ4F_compressBound(srcSize, preferencesPtr).
+ *  LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered.
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
+                           void* dstBuffer, size_t dstCapacity,
+                     const void* srcBuffer, size_t srcSize,
+                     const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_compressOptions_t cOptionsNull;
+    size_t const blockSize = cctxPtr->maxBlockSize;
+    const BYTE* srcPtr = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcPtr + srcSize;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    LZ4F_lastBlockStatus lastBlockCompressed = notDone;
+    compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+    DEBUGLOG(4, "LZ4F_compressUpdate (srcSize=%zu)", srcSize);
+
+    if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
+    if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize))
+        return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    MEM_INIT(&cOptionsNull, 0, sizeof(cOptionsNull));
+    if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
+
+    /* complete tmp buffer */
+    if (cctxPtr->tmpInSize > 0) {   /* some data already within tmp buffer */
+        size_t const sizeToCopy = blockSize - cctxPtr->tmpInSize;
+        if (sizeToCopy > srcSize) {
+            /* add src to tmpIn buffer */
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize);
+            srcPtr = srcEnd;
+            cctxPtr->tmpInSize += srcSize;
+            /* still needs some CRC */
+        } else {
+            /* complete tmpIn block and then compress it */
+            lastBlockCompressed = fromTmpBuffer;
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
+            srcPtr += sizeToCopy;
+
+            dstPtr += LZ4F_makeBlock(dstPtr,
+                                     cctxPtr->tmpIn, blockSize,
+                                     compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                     cctxPtr->cdict,
+                                     cctxPtr->prefs.frameInfo.blockChecksumFlag);
+
+            if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize;
+            cctxPtr->tmpInSize = 0;
+        }
+    }
+
+    while ((size_t)(srcEnd - srcPtr) >= blockSize) {
+        /* compress full blocks */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_makeBlock(dstPtr,
+                                 srcPtr, blockSize,
+                                 compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                 cctxPtr->cdict,
+                                 cctxPtr->prefs.frameInfo.blockChecksumFlag);
+        srcPtr += blockSize;
+    }
+
+    if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) {
+        /* compress remaining input < blockSize */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_makeBlock(dstPtr,
+                                 srcPtr, (size_t)(srcEnd - srcPtr),
+                                 compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                                 cctxPtr->cdict,
+                                 cctxPtr->prefs.frameInfo.blockChecksumFlag);
+        srcPtr  = srcEnd;
+    }
+
+    /* preserve dictionary if necessary */
+    if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer)) {
+        if (compressOptionsPtr->stableSrc) {
+            cctxPtr->tmpIn = cctxPtr->tmpBuff;
+        } else {
+            int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+            if (realDictSize==0) return err0r(LZ4F_ERROR_GENERIC);
+            cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+        }
+    }
+
+    /* keep tmpIn within limits */
+    if (!(cctxPtr->prefs.autoFlush) &&
+        (cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize))   /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */
+    {
+        int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+    }
+
+    /* some input data left, necessarily < blockSize */
+    if (srcPtr < srcEnd) {
+        /* fill tmp buffer */
+        size_t const sizeToCopy = (size_t)(srcEnd - srcPtr);
+        memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
+        cctxPtr->tmpInSize = sizeToCopy;
+    }
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
+        (void)XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize);
+
+    cctxPtr->totalInSize += srcSize;
+    return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_flush() :
+ *  When compressed data must be sent immediately, without waiting for a block to be filled,
+ *  invoke LZ4_flush(), which will immediately compress any remaining data stored within LZ4F_cctx.
+ *  The result of the function is the number of bytes written into dstBuffer.
+ *  It can be zero, this means there was no data left within LZ4F_cctx.
+ *  The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ *  LZ4F_compressOptions_t* is optional. NULL is a valid argument.
+ */
+size_t LZ4F_flush(LZ4F_cctx* cctxPtr,
+                  void* dstBuffer, size_t dstCapacity,
+            const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    compressFunc_t compress;
+
+    if (cctxPtr->tmpInSize == 0) return 0;   /* nothing to flush */
+    if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
+    if (dstCapacity < (cctxPtr->tmpInSize + BHSize + BFSize))
+        return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    (void)compressOptionsPtr;   /* not yet useful */
+
+    /* select compression function */
+    compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+    /* compress tmp buffer */
+    dstPtr += LZ4F_makeBlock(dstPtr,
+                             cctxPtr->tmpIn, cctxPtr->tmpInSize,
+                             compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
+                             cctxPtr->cdict,
+                             cctxPtr->prefs.frameInfo.blockChecksumFlag);
+    assert(((void)"flush overflows dstBuffer!", (size_t)(dstPtr - dstStart) <= dstCapacity));
+
+    if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked)
+        cctxPtr->tmpIn += cctxPtr->tmpInSize;
+    cctxPtr->tmpInSize = 0;
+
+    /* keep tmpIn within limits */
+    if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) {  /* necessarily LZ4F_blockLinked */
+        int const realDictSize = LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+    }
+
+    return (size_t)(dstPtr - dstStart);
+}
+
+
+/*! LZ4F_compressEnd() :
+ *  When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ *  It will flush whatever data remained within compressionContext (like LZ4_flush())
+ *  but also properly finalize the frame, with an endMark and an (optional) checksum.
+ *  LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return: the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+ *       or an error code if it fails (can be tested using LZ4F_isError())
+ *  The context can then be used again to compress a new frame, starting with LZ4F_compressBegin().
+ */
+size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr,
+                        void* dstBuffer, size_t dstCapacity,
+                  const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+
+    size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr);
+    DEBUGLOG(5,"LZ4F_compressEnd: dstCapacity=%u", (unsigned)dstCapacity);
+    if (LZ4F_isError(flushSize)) return flushSize;
+    dstPtr += flushSize;
+
+    assert(flushSize <= dstCapacity);
+    dstCapacity -= flushSize;
+
+    if (dstCapacity < 4) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    LZ4F_writeLE32(dstPtr, 0);
+    dstPtr += 4;   /* endMark */
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) {
+        U32 const xxh = XXH32_digest(&(cctxPtr->xxh));
+        if (dstCapacity < 8) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+        DEBUGLOG(5,"Writing 32-bit content checksum");
+        LZ4F_writeLE32(dstPtr, xxh);
+        dstPtr+=4;   /* content Checksum */
+    }
+
+    cctxPtr->cStage = 0;   /* state is now re-usable (with identical preferences) */
+    cctxPtr->maxBufferSize = 0;  /* reuse HC context */
+
+    if (cctxPtr->prefs.frameInfo.contentSize) {
+        if (cctxPtr->prefs.frameInfo.contentSize != cctxPtr->totalInSize)
+            return err0r(LZ4F_ERROR_frameSize_wrong);
+    }
+
+    return (size_t)(dstPtr - dstStart);
+}
+
+
+/*-***************************************************
+*   Frame Decompression
+*****************************************************/
+
+typedef enum {
+    dstage_getFrameHeader=0, dstage_storeFrameHeader,
+    dstage_init,
+    dstage_getBlockHeader, dstage_storeBlockHeader,
+    dstage_copyDirect, dstage_getBlockChecksum,
+    dstage_getCBlock, dstage_storeCBlock,
+    dstage_flushOut,
+    dstage_getSuffix, dstage_storeSuffix,
+    dstage_getSFrameSize, dstage_storeSFrameSize,
+    dstage_skipSkippable
+} dStage_t;
+
+struct LZ4F_dctx_s {
+    LZ4F_frameInfo_t frameInfo;
+    U32    version;
+    dStage_t dStage;
+    U64    frameRemainingSize;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    size_t tmpInTarget;
+    BYTE*  tmpOutBuffer;
+    const BYTE* dict;
+    size_t dictSize;
+    BYTE*  tmpOut;
+    size_t tmpOutSize;
+    size_t tmpOutStart;
+    XXH32_state_t xxh;
+    XXH32_state_t blockChecksum;
+    BYTE   header[LZ4F_HEADER_SIZE_MAX];
+};  /* typedef'd to LZ4F_dctx in lz4frame.h */
+
+
+/*! LZ4F_createDecompressionContext() :
+ *  Create a decompressionContext object, which will track all decompression operations.
+ *  Provides a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
+ *  Object can later be released using LZ4F_freeDecompressionContext().
+ * @return : if != 0, there was an error during context creation.
+ */
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber)
+{
+    LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOC_AND_ZERO(sizeof(LZ4F_dctx));
+    if (dctx == NULL) {  /* failed allocation */
+        *LZ4F_decompressionContextPtr = NULL;
+        return err0r(LZ4F_ERROR_allocation_failed);
+    }
+
+    dctx->version = versionNumber;
+    *LZ4F_decompressionContextPtr = dctx;
+    return LZ4F_OK_NoError;
+}
+
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx)
+{
+    LZ4F_errorCode_t result = LZ4F_OK_NoError;
+    if (dctx != NULL) {   /* can accept NULL input, like free() */
+      result = (LZ4F_errorCode_t)dctx->dStage;
+      FREEMEM(dctx->tmpIn);
+      FREEMEM(dctx->tmpOutBuffer);
+      FREEMEM(dctx);
+    }
+    return result;
+}
+
+
+/*==---   Streaming Decompression operations   ---==*/
+
+void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx)
+{
+    dctx->dStage = dstage_getFrameHeader;
+    dctx->dict = NULL;
+    dctx->dictSize = 0;
+}
+
+
+/*! LZ4F_decodeHeader() :
+ *  input   : `src` points at the **beginning of the frame**
+ *  output  : set internal values of dctx, such as
+ *            dctx->frameInfo and dctx->dStage.
+ *            Also allocates internal buffers.
+ *  @return : nb Bytes read from src (necessarily <= srcSize)
+ *            or an error code (testable with LZ4F_isError())
+ */
+static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize)
+{
+    unsigned blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictIDFlag, blockSizeID;
+    size_t frameHeaderSize;
+    const BYTE* srcPtr = (const BYTE*)src;
+
+    DEBUGLOG(5, "LZ4F_decodeHeader");
+    /* need to decode header to get frameInfo */
+    if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete);   /* minimal frame header size */
+    MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
+
+    /* special case : skippable frames */
+    if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) {
+        dctx->frameInfo.frameType = LZ4F_skippableFrame;
+        if (src == (void*)(dctx->header)) {
+            dctx->tmpInSize = srcSize;
+            dctx->tmpInTarget = 8;
+            dctx->dStage = dstage_storeSFrameSize;
+            return srcSize;
+        } else {
+            dctx->dStage = dstage_getSFrameSize;
+            return 4;
+        }
+    }
+
+    /* control magic number */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) {
+        DEBUGLOG(4, "frame header error : unknown magic number");
+        return err0r(LZ4F_ERROR_frameType_unknown);
+    }
+#endif
+    dctx->frameInfo.frameType = LZ4F_frame;
+
+    /* Flags */
+    {   U32 const FLG = srcPtr[4];
+        U32 const version = (FLG>>6) & _2BITS;
+        blockChecksumFlag = (FLG>>4) & _1BIT;
+        blockMode = (FLG>>5) & _1BIT;
+        contentSizeFlag = (FLG>>3) & _1BIT;
+        contentChecksumFlag = (FLG>>2) & _1BIT;
+        dictIDFlag = FLG & _1BIT;
+        /* validate */
+        if (((FLG>>1)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */
+        if (version != 1) return err0r(LZ4F_ERROR_headerVersion_wrong);        /* Version Number, only supported value */
+    }
+
+    /* Frame Header Size */
+    frameHeaderSize = minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
+
+    if (srcSize < frameHeaderSize) {
+        /* not enough input to fully decode frame header */
+        if (srcPtr != dctx->header)
+            memcpy(dctx->header, srcPtr, srcSize);
+        dctx->tmpInSize = srcSize;
+        dctx->tmpInTarget = frameHeaderSize;
+        dctx->dStage = dstage_storeFrameHeader;
+        return srcSize;
+    }
+
+    {   U32 const BD = srcPtr[5];
+        blockSizeID = (BD>>4) & _3BITS;
+        /* validate */
+        if (((BD>>7)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);   /* Reserved bit */
+        if (blockSizeID < 4) return err0r(LZ4F_ERROR_maxBlockSize_invalid);    /* 4-7 only supported values for the time being */
+        if (((BD>>0)&_4BITS) != 0) return err0r(LZ4F_ERROR_reservedFlag_set);  /* Reserved bits */
+    }
+
+    /* check header */
+    assert(frameHeaderSize > 5);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    {   BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
+        if (HC != srcPtr[frameHeaderSize-1])
+            return err0r(LZ4F_ERROR_headerChecksum_invalid);
+    }
+#endif
+
+    /* save */
+    dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
+    dctx->frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)blockChecksumFlag;
+    dctx->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag;
+    dctx->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID;
+    dctx->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
+    if (contentSizeFlag)
+        dctx->frameRemainingSize =
+            dctx->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
+    if (dictIDFlag)
+        dctx->frameInfo.dictID = LZ4F_readLE32(srcPtr + frameHeaderSize - 5);
+
+    dctx->dStage = dstage_init;
+
+    return frameHeaderSize;
+}
+
+
+/*! LZ4F_headerSize() :
+ * @return : size of frame header
+ *           or an error code, which can be tested using LZ4F_isError()
+ */
+size_t LZ4F_headerSize(const void* src, size_t srcSize)
+{
+    if (src == NULL) return err0r(LZ4F_ERROR_srcPtr_wrong);
+
+    /* minimal srcSize to determine header size */
+    if (srcSize < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH)
+        return err0r(LZ4F_ERROR_frameHeader_incomplete);
+
+    /* special case : skippable frames */
+    if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START)
+        return 8;
+
+    /* control magic number */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
+        return err0r(LZ4F_ERROR_frameType_unknown);
+#endif
+
+    /* Frame Header Size */
+    {   BYTE const FLG = ((const BYTE*)src)[4];
+        U32 const contentSizeFlag = (FLG>>3) & _1BIT;
+        U32 const dictIDFlag = FLG & _1BIT;
+        return minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
+    }
+}
+
+/*! LZ4F_getFrameInfo() :
+ *  This function extracts frame parameters (max blockSize, frame checksum, etc.).
+ *  Usage is optional. Objective is to provide relevant information for allocation purposes.
+ *  This function works in 2 situations :
+ *   - At the beginning of a new frame, in which case it will decode this information from `srcBuffer`, and start the decoding process.
+ *     Amount of input data provided must be large enough to successfully decode the frame header.
+ *     A header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes. It's possible to provide more input data than this minimum.
+ *   - After decoding has been started. In which case, no input is read, frame parameters are extracted from dctx.
+ *  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ *  Decompression must resume from (srcBuffer + *srcSizePtr).
+ * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *           or an error code which can be tested using LZ4F_isError()
+ *  note 1 : in case of error, dctx is not modified. Decoding operations can resume from where they stopped.
+ *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+                                   LZ4F_frameInfo_t* frameInfoPtr,
+                             const void* srcBuffer, size_t* srcSizePtr)
+{
+    LZ4F_STATIC_ASSERT(dstage_getFrameHeader < dstage_storeFrameHeader);
+    if (dctx->dStage > dstage_storeFrameHeader) {
+        /* frameInfo already decoded */
+        size_t o=0, i=0;
+        *srcSizePtr = 0;
+        *frameInfoPtr = dctx->frameInfo;
+        /* returns : recommended nb of bytes for LZ4F_decompress() */
+        return LZ4F_decompress(dctx, NULL, &o, NULL, &i, NULL);
+    } else {
+        if (dctx->dStage == dstage_storeFrameHeader) {
+            /* frame decoding already started, in the middle of header => automatic fail */
+            *srcSizePtr = 0;
+            return err0r(LZ4F_ERROR_frameDecoding_alreadyStarted);
+        } else {
+            size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr);
+            if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; }
+            if (*srcSizePtr < hSize) {
+                *srcSizePtr=0;
+                return err0r(LZ4F_ERROR_frameHeader_incomplete);
+            }
+
+            {   size_t decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize);
+                if (LZ4F_isError(decodeResult)) {
+                    *srcSizePtr = 0;
+                } else {
+                    *srcSizePtr = decodeResult;
+                    decodeResult = BHSize;   /* block header size */
+                }
+                *frameInfoPtr = dctx->frameInfo;
+                return decodeResult;
+    }   }   }
+}
+
+
+/* LZ4F_updateDict() :
+ * only used for LZ4F_blockLinked mode
+ * Condition : dstPtr != NULL
+ */
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+                      const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+                      unsigned withinTmp)
+{
+    assert(dstPtr != NULL);
+    if (dctx->dictSize==0) {
+        dctx->dict = (const BYTE*)dstPtr;   /* priority to prefix mode */
+    }
+    assert(dctx->dict != NULL);
+
+    if (dctx->dict + dctx->dictSize == dstPtr) {  /* prefix mode, everything within dstBuffer */
+        dctx->dictSize += dstSize;
+        return;
+    }
+
+    assert(dstPtr >= dstBufferStart);
+    if ((size_t)(dstPtr - dstBufferStart) + dstSize >= 64 KB) {  /* history in dstBuffer becomes large enough to become dictionary */
+        dctx->dict = (const BYTE*)dstBufferStart;
+        dctx->dictSize = (size_t)(dstPtr - dstBufferStart) + dstSize;
+        return;
+    }
+
+    assert(dstSize < 64 KB);   /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+    /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOutBuffer */
+    assert(dctx->tmpOutBuffer != NULL);
+
+    if (withinTmp && (dctx->dict == dctx->tmpOutBuffer)) {   /* continue history within tmpOutBuffer */
+        /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+        assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
+        dctx->dictSize += dstSize;
+        return;
+    }
+
+    if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
+        size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
+        size_t copySize = 64 KB - dctx->tmpOutSize;
+        const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+        if (dctx->tmpOutSize > 64 KB) copySize = 0;
+        if (copySize > preserveSize) copySize = preserveSize;
+
+        memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+        dctx->dict = dctx->tmpOutBuffer;
+        dctx->dictSize = preserveSize + dctx->tmpOutStart + dstSize;
+        return;
+    }
+
+    if (dctx->dict == dctx->tmpOutBuffer) {    /* copy dst into tmp to complete dict */
+        if (dctx->dictSize + dstSize > dctx->maxBufferSize) {  /* tmp buffer not large enough */
+            size_t const preserveSize = 64 KB - dstSize;
+            memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+            dctx->dictSize = preserveSize;
+        }
+        memcpy(dctx->tmpOutBuffer + dctx->dictSize, dstPtr, dstSize);
+        dctx->dictSize += dstSize;
+        return;
+    }
+
+    /* join dict & dest into tmp */
+    {   size_t preserveSize = 64 KB - dstSize;
+        if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
+        memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
+        memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
+        dctx->dict = dctx->tmpOutBuffer;
+        dctx->dictSize = preserveSize + dstSize;
+    }
+}
+
+
+
+/*! LZ4F_decompress() :
+ *  Call this function repetitively to regenerate compressed data in srcBuffer.
+ *  The function will attempt to decode up to *srcSizePtr bytes from srcBuffer
+ *  into dstBuffer of capacity *dstSizePtr.
+ *
+ *  The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ *  The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ *  If number of bytes read is < number of bytes provided, then decompression operation is not complete.
+ *  Remaining data will have to be presented again in a subsequent invocation.
+ *
+ *  The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+ *  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ *  Respecting the hint provides a small boost to performance, since it allows less buffer shuffling.
+ *  Note that this is just a hint, and it's always possible to any srcSize value.
+ *  When a frame is fully decoded, @return will be 0.
+ *  If decompression failed, @return is an error code which can be tested using LZ4F_isError().
+ */
+size_t LZ4F_decompress(LZ4F_dctx* dctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    LZ4F_decompressOptions_t optionsNull;
+    const BYTE* const srcStart = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcStart + *srcSizePtr;
+    const BYTE* srcPtr = srcStart;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* const dstEnd = dstStart ? dstStart + *dstSizePtr : NULL;
+    BYTE* dstPtr = dstStart;
+    const BYTE* selectedIn = NULL;
+    unsigned doAnotherStage = 1;
+    size_t nextSrcSizeHint = 1;
+
+
+    DEBUGLOG(5, "LZ4F_decompress : %p,%u => %p,%u",
+            srcBuffer, (unsigned)*srcSizePtr, dstBuffer, (unsigned)*dstSizePtr);
+    if (dstBuffer == NULL) assert(*dstSizePtr == 0);
+    MEM_INIT(&optionsNull, 0, sizeof(optionsNull));
+    if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
+    *srcSizePtr = 0;
+    *dstSizePtr = 0;
+    assert(dctx != NULL);
+
+    /* behaves as a state machine */
+
+    while (doAnotherStage) {
+
+        switch(dctx->dStage)
+        {
+
+        case dstage_getFrameHeader:
+            DEBUGLOG(6, "dstage_getFrameHeader");
+            if ((size_t)(srcEnd-srcPtr) >= maxFHSize) {  /* enough to decode - shortcut */
+                size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, (size_t)(srcEnd-srcPtr));  /* will update dStage appropriately */
+                if (LZ4F_isError(hSize)) return hSize;
+                srcPtr += hSize;
+                break;
+            }
+            dctx->tmpInSize = 0;
+            if (srcEnd-srcPtr == 0) return minFHSize;   /* 0-size input */
+            dctx->tmpInTarget = minFHSize;   /* minimum size to decode header */
+            dctx->dStage = dstage_storeFrameHeader;
+            /* fall-through */
+
+        case dstage_storeFrameHeader:
+            DEBUGLOG(6, "dstage_storeFrameHeader");
+            {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr));
+                memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                dctx->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+            }
+            if (dctx->tmpInSize < dctx->tmpInTarget) {
+                nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize;   /* rest of header + nextBlockHeader */
+                doAnotherStage = 0;   /* not enough src data, ask for some more */
+                break;
+            }
+            {   size_t const hSize = LZ4F_decodeHeader(dctx, dctx->header, dctx->tmpInTarget);  /* will update dStage appropriately */
+                if (LZ4F_isError(hSize)) return hSize;
+            }
+            break;
+
+        case dstage_init:
+            DEBUGLOG(6, "dstage_init");
+            if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_reset(&(dctx->xxh), 0);
+            /* internal buffers allocation */
+            {   size_t const bufferNeeded = dctx->maxBlockSize
+                    + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) ? 128 KB : 0);
+                if (bufferNeeded > dctx->maxBufferSize) {   /* tmp buffers too small */
+                    dctx->maxBufferSize = 0;   /* ensure allocation will be re-attempted on next entry*/
+                    FREEMEM(dctx->tmpIn);
+                    dctx->tmpIn = (BYTE*)ALLOC(dctx->maxBlockSize + BFSize /* block checksum */);
+                    if (dctx->tmpIn == NULL)
+                        return err0r(LZ4F_ERROR_allocation_failed);
+                    FREEMEM(dctx->tmpOutBuffer);
+                    dctx->tmpOutBuffer= (BYTE*)ALLOC(bufferNeeded);
+                    if (dctx->tmpOutBuffer== NULL)
+                        return err0r(LZ4F_ERROR_allocation_failed);
+                    dctx->maxBufferSize = bufferNeeded;
+            }   }
+            dctx->tmpInSize = 0;
+            dctx->tmpInTarget = 0;
+            dctx->tmpOut = dctx->tmpOutBuffer;
+            dctx->tmpOutStart = 0;
+            dctx->tmpOutSize = 0;
+
+            dctx->dStage = dstage_getBlockHeader;
+            /* fall-through */
+
+        case dstage_getBlockHeader:
+            if ((size_t)(srcEnd - srcPtr) >= BHSize) {
+                selectedIn = srcPtr;
+                srcPtr += BHSize;
+            } else {
+                /* not enough input to read cBlockSize field */
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeBlockHeader;
+            }
+
+            if (dctx->dStage == dstage_storeBlockHeader)   /* can be skipped */
+        case dstage_storeBlockHeader:
+            {   size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+                size_t const wantedData = BHSize - dctx->tmpInSize;
+                size_t const sizeToCopy = MIN(wantedData, remainingInput);
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctx->tmpInSize += sizeToCopy;
+
+                if (dctx->tmpInSize < BHSize) {   /* not enough input for cBlockSize */
+                    nextSrcSizeHint = BHSize - dctx->tmpInSize;
+                    doAnotherStage  = 0;
+                    break;
+                }
+                selectedIn = dctx->tmpIn;
+            }   /* if (dctx->dStage == dstage_storeBlockHeader) */
+
+        /* decode block header */
+            {   U32 const blockHeader = LZ4F_readLE32(selectedIn);
+                size_t const nextCBlockSize = blockHeader & 0x7FFFFFFFU;
+                size_t const crcSize = dctx->frameInfo.blockChecksumFlag * BFSize;
+                if (blockHeader==0) {  /* frameEnd signal, no more block */
+                    DEBUGLOG(5, "end of frame");
+                    dctx->dStage = dstage_getSuffix;
+                    break;
+                }
+                if (nextCBlockSize > dctx->maxBlockSize) {
+                    return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+                }
+                if (blockHeader & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
+                    /* next block is uncompressed */
+                    dctx->tmpInTarget = nextCBlockSize;
+                    DEBUGLOG(5, "next block is uncompressed (size %u)", (U32)nextCBlockSize);
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        (void)XXH32_reset(&dctx->blockChecksum, 0);
+                    }
+                    dctx->dStage = dstage_copyDirect;
+                    break;
+                }
+                /* next block is a compressed block */
+                dctx->tmpInTarget = nextCBlockSize + crcSize;
+                dctx->dStage = dstage_getCBlock;
+                if (dstPtr==dstEnd || srcPtr==srcEnd) {
+                    nextSrcSizeHint = BHSize + nextCBlockSize + crcSize;
+                    doAnotherStage = 0;
+                }
+                break;
+            }
+
+        case dstage_copyDirect:   /* uncompressed block */
+            DEBUGLOG(6, "dstage_copyDirect");
+            {   size_t sizeToCopy;
+                if (dstPtr == NULL) {
+                    sizeToCopy = 0;
+                } else {
+                    size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr));
+                    sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
+                    memcpy(dstPtr, srcPtr, sizeToCopy);
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
+                    }
+                    if (dctx->frameInfo.contentChecksumFlag)
+                        (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
+                    if (dctx->frameInfo.contentSize)
+                        dctx->frameRemainingSize -= sizeToCopy;
+
+                    /* history management (linked blocks only)*/
+                    if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+                        LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0);
+                }   }
+
+                srcPtr += sizeToCopy;
+                dstPtr += sizeToCopy;
+                if (sizeToCopy == dctx->tmpInTarget) {   /* all done */
+                    if (dctx->frameInfo.blockChecksumFlag) {
+                        dctx->tmpInSize = 0;
+                        dctx->dStage = dstage_getBlockChecksum;
+                    } else
+                        dctx->dStage = dstage_getBlockHeader;  /* new block */
+                    break;
+                }
+                dctx->tmpInTarget -= sizeToCopy;  /* need to copy more */
+            }
+            nextSrcSizeHint = dctx->tmpInTarget +
+                            +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+                            + BHSize /* next header size */;
+            doAnotherStage = 0;
+            break;
+
+        /* check block checksum for recently transferred uncompressed block */
+        case dstage_getBlockChecksum:
+            DEBUGLOG(6, "dstage_getBlockChecksum");
+            {   const void* crcSrc;
+                if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) {
+                    crcSrc = srcPtr;
+                    srcPtr += 4;
+                } else {
+                    size_t const stillToCopy = 4 - dctx->tmpInSize;
+                    size_t const sizeToCopy = MIN(stillToCopy, (size_t)(srcEnd-srcPtr));
+                    memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                    dctx->tmpInSize += sizeToCopy;
+                    srcPtr += sizeToCopy;
+                    if (dctx->tmpInSize < 4) {  /* all input consumed */
+                        doAnotherStage = 0;
+                        break;
+                    }
+                    crcSrc = dctx->header;
+                }
+                {   U32 const readCRC = LZ4F_readLE32(crcSrc);
+                    U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+                    DEBUGLOG(6, "compare block checksum");
+                    if (readCRC != calcCRC) {
+                        DEBUGLOG(4, "incorrect block checksum: %08X != %08X",
+                                readCRC, calcCRC);
+                        return err0r(LZ4F_ERROR_blockChecksum_invalid);
+                    }
+#else
+                    (void)readCRC;
+                    (void)calcCRC;
+#endif
+            }   }
+            dctx->dStage = dstage_getBlockHeader;  /* new block */
+            break;
+
+        case dstage_getCBlock:
+            DEBUGLOG(6, "dstage_getCBlock");
+            if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) {
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeCBlock;
+                break;
+            }
+            /* input large enough to read full block directly */
+            selectedIn = srcPtr;
+            srcPtr += dctx->tmpInTarget;
+
+            if (0)  /* always jump over next block */
+        case dstage_storeCBlock:
+            {   size_t const wantedData = dctx->tmpInTarget - dctx->tmpInSize;
+                size_t const inputLeft = (size_t)(srcEnd-srcPtr);
+                size_t const sizeToCopy = MIN(wantedData, inputLeft);
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+                dctx->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+                if (dctx->tmpInSize < dctx->tmpInTarget) { /* need more input */
+                    nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize)
+                                    + (dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+                                    + BHSize /* next header size */;
+                    doAnotherStage = 0;
+                    break;
+                }
+                selectedIn = dctx->tmpIn;
+            }
+
+            /* At this stage, input is large enough to decode a block */
+            if (dctx->frameInfo.blockChecksumFlag) {
+                dctx->tmpInTarget -= 4;
+                assert(selectedIn != NULL);  /* selectedIn is defined at this stage (either srcPtr, or dctx->tmpIn) */
+                {   U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget);
+                    U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0);
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+                    if (readBlockCrc != calcBlockCrc)
+                        return err0r(LZ4F_ERROR_blockChecksum_invalid);
+#else
+                    (void)readBlockCrc;
+                    (void)calcBlockCrc;
+#endif
+            }   }
+
+            if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
+                const char* dict = (const char*)dctx->dict;
+                size_t dictSize = dctx->dictSize;
+                int decodedSize;
+                assert(dstPtr != NULL);
+                if (dict && dictSize > 1 GB) {
+                    /* the dictSize param is an int, avoid truncation / sign issues */
+                    dict += dictSize - 64 KB;
+                    dictSize = 64 KB;
+                }
+                /* enough capacity in `dst` to decompress directly there */
+                decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dstPtr,
+                        (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+                        dict, (int)dictSize);
+                if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC);   /* decompression failed */
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&(dctx->xxh), dstPtr, (size_t)decodedSize);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= (size_t)decodedSize;
+
+                /* dictionary management */
+                if (dctx->frameInfo.blockMode==LZ4F_blockLinked) {
+                    LZ4F_updateDict(dctx, dstPtr, (size_t)decodedSize, dstStart, 0);
+                }
+
+                dstPtr += decodedSize;
+                dctx->dStage = dstage_getBlockHeader;
+                break;
+            }
+
+            /* not enough place into dst : decode into tmpOut */
+            /* ensure enough place for tmpOut */
+            if (dctx->frameInfo.blockMode == LZ4F_blockLinked) {
+                if (dctx->dict == dctx->tmpOutBuffer) {
+                    if (dctx->dictSize > 128 KB) {
+                        memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - 64 KB, 64 KB);
+                        dctx->dictSize = 64 KB;
+                    }
+                    dctx->tmpOut = dctx->tmpOutBuffer + dctx->dictSize;
+                } else {  /* dict not within tmp */
+                    size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
+                    dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
+            }   }
+
+            /* Decode block */
+            {   const char* dict = (const char*)dctx->dict;
+                size_t dictSize = dctx->dictSize;
+                int decodedSize;
+                if (dict && dictSize > 1 GB) {
+                    /* the dictSize param is an int, avoid truncation / sign issues */
+                    dict += dictSize - 64 KB;
+                    dictSize = 64 KB;
+                }
+                decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dctx->tmpOut,
+                        (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
+                        dict, (int)dictSize);
+                if (decodedSize < 0)  /* decompression failed */
+                    return err0r(LZ4F_ERROR_decompressionFailed);
+                if (dctx->frameInfo.contentChecksumFlag)
+                    XXH32_update(&(dctx->xxh), dctx->tmpOut, (size_t)decodedSize);
+                if (dctx->frameInfo.contentSize)
+                    dctx->frameRemainingSize -= (size_t)decodedSize;
+                dctx->tmpOutSize = (size_t)decodedSize;
+                dctx->tmpOutStart = 0;
+                dctx->dStage = dstage_flushOut;
+            }
+            /* fall-through */
+
+        case dstage_flushOut:  /* flush decoded data from tmpOut to dstBuffer */
+            DEBUGLOG(6, "dstage_flushOut");
+            if (dstPtr != NULL) {
+                size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr));
+                memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
+
+                /* dictionary management */
+                if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
+
+                dctx->tmpOutStart += sizeToCopy;
+                dstPtr += sizeToCopy;
+            }
+            if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */
+                dctx->dStage = dstage_getBlockHeader;  /* get next block */
+                break;
+            }
+            /* could not flush everything : stop there, just request a block header */
+            doAnotherStage = 0;
+            nextSrcSizeHint = BHSize;
+            break;
+
+        case dstage_getSuffix:
+            if (dctx->frameRemainingSize)
+                return err0r(LZ4F_ERROR_frameSize_wrong);   /* incorrect frame size decoded */
+            if (!dctx->frameInfo.contentChecksumFlag) {  /* no checksum, frame is completed */
+                nextSrcSizeHint = 0;
+                LZ4F_resetDecompressionContext(dctx);
+                doAnotherStage = 0;
+                break;
+            }
+            if ((srcEnd - srcPtr) < 4) {  /* not enough size for entire CRC */
+                dctx->tmpInSize = 0;
+                dctx->dStage = dstage_storeSuffix;
+            } else {
+                selectedIn = srcPtr;
+                srcPtr += 4;
+            }
+
+            if (dctx->dStage == dstage_storeSuffix)   /* can be skipped */
+        case dstage_storeSuffix:
+            {   size_t const remainingInput = (size_t)(srcEnd - srcPtr);
+                size_t const wantedData = 4 - dctx->tmpInSize;
+                size_t const sizeToCopy = MIN(wantedData, remainingInput);
+                memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctx->tmpInSize += sizeToCopy;
+                if (dctx->tmpInSize < 4) { /* not enough input to read complete suffix */
+                    nextSrcSizeHint = 4 - dctx->tmpInSize;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctx->tmpIn;
+            }   /* if (dctx->dStage == dstage_storeSuffix) */
+
+        /* case dstage_checkSuffix: */   /* no direct entry, avoid initialization risks */
+            {   U32 const readCRC = LZ4F_readLE32(selectedIn);
+                U32 const resultCRC = XXH32_digest(&(dctx->xxh));
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+                if (readCRC != resultCRC)
+                    return err0r(LZ4F_ERROR_contentChecksum_invalid);
+#else
+                (void)readCRC;
+                (void)resultCRC;
+#endif
+                nextSrcSizeHint = 0;
+                LZ4F_resetDecompressionContext(dctx);
+                doAnotherStage = 0;
+                break;
+            }
+
+        case dstage_getSFrameSize:
+            if ((srcEnd - srcPtr) >= 4) {
+                selectedIn = srcPtr;
+                srcPtr += 4;
+            } else {
+                /* not enough input to read cBlockSize field */
+                dctx->tmpInSize = 4;
+                dctx->tmpInTarget = 8;
+                dctx->dStage = dstage_storeSFrameSize;
+            }
+
+            if (dctx->dStage == dstage_storeSFrameSize)
+        case dstage_storeSFrameSize:
+            {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+                                             (size_t)(srcEnd - srcPtr) );
+                memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctx->tmpInSize += sizeToCopy;
+                if (dctx->tmpInSize < dctx->tmpInTarget) {
+                    /* not enough input to get full sBlockSize; wait for more */
+                    nextSrcSizeHint = dctx->tmpInTarget - dctx->tmpInSize;
+                    doAnotherStage = 0;
+                    break;
+                }
+                selectedIn = dctx->header + 4;
+            }   /* if (dctx->dStage == dstage_storeSFrameSize) */
+
+        /* case dstage_decodeSFrameSize: */   /* no direct entry */
+            {   size_t const SFrameSize = LZ4F_readLE32(selectedIn);
+                dctx->frameInfo.contentSize = SFrameSize;
+                dctx->tmpInTarget = SFrameSize;
+                dctx->dStage = dstage_skipSkippable;
+                break;
+            }
+
+        case dstage_skipSkippable:
+            {   size_t const skipSize = MIN(dctx->tmpInTarget, (size_t)(srcEnd-srcPtr));
+                srcPtr += skipSize;
+                dctx->tmpInTarget -= skipSize;
+                doAnotherStage = 0;
+                nextSrcSizeHint = dctx->tmpInTarget;
+                if (nextSrcSizeHint) break;  /* still more to skip */
+                /* frame fully skipped : prepare context for a new frame */
+                LZ4F_resetDecompressionContext(dctx);
+                break;
+            }
+        }   /* switch (dctx->dStage) */
+    }   /* while (doAnotherStage) */
+
+    /* preserve history within tmp whenever necessary */
+    LZ4F_STATIC_ASSERT((unsigned)dstage_init == 2);
+    if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked)  /* next block will use up to 64KB from previous ones */
+      && (dctx->dict != dctx->tmpOutBuffer)             /* dictionary is not already within tmp */
+      && (dctx->dict != NULL)                           /* dictionary exists */
+      && (!decompressOptionsPtr->stableDst)             /* cannot rely on dst data to remain there for next call */
+      && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) )  /* valid stages : [init ... getSuffix[ */
+    {
+        if (dctx->dStage == dstage_flushOut) {
+            size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
+            size_t copySize = 64 KB - dctx->tmpOutSize;
+            const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
+            if (dctx->tmpOutSize > 64 KB) copySize = 0;
+            if (copySize > preserveSize) copySize = preserveSize;
+            assert(dctx->tmpOutBuffer != NULL);
+
+            memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+            dctx->dict = dctx->tmpOutBuffer;
+            dctx->dictSize = preserveSize + dctx->tmpOutStart;
+        } else {
+            const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize;
+            size_t const newDictSize = MIN(dctx->dictSize, 64 KB);
+
+            memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+
+            dctx->dict = dctx->tmpOutBuffer;
+            dctx->dictSize = newDictSize;
+            dctx->tmpOut = dctx->tmpOutBuffer + newDictSize;
+        }
+    }
+
+    *srcSizePtr = (size_t)(srcPtr - srcStart);
+    *dstSizePtr = (size_t)(dstPtr - dstStart);
+    return nextSrcSizeHint;
+}
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding.
+ */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const void* dict, size_t dictSize,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    if (dctx->dStage <= dstage_init) {
+        dctx->dict = (const BYTE*)dict;
+        dctx->dictSize = dictSize;
+    }
+    return LZ4F_decompress(dctx, dstBuffer, dstSizePtr,
+                           srcBuffer, srcSizePtr,
+                           decompressOptionsPtr);
+}
diff --git a/libbutl/lz4frame.h b/libbutl/lz4frame.h
new file mode 100644
index 0000000..4573317
--- /dev/null
+++ b/libbutl/lz4frame.h
@@ -0,0 +1,623 @@
+/*
+   LZ4 auto-framing library
+   Header File
+   Copyright (C) 2011-2017, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API able to create and decode LZ4 frames
+ * conformant with specification v1.6.1 in doc/lz4_Frame_format.md .
+ * Generated frames are compatible with `lz4` CLI.
+ *
+ * LZ4F also offers streaming capabilities.
+ *
+ * lz4.h is not required when using lz4frame.h,
+ * except to extract common constant such as LZ4_VERSION_NUMBER.
+ * */
+
+#ifndef LZ4F_H_09782039843
+#define LZ4F_H_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ---   Dependency   --- */
+#include <stddef.h>   /* size_t */
+
+
+/**
+  Introduction
+
+  lz4frame.h implements LZ4 frame specification (doc/lz4_Frame_format.md).
+  lz4frame.h provides frame compression functions that take care
+  of encoding standard metadata alongside LZ4-compressed blocks.
+*/
+
+/*-***************************************************************
+ *  Compiler specifics
+ *****************************************************************/
+/*  LZ4_DLL_EXPORT :
+ *  Enable exporting of functions when building a Windows DLL
+ *  LZ4FLIB_VISIBILITY :
+ *  Control library symbols visibility.
+ */
+#ifndef LZ4FLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4FLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4FLIB_VISIBILITY
+#  endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4FLIB_API __declspec(dllexport) LZ4FLIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4FLIB_API __declspec(dllimport) LZ4FLIB_VISIBILITY
+#else
+#  define LZ4FLIB_API LZ4FLIB_VISIBILITY
+#endif
+
+#ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4F_DEPRECATE(x) x
+#else
+#  if defined(_MSC_VER)
+#    define LZ4F_DEPRECATE(x) x   /* __declspec(deprecated) x - only works with C++ */
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6))
+#    define LZ4F_DEPRECATE(x) x __attribute__((deprecated))
+#  else
+#    define LZ4F_DEPRECATE(x) x   /* no deprecation warning for this compiler */
+#  endif
+#endif
+
+
+/*-************************************
+ *  Error management
+ **************************************/
+typedef size_t LZ4F_errorCode_t;
+
+LZ4FLIB_API unsigned    LZ4F_isError(LZ4F_errorCode_t code);   /**< tells when a function result is an error code */
+LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /**< return error code string; for debugging */
+
+
+/*-************************************
+ *  Frame compression types
+ ************************************* */
+/* #define LZ4F_ENABLE_OBSOLETE_ENUMS   // uncomment to enable obsolete enums */
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+#  define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x
+#else
+#  define LZ4F_OBSOLETE_ENUM(x)
+#endif
+
+/* The larger the block size, the (slightly) better the compression ratio,
+ * though there are diminishing returns.
+ * Larger blocks also increase memory usage on both compression and decompression sides.
+ */
+typedef enum {
+    LZ4F_default=0,
+    LZ4F_max64KB=4,
+    LZ4F_max256KB=5,
+    LZ4F_max1MB=6,
+    LZ4F_max4MB=7
+    LZ4F_OBSOLETE_ENUM(max64KB)
+    LZ4F_OBSOLETE_ENUM(max256KB)
+    LZ4F_OBSOLETE_ENUM(max1MB)
+    LZ4F_OBSOLETE_ENUM(max4MB)
+} LZ4F_blockSizeID_t;
+
+/* Linked blocks sharply reduce inefficiencies when using small blocks,
+ * they compress better.
+ * However, some LZ4 decoders are only compatible with independent blocks */
+typedef enum {
+    LZ4F_blockLinked=0,
+    LZ4F_blockIndependent
+    LZ4F_OBSOLETE_ENUM(blockLinked)
+    LZ4F_OBSOLETE_ENUM(blockIndependent)
+} LZ4F_blockMode_t;
+
+typedef enum {
+    LZ4F_noContentChecksum=0,
+    LZ4F_contentChecksumEnabled
+    LZ4F_OBSOLETE_ENUM(noContentChecksum)
+    LZ4F_OBSOLETE_ENUM(contentChecksumEnabled)
+} LZ4F_contentChecksum_t;
+
+typedef enum {
+    LZ4F_noBlockChecksum=0,
+    LZ4F_blockChecksumEnabled
+} LZ4F_blockChecksum_t;
+
+typedef enum {
+    LZ4F_frame=0,
+    LZ4F_skippableFrame
+    LZ4F_OBSOLETE_ENUM(skippableFrame)
+} LZ4F_frameType_t;
+
+#ifdef LZ4F_ENABLE_OBSOLETE_ENUMS
+typedef LZ4F_blockSizeID_t blockSizeID_t;
+typedef LZ4F_blockMode_t blockMode_t;
+typedef LZ4F_frameType_t frameType_t;
+typedef LZ4F_contentChecksum_t contentChecksum_t;
+#endif
+
+/*! LZ4F_frameInfo_t :
+ *  makes it possible to set or read frame parameters.
+ *  Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO,
+ *  setting all parameters to default.
+ *  It's then possible to update selectively some parameters */
+typedef struct {
+  LZ4F_blockSizeID_t     blockSizeID;         /* max64KB, max256KB, max1MB, max4MB; 0 == default */
+  LZ4F_blockMode_t       blockMode;           /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */
+  LZ4F_contentChecksum_t contentChecksumFlag; /* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */
+  LZ4F_frameType_t       frameType;           /* read-only field : LZ4F_frame or LZ4F_skippableFrame */
+  unsigned long long     contentSize;         /* Size of uncompressed content ; 0 == unknown */
+  unsigned               dictID;              /* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */
+  LZ4F_blockChecksum_t   blockChecksumFlag;   /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */
+} LZ4F_frameInfo_t;
+
+#define LZ4F_INIT_FRAMEINFO   { LZ4F_default, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0ULL, 0U, LZ4F_noBlockChecksum }    /* v1.8.3+ */
+
+/*! LZ4F_preferences_t :
+ *  makes it possible to supply advanced compression instructions to streaming interface.
+ *  Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES,
+ *  setting all parameters to default.
+ *  All reserved fields must be set to zero. */
+typedef struct {
+  LZ4F_frameInfo_t frameInfo;
+  int      compressionLevel;    /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */
+  unsigned autoFlush;           /* 1: always flush; reduces usage of internal buffers */
+  unsigned favorDecSpeed;       /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */  /* v1.8.2+ */
+  unsigned reserved[3];         /* must be zero for forward compatibility */
+} LZ4F_preferences_t;
+
+#define LZ4F_INIT_PREFERENCES   { LZ4F_INIT_FRAMEINFO, 0, 0u, 0u, { 0u, 0u, 0u } }    /* v1.8.3+ */
+
+
+/*-*********************************
+*  Simple compression function
+***********************************/
+
+LZ4FLIB_API int LZ4F_compressionLevel_max(void);   /* v1.8.0+ */
+
+/*! LZ4F_compressFrameBound() :
+ *  Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
+ * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
+ *  Note : this result is only usable with LZ4F_compressFrame().
+ *         It may also be used with LZ4F_compressUpdate() _if no flush() operation_ is performed.
+ */
+LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+
+/*! LZ4F_compressFrame() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame.
+ *  dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                                const void* srcBuffer, size_t srcSize,
+                                const LZ4F_preferences_t* preferencesPtr);
+
+
+/*-***********************************
+*  Advanced compression functions
+*************************************/
+typedef struct LZ4F_cctx_s LZ4F_cctx;   /* incomplete type */
+typedef LZ4F_cctx* LZ4F_compressionContext_t;   /* for compatibility with previous API version */
+
+typedef struct {
+  unsigned stableSrc;    /* 1 == src content will remain present on future calls to LZ4F_compress(); skip copying src content within tmp buffer */
+  unsigned reserved[3];
+} LZ4F_compressOptions_t;
+
+/*---   Resource Management   ---*/
+
+#define LZ4F_VERSION 100    /* This number can be used to check for an incompatible API breaking change */
+LZ4FLIB_API unsigned LZ4F_getVersion(void);
+
+/*! LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential version mismatch, notably when using DLL.
+ * The function will provide a pointer to a fully allocated LZ4F_cctx object.
+ * If @return != zero, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+ */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
+
+
+/*----    Compression    ----*/
+
+#define LZ4F_HEADER_SIZE_MIN  7   /* LZ4 Frame header size can vary, depending on selected paramaters */
+#define LZ4F_HEADER_SIZE_MAX 19
+
+/* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */
+#define LZ4F_BLOCK_HEADER_SIZE 4
+
+/* Size in bytes of a block checksum footer in little-endian format. */
+#define LZ4F_BLOCK_CHECKSUM_SIZE 4
+
+/* Size in bytes of the content checksum. */
+#define LZ4F_CONTENT_CHECKSUM_SIZE 4
+
+/*! LZ4F_compressBegin() :
+ *  will write the frame header into dstBuffer.
+ *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * @return : number of bytes written into dstBuffer for the header
+ *           or an error code (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
+                                      void* dstBuffer, size_t dstCapacity,
+                                      const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressBound() :
+ *  Provides minimum dstCapacity required to guarantee success of
+ *  LZ4F_compressUpdate(), given a srcSize and preferences, for a worst case scenario.
+ *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() instead.
+ *  Note that the result is only valid for a single invocation of LZ4F_compressUpdate().
+ *  When invoking LZ4F_compressUpdate() multiple times,
+ *  if the output buffer is gradually filled up instead of emptied and re-used from its start,
+ *  one must check if there is enough remaining capacity before each invocation, using LZ4F_compressBound().
+ * @return is always the same for a srcSize and prefsPtr.
+ *  prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
+ *  tech details :
+ * @return if automatic flushing is not enabled, includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ *  It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd().
+ * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin().
+ */
+LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
+
+/*! LZ4F_compressUpdate() :
+ *  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ *  Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+ *  This value is provided by LZ4F_compressBound().
+ *  If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+ *  LZ4F_compressUpdate() doesn't guarantee error recovery.
+ *  When an error occurs, compression context must be freed or resized.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
+ * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ */
+LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
+                                       void* dstBuffer, size_t dstCapacity,
+                                 const void* srcBuffer, size_t srcSize,
+                                 const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_flush() :
+ *  When data must be generated and sent immediately, without waiting for a block to be completely filled,
+ *  it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
+ * `dstCapacity` must be large enough to ensure the operation will be successful.
+ * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
+ * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ *  Note : LZ4F_flush() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
+ */
+LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx,
+                              void* dstBuffer, size_t dstCapacity,
+                        const LZ4F_compressOptions_t* cOptPtr);
+
+/*! LZ4F_compressEnd() :
+ *  To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
+ *  It will flush whatever data remained within `cctx` (like LZ4_flush())
+ *  and properly finalize the frame, with an endMark and a checksum.
+ * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
+ * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
+ *           or an error code if it fails (which can be tested using LZ4F_isError())
+ *  Note : LZ4F_compressEnd() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
+ *  A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
+ */
+LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
+                                    void* dstBuffer, size_t dstCapacity,
+                              const LZ4F_compressOptions_t* cOptPtr);
+
+
+/*-*********************************
+*  Decompression functions
+***********************************/
+typedef struct LZ4F_dctx_s LZ4F_dctx;   /* incomplete type */
+typedef LZ4F_dctx* LZ4F_decompressionContext_t;   /* compatibility with previous API versions */
+
+typedef struct {
+  unsigned stableDst;    /* pledges that last 64KB decompressed data will remain available unmodified. This optimization skips storage operations in tmp buffers. */
+  unsigned reserved[3];  /* must be set to zero for forward compatibility */
+} LZ4F_decompressOptions_t;
+
+
+/* Resource management */
+
+/*! LZ4F_createDecompressionContext() :
+ *  Create an LZ4F_dctx object, to track all decompression operations.
+ *  The version provided MUST be LZ4F_VERSION.
+ *  The function provides a pointer to an allocated and initialized LZ4F_dctx object.
+ *  The result is an errorCode, which can be tested using LZ4F_isError().
+ *  dctx memory can be released using LZ4F_freeDecompressionContext();
+ *  Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released.
+ *  That is, it should be == 0 if decompression has been completed fully and correctly.
+ */
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
+LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
+
+
+/*-***********************************
+*  Streaming decompression functions
+*************************************/
+
+#define LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH 5
+
+/*! LZ4F_headerSize() : v1.9.0+
+ *  Provide the header size of a frame starting at `src`.
+ * `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
+ *  which is enough to decode the header length.
+ * @return : size of frame header
+ *           or an error code, which can be tested using LZ4F_isError()
+ *  note : Frame header size is variable, but is guaranteed to be
+ *         >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes.
+ */
+LZ4FLIB_API size_t LZ4F_headerSize(const void* src, size_t srcSize);
+
+/*! LZ4F_getFrameInfo() :
+ *  This function extracts frame parameters (max blockSize, dictID, etc.).
+ *  Its usage is optional: user can call LZ4F_decompress() directly.
+ *
+ *  Extracted information will fill an existing LZ4F_frameInfo_t structure.
+ *  This can be useful for allocation and dictionary identification purposes.
+ *
+ *  LZ4F_getFrameInfo() can work in the following situations :
+ *
+ *  1) At the beginning of a new frame, before any invocation of LZ4F_decompress().
+ *     It will decode header from `srcBuffer`,
+ *     consuming the header and starting the decoding process.
+ *
+ *     Input size must be large enough to contain the full frame header.
+ *     Frame header size can be known beforehand by LZ4F_headerSize().
+ *     Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes,
+ *     and not more than <= LZ4F_HEADER_SIZE_MAX bytes.
+ *     Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work.
+ *     It's allowed to provide more input data than the header size,
+ *     LZ4F_getFrameInfo() will only consume the header.
+ *
+ *     If input size is not large enough,
+ *     aka if it's smaller than header size,
+ *     function will fail and return an error code.
+ *
+ *  2) After decoding has been started,
+ *     it's possible to invoke LZ4F_getFrameInfo() anytime
+ *     to extract already decoded frame parameters stored within dctx.
+ *
+ *     Note that, if decoding has barely started,
+ *     and not yet read enough information to decode the header,
+ *     LZ4F_getFrameInfo() will fail.
+ *
+ *  The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value).
+ *  LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started,
+ *  and when decoding the header has been successful.
+ *  Decompression must then resume from (srcBuffer + *srcSizePtr).
+ *
+ * @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *           or an error code which can be tested using LZ4F_isError().
+ *  note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
+ *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
+ */
+LZ4FLIB_API size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+                                     LZ4F_frameInfo_t* frameInfoPtr,
+                                     const void* srcBuffer, size_t* srcSizePtr);
+
+/*! LZ4F_decompress() :
+ *  Call this function repetitively to regenerate data compressed in `srcBuffer`.
+ *
+ *  The function requires a valid dctx state.
+ *  It will read up to *srcSizePtr bytes from srcBuffer,
+ *  and decompress data into dstBuffer, of capacity *dstSizePtr.
+ *
+ *  The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
+ *  The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
+ *
+ *  The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
+ *  Unconsumed source data must be presented again in subsequent invocations.
+ *
+ * `dstBuffer` can freely change between each consecutive function invocation.
+ * `dstBuffer` content will be overwritten.
+ *
+ * @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
+ *  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ *  Respecting the hint provides some small speed benefit, because it skips intermediate buffers.
+ *  This is just a hint though, it's always possible to provide any srcSize.
+ *
+ *  When a frame is fully decoded, @return will be 0 (no more data expected).
+ *  When provided with more bytes than necessary to decode a frame,
+ *  LZ4F_decompress() will stop reading exactly at end of current frame, and @return 0.
+ *
+ *  If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
+ *  After a decompression error, the `dctx` context is not resumable.
+ *  Use LZ4F_resetDecompressionContext() to return to clean state.
+ *
+ *  After a frame is fully decoded, dctx can be used again to decompress another frame.
+ */
+LZ4FLIB_API size_t LZ4F_decompress(LZ4F_dctx* dctx,
+                                   void* dstBuffer, size_t* dstSizePtr,
+                                   const void* srcBuffer, size_t* srcSizePtr,
+                                   const LZ4F_decompressOptions_t* dOptPtr);
+
+
+/*! LZ4F_resetDecompressionContext() : added in v1.8.0
+ *  In case of an error, the context is left in "undefined" state.
+ *  In which case, it's necessary to reset it, before re-using it.
+ *  This method can also be used to abruptly stop any unfinished decompression,
+ *  and start a new one using same context resources. */
+LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx);   /* always successful */
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* LZ4F_H_09782039843 */
+
+#if defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843)
+#define LZ4F_H_STATIC_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* These declarations are not stable and may change in the future.
+ * They are therefore only safe to depend on
+ * when the caller is statically linked against the library.
+ * To access their declarations, define LZ4F_STATIC_LINKING_ONLY.
+ *
+ * By default, these symbols aren't published into shared/dynamic libraries.
+ * You can override this behavior and force them to be published
+ * by defining LZ4F_PUBLISH_STATIC_FUNCTIONS.
+ * Use at your own risk.
+ */
+#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
+# define LZ4FLIB_STATIC_API LZ4FLIB_API
+#else
+# define LZ4FLIB_STATIC_API
+#endif
+
+
+/* ---   Error List   --- */
+#define LZ4F_LIST_ERRORS(ITEM) \
+        ITEM(OK_NoError) \
+        ITEM(ERROR_GENERIC) \
+        ITEM(ERROR_maxBlockSize_invalid) \
+        ITEM(ERROR_blockMode_invalid) \
+        ITEM(ERROR_contentChecksumFlag_invalid) \
+        ITEM(ERROR_compressionLevel_invalid) \
+        ITEM(ERROR_headerVersion_wrong) \
+        ITEM(ERROR_blockChecksum_invalid) \
+        ITEM(ERROR_reservedFlag_set) \
+        ITEM(ERROR_allocation_failed) \
+        ITEM(ERROR_srcSize_tooLarge) \
+        ITEM(ERROR_dstMaxSize_tooSmall) \
+        ITEM(ERROR_frameHeader_incomplete) \
+        ITEM(ERROR_frameType_unknown) \
+        ITEM(ERROR_frameSize_wrong) \
+        ITEM(ERROR_srcPtr_wrong) \
+        ITEM(ERROR_decompressionFailed) \
+        ITEM(ERROR_headerChecksum_invalid) \
+        ITEM(ERROR_contentChecksum_invalid) \
+        ITEM(ERROR_frameDecoding_alreadyStarted) \
+        ITEM(ERROR_maxCode)
+
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+
+/* enum list is exposed, to handle specific errors */
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM)
+              _LZ4F_dummy_error_enum_for_c89_never_used } LZ4F_errorCodes;
+
+LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
+
+LZ4FLIB_STATIC_API size_t LZ4F_getBlockSize(unsigned);
+
+/**********************************
+ *  Bulk processing dictionary API
+ *********************************/
+
+/* A Dictionary is useful for the compression of small messages (KB range).
+ * It dramatically improves compression efficiency.
+ *
+ * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful.
+ * Best results are generally achieved by using Zstandard's Dictionary Builder
+ * to generate a high-quality dictionary from a set of samples.
+ *
+ * Loading a dictionary has a cost, since it involves construction of tables.
+ * The Bulk processing dictionary API makes it possible to share this cost
+ * over an arbitrary number of compression jobs, even concurrently,
+ * markedly improving compression latency for these cases.
+ *
+ * The same dictionary will have to be used on the decompression side
+ * for decoding to be successful.
+ * To help identify the correct dictionary at decoding stage,
+ * the frame header allows optional embedding of a dictID field.
+ */
+typedef struct LZ4F_CDict_s LZ4F_CDict;
+
+/*! LZ4_createCDict() :
+ *  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once.
+ *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
+LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+
+
+/*! LZ4_compressFrame_usingCDict() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+ *  cctx must point to a context created by LZ4F_createCompressionContext().
+ *  If cdict==NULL, compress without a dictionary.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  If this condition is not respected, function will fail (@return an errorCode).
+ *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ *  but it's not recommended, as it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dst, size_t dstCapacity,
+    const void* src, size_t srcSize,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* preferencesPtr);
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ *  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+ *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you may provide NULL as argument,
+ *  however, it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer for the header,
+ *           or an error code (which can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dstBuffer, size_t dstCapacity,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* prefsPtr);
+
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding. */
+LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
+    LZ4F_dctx* dctxPtr,
+    void* dstBuffer, size_t* dstSizePtr,
+    const void* srcBuffer, size_t* srcSizePtr,
+    const void* dict, size_t dictSize,
+    const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843) */
diff --git a/libbutl/lz4hc.c b/libbutl/lz4hc.c
new file mode 100644
index 0000000..77c9f43
--- /dev/null
+++ b/libbutl/lz4hc.c
@@ -0,0 +1,1615 @@
+/*
+    LZ4 HC - High Compression Mode of LZ4
+    Copyright (C) 2011-2017, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/lz4/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+
+
+/* *************************************
+*  Tuning Parameter
+***************************************/
+
+/*! HEAPMODE :
+ *  Select how default compression function will allocate workplace memory,
+ *  in stack (0:fastest), or in heap (1:requires malloc()).
+ *  Since workplace is rather large, heap mode is recommended.
+ */
+#ifndef LZ4HC_HEAPMODE
+#  define LZ4HC_HEAPMODE 1
+#endif
+
+
+/*===    Dependency    ===*/
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "lz4hc.h"
+
+
+/*===   Common definitions   ===*/
+#if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+#if defined (__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+#define LZ4_COMMONDEFS_ONLY
+#ifndef LZ4_SRC_INCLUDED
+#include "lz4.c"   /* LZ4_count, constants, mem */
+#endif
+
+
+/*===   Enums   ===*/
+typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
+
+
+/*===   Constants   ===*/
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_OPT_NUM   (1<<12)
+
+
+/*===   Macros   ===*/
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+#define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
+#define HASH_FUNCTION(i)         (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTMAXD(p)         chainTable[(p) & LZ4HC_MAXD_MASK]    /* flexible, LZ4HC_MAXD dependent */
+#define DELTANEXTU16(table, pos) table[(U16)(pos)]   /* faster */
+/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
+#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+/**************************************
+*  HC Compression
+**************************************/
+static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
+{
+    MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+}
+
+static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+{
+    uptrval startingOffset = (uptrval)(hc4->end - hc4->base);
+    if (startingOffset > 1 GB) {
+        LZ4HC_clearTables(hc4);
+        startingOffset = 0;
+    }
+    startingOffset += 64 KB;
+    hc4->nextToUpdate = (U32) startingOffset;
+    hc4->base = start - startingOffset;
+    hc4->end = start;
+    hc4->dictBase = start - startingOffset;
+    hc4->dictLimit = (U32) startingOffset;
+    hc4->lowLimit = (U32) startingOffset;
+}
+
+
+/* Update chains up to ip (excluded) */
+LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const hashTable  = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = hc4->nextToUpdate;
+
+    while (idx < target) {
+        U32 const h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - hashTable[h];
+        if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
+        DELTANEXTU16(chainTable, idx) = (U16)delta;
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    hc4->nextToUpdate = target;
+}
+
+/** LZ4HC_countBack() :
+ * @return : negative value, nb of common bytes before ip/match */
+LZ4_FORCE_INLINE
+int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
+                    const BYTE* const iMin, const BYTE* const mMin)
+{
+    int back = 0;
+    int const min = (int)MAX(iMin - ip, mMin - match);
+    assert(min <= 0);
+    assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
+    assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
+    while ( (back > min)
+         && (ip[back-1] == match[back-1]) )
+            back--;
+    return back;
+}
+
+#if defined(_MSC_VER)
+#  define LZ4HC_rotl32(x,r) _rotl(x,r)
+#else
+#  define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+
+
+static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
+{
+    size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
+    if (bitsToRotate == 0) return pattern;
+    return LZ4HC_rotl32(pattern, (int)bitsToRotate);
+}
+
+/* LZ4HC_countPattern() :
+ * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
+static unsigned
+LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
+{
+    const BYTE* const iStart = ip;
+    reg_t const pattern = (sizeof(pattern)==8) ?
+        (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32;
+
+    while (likely(ip < iEnd-(sizeof(pattern)-1))) {
+        reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
+        if (!diff) { ip+=sizeof(pattern); continue; }
+        ip += LZ4_NbCommonBytes(diff);
+        return (unsigned)(ip - iStart);
+    }
+
+    if (LZ4_isLittleEndian()) {
+        reg_t patternByte = pattern;
+        while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
+            ip++; patternByte >>= 8;
+        }
+    } else {  /* big endian */
+        U32 bitOffset = (sizeof(pattern)*8) - 8;
+        while (ip < iEnd) {
+            BYTE const byte = (BYTE)(pattern >> bitOffset);
+            if (*ip != byte) break;
+            ip ++; bitOffset -= 8;
+        }
+    }
+
+    return (unsigned)(ip - iStart);
+}
+
+/* LZ4HC_reverseCountPattern() :
+ * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
+ * read using natural platform endianess */
+static unsigned
+LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
+{
+    const BYTE* const iStart = ip;
+
+    while (likely(ip >= iLow+4)) {
+        if (LZ4_read32(ip-4) != pattern) break;
+        ip -= 4;
+    }
+    {   const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
+        while (likely(ip>iLow)) {
+            if (ip[-1] != *bytePtr) break;
+            ip--; bytePtr--;
+    }   }
+    return (unsigned)(iStart - ip);
+}
+
+/* LZ4HC_protectDictEnd() :
+ * Checks if the match is in the last 3 bytes of the dictionary, so reading the
+ * 4 byte MINMATCH would overflow.
+ * @returns true if the match index is okay.
+ */
+static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
+{
+    return ((U32)((dictLimit - 1) - matchIndex) >= 3);
+}
+
+typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
+typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
+
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndGetWiderMatch (
+    LZ4HC_CCtx_internal* hc4,
+    const BYTE* const ip,
+    const BYTE* const iLowLimit,
+    const BYTE* const iHighLimit,
+    int longest,
+    const BYTE** matchpos,
+    const BYTE** startpos,
+    const int maxNbAttempts,
+    const int patternAnalysis,
+    const int chainSwap,
+    const dictCtx_directive dict,
+    const HCfavor_e favorDecSpeed)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx;
+    const BYTE* const base = hc4->base;
+    const U32 dictLimit = hc4->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const U32 ipIndex = (U32)(ip - base);
+    const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
+    const BYTE* const dictBase = hc4->dictBase;
+    int const lookBackLength = (int)(ip-iLowLimit);
+    int nbAttempts = maxNbAttempts;
+    U32 matchChainPos = 0;
+    U32 const pattern = LZ4_read32(ip);
+    U32 matchIndex;
+    repeat_state_e repeat = rep_untested;
+    size_t srcPatternLength = 0;
+
+    DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
+    /* First Match */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+    DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+                matchIndex, lowestMatchIndex);
+
+    while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
+        int matchLength=0;
+        nbAttempts--;
+        assert(matchIndex < ipIndex);
+        if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
+            /* do nothing */
+        } else if (matchIndex >= dictLimit) {   /* within current Prefix */
+            const BYTE* const matchPtr = base + matchIndex;
+            assert(matchPtr >= lowPrefixPtr);
+            assert(matchPtr < ip);
+            assert(longest >= 1);
+            if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
+                if (LZ4_read32(matchPtr) == pattern) {
+                    int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+                    matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    matchLength -= back;
+                    if (matchLength > longest) {
+                        longest = matchLength;
+                        *matchpos = matchPtr + back;
+                        *startpos = ip + back;
+            }   }   }
+        } else {   /* lowestMatchIndex <= matchIndex < dictLimit */
+            const BYTE* const matchPtr = dictBase + matchIndex;
+            if (LZ4_read32(matchPtr) == pattern) {
+                const BYTE* const dictStart = dictBase + hc4->lowLimit;
+                int back = 0;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+                    matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit);
+                back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
+                matchLength -= back;
+                if (matchLength > longest) {
+                    longest = matchLength;
+                    *matchpos = base + matchIndex + back;   /* virtual pos, relative to ip, to retrieve offset */
+                    *startpos = ip + back;
+        }   }   }
+
+        if (chainSwap && matchLength==longest) {    /* better match => select a better chain */
+            assert(lookBackLength==0);   /* search forward only */
+            if (matchIndex + (U32)longest <= ipIndex) {
+                int const kTrigger = 4;
+                U32 distanceToNextMatch = 1;
+                int const end = longest - MINMATCH + 1;
+                int step = 1;
+                int accel = 1 << kTrigger;
+                int pos;
+                for (pos = 0; pos < end; pos += step) {
+                    U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
+                    step = (accel++ >> kTrigger);
+                    if (candidateDist > distanceToNextMatch) {
+                        distanceToNextMatch = candidateDist;
+                        matchChainPos = (U32)pos;
+                        accel = 1 << kTrigger;
+                    }
+                }
+                if (distanceToNextMatch > 1) {
+                    if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
+                    matchIndex -= distanceToNextMatch;
+                    continue;
+        }   }   }
+
+        {   U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
+            if (patternAnalysis && distNextMatch==1 && matchChainPos==0) {
+                U32 const matchCandidateIdx = matchIndex-1;
+                /* may be a repeated pattern */
+                if (repeat == rep_untested) {
+                    if ( ((pattern & 0xFFFF) == (pattern >> 16))
+                      &  ((pattern & 0xFF)   == (pattern >> 24)) ) {
+                        repeat = rep_confirmed;
+                        srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
+                    } else {
+                        repeat = rep_not;
+                }   }
+                if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
+                  && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) {
+                    const int extDict = matchCandidateIdx < dictLimit;
+                    const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
+                    if (LZ4_read32(matchPtr) == pattern) {  /* good candidate */
+                        const BYTE* const dictStart = dictBase + hc4->lowLimit;
+                        const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
+                        size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
+                        if (extDict && matchPtr + forwardPatternLength == iLimit) {
+                            U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
+                            forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
+                        }
+                        {   const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
+                            size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
+                            size_t currentSegmentLength;
+                            if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
+                                U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
+                                backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
+                            }
+                            /* Limit backLength not go further than lowestMatchIndex */
+                            backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
+                            assert(matchCandidateIdx - backLength >= lowestMatchIndex);
+                            currentSegmentLength = backLength + forwardPatternLength;
+                            /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
+                            if ( (currentSegmentLength >= srcPatternLength)   /* current pattern segment large enough to contain full srcPatternLength */
+                              && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
+                                U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength;  /* best position, full pattern, might be followed by more match */
+                                if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
+                                    matchIndex = newMatchIndex;
+                                else {
+                                    /* Can only happen if started in the prefix */
+                                    assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
+                                    matchIndex = dictLimit;
+                                }
+                            } else {
+                                U32 const newMatchIndex = matchCandidateIdx - (U32)backLength;   /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
+                                if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
+                                    assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
+                                    matchIndex = dictLimit;
+                                } else {
+                                    matchIndex = newMatchIndex;
+                                    if (lookBackLength==0) {  /* no back possible */
+                                        size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
+                                        if ((size_t)longest < maxML) {
+                                            assert(base + matchIndex != ip);
+                                            if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break;
+                                            assert(maxML < 2 GB);
+                                            longest = (int)maxML;
+                                            *matchpos = base + matchIndex;   /* virtual pos, relative to ip, to retrieve offset */
+                                            *startpos = ip;
+                                        }
+                                        {   U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
+                                            if (distToNextPattern > matchIndex) break;  /* avoid overflow */
+                                            matchIndex -= distToNextPattern;
+                        }   }   }   }   }
+                        continue;
+                }   }
+        }   }   /* PA optimization */
+
+        /* follow current chain */
+        matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos);
+
+    }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
+
+    if ( dict == usingDictCtxHc
+      && nbAttempts > 0
+      && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
+        size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
+        U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
+        assert(dictEndOffset <= 1 GB);
+        matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
+        while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
+            const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
+
+            if (LZ4_read32(matchPtr) == pattern) {
+                int mlt;
+                int back = 0;
+                const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
+                mlt -= back;
+                if (mlt > longest) {
+                    longest = mlt;
+                    *matchpos = base + matchIndex + back;
+                    *startpos = ip + back;
+            }   }
+
+            {   U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
+                dictMatchIndex -= nextOffset;
+                matchIndex -= nextOffset;
+    }   }   }
+
+    return longest;
+}
+
+LZ4_FORCE_INLINE
+int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4,   /* Index table will be updated */
+                                 const BYTE* const ip, const BYTE* const iLimit,
+                                 const BYTE** matchpos,
+                                 const int maxNbAttempts,
+                                 const int patternAnalysis,
+                                 const dictCtx_directive dict)
+{
+    const BYTE* uselessPtr = ip;
+    /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+     * but this won't be the case here, as we define iLowLimit==ip,
+     * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+    return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
+}
+
+/* LZ4HC_encodeSequence() :
+ * @return : 0 if ok,
+ *           1 if buffer issue detected */
+LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
+    const BYTE** _ip,
+    BYTE** _op,
+    const BYTE** _anchor,
+    int matchLength,
+    const BYTE* const match,
+    limitedOutput_directive limit,
+    BYTE* oend)
+{
+#define ip      (*_ip)
+#define op      (*_op)
+#define anchor  (*_anchor)
+
+    size_t length;
+    BYTE* const token = op++;
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
+    static const BYTE* start = NULL;
+    static U32 totalCost = 0;
+    U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
+    U32 const ll = (U32)(ip - anchor);
+    U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
+    U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
+    U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
+    if (start==NULL) start = anchor;  /* only works for single segment */
+    /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
+    DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
+                pos,
+                (U32)(ip - anchor), matchLength, (U32)(ip-match),
+                cost, totalCost);
+    totalCost += cost;
+#endif
+
+    /* Encode Literal length */
+    length = (size_t)(ip - anchor);
+    LZ4_STATIC_ASSERT(notLimited == 0);
+    /* Check output limit */
+    if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
+        DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
+                (int)length, (int)(oend - op));
+        return 1;
+    }
+    if (length >= RUN_MASK) {
+        size_t len = length - RUN_MASK;
+        *token = (RUN_MASK << ML_BITS);
+        for(; len >= 255 ; len -= 255) *op++ = 255;
+        *op++ = (BYTE)len;
+    } else {
+        *token = (BYTE)(length << ML_BITS);
+    }
+
+    /* Copy Literals */
+    LZ4_wildCopy8(op, anchor, op + length);
+    op += length;
+
+    /* Encode Offset */
+    assert( (ip - match) <= LZ4_DISTANCE_MAX );   /* note : consider providing offset as a value, rather than as a pointer difference */
+    LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+
+    /* Encode MatchLength */
+    assert(matchLength >= MINMATCH);
+    length = (size_t)matchLength - MINMATCH;
+    if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
+        DEBUGLOG(6, "Not enough room to write match length");
+        return 1;   /* Check output limit */
+    }
+    if (length >= ML_MASK) {
+        *token += ML_MASK;
+        length -= ML_MASK;
+        for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
+        if (length >= 255) { length -= 255; *op++ = 255; }
+        *op++ = (BYTE)length;
+    } else {
+        *token += (BYTE)(length);
+    }
+
+    /* Prepare next loop */
+    ip += matchLength;
+    anchor = ip;
+
+    return 0;
+}
+#undef ip
+#undef op
+#undef anchor
+
+LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const source,
+    char* const dest,
+    int* srcSizePtr,
+    int const maxOutputSize,
+    int maxNbAttempts,
+    const limitedOutput_directive limit,
+    const dictCtx_directive dict
+    )
+{
+    const int inputSize = *srcSizePtr;
+    const int patternAnalysis = (maxNbAttempts > 128);   /* levels 9+ */
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* optr = (BYTE*) dest;
+    BYTE* op = (BYTE*) dest;
+    BYTE* oend = op + maxOutputSize;
+
+    int   ml0, ml, ml2, ml3;
+    const BYTE* start0;
+    const BYTE* ref0;
+    const BYTE* ref = NULL;
+    const BYTE* start2 = NULL;
+    const BYTE* ref2 = NULL;
+    const BYTE* start3 = NULL;
+    const BYTE* ref3 = NULL;
+
+    /* init */
+    *srcSizePtr = 0;
+    if (limit == fillOutput) oend -= LASTLITERALS;                  /* Hack for support LZ4 format restriction */
+    if (inputSize < LZ4_minLength) goto _last_literals;             /* Input too small, no compression (all literals) */
+
+    /* Main Loop */
+    while (ip <= mflimit) {
+        ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
+        if (ml<MINMATCH) { ip++; continue; }
+
+        /* saved, in case we would skip too much */
+        start0 = ip; ref0 = ref; ml0 = ml;
+
+_Search2:
+        if (ip+ml <= mflimit) {
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                            ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
+                            maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+        } else {
+            ml2 = ml;
+        }
+
+        if (ml2 == ml) { /* No better match => encode ML1 */
+            optr = op;
+            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+            continue;
+        }
+
+        if (start0 < ip) {   /* first match was skipped at least once */
+            if (start2 < ip + ml0) {  /* squeezing ML1 between ML0(original ML1) and ML2 */
+                ip = start0; ref = ref0; ml = ml0;  /* restore initial ML1 */
+        }   }
+
+        /* Here, start0==ip */
+        if ((start2 - ip) < 3) {  /* First Match too small : removed */
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        /* At this stage, we have :
+        *  ml2 > ml1, and
+        *  ip1+3 <= ip2 (usually < ip1+ml1) */
+        if ((start2 - ip) < OPTIMAL_ML) {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0) {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+        if (start2 + ml2 <= mflimit) {
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                            start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
+                            maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+        } else {
+            ml3 = ml2;
+        }
+
+        if (ml3 == ml2) {  /* No better match => encode ML1 and ML2 */
+            /* ip & ref are known; Now for ml */
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            /* Now, encode 2 sequences */
+            optr = op;
+            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+            ip = start2;
+            optr = op;
+            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
+                ml  = ml2;
+                ref = ref2;
+                goto _dest_overflow;
+            }
+            continue;
+        }
+
+        if (start3 < ip+ml+3) {  /* Not enough space for match 2 : remove it */
+            if (start3 >= (ip+ml)) {  /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+                if (start2 < ip+ml) {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH) {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                optr = op;
+                if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        /*
+        * OK, now we have 3 ascending matches;
+        * let's write the first one ML1.
+        * ip & ref are known; Now decide ml.
+        */
+        if (start2 < ip+ml) {
+            if ((start2 - ip) < OPTIMAL_ML) {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0) {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            } else {
+                ml = (int)(start2 - ip);
+            }
+        }
+        optr = op;
+        if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+
+        /* ML2 becomes ML1 */
+        ip = start2; ref = ref2; ml = ml2;
+
+        /* ML3 becomes ML2 */
+        start2 = start3; ref2 = ref3; ml2 = ml3;
+
+        /* let's find a new ML3 */
+        goto _Search3;
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+        size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+        size_t const totalSize = 1 + llAdd + lastRunSize;
+        if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
+        if (limit && (op + totalSize > oend)) {
+            if (limit == limitedOutput) return 0;
+            /* adapt lastRunSize to fill 'dest' */
+            lastRunSize  = (size_t)(oend - op) - 1 /*token*/;
+            llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+            lastRunSize -= llAdd;
+        }
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+        ip = anchor + lastRunSize;  /* can be != iend if limit==fillOutput */
+
+        if (lastRunSize >= RUN_MASK) {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = (RUN_MASK << ML_BITS);
+            for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRunSize << ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
+    }
+
+    /* End */
+    *srcSizePtr = (int) (((const char*)ip) - source);
+    return (int) (((char*)op)-dest);
+
+_dest_overflow:
+    if (limit == fillOutput) {
+        /* Assumption : ip, anchor, ml and ref must be set correctly */
+        size_t const ll = (size_t)(ip - anchor);
+        size_t const ll_addbytes = (ll + 240) / 255;
+        size_t const ll_totalCost = 1 + ll_addbytes + ll;
+        BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+        DEBUGLOG(6, "Last sequence overflowing");
+        op = optr;  /* restore correct out pointer */
+        if (op + ll_totalCost <= maxLitPos) {
+            /* ll validated; now adjust match length */
+            size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+            size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+            assert(maxMlSize < INT_MAX); assert(ml >= 0);
+            if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
+            if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
+                LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
+        }   }
+        goto _last_literals;
+    }
+    /* compression failed */
+    return 0;
+}
+
+
+static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
+    const char* const source, char* dst,
+    int* srcSizePtr, int dstCapacity,
+    int const nbSearches, size_t sufficient_len,
+    const limitedOutput_directive limit, int const fullUpdate,
+    const dictCtx_directive dict,
+    const HCfavor_e favorDecSpeed);
+
+
+LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    const limitedOutput_directive limit,
+    const dictCtx_directive dict
+    )
+{
+    typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
+    typedef struct {
+        lz4hc_strat_e strat;
+        int nbSearches;
+        U32 targetLength;
+    } cParams_t;
+    static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
+        { lz4hc,     2, 16 },  /* 0, unused */
+        { lz4hc,     2, 16 },  /* 1, unused */
+        { lz4hc,     2, 16 },  /* 2, unused */
+        { lz4hc,     4, 16 },  /* 3 */
+        { lz4hc,     8, 16 },  /* 4 */
+        { lz4hc,    16, 16 },  /* 5 */
+        { lz4hc,    32, 16 },  /* 6 */
+        { lz4hc,    64, 16 },  /* 7 */
+        { lz4hc,   128, 16 },  /* 8 */
+        { lz4hc,   256, 16 },  /* 9 */
+        { lz4opt,   96, 64 },  /*10==LZ4HC_CLEVEL_OPT_MIN*/
+        { lz4opt,  512,128 },  /*11 */
+        { lz4opt,16384,LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
+    };
+
+    DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+                ctx, src, *srcSizePtr, limit);
+
+    if (limit == fillOutput && dstCapacity < 1) return 0;   /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;    /* Unsupported input size (too large or negative) */
+
+    ctx->end += *srcSizePtr;
+    if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe something to review */
+    cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
+    {   cParams_t const cParam = clTable[cLevel];
+        HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
+        int result;
+
+        if (cParam.strat == lz4hc) {
+            result = LZ4HC_compress_hashChain(ctx,
+                                src, dst, srcSizePtr, dstCapacity,
+                                cParam.nbSearches, limit, dict);
+        } else {
+            assert(cParam.strat == lz4opt);
+            result = LZ4HC_compress_optimal(ctx,
+                                src, dst, srcSizePtr, dstCapacity,
+                                cParam.nbSearches, cParam.targetLength, limit,
+                                cLevel == LZ4HC_CLEVEL_MAX,   /* ultra mode */
+                                dict, favor);
+        }
+        if (result <= 0) ctx->dirty = 1;
+        return result;
+    }
+}
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock);
+
+static int
+LZ4HC_compress_generic_noDictCtx (
+        LZ4HC_CCtx_internal* const ctx,
+        const char* const src,
+        char* const dst,
+        int* const srcSizePtr,
+        int const dstCapacity,
+        int cLevel,
+        limitedOutput_directive limit
+        )
+{
+    assert(ctx->dictCtx == NULL);
+    return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
+}
+
+static int
+LZ4HC_compress_generic_dictCtx (
+        LZ4HC_CCtx_internal* const ctx,
+        const char* const src,
+        char* const dst,
+        int* const srcSizePtr,
+        int const dstCapacity,
+        int cLevel,
+        limitedOutput_directive limit
+        )
+{
+    const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit;
+    assert(ctx->dictCtx != NULL);
+    if (position >= 64 KB) {
+        ctx->dictCtx = NULL;
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else if (position == 0 && *srcSizePtr > 4 KB) {
+        memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
+        LZ4HC_setExternalDict(ctx, (const BYTE *)src);
+        ctx->compressionLevel = (short)cLevel;
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else {
+        return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc);
+    }
+}
+
+static int
+LZ4HC_compress_generic (
+        LZ4HC_CCtx_internal* const ctx,
+        const char* const src,
+        char* const dst,
+        int* const srcSizePtr,
+        int const dstCapacity,
+        int cLevel,
+        limitedOutput_directive limit
+        )
+{
+    if (ctx->dictCtx == NULL) {
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else {
+        return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    }
+}
+
+
+int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
+
+static size_t LZ4_streamHC_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+    typedef struct { char c; LZ4_streamHC_t t; } t_a;
+    return sizeof(t_a) - sizeof(LZ4_streamHC_t);
+#else
+    return 1;  /* effectively disabled */
+#endif
+}
+
+/* state is presumed correctly initialized,
+ * in which case its size and alignment have already been validate */
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+    LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+    if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0;
+    LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
+    LZ4HC_init_internal (ctx, (const BYTE*)src);
+    if (dstCapacity < LZ4_compressBound(srcSize))
+        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
+    else
+        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited);
+}
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+    LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+    if (ctx==NULL) return 0;   /* init failure */
+    return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
+}
+
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+    LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
+#else
+    LZ4_streamHC_t state;
+    LZ4_streamHC_t* const statePtr = &state;
+#endif
+    int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+    FREEMEM(statePtr);
+#endif
+    return cSize;
+}
+
+/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */
+int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
+{
+    LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+    if (ctx==NULL) return 0;   /* init failure */
+    LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source);
+    LZ4_setCompressionLevel(ctx, cLevel);
+    return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput);
+}
+
+
+
+/**************************************
+*  Streaming Functions
+**************************************/
+/* allocation */
+LZ4_streamHC_t* LZ4_createStreamHC(void)
+{
+    LZ4_streamHC_t* const state =
+        (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t));
+    if (state == NULL) return NULL;
+    LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT);
+    return state;
+}
+
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
+{
+    DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
+    if (!LZ4_streamHCPtr) return 0;  /* support free on NULL */
+    FREEMEM(LZ4_streamHCPtr);
+    return 0;
+}
+
+
+LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
+{
+    LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
+    /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
+    DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size);
+    /* check conditions */
+    if (buffer == NULL) return NULL;
+    if (size < sizeof(LZ4_streamHC_t)) return NULL;
+    if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL;
+    /* init */
+    { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse);
+      MEM_INIT(hcstate, 0, sizeof(*hcstate)); }
+    LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+    return LZ4_streamHCPtr;
+}
+
+/* just a stub */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+    LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+    if (LZ4_streamHCPtr->internal_donotuse.dirty) {
+        LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+    } else {
+        /* preserve end - base : can trigger clearTable's threshold */
+        LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
+        LZ4_streamHCPtr->internal_donotuse.base = NULL;
+        LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+    }
+    LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+    if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
+    if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
+    LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
+}
+
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor)
+{
+    LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0);
+}
+
+/* LZ4_loadDictHC() :
+ * LZ4_streamHCPtr is presumed properly initialized */
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
+              const char* dictionary, int dictSize)
+{
+    LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
+    assert(LZ4_streamHCPtr != NULL);
+    if (dictSize > 64 KB) {
+        dictionary += (size_t)dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    /* need a full initialization, there are bad side-effects when using resetFast() */
+    {   int const cLevel = ctxPtr->compressionLevel;
+        LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+        LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
+    }
+    LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary);
+    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
+    return dictSize;
+}
+
+void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) {
+    working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL;
+}
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
+{
+    DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
+    if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
+        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+
+    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+    ctxPtr->lowLimit  = ctxPtr->dictLimit;
+    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+    ctxPtr->dictBase  = ctxPtr->base;
+    ctxPtr->base = newBlock - ctxPtr->dictLimit;
+    ctxPtr->end  = newBlock;
+    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
+
+    /* cannot reference an extDict and a dictCtx at the same time */
+    ctxPtr->dictCtx = NULL;
+}
+
+static int
+LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+                                 const char* src, char* dst,
+                                 int* srcSizePtr, int dstCapacity,
+                                 limitedOutput_directive limit)
+{
+    LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+                LZ4_streamHCPtr, src, *srcSizePtr, limit);
+    assert(ctxPtr != NULL);
+    /* auto-init if forgotten */
+    if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
+
+    /* Check overflow */
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
+        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+        if (dictSize > 64 KB) dictSize = 64 KB;
+        LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+    }
+
+    /* Check if blocks follow each other */
+    if ((const BYTE*)src != ctxPtr->end)
+        LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
+        const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* const dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) {
+            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+    }   }
+
+    return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    if (dstCapacity < LZ4_compressBound(srcSize))
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
+    else
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited);
+}
+
+int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
+{
+    return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput);
+}
+
+
+
+/* LZ4_saveDictHC :
+ * save history content
+ * into a user-provided buffer
+ * which is then used to continue compression
+ */
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
+{
+    LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+    int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
+    assert(prefixSize >= 0);
+    if (dictSize > 64 KB) dictSize = 64 KB;
+    if (dictSize < 4) dictSize = 0;
+    if (dictSize > prefixSize) dictSize = prefixSize;
+    if (safeBuffer == NULL) assert(dictSize == 0);
+    if (dictSize > 0)
+        memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+    {   U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+        streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+        streamPtr->base = streamPtr->end - endIndex;
+        streamPtr->dictLimit = endIndex - (U32)dictSize;
+        streamPtr->lowLimit = endIndex - (U32)dictSize;
+        if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+            streamPtr->nextToUpdate = streamPtr->dictLimit;
+    }
+    return dictSize;
+}
+
+
+/***************************************************
+*  Deprecated Functions
+***************************************************/
+
+/* These functions currently generate deprecation warnings */
+
+/* Wrappers for deprecated compression functions */
+int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
+
+
+/* Deprecated streaming functions */
+int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
+
+/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
+ * @return : 0 on success, !=0 if error */
+int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
+{
+    LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
+    if (hc4 == NULL) return 1;   /* init failed */
+    LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+    return 0;
+}
+
+void* LZ4_createHC (const char* inputBuffer)
+{
+    LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
+    if (hc4 == NULL) return NULL;   /* not enough memory */
+    LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+    return hc4;
+}
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+    if (!LZ4HC_Data) return 0;  /* support free on NULL */
+    FREEMEM(LZ4HC_Data);
+    return 0;
+}
+
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
+{
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
+{
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
+}
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+    LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data;
+    const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit;
+    LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
+    /* avoid const char * -> char * conversion warning :( */
+    return (char *)(uptrval)bufferStart;
+}
+
+
+/* ================================================
+ *  LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
+ * ===============================================*/
+typedef struct {
+    int price;
+    int off;
+    int mlen;
+    int litlen;
+} LZ4HC_optimal_t;
+
+/* price in bytes */
+LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
+{
+    int price = litlen;
+    assert(litlen >= 0);
+    if (litlen >= (int)RUN_MASK)
+        price += 1 + ((litlen-(int)RUN_MASK) / 255);
+    return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
+{
+    int price = 1 + 2 ; /* token + 16-bit offset */
+    assert(litlen >= 0);
+    assert(mlen >= MINMATCH);
+
+    price += LZ4HC_literalsPrice(litlen);
+
+    if (mlen >= (int)(ML_MASK+MINMATCH))
+        price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255);
+
+    return price;
+}
+
+
+typedef struct {
+    int off;
+    int len;
+} LZ4HC_match_t;
+
+LZ4_FORCE_INLINE LZ4HC_match_t
+LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
+                      const BYTE* ip, const BYTE* const iHighLimit,
+                      int minLen, int nbSearches,
+                      const dictCtx_directive dict,
+                      const HCfavor_e favorDecSpeed)
+{
+    LZ4HC_match_t match = { 0 , 0 };
+    const BYTE* matchPtr = NULL;
+    /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+     * but this won't be the case here, as we define iLowLimit==ip,
+     * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+    int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
+    if (matchLength <= minLen) return match;
+    if (favorDecSpeed) {
+        if ((matchLength>18) & (matchLength<=36)) matchLength=18;   /* favor shortcut */
+    }
+    match.len = matchLength;
+    match.off = (int)(ip-matchPtr);
+    return match;
+}
+
+
+static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
+                                    const char* const source,
+                                    char* dst,
+                                    int* srcSizePtr,
+                                    int dstCapacity,
+                                    int const nbSearches,
+                                    size_t sufficient_len,
+                                    const limitedOutput_directive limit,
+                                    int const fullUpdate,
+                                    const dictCtx_directive dict,
+                                    const HCfavor_e favorDecSpeed)
+{
+    int retval = 0;
+#define TRAILING_LITERALS 3
+#ifdef LZ4HC_HEAPMODE
+    LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
+#else
+    LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS];   /* ~64 KB, which is a bit large for stack... */
+#endif
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+    BYTE* op = (BYTE*) dst;
+    BYTE* opSaved = (BYTE*) dst;
+    BYTE* oend = op + dstCapacity;
+    int ovml = MINMATCH;  /* overflow - last sequence */
+    const BYTE* ovref = NULL;
+
+    /* init */
+#ifdef LZ4HC_HEAPMODE
+    if (opt == NULL) goto _return_label;
+#endif
+    DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
+    *srcSizePtr = 0;
+    if (limit == fillOutput) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
+    if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+
+    /* Main Loop */
+    while (ip <= mflimit) {
+         int const llen = (int)(ip - anchor);
+         int best_mlen, best_off;
+         int cur, last_match_pos = 0;
+
+         LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+         if (firstMatch.len==0) { ip++; continue; }
+
+         if ((size_t)firstMatch.len > sufficient_len) {
+             /* good enough solution : immediate encoding */
+             int const firstML = firstMatch.len;
+             const BYTE* const matchPos = ip - firstMatch.off;
+             opSaved = op;
+             if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) {  /* updates ip, op and anchor */
+                 ovml = firstML;
+                 ovref = matchPos;
+                 goto _dest_overflow;
+             }
+             continue;
+         }
+
+         /* set prices for first positions (literals) */
+         {   int rPos;
+             for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
+                 int const cost = LZ4HC_literalsPrice(llen + rPos);
+                 opt[rPos].mlen = 1;
+                 opt[rPos].off = 0;
+                 opt[rPos].litlen = llen + rPos;
+                 opt[rPos].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             rPos, cost, opt[rPos].litlen);
+         }   }
+         /* set prices using initial match */
+         {   int mlen = MINMATCH;
+             int const matchML = firstMatch.len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
+             int const offset = firstMatch.off;
+             assert(matchML < LZ4_OPT_NUM);
+             for ( ; mlen <= matchML ; mlen++) {
+                 int const cost = LZ4HC_sequencePrice(llen, mlen);
+                 opt[mlen].mlen = mlen;
+                 opt[mlen].off = offset;
+                 opt[mlen].litlen = llen;
+                 opt[mlen].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
+                             mlen, cost, mlen);
+         }   }
+         last_match_pos = firstMatch.len;
+         {   int addLit;
+             for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                 opt[last_match_pos+addLit].mlen = 1; /* literal */
+                 opt[last_match_pos+addLit].off = 0;
+                 opt[last_match_pos+addLit].litlen = addLit;
+                 opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+         }   }
+
+         /* check further positions */
+         for (cur = 1; cur < last_match_pos; cur++) {
+             const BYTE* const curPtr = ip + cur;
+             LZ4HC_match_t newMatch;
+
+             if (curPtr > mflimit) break;
+             DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
+                     cur, opt[cur].price, opt[cur+1].price, cur+1);
+             if (fullUpdate) {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if ( (opt[cur+1].price <= opt[cur].price)
+                   /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
+                   && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
+                     continue;
+             } else {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if (opt[cur+1].price <= opt[cur].price) continue;
+             }
+
+             DEBUGLOG(7, "search at rPos:%u", cur);
+             if (fullUpdate)
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+             else
+                 /* only test matches of minimum length; slightly faster, but misses a few bytes */
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed);
+             if (!newMatch.len) continue;
+
+             if ( ((size_t)newMatch.len > sufficient_len)
+               || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
+                 /* immediate encoding */
+                 best_mlen = newMatch.len;
+                 best_off = newMatch.off;
+                 last_match_pos = cur + 1;
+                 goto encode;
+             }
+
+             /* before match : set price with literals at beginning */
+             {   int const baseLitlen = opt[cur].litlen;
+                 int litlen;
+                 for (litlen = 1; litlen < MINMATCH; litlen++) {
+                     int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
+                     int const pos = cur + litlen;
+                     if (price < opt[pos].price) {
+                         opt[pos].mlen = 1; /* literal */
+                         opt[pos].off = 0;
+                         opt[pos].litlen = baseLitlen+litlen;
+                         opt[pos].price = price;
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
+                                     pos, price, opt[pos].litlen);
+             }   }   }
+
+             /* set prices using match at position = cur */
+             {   int const matchML = newMatch.len;
+                 int ml = MINMATCH;
+
+                 assert(cur + newMatch.len < LZ4_OPT_NUM);
+                 for ( ; ml <= matchML ; ml++) {
+                     int const pos = cur + ml;
+                     int const offset = newMatch.off;
+                     int price;
+                     int ll;
+                     DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
+                                 pos, last_match_pos);
+                     if (opt[cur].mlen == 1) {
+                         ll = opt[cur].litlen;
+                         price = ((cur > ll) ? opt[cur - ll].price : 0)
+                               + LZ4HC_sequencePrice(ll, ml);
+                     } else {
+                         ll = 0;
+                         price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+                     }
+
+                    assert((U32)favorDecSpeed <= 1);
+                     if (pos > last_match_pos+TRAILING_LITERALS
+                      || price <= opt[pos].price - (int)favorDecSpeed) {
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
+                                     pos, price, ml);
+                         assert(pos < LZ4_OPT_NUM);
+                         if ( (ml == matchML)  /* last pos of last match */
+                           && (last_match_pos < pos) )
+                             last_match_pos = pos;
+                         opt[pos].mlen = ml;
+                         opt[pos].off = offset;
+                         opt[pos].litlen = ll;
+                         opt[pos].price = price;
+             }   }   }
+             /* complete following positions with literals */
+             {   int addLit;
+                 for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                     opt[last_match_pos+addLit].mlen = 1; /* literal */
+                     opt[last_match_pos+addLit].off = 0;
+                     opt[last_match_pos+addLit].litlen = addLit;
+                     opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                     DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+             }   }
+         }  /* for (cur = 1; cur <= last_match_pos; cur++) */
+
+         assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS);
+         best_mlen = opt[last_match_pos].mlen;
+         best_off = opt[last_match_pos].off;
+         cur = last_match_pos - best_mlen;
+
+encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+         assert(cur < LZ4_OPT_NUM);
+         assert(last_match_pos >= 1);  /* == 1 when only one candidate */
+         DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
+         {   int candidate_pos = cur;
+             int selected_matchLength = best_mlen;
+             int selected_offset = best_off;
+             while (1) {  /* from end to beginning */
+                 int const next_matchLength = opt[candidate_pos].mlen;  /* can be 1, means literal */
+                 int const next_offset = opt[candidate_pos].off;
+                 DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
+                 opt[candidate_pos].mlen = selected_matchLength;
+                 opt[candidate_pos].off = selected_offset;
+                 selected_matchLength = next_matchLength;
+                 selected_offset = next_offset;
+                 if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
+                 assert(next_matchLength > 0);  /* can be 1, means literal */
+                 candidate_pos -= next_matchLength;
+         }   }
+
+         /* encode all recorded sequences in order */
+         {   int rPos = 0;  /* relative position (to ip) */
+             while (rPos < last_match_pos) {
+                 int const ml = opt[rPos].mlen;
+                 int const offset = opt[rPos].off;
+                 if (ml == 1) { ip++; rPos++; continue; }  /* literal; note: can end up with several literals, in which case, skip them */
+                 rPos += ml;
+                 assert(ml >= MINMATCH);
+                 assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
+                 opSaved = op;
+                 if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) {  /* updates ip, op and anchor */
+                     ovml = ml;
+                     ovref = ip - offset;
+                     goto _dest_overflow;
+         }   }   }
+     }  /* while (ip <= mflimit) */
+
+_last_literals:
+     /* Encode Last Literals */
+     {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+         size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+         size_t const totalSize = 1 + llAdd + lastRunSize;
+         if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
+         if (limit && (op + totalSize > oend)) {
+             if (limit == limitedOutput) { /* Check output limit */
+                retval = 0;
+                goto _return_label;
+             }
+             /* adapt lastRunSize to fill 'dst' */
+             lastRunSize  = (size_t)(oend - op) - 1 /*token*/;
+             llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+             lastRunSize -= llAdd;
+         }
+         DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+         ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
+
+         if (lastRunSize >= RUN_MASK) {
+             size_t accumulator = lastRunSize - RUN_MASK;
+             *op++ = (RUN_MASK << ML_BITS);
+             for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+             *op++ = (BYTE) accumulator;
+         } else {
+             *op++ = (BYTE)(lastRunSize << ML_BITS);
+         }
+         memcpy(op, anchor, lastRunSize);
+         op += lastRunSize;
+     }
+
+     /* End */
+     *srcSizePtr = (int) (((const char*)ip) - source);
+     retval = (int) ((char*)op-dst);
+     goto _return_label;
+
+_dest_overflow:
+if (limit == fillOutput) {
+     /* Assumption : ip, anchor, ovml and ovref must be set correctly */
+     size_t const ll = (size_t)(ip - anchor);
+     size_t const ll_addbytes = (ll + 240) / 255;
+     size_t const ll_totalCost = 1 + ll_addbytes + ll;
+     BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+     DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved));
+     op = opSaved;  /* restore correct out pointer */
+     if (op + ll_totalCost <= maxLitPos) {
+         /* ll validated; now adjust match length */
+         size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+         size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+         assert(maxMlSize < INT_MAX); assert(ovml >= 0);
+         if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
+         if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
+             DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
+             DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
+             LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
+             DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
+     }   }
+     goto _last_literals;
+}
+_return_label:
+#ifdef LZ4HC_HEAPMODE
+     FREEMEM(opt);
+#endif
+     return retval;
+}
diff --git a/libbutl/lz4hc.h b/libbutl/lz4hc.h
new file mode 100644
index 0000000..3d441fb
--- /dev/null
+++ b/libbutl/lz4hc.h
@@ -0,0 +1,413 @@
+/*
+   LZ4 HC - High Compression Mode of LZ4
+   Header File
+   Copyright (C) 2011-2017, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef LZ4_HC_H_19834876238432
+#define LZ4_HC_H_19834876238432
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* --- Dependency --- */
+/* note : lz4hc requires lz4.h/lz4.c for compilation */
+#include "lz4.h"   /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
+
+
+/* --- Useful constants --- */
+#define LZ4HC_CLEVEL_MIN         3
+#define LZ4HC_CLEVEL_DEFAULT     9
+#define LZ4HC_CLEVEL_OPT_MIN    10
+#define LZ4HC_CLEVEL_MAX        12
+
+
+/*-************************************
+ *  Block Compression
+ **************************************/
+/*! LZ4_compress_HC() :
+ *  Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
+ * `dst` must be already allocated.
+ *  Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ *  Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
+ *                      Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
+
+
+/* Note :
+ *   Decompression functions are provided within "lz4.h" (BSD license)
+ */
+
+
+/*! LZ4_compress_HC_extStateHC() :
+ *  Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ * `state` size is provided by LZ4_sizeofStateHC().
+ *  Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
+ */
+LZ4LIB_API int LZ4_sizeofStateHC(void);
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+
+
+/*! LZ4_compress_HC_destSize() : v1.9.0+
+ *  Will compress as much data as possible from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ *           or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
+ */
+LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
+                                  const char* src, char* dst,
+                                        int* srcSizePtr, int targetDstSize,
+                                        int compressionLevel);
+
+
+/*-************************************
+ *  Streaming Compression
+ *  Bufferless synchronous API
+ **************************************/
+ typedef union LZ4_streamHC_u LZ4_streamHC_t;   /* incomplete type (defined later) */
+
+/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
+ *  These functions create and release memory for LZ4 HC streaming state.
+ *  Newly created states are automatically initialized.
+ *  A same state can be used multiple times consecutively,
+ *  starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
+LZ4LIB_API int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
+
+/*
+  These functions compress data in successive blocks of any size,
+  using previous blocks as dictionary, to improve compression ratio.
+  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+  There is an exception for ring buffers, which can be smaller than 64 KB.
+  Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
+
+  Before starting compression, state must be allocated and properly initialized.
+  LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
+
+  Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
+  or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
+  LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
+  which is automatically the case when state is created using LZ4_createStreamHC().
+
+  After reset, a first "fictional block" can be designated as initial dictionary,
+  using LZ4_loadDictHC() (Optional).
+
+  Invoke LZ4_compress_HC_continue() to compress each successive block.
+  The number of blocks is unlimited.
+  Previous input blocks, including initial dictionary when present,
+  must remain accessible and unmodified during compression.
+
+  It's allowed to update compression level anytime between blocks,
+  using LZ4_setCompressionLevel() (experimental).
+
+  'dst' buffer should be sized to handle worst case scenarios
+  (see LZ4_compressBound(), it ensures compression success).
+  In case of failure, the API does not guarantee recovery,
+  so the state _must_ be reset.
+  To ensure compression success
+  whenever `dst` buffer size cannot be made >= LZ4_compressBound(),
+  consider using LZ4_compress_HC_continue_destSize().
+
+  Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
+  it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
+  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
+
+  After completing a streaming compression,
+  it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
+  just by resetting it, using LZ4_resetStreamHC_fast().
+*/
+
+LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel);   /* v1.9.0+ */
+LZ4LIB_API int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
+
+LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr,
+                                   const char* src, char* dst,
+                                         int srcSize, int maxDstSize);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
+ *  Similar to LZ4_compress_HC_continue(),
+ *  but will read as much data as possible from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided into 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ *           or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
+ *           Note that this function may not consume the entire input.
+ */
+LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
+                                           const char* src, char* dst,
+                                                 int* srcSizePtr, int targetDstSize);
+
+LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+
+
+
+/*^**********************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***********************************************/
+
+/*-******************************************************************
+ * PRIVATE DEFINITIONS :
+ * Do not use these definitions directly.
+ * They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Declare an `LZ4_streamHC_t` directly, rather than any type below.
+ * Even then, only do so in the context of static linking, as definitions may change between versions.
+ ********************************************************************/
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG 15
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
+{
+    LZ4_u32   hashTable[LZ4HC_HASHTABLESIZE];
+    LZ4_u16   chainTable[LZ4HC_MAXD];
+    const LZ4_byte* end;       /* next block here to continue on current prefix */
+    const LZ4_byte* base;      /* All index relative to this position */
+    const LZ4_byte* dictBase;  /* alternate base for extDict */
+    LZ4_u32   dictLimit;       /* below that point, need extDict */
+    LZ4_u32   lowLimit;        /* below that point, no more dict */
+    LZ4_u32   nextToUpdate;    /* index from which to continue dictionary update */
+    short     compressionLevel;
+    LZ4_i8    favorDecSpeed;   /* favor decompression speed if this flag set,
+                                  otherwise, favor compression ratio */
+    LZ4_i8    dirty;           /* stream has to be fully reset if this flag is set */
+    const LZ4HC_CCtx_internal* dictCtx;
+};
+
+
+/* Do not use these definitions directly !
+ * Declare or allocate an LZ4_streamHC_t instead.
+ */
+#define LZ4_STREAMHCSIZE       262200  /* static size, for inter-version compatibility */
+#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*))
+union LZ4_streamHC_u {
+    void* table[LZ4_STREAMHCSIZE_VOIDP];
+    LZ4HC_CCtx_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_streamHC_t */
+
+/* LZ4_streamHC_t :
+ * This structure allows static allocation of LZ4 HC streaming state.
+ * This can be used to allocate statically, on state, or as part of a larger structure.
+ *
+ * Such state **must** be initialized using LZ4_initStreamHC() before first use.
+ *
+ * Note that invoking LZ4_initStreamHC() is not required when
+ * the state was created using LZ4_createStreamHC() (which is recommended).
+ * Using the normal builder, a newly created state is automatically initialized.
+ *
+ * Static allocation shall only be used in combination with static linking.
+ */
+
+/* LZ4_initStreamHC() : v1.9.0+
+ * Required before first use of a statically allocated LZ4_streamHC_t.
+ * Before v1.9.0 : use LZ4_resetStreamHC() instead
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size);
+
+
+/*-************************************
+*  Deprecated Functions
+**************************************/
+/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
+
+/* deprecated compression functions */
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC               (const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2              (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, use of
+ * LZ4_slideInputBufferHC() will truncate the history of the stream, rather
+ * than preserve a window-sized chunk of history.
+ */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API   int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue               (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int   LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
+/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
+ * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
+ * which is now the recommended function to start a new stream of blocks,
+ * but cannot be used to initialize a memory segment containing arbitrary garbage data.
+ *
+ * It is recommended to switch to LZ4_initStreamHC().
+ * LZ4_resetStreamHC() will generate deprecation warnings in a future version.
+ */
+LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* LZ4_HC_H_19834876238432 */
+
+
+/*-**************************************************
+ * !!!!!     STATIC LINKING ONLY     !!!!!
+ * Following definitions are considered experimental.
+ * They should not be linked from DLL,
+ * as there is no guarantee of API stability yet.
+ * Prototypes will be promoted to "stable" status
+ * after successfull usage in real-life scenarios.
+ ***************************************************/
+#ifdef LZ4_HC_STATIC_LINKING_ONLY   /* protection macro */
+#ifndef LZ4_HC_SLO_098092834
+#define LZ4_HC_SLO_098092834
+
+#define LZ4_STATIC_LINKING_ONLY   /* LZ4LIB_STATIC_API */
+#include "lz4.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
+ *  It's possible to change compression level
+ *  between successive invocations of LZ4_compress_HC_continue*()
+ *  for dynamic adaptation.
+ */
+LZ4LIB_STATIC_API void LZ4_setCompressionLevel(
+    LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
+ *  Opt. Parser will favor decompression speed over compression ratio.
+ *  Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
+ */
+LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
+    LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
+
+/*! LZ4_resetStreamHC_fast() : v1.9.0+
+ *  When an LZ4_streamHC_t is known to be in a internally coherent state,
+ *  it can often be prepared for a new compression with almost no work, only
+ *  sometimes falling back to the full, expensive reset that is always required
+ *  when the stream is in an indeterminate state (i.e., the reset performed by
+ *  LZ4_resetStreamHC()).
+ *
+ *  LZ4_streamHCs are guaranteed to be in a valid state when:
+ *  - returned from LZ4_createStreamHC()
+ *  - reset by LZ4_resetStreamHC()
+ *  - memset(stream, 0, sizeof(LZ4_streamHC_t))
+ *  - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
+ *  - the stream was in a valid state and was then used in any compression call
+ *    that returned success
+ *  - the stream was in an indeterminate state and was used in a compression
+ *    call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
+ *    returned success
+ *
+ *  Note:
+ *  A stream that was last used in a compression call that returned an error
+ *  may be passed to this function. However, it will be fully reset, which will
+ *  clear any existing history and settings from the context.
+ */
+LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
+    LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_compress_HC_extStateHC_fastReset() :
+ *  A variant of LZ4_compress_HC_extStateHC().
+ *
+ *  Using this variant avoids an expensive initialization step. It is only safe
+ *  to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStreamHC_fast() for a definition of
+ *  "correctly initialized"). From a high level, the difference is that this
+ *  function initializes the provided state with a call to
+ *  LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
+ *  call to LZ4_resetStreamHC().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
+    void* state,
+    const char* src, char* dst,
+    int srcSize, int dstCapacity,
+    int compressionLevel);
+
+/*! LZ4_attach_HC_dictionary() :
+ *  This is an experimental API that allows for the efficient use of a
+ *  static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
+ *  working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDictHC() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionary stream pointer may be NULL, in which
+ *  case any existing dictionary stream is unset.
+ *
+ *  A dictionary should only be attached to a stream without any history (i.e.,
+ *  a stream that has just been reset).
+ *
+ *  The dictionary will remain attached to the working stream only for the
+ *  current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
+ *  dictionary context association from the working stream. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the lifetime of the stream session.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary(
+          LZ4_streamHC_t *working_stream,
+    const LZ4_streamHC_t *dictionary_stream);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* LZ4_HC_SLO_098092834 */
+#endif   /* LZ4_HC_STATIC_LINKING_ONLY */
diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx
index 9514bbd..904910a 100644
--- a/libbutl/manifest-parser.cxx
+++ b/libbutl/manifest-parser.cxx
@@ -1,39 +1,10 @@
 // file      : libbutl/manifest-parser.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-parser.mxx>
-#endif
+#include <libbutl/manifest-parser.hxx>
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
-#include <vector>
-#include <cstdint>
-#include <utility>
-#include <stdexcept>
-
-#include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-import butl.char_scanner;
-import butl.manifest_types;
-#endif
-
-#endif
+#include <cassert>
 
 using namespace std;
 
@@ -177,41 +148,136 @@ namespace butl
   {
     using iterator = string::const_iterator;
 
-    auto space = [] (char c) -> bool {return c == ' ' || c == '\t';};
+    // Parse the value differently depending on whether it is multi-line or
+    // not.
+    //
+    if (v.find ('\n') == string::npos) // Single-line.
+    {
+      auto space = [] (char c) {return c == ' ' || c == '\t';};
 
-    iterator i (v.begin ());
-    iterator e (v.end ());
+      iterator i (v.begin ());
+      iterator e (v.end ());
 
-    string r;
-    size_t n (0);
-    for (char c; i != e && (c = *i) != ';'; ++i)
-    {
-      // Unescape ';' character.
+      string r;
+      size_t n (0);
+      for (char c; i != e && (c = *i) != ';'; ++i)
+      {
+        // Unescape ';' and '\' characters.
+        //
+        if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\'))
+          c = *++i;
+
+        r += c;
+
+        if (!space (c))
+          n = r.size ();
+      }
+
+      // Strip the value trailing spaces.
       //
-      if (c == '\\' && i + 1 != e && *(i + 1) == ';')
-        c = *++i;
+      if (r.size () != n)
+        r.resize (n);
 
-      r += c;
+      // Find beginning of a comment (i).
+      //
+      if (i != e)
+      {
+        // Skip spaces.
+        //
+        for (++i; i != e && space (*i); ++i);
+      }
 
-      if (!space (c))
-        n = r.size ();
+      return make_pair (move (r), string (i, e));
     }
+    else // Multi-line.
+    {
+      string r;
+      string c;
 
-    // Strip the value trailing spaces.
-    //
-    if (r.size () != n)
-      r.resize (n);
+      // Parse the value lines until the comment separator is encountered or
+      // the end of the value is reached. Add these lines to the resulting
+      // value, unescaping them if required.
+      //
+      // Note that we only need to unescape lines which have the '\+;' form.
+      //
+      auto i (v.begin ());
+      auto e (v.end ());
 
-    // Find beginning of a comment (i).
-    //
-    if (i != e)
-    {
-      // Skip spaces.
+      while (i != e)
+      {
+        // Find the end of the line and while at it the first non-backslash
+        // character.
+        //
+        auto le (i);
+        auto nb (e);
+        for (; le != e && *le != '\n'; ++le)
+        {
+          if (nb == e && *le != '\\')
+            nb = le;
+        }
+
+        // If the value end is not reached then position to the beginning of
+        // the next line and to the end of the value otherwise.
+        //
+        auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);};
+
+        // If the first non-backslash character is ';' and it is the last
+        // character on the line, then this is either the comment separator or
+        // an escape sequence.
+        //
+        if (nb != e && *nb == ';' && nb + 1 == le)
+        {
+          // If ';' is the first (and thus the only) character on the line,
+          // then this is the comment separator and we bail out from this
+          // loop. Note that in this case we need to trim the trailing newline
+          // (but only one) from the resulting value since it is considered as
+          // a part of the separator.
+          //
+          if (nb == i)
+          {
+            if (!r.empty ())
+            {
+              assert (r.back () == '\n');
+              r.pop_back ();
+            }
+
+            next ();
+            break;
+          }
+          //
+          // Otherwise, this is an escape sequence, so unescape it. For that
+          // just take the rightmost half of the string:
+          //
+          // \;     -> ;
+          // \\;    -> \;
+          // \\\;   -> \;
+          // \\\\;  -> \\;
+          // \\\\\; -> \\;
+          //
+          else
+            i += (le - i) / 2;
+        }
+
+        // Add the line to the resulting value together with the trailing
+        // newline, if present.
+        //
+        r.append (i, le);
+
+        if (le != e)
+          r += '\n';
+
+        next ();
+      }
+
+      // If we haven't reached the end of the value then it means we've
+      // encountered the comment separator. In this case save the remaining
+      // value part as a comment.
       //
-      for (++i; i != e && space (*i); ++i);
-    }
+      if (i != e)
+        c = string (i, e);
 
-    return make_pair (move (r), string (i, e));
+      return make_pair (move (r), move (c));
+    }
   }
 
   void manifest_parser::
@@ -251,7 +317,8 @@ namespace butl
     string& v (r.value);
     string::size_type n (0); // Size of last non-space character (simple mode).
 
-    // Detect the multi-line mode introductor.
+    // Detect the old-fashioned multi-line mode introducer (like in
+    // 'foo:\<newline>').
     //
     bool ml (false);
     if (c == '\\')
@@ -266,11 +333,46 @@ namespace butl
         ml = true;
       }
       else if (eos (p))
+      {
+        c = p;     // Set to EOF.
         ml = true;
+      }
       else
         unget (c);
     }
 
+    // Detect the new-fashioned multi-line mode introducer (like in
+    // 'foo:<newline>\<newline>').
+    //
+    if (!ml && c == '\n')
+    {
+      get ();
+      xchar p1 (peek ());
+
+      if (p1 == '\\')
+      {
+        get ();
+        xchar p2 (peek ());
+
+        if (p2 == '\n')
+        {
+          get (); // Newline is not part of the value so skip it.
+          c = peek ();
+          ml = true;
+        }
+        else if (eos (p2))
+        {
+          c = p2;    // Set to EOF.
+          ml = true;
+        }
+        else
+          unget (p1);  // Unget '\\'. Note: '\n' will be ungot below.
+      }
+
+      if (!ml)
+        unget (c); // Unget '\n'.
+    }
+
     // Multi-line value starts from the line that follows the name.
     //
     if (ml)
@@ -281,7 +383,7 @@ namespace butl
 
     // The nl flag signals that the preceding character was a "special
     // newline", that is, a newline that was part of the milti-line mode
-    // introductor or an escape sequence.
+    // introducer or an escape sequence.
     //
     for (bool nl (ml); !eos (c); c = peek ())
     {
@@ -299,7 +401,7 @@ namespace butl
       //
       // The first block handles the special sequence that starts with
       // a special newline. In multi-line mode, this is an "immediate
-      // termination" where we "use" the newline from the introductor.
+      // termination" where we "use" the newline from the introducer.
       // Note also that in the simple mode the special sequence can
       // only start with a special (i.e., escaped) newline.
       //
@@ -472,11 +574,21 @@ namespace butl
   static inline string
   format (const string& n, uint64_t l, uint64_t c, const string& d)
   {
-    ostringstream os;
+    using std::to_string;
+
+    string r;
     if (!n.empty ())
-      os << n << ':';
-    os << l << ':' << c << ": error: " << d;
-    return os.str ();
+    {
+      r += n;
+      r += ':';
+    }
+
+    r += to_string (l);
+    r += ':';
+    r += to_string (c);
+    r += ": error: ";
+    r += d;
+    return r;
   }
 
   manifest_parsing::
diff --git a/libbutl/manifest-parser.mxx b/libbutl/manifest-parser.hxx
index 77addff..601fb2d 100644
--- a/libbutl/manifest-parser.mxx
+++ b/libbutl/manifest-parser.hxx
@@ -1,13 +1,8 @@
-// file      : libbutl/manifest-parser.mxx -*- C++ -*-
+// file      : libbutl/manifest-parser.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
 #include <iosfwd>
@@ -15,30 +10,15 @@
 #include <utility>    // pair, move()
 #include <stdexcept>  // runtime_error
 #include <functional>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.manifest_parser;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utf8;
-import butl.optional;
-import butl.char_scanner;
-import butl.manifest_types;
-#else
-#include <libbutl/utf8.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/char-scanner.mxx>
-#include <libbutl/manifest-types.mxx>
-#endif
+
+#include <libbutl/utf8.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/char-scanner.hxx>
+#include <libbutl/manifest-types.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error
   {
@@ -57,7 +37,7 @@ LIBBUTL_MODEXPORT namespace butl
   };
 
   class LIBBUTL_SYMEXPORT manifest_parser:
-    protected char_scanner<utf8_validator>
+    protected char_scanner<utf8_validator, 2>
   {
   public:
     // The filter, if specified, is called by next() prior to returning the
@@ -103,7 +83,7 @@ LIBBUTL_MODEXPORT namespace butl
     split_comment (const std::string&);
 
   private:
-    using base = char_scanner<utf8_validator>;
+    using base = char_scanner<utf8_validator, 2>;
 
     void
     parse_next (manifest_name_value&);
diff --git a/libbutl/manifest-rewriter.cxx b/libbutl/manifest-rewriter.cxx
index e38d5f4..1232e9c 100644
--- a/libbutl/manifest-rewriter.cxx
+++ b/libbutl/manifest-rewriter.cxx
@@ -1,41 +1,15 @@
 // file      : libbutl/manifest-rewriter.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-rewriter.mxx>
-#endif
+#include <libbutl/manifest-rewriter.hxx>
 
-#include <cassert>
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
+#include <cassert>
 #include <cstdint> // uint64_t
 #include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_rewriter;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.manifest_types;
-#endif
-
-import butl.utility;             // utf8_length()
-import butl.manifest_serializer;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/manifest-serializer.mxx>
-#endif
+
+#include <libbutl/utility.hxx>             // utf8_length()
+#include <libbutl/manifest-serializer.hxx>
 
 using namespace std;
 
@@ -64,7 +38,7 @@ namespace butl
       // Temporary move the descriptor into the stream.
       //
       ifdstream is (move (fd));
-      fdbuf& buf (static_cast<fdbuf&> (*is.rdbuf ()));
+      fdstreambuf& buf (static_cast<fdstreambuf&> (*is.rdbuf ()));
 
       // Read suffix.
       //
@@ -99,8 +73,6 @@ namespace butl
 
     if (!nv.value.empty ())
     {
-      os << ' ';
-
       manifest_serializer s (os, path_.string (), long_lines_);
 
       // Note that the name can be surrounded with the ASCII whitespace
@@ -112,7 +84,7 @@ namespace butl
       //
       s.write_value (nv.value,
                      static_cast<size_t> (nv.colon_pos - nv.start_pos) -
-                     (nv.name.size () - utf8_length (nv.name)) + 2);
+                     (nv.name.size () - utf8_length (nv.name)) + 1);
     }
 
     os << suffix;
@@ -144,15 +116,13 @@ namespace butl
 
     if (!nv.value.empty ())
     {
-      os << ' ';
-
       // Note that the name can be surrounded with the ASCII whitespace
       // characters and the start_pos refers to the first character in the
       // line.
       //
       s.write_value (nv.value,
                      static_cast<size_t> (nv.colon_pos - nv.start_pos) -
-                     (nv.name.size () - n) + 2);
+                     (nv.name.size () - n) + 1);
     }
 
     os << suffix;
diff --git a/libbutl/manifest-rewriter.mxx b/libbutl/manifest-rewriter.hxx
index 907c990..02a533a 100644
--- a/libbutl/manifest-rewriter.mxx
+++ b/libbutl/manifest-rewriter.hxx
@@ -1,33 +1,15 @@
-// file      : libbutl/manifest-rewriter.mxx -*- C++ -*-
+// file      : libbutl/manifest-rewriter.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.manifest_rewriter;
-#ifdef __cpp_lib_modules_ts
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.manifest_types;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/manifest-types.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/manifest-types.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Rewrite a hand-written manifest file preserving formatting, comments,
   // etc., of the unaffected parts. The general workflow is as follows:
diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx
index 6a26a15..26699e0 100644
--- a/libbutl/manifest-serializer.cxx
+++ b/libbutl/manifest-serializer.cxx
@@ -1,41 +1,13 @@
 // file      : libbutl/manifest-serializer.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-serializer.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef>
-#include <stdexcept>
+#include <libbutl/manifest-serializer.hxx>
 
 #include <ostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_serializer;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.manifest_types;
-#endif
+#include <cassert>
 
-import butl.utf8;
-import butl.utility;
-#else
-#include <libbutl/utf8.mxx>
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utf8.hxx>
+#include <libbutl/utility.hxx>
 
 using namespace std;
 
@@ -95,10 +67,7 @@ namespace butl
         os_ << ':';
 
         if (!v.empty ())
-        {
-          os_ << ' ';
-          write_value (v, l + 2);
-        }
+          write_value (v, l + 1);
 
         os_ << endl;
         break;
@@ -132,22 +101,89 @@ namespace butl
   merge_comment (const string& value, const string& comment)
   {
     string r;
-    for (char c: value)
+
+    // Merge the value and comment differently depending on whether any of
+    // them is multi-line or not.
+    //
+    if (value.find ('\n') == string::npos && // Single-line.
+        comment.find ('\n') == string::npos)
     {
-      // Escape ';' character.
-      //
-      if (c == ';')
-        r += '\\';
+      for (char c: value)
+      {
+        // Escape ';' and '\' characters.
+        //
+        if (c == ';' || c == '\\')
+          r += '\\';
 
-      r += c;
-    }
+        r += c;
+      }
 
-    // Add the comment.
-    //
-    if (!comment.empty ())
+      // Add the comment.
+      //
+      if (!comment.empty ())
+      {
+        r += "; ";
+        r += comment;
+      }
+    }
+    else // Multi-line.
     {
-      r += "; ";
-      r += comment;
+      // Parse the value lines and add them to the resulting value, escaping
+      // them if required.
+      //
+      // Note that we only need to escape lines which have the '\*;' form.
+      //
+      for (auto i (value.begin ()), e (value.end ()); i != e; )
+      {
+        // Find the end of the line and while at it the first non-backslash
+        // character.
+        //
+        auto le (i);
+        auto nb (e);
+        for (; le != e && *le != '\n'; ++le)
+        {
+          if (nb == e && *le != '\\')
+            nb = le;
+        }
+
+        // If the first non-backslash character is ';' and it is the last
+        // character on the line, then we need to escape the line characters.
+        // Note that we only escape ';' if it is the only character on the
+        // line. Otherwise, we only escape backslashes doubling the number of
+        // them from the left:
+        //
+        // ;   -> \;
+        // \;  -> \\;
+        // \\; -> \\\\;
+        // \\\; -> \\\\\\;
+        //
+        if (nb != e && *nb == ';' && nb + 1 == le)
+          r.append (nb == i ? 1 : nb - i, '\\');
+
+        // Add the line to the resulting value together with the trailing
+        // newline, if present.
+        //
+        r.append (i, le);
+
+        if (le != e)
+          r += '\n';
+
+        // If the value end is not reached then position to the beginning of
+        // the next line and to the end of the value otherwise.
+        //
+        i = (le != e ? le + 1 : e);
+      }
+
+      // Append the comment, if present.
+      //
+      if (!comment.empty ())
+      {
+        if (!r.empty ())
+          r += '\n';
+
+        r += ";\n";
+        r += comment;
+      }
     }
 
     return r;
@@ -301,6 +337,8 @@ namespace butl
   void manifest_serializer::
   write_value (const string& v, size_t cl)
   {
+    assert (!v.empty ());
+
     // Consider both \r and \n characters as line separators, and the
     // \r\n characters sequence as a single line separator.
     //
@@ -319,11 +357,17 @@ namespace butl
     // readability, still allowing the user to easily copy the value which
     // seems to be the main reason for using the flag.
     //
-    if (cl > 39 || nl () != string::npos ||
-        v.front () == ' ' || v.front () == '\t' ||
-        v.back () == ' ' || v.back () == '\t')
+    if (cl + 1 > 39           || // '+ 1' for the space after the colon.
+        nl () != string::npos ||
+        v.front () == ' '     ||
+        v.front () == '\t'    ||
+        v.back () == ' '      ||
+        v.back () == '\t')
     {
-      os_ << "\\" << endl; // Multi-line mode introductor.
+      if (multiline_v2_)
+        os_ << endl;
+
+      os_ << "\\" << endl; // Multi-line mode introducer.
 
       // Chunk the value into fragments separated by newlines.
       //
@@ -346,7 +390,10 @@ namespace butl
       os_ << endl << "\\"; // Multi-line mode terminator.
     }
     else
-      write_value (v.c_str (), v.size (), cl);
+    {
+      os_ << ' ';
+      write_value (v.c_str (), v.size (), cl + 1);
+    }
   }
 
   // manifest_serialization
diff --git a/libbutl/manifest-serializer.mxx b/libbutl/manifest-serializer.hxx
index b73c255..2159901 100644
--- a/libbutl/manifest-serializer.mxx
+++ b/libbutl/manifest-serializer.hxx
@@ -1,37 +1,20 @@
-// file      : libbutl/manifest-serializer.mxx -*- C++ -*-
+// file      : libbutl/manifest-serializer.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
 #include <iosfwd>
 #include <cstddef>    // size_t
 #include <stdexcept>  // runtime_error
 #include <functional>
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.manifest_serializer;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.manifest_types;
-#else
-#include <libbutl/manifest-types.mxx>
-#endif
+#include <libbutl/manifest-types.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   class LIBBUTL_SYMEXPORT manifest_serialization: public std::runtime_error
   {
@@ -62,14 +45,19 @@ LIBBUTL_MODEXPORT namespace butl
     // Unless long_lines is true, break lines in values (including multi-line)
     // so that their length does not exceed 78 codepoints (including '\n').
     //
+    // Note that the multiline_v2 flag is temporary and should not be used
+    // except by the implementation for testing.
+    //
     manifest_serializer (std::ostream& os,
                          const std::string& name,
                          bool long_lines = false,
-                         std::function<filter_function> filter = {})
+                         std::function<filter_function> filter = {},
+                         bool multiline_v2 = false)
       : os_ (os),
         name_ (name),
         long_lines_ (long_lines),
-        filter_ (std::move (filter))
+        filter_ (std::move (filter)),
+        multiline_v2_ (multiline_v2)
     {
     }
 
@@ -113,10 +101,12 @@ LIBBUTL_MODEXPORT namespace butl
     size_t
     write_name (const std::string&);
 
-    // Write a value assuming the current line already has the specified
-    // codepoint offset. If the resulting line length would be too large then
-    // the multi-line representation will be used. It is assumed that the
-    // name, followed by the colon, is already written.
+    // Write a non-empty value assuming the current line already has the
+    // specified codepoint offset. If the resulting line length would be too
+    // large then the multi-line representation will be used. For the
+    // single-line representation the space character is written before the
+    // value. It is assumed that the name, followed by the colon, is already
+    // written.
     //
     void
     write_value (const std::string&, std::size_t offset);
@@ -138,6 +128,7 @@ LIBBUTL_MODEXPORT namespace butl
     const std::string name_;
     bool long_lines_;
     const std::function<filter_function> filter_;
+    bool multiline_v2_;
   };
 
   // Serialize a manifest to a stream adding the leading format version pair
diff --git a/libbutl/manifest-types.mxx b/libbutl/manifest-types.hxx
index 93f6fc6..23318f0 100644
--- a/libbutl/manifest-types.mxx
+++ b/libbutl/manifest-types.hxx
@@ -1,30 +1,14 @@
-// file      : libbutl/manifest-types.mxx -*- C++ -*-
+// file      : libbutl/manifest-types.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
-#include <cstdint>   // uint64_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.manifest_types;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#else
-#endif
+#include <cstdint> // uint64_t
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   class manifest_name_value
   {
diff --git a/libbutl/mingw-condition_variable.hxx b/libbutl/mingw-condition_variable.hxx
new file mode 100644
index 0000000..965f533
--- /dev/null
+++ b/libbutl/mingw-condition_variable.hxx
@@ -0,0 +1,275 @@
+/**
+* std::condition_variable implementation for MinGW-w64
+*
+* Copyright (c) 2013-2016 by Mega Limited, Auckland, New Zealand
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_CONDITION_VARIABLE_HXX
+#define LIBBUTL_MINGW_CONDITION_VARIABLE_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+#  error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+#  error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <condition_variable> // Use std::cv_status, if available.
+
+#include <cassert>
+#include <chrono>
+#include <system_error>
+
+#include <synchapi.h>
+
+#include <libbutl/mingw-mutex.hxx>
+#include <libbutl/mingw-shared_mutex.hxx>
+
+namespace mingw_stdthread
+{
+#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
+  enum class cv_status { no_timeout, timeout };
+#else
+  using std::cv_status;
+#endif
+
+  //  Native condition variable-based implementation.
+  //
+  class condition_variable
+  {
+    static constexpr DWORD kInfinite = 0xffffffffl;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
+    CONDITION_VARIABLE cvariable_ = CONDITION_VARIABLE_INIT;
+#pragma GCC diagnostic pop
+
+    friend class condition_variable_any;
+
+    bool wait_unique (mutex * pmutex, DWORD time)
+    {
+      BOOL success = SleepConditionVariableSRW(native_handle(),
+                                               pmutex->native_handle(),
+                                               time,
+//    CONDITION_VARIABLE_LOCKMODE_SHARED has a value not specified by
+//  Microsoft's Dev Center, but is known to be (convertible to) a ULONG. To
+//  ensure that the value passed to this function is not equal to Microsoft's
+//  constant, we can either use a static_assert, or simply generate an
+//  appropriate value.
+                                               !CONDITION_VARIABLE_LOCKMODE_SHARED);
+      return success;
+    }
+    bool wait_impl (unique_lock<mutex> & lock, DWORD time)
+    {
+      mutex * pmutex = lock.release();
+      bool success = wait_unique(pmutex, time);
+      lock = unique_lock<mutex>(*pmutex, adopt_lock);
+      return success;
+    }
+public:
+    using native_handle_type = PCONDITION_VARIABLE;
+    native_handle_type native_handle ()
+    {
+      return &cvariable_;
+    }
+
+    condition_variable () = default;
+    ~condition_variable () = default;
+
+    condition_variable (const condition_variable &) = delete;
+    condition_variable & operator= (const condition_variable &) = delete;
+
+    void notify_one () noexcept
+    {
+      WakeConditionVariable(&cvariable_);
+    }
+
+    void notify_all () noexcept
+    {
+      WakeAllConditionVariable(&cvariable_);
+    }
+
+    void wait (unique_lock<mutex> & lock)
+    {
+      wait_impl(lock, kInfinite);
+    }
+
+    template<class Predicate>
+    void wait (unique_lock<mutex> & lock, Predicate pred)
+    {
+      while (!pred())
+        wait(lock);
+    }
+
+    template <class Rep, class Period>
+    cv_status wait_for(unique_lock<mutex>& lock,
+                       const std::chrono::duration<Rep, Period>& rel_time)
+    {
+      using namespace std::chrono;
+      auto timeout = duration_cast<milliseconds>(rel_time).count();
+      DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
+      bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
+      return result ? cv_status::no_timeout : cv_status::timeout;
+    }
+
+    template <class Rep, class Period, class Predicate>
+    bool wait_for(unique_lock<mutex>& lock,
+                  const std::chrono::duration<Rep, Period>& rel_time,
+                  Predicate pred)
+    {
+#if __cplusplus >= 201703L
+      using steady_duration = typename std::chrono::steady_clock::duration;
+      return wait_until(lock,
+                        std::chrono::steady_clock::now() +
+                        std::chrono::ceil<steady_duration> (rel_time),
+                        std::move(pred));
+#else
+      return wait_until(lock,
+                        std::chrono::steady_clock::now() + rel_time,
+                        std::move(pred));
+#endif
+    }
+    template <class Clock, class Duration>
+    cv_status wait_until (unique_lock<mutex>& lock,
+                          const std::chrono::time_point<Clock,Duration>& abs_time)
+    {
+      return wait_for(lock, abs_time - Clock::now());
+    }
+    template <class Clock, class Duration, class Predicate>
+    bool wait_until  (unique_lock<mutex>& lock,
+                      const std::chrono::time_point<Clock, Duration>& abs_time,
+                      Predicate pred)
+    {
+        while (!pred())
+        {
+          if (wait_until(lock, abs_time) == cv_status::timeout)
+          {
+            return pred();
+          }
+        }
+        return true;
+    }
+  };
+
+  class condition_variable_any
+  {
+    static constexpr DWORD kInfinite = 0xffffffffl;
+
+    condition_variable internal_cv_ {};
+    mutex internal_mutex_ {};
+
+    template<class L>
+    bool wait_impl (L & lock, DWORD time)
+    {
+      unique_lock<decltype(internal_mutex_)> internal_lock(internal_mutex_);
+      lock.unlock();
+      bool success = internal_cv_.wait_impl(internal_lock, time);
+      lock.lock();
+      return success;
+    }
+    // If the lock happens to be called on a native Windows mutex, skip any
+    // extra contention.
+    inline bool wait_impl (unique_lock<mutex> & lock, DWORD time)
+    {
+        return internal_cv_.wait_impl(lock, time);
+    }
+    bool wait_impl (unique_lock<shared_mutex> & lock, DWORD time)
+    {
+      shared_mutex * pmutex = lock.release();
+      bool success = internal_cv_.wait_unique(pmutex, time);
+      lock = unique_lock<shared_mutex>(*pmutex, adopt_lock);
+      return success;
+    }
+    bool wait_impl (shared_lock<shared_mutex> & lock, DWORD time)
+    {
+      shared_mutex * pmutex = lock.release();
+      BOOL success = SleepConditionVariableSRW(native_handle(),
+                                               pmutex->native_handle(), time,
+                                               CONDITION_VARIABLE_LOCKMODE_SHARED);
+      lock = shared_lock<shared_mutex>(*pmutex, adopt_lock);
+      return success;
+    }
+  public:
+    using native_handle_type = typename condition_variable::native_handle_type;
+
+    native_handle_type native_handle ()
+    {
+      return internal_cv_.native_handle();
+    }
+
+    void notify_one () noexcept
+    {
+      internal_cv_.notify_one();
+    }
+
+    void notify_all () noexcept
+    {
+      internal_cv_.notify_all();
+    }
+
+    condition_variable_any () = default;
+    ~condition_variable_any () = default;
+
+    template<class L>
+    void wait (L & lock)
+    {
+      wait_impl(lock, kInfinite);
+    }
+
+    template<class L, class Predicate>
+    void wait (L & lock, Predicate pred)
+    {
+      while (!pred())
+        wait(lock);
+    }
+
+    template <class L, class Rep, class Period>
+    cv_status wait_for(L& lock, const std::chrono::duration<Rep,Period>& period)
+    {
+      using namespace std::chrono;
+      auto timeout = duration_cast<milliseconds>(period).count();
+      DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
+      bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
+      return result ? cv_status::no_timeout : cv_status::timeout;
+    }
+
+    template <class L, class Rep, class Period, class Predicate>
+    bool wait_for(L& lock, const std::chrono::duration<Rep, Period>& period,
+                  Predicate pred)
+    {
+      return wait_until(lock, std::chrono::steady_clock::now() + period,
+                        std::move(pred));
+    }
+    template <class L, class Clock, class Duration>
+    cv_status wait_until (L& lock,
+                          const std::chrono::time_point<Clock,Duration>& abs_time)
+    {
+      return wait_for(lock, abs_time - Clock::now());
+    }
+    template <class L, class Clock, class Duration, class Predicate>
+    bool wait_until  (L& lock,
+                      const std::chrono::time_point<Clock, Duration>& abs_time,
+                      Predicate pred)
+    {
+      while (!pred())
+      {
+        if (wait_until(lock, abs_time) == cv_status::timeout)
+        {
+          return pred();
+        }
+      }
+      return true;
+    }
+  };
+}
+
+#endif // LIBBUTL_MINGW_CONDITION_VARIABLE_HXX
diff --git a/libbutl/mingw-invoke.hxx b/libbutl/mingw-invoke.hxx
new file mode 100644
index 0000000..65810e7
--- /dev/null
+++ b/libbutl/mingw-invoke.hxx
@@ -0,0 +1,109 @@
+/**
+* Lightweight std::invoke() implementation for C++11 and C++14
+*
+* Copyright (c) 2018-2019 by Nathaniel J. McClatchey, San Jose, CA, United States
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_INVOKE_HXX
+#define LIBBUTL_MINGW_INVOKE_HXX
+
+#include <type_traits>  //  For std::result_of, etc.
+#include <utility>      //  For std::forward
+#include <functional>   //  For std::reference_wrapper
+
+namespace mingw_stdthread
+{
+  namespace detail
+  {
+    // For compatibility, implement std::invoke for C++11 and C++14.
+    //
+    template<bool PMemFunc, bool PMemData>
+    struct Invoker
+    {
+      template<class F, class... Args>
+      inline static typename std::result_of<F(Args...)>::type invoke (F&& f, Args&&... args)
+      {
+        return std::forward<F>(f)(std::forward<Args>(args)...);
+      }
+    };
+    template<bool>
+    struct InvokerHelper;
+
+    template<>
+    struct InvokerHelper<false>
+    {
+      template<class T1>
+      inline static auto get (T1&& t1) -> decltype(*std::forward<T1>(t1))
+      {
+        return *std::forward<T1>(t1);
+      }
+
+      template<class T1>
+      inline static auto get (const std::reference_wrapper<T1>& t1) -> decltype(t1.get())
+      {
+        return t1.get();
+      }
+    };
+
+    template<>
+    struct InvokerHelper<true>
+    {
+      template<class T1>
+      inline static auto get (T1&& t1) -> decltype(std::forward<T1>(t1))
+      {
+        return std::forward<T1>(t1);
+      }
+    };
+
+    template<>
+    struct Invoker<true, false>
+    {
+      template<class T, class F, class T1, class... Args>
+      inline static auto invoke (F T::* f, T1&& t1, Args&&... args) ->  \
+        decltype((InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...))
+      {
+        return (InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...);
+      }
+    };
+
+    template<>
+    struct Invoker<false, true>
+    {
+      template<class T, class F, class T1, class... Args>
+      inline static auto invoke (F T::* f, T1&& t1, Args&&... args) ->  \
+        decltype(InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f)
+      {
+        return InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f;
+      }
+    };
+
+    template<class F, class... Args>
+    struct InvokeResult
+    {
+      typedef Invoker<std::is_member_function_pointer<typename std::remove_reference<F>::type>::value,
+                      std::is_member_object_pointer<typename std::remove_reference<F>::type>::value &&
+                      (sizeof...(Args) == 1)> invoker;
+      inline static auto invoke (F&& f, Args&&... args) -> decltype(invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...))
+      {
+        return invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...);
+      }
+    };
+
+    template<class F, class...Args>
+    auto invoke (F&& f, Args&&... args) -> decltype(InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...))
+    {
+      return InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...);
+    }
+  }
+}
+
+#endif // LIBBUTL_MINGW_INVOKE_HXX
diff --git a/libbutl/mingw-mutex.hxx b/libbutl/mingw-mutex.hxx
new file mode 100644
index 0000000..d297786
--- /dev/null
+++ b/libbutl/mingw-mutex.hxx
@@ -0,0 +1,210 @@
+/**
+* std::mutex et al implementation for MinGW-w64
+*
+* Copyright (c) 2013-2016 by Mega Limited, Auckland, New Zealand
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_MUTEX_HXX
+#define LIBBUTL_MINGW_MUTEX_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+#  error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+#  error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <chrono>
+#include <system_error>
+#include <atomic>
+
+#include <mutex>
+
+#include <synchapi.h>       //  For InitializeCriticalSection, etc.
+#include <errhandlingapi.h> //  For GetLastError
+#include <handleapi.h>
+
+namespace mingw_stdthread
+{
+  // To make this namespace equivalent to the thread-related subset of std,
+  // pull in the classes and class templates supplied by std but not by this
+  // implementation.
+  //
+  using std::lock_guard;
+  using std::unique_lock;
+  using std::adopt_lock_t;
+  using std::defer_lock_t;
+  using std::try_to_lock_t;
+  using std::adopt_lock;
+  using std::defer_lock;
+  using std::try_to_lock;
+
+  class recursive_mutex
+  {
+    CRITICAL_SECTION mHandle;
+  public:
+    typedef LPCRITICAL_SECTION native_handle_type;
+    native_handle_type native_handle() {return &mHandle;}
+    recursive_mutex() noexcept : mHandle()
+    {
+      InitializeCriticalSection(&mHandle);
+    }
+    recursive_mutex (const recursive_mutex&) = delete;
+    recursive_mutex& operator=(const recursive_mutex&) = delete;
+    ~recursive_mutex() noexcept
+    {
+      DeleteCriticalSection(&mHandle);
+    }
+    void lock()
+    {
+      EnterCriticalSection(&mHandle);
+    }
+    void unlock()
+    {
+      LeaveCriticalSection(&mHandle);
+    }
+    bool try_lock()
+    {
+      return (TryEnterCriticalSection(&mHandle)!=0);
+    }
+  };
+
+  // Slim Reader-Writer (SRW)-based implementation that requires Windows 7.
+  //
+  class mutex
+  {
+  protected:
+    SRWLOCK mHandle;
+  public:
+    typedef PSRWLOCK native_handle_type;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
+    constexpr mutex () noexcept : mHandle(SRWLOCK_INIT) { }
+#pragma GCC diagnostic pop
+    mutex (const mutex&) = delete;
+    mutex & operator= (const mutex&) = delete;
+    void lock ()
+    {
+      AcquireSRWLockExclusive(&mHandle);
+    }
+    void unlock ()
+    {
+      ReleaseSRWLockExclusive(&mHandle);
+    }
+    //  TryAcquireSRW functions are a Windows 7 feature.
+    bool try_lock ()
+    {
+      BOOL ret = TryAcquireSRWLockExclusive(&mHandle);
+      return ret;
+    }
+    native_handle_type native_handle ()
+    {
+        return &mHandle;
+    }
+  };
+
+  class recursive_timed_mutex
+  {
+    static constexpr DWORD kWaitAbandoned = 0x00000080l;
+    static constexpr DWORD kWaitObject0 = 0x00000000l;
+    static constexpr DWORD kInfinite = 0xffffffffl;
+    inline bool try_lock_internal (DWORD ms) noexcept
+    {
+      DWORD ret = WaitForSingleObject(mHandle, ms);
+
+      /*
+        @@ TODO
+#ifndef NDEBUG
+      if (ret == kWaitAbandoned)
+      {
+        using namespace std;
+        fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
+        terminate();
+      }
+#endif
+      */
+
+      return (ret == kWaitObject0) || (ret == kWaitAbandoned);
+    }
+  protected:
+    HANDLE mHandle;
+  public:
+    typedef HANDLE native_handle_type;
+    native_handle_type native_handle() const {return mHandle;}
+    recursive_timed_mutex(const recursive_timed_mutex&) = delete;
+    recursive_timed_mutex& operator=(const recursive_timed_mutex&) = delete;
+    recursive_timed_mutex(): mHandle(CreateMutex(NULL, FALSE, NULL)) {}
+    ~recursive_timed_mutex()
+    {
+      CloseHandle(mHandle);
+    }
+    void lock()
+    {
+      DWORD ret = WaitForSingleObject(mHandle, kInfinite);
+
+      /*
+        @@ TODO
+
+//    If (ret == WAIT_ABANDONED), then the thread that held ownership was
+//  terminated. Behavior is undefined, but Windows will pass ownership to this
+//  thread.
+#ifndef NDEBUG
+        if (ret == kWaitAbandoned)
+        {
+            using namespace std;
+            fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
+            terminate();
+        }
+#endif
+      */
+
+      if ((ret != kWaitObject0) && (ret != kWaitAbandoned))
+      {
+        throw std::system_error(GetLastError(), std::system_category());
+      }
+    }
+    void unlock()
+    {
+      if (!ReleaseMutex(mHandle))
+        throw std::system_error(GetLastError(), std::system_category());
+    }
+    bool try_lock()
+    {
+      return try_lock_internal(0);
+    }
+    template <class Rep, class Period>
+    bool try_lock_for(const std::chrono::duration<Rep,Period>& dur)
+    {
+      using namespace std::chrono;
+      auto timeout = duration_cast<milliseconds>(dur).count();
+      while (timeout > 0)
+      {
+        constexpr auto kMaxStep = static_cast<decltype(timeout)>(kInfinite-1);
+        auto step = (timeout < kMaxStep) ? timeout : kMaxStep;
+        if (try_lock_internal(static_cast<DWORD>(step)))
+          return true;
+        timeout -= step;
+      }
+      return false;
+    }
+    template <class Clock, class Duration>
+    bool try_lock_until(const std::chrono::time_point<Clock,Duration>& timeout_time)
+    {
+      return try_lock_for(timeout_time - Clock::now());
+    }
+  };
+
+  typedef recursive_timed_mutex timed_mutex;
+}
+
+#endif // LIBBUTL_MINGW_MUTEX_HXX
diff --git a/libbutl/mingw-shared_mutex.hxx b/libbutl/mingw-shared_mutex.hxx
new file mode 100644
index 0000000..aacbaf8
--- /dev/null
+++ b/libbutl/mingw-shared_mutex.hxx
@@ -0,0 +1,124 @@
+/**
+* std::shared_mutex et al implementation for MinGW-w64
+*
+* Copyright (c) 2017 by Nathaniel J. McClatchey, Athens OH, United States
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_SHARED_MUTEX_HXX
+#define LIBBUTL_MINGW_SHARED_MUTEX_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+#  error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+#  error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <cassert>
+//  For descriptive errors.
+#include <system_error>
+//  For timing in shared_timed_mutex.
+#include <chrono>
+#include <limits>
+
+#include <shared_mutex> // shared_lock
+
+//  For defer_lock_t, adopt_lock_t, and try_to_lock_t
+#include <libbutl/mingw-mutex.hxx>
+
+#include <synchapi.h>
+
+namespace mingw_stdthread
+{
+  using std::shared_lock;
+
+  class condition_variable_any;
+
+  // Slim Reader-Writer (SRW)-based implementation that requires Windows 7.
+  //
+  class shared_mutex : mutex
+  {
+    friend class condition_variable_any;
+  public:
+    using mutex::native_handle_type;
+    using mutex::lock;
+    using mutex::try_lock;
+    using mutex::unlock;
+    using mutex::native_handle;
+
+    void lock_shared ()
+    {
+      AcquireSRWLockShared(&mHandle);
+    }
+
+    void unlock_shared ()
+    {
+      ReleaseSRWLockShared(&mHandle);
+    }
+
+    bool try_lock_shared ()
+    {
+      return TryAcquireSRWLockShared(&mHandle) != 0;
+    }
+  };
+
+  class shared_timed_mutex : shared_mutex
+  {
+    typedef shared_mutex Base;
+  public:
+    using Base::lock;
+    using Base::try_lock;
+    using Base::unlock;
+    using Base::lock_shared;
+    using Base::try_lock_shared;
+    using Base::unlock_shared;
+
+    template< class Clock, class Duration >
+    bool try_lock_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
+    {
+      do
+      {
+        if (try_lock())
+          return true;
+      }
+      while (std::chrono::steady_clock::now() < cutoff);
+      return false;
+    }
+
+    template< class Rep, class Period >
+    bool try_lock_for (const std::chrono::duration<Rep,Period>& rel_time)
+    {
+      return try_lock_until(std::chrono::steady_clock::now() + rel_time);
+    }
+
+    template< class Clock, class Duration >
+    bool try_lock_shared_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
+    {
+      do
+      {
+        if (try_lock_shared())
+          return true;
+      }
+      while (std::chrono::steady_clock::now() < cutoff);
+      return false;
+    }
+
+    template< class Rep, class Period >
+    bool try_lock_shared_for (const std::chrono::duration<Rep,Period>& rel_time)
+    {
+      return try_lock_shared_until(std::chrono::steady_clock::now() + rel_time);
+    }
+  };
+}
+
+#endif // LIBBUTL_MINGW_SHARED_MUTEX_HXX
diff --git a/libbutl/mingw-thread.hxx b/libbutl/mingw-thread.hxx
new file mode 100644
index 0000000..66f98aa
--- /dev/null
+++ b/libbutl/mingw-thread.hxx
@@ -0,0 +1,330 @@
+/**
+* std::thread implementation for MinGW-w64
+*
+* Copyright (c) 2013-2016 by Mega Limited, Auckland, New Zealand
+* Copyright (c) 2022 the build2 authors
+*
+* Licensed under the simplified (2-clause) BSD License.
+* You should have received a copy of the license along with this
+* program.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef LIBBUTL_MINGW_THREAD_HXX
+#define LIBBUTL_MINGW_THREAD_HXX
+
+#if !defined(__cplusplus) || (__cplusplus < 201402L)
+#  error C++14 compiler required
+#endif
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0601
+#  error _WIN32_WINNT should be 0x0601 (Windows 7) or greater
+#endif
+
+#include <cstddef>      //  For std::size_t
+#include <cerrno>       //  Detect error type.
+#include <exception>    //  For std::terminate
+#include <system_error> //  For std::system_error
+#include <functional>   //  For std::hash, std::invoke (C++17)
+#include <tuple>        //  For std::tuple
+#include <chrono>       //  For sleep timing.
+#include <memory>       //  For std::unique_ptr
+#include <iosfwd>       //  Stream output for thread ids.
+#include <utility>      //  For std::swap, std::forward
+
+#include <synchapi.h>   //  For WaitForSingleObject
+#include <handleapi.h>  //  For CloseHandle, etc.
+#include <sysinfoapi.h> //  For GetNativeSystemInfo
+#include <processthreadsapi.h>  //  For GetCurrentThreadId
+
+#include <process.h>  //  For _beginthreadex
+
+#if __cplusplus < 201703L
+#  include <libbutl/mingw-invoke.hxx>
+#endif
+
+namespace mingw_stdthread
+{
+  // @@ I think can get rid of this in C++14.
+  //
+  namespace detail
+  {
+    template<std::size_t...>
+    struct IntSeq {};
+
+    template<std::size_t N, std::size_t... S>
+    struct GenIntSeq : GenIntSeq<N-1, N-1, S...> { };
+
+    template<std::size_t... S>
+    struct GenIntSeq<0, S...> { typedef IntSeq<S...> type; };
+
+//    Use a template specialization to avoid relying on compiler optimization
+//  when determining the parameter integer sequence.
+    template<class Func, class T, typename... Args>
+    class ThreadFuncCall;
+// We can't define the Call struct in the function - the standard forbids template methods in that case
+    template<class Func, std::size_t... S, typename... Args>
+    class ThreadFuncCall<Func, detail::IntSeq<S...>, Args...>
+    {
+        static_assert(sizeof...(S) == sizeof...(Args), "Args must match.");
+        using Tuple = std::tuple<typename std::decay<Args>::type...>;
+        typename std::decay<Func>::type mFunc;
+        Tuple mArgs;
+
+    public:
+        ThreadFuncCall(Func&& aFunc, Args&&... aArgs)
+          : mFunc(std::forward<Func>(aFunc)),
+            mArgs(std::forward<Args>(aArgs)...)
+        {
+        }
+
+        void callFunc()
+        {
+#if __cplusplus < 201703L
+          detail::invoke(std::move(mFunc), std::move(std::get<S>(mArgs)) ...);
+#else
+          std::invoke (std::move(mFunc), std::move(std::get<S>(mArgs)) ...);
+#endif
+        }
+    };
+
+    // Allow construction of threads without exposing implementation.
+    class ThreadIdTool;
+  }
+
+  class thread
+  {
+  public:
+    class id
+    {
+      DWORD mId = 0;
+      friend class thread;
+      friend class std::hash<id>;
+      friend class detail::ThreadIdTool;
+      explicit id(DWORD aId) noexcept : mId(aId){}
+    public:
+      id () noexcept = default;
+      friend bool operator==(id x, id y) noexcept {return x.mId == y.mId; }
+      friend bool operator!=(id x, id y) noexcept {return x.mId != y.mId; }
+      friend bool operator< (id x, id y) noexcept {return x.mId <  y.mId; }
+      friend bool operator<=(id x, id y) noexcept {return x.mId <= y.mId; }
+      friend bool operator> (id x, id y) noexcept {return x.mId >  y.mId; }
+      friend bool operator>=(id x, id y) noexcept {return x.mId >= y.mId; }
+
+      template<class _CharT, class _Traits>
+      friend std::basic_ostream<_CharT, _Traits>&
+      operator<<(std::basic_ostream<_CharT, _Traits>& __out, id __id)
+      {
+        if (__id.mId == 0)
+        {
+          return __out << "<invalid std::thread::id>";
+        }
+        else
+        {
+          return __out << __id.mId;
+        }
+      }
+    };
+  private:
+    static constexpr HANDLE kInvalidHandle = nullptr;
+    static constexpr DWORD kInfinite = 0xffffffffl;
+    HANDLE mHandle;
+    id mThreadId;
+
+    template <class Call>
+    static unsigned __stdcall threadfunc(void* arg)
+    {
+      std::unique_ptr<Call> call(static_cast<Call*>(arg));
+      call->callFunc();
+      return 0;
+    }
+
+    static unsigned int _hardware_concurrency_helper() noexcept
+    {
+      SYSTEM_INFO sysinfo;
+      ::GetNativeSystemInfo(&sysinfo);
+      return sysinfo.dwNumberOfProcessors;
+    }
+  public:
+    typedef HANDLE native_handle_type;
+    id get_id() const noexcept {return mThreadId;}
+    native_handle_type native_handle() const {return mHandle;}
+    thread(): mHandle(kInvalidHandle), mThreadId(){}
+
+    thread(thread&& other) noexcept
+        :mHandle(other.mHandle), mThreadId(other.mThreadId)
+    {
+      other.mHandle = kInvalidHandle;
+      other.mThreadId = id{};
+    }
+
+    thread(const thread &other) = delete;
+
+    template<class Func, typename... Args>
+    explicit thread(Func&& func, Args&&... args) : mHandle(), mThreadId()
+    {
+      // Instead of INVALID_HANDLE_VALUE, _beginthreadex returns 0.
+
+      using ArgSequence = typename detail::GenIntSeq<sizeof...(Args)>::type;
+      using Call = detail::ThreadFuncCall<Func, ArgSequence, Args...>;
+      auto call = new Call(std::forward<Func>(func), std::forward<Args>(args)...);
+      unsigned int id_receiver;
+      auto int_handle = _beginthreadex(NULL, 0, threadfunc<Call>,
+                                       static_cast<LPVOID>(call), 0, &id_receiver);
+      if (int_handle == 0)
+      {
+        mHandle = kInvalidHandle;
+        int errnum = errno;
+        delete call;
+         //  Note: Should only throw EINVAL, EAGAIN, EACCES
+        throw std::system_error(errnum, std::generic_category());
+      } else {
+        mThreadId.mId = id_receiver;
+        mHandle = reinterpret_cast<HANDLE>(int_handle);
+      }
+    }
+
+    bool joinable() const {return mHandle != kInvalidHandle;}
+
+    //  Note: Due to lack of synchronization, this function has a race
+    //  condition if called concurrently, which leads to undefined
+    //  behavior. The same applies to all other member functions of this
+    //  class, but this one is mentioned explicitly.
+    void join()
+    {
+        using namespace std;
+        if (get_id() == id(GetCurrentThreadId()))
+            throw system_error(make_error_code(errc::resource_deadlock_would_occur));
+        if (mHandle == kInvalidHandle)
+            throw system_error(make_error_code(errc::no_such_process));
+        if (!joinable())
+            throw system_error(make_error_code(errc::invalid_argument));
+        WaitForSingleObject(mHandle, kInfinite);
+        CloseHandle(mHandle);
+        mHandle = kInvalidHandle;
+        mThreadId = id{};
+    }
+
+    ~thread()
+    {
+      if (joinable())
+      {
+        // @@ TODO
+        /*
+#ifndef NDEBUG
+        std::printf("Error: Must join() or detach() a thread before \
+destroying it.\n");
+#endif
+        */
+        std::terminate();
+      }
+    }
+    thread& operator=(const thread&) = delete;
+    thread& operator=(thread&& other) noexcept
+    {
+      if (joinable())
+      {
+        // @@ TODO
+        /*
+#ifndef NDEBUG
+        std::printf("Error: Must join() or detach() a thread before \
+moving another thread to it.\n");
+#endif
+        */
+        std::terminate();
+      }
+      swap(other);
+      return *this;
+    }
+    void swap(thread& other) noexcept
+    {
+      std::swap(mHandle, other.mHandle);
+      std::swap(mThreadId.mId, other.mThreadId.mId);
+    }
+
+    static unsigned int hardware_concurrency() noexcept
+    {
+      // @@ TODO: this seems like a bad idea.
+      //
+      /*static*/ unsigned int cached = _hardware_concurrency_helper();
+      return cached;
+    }
+
+    void detach()
+    {
+      if (!joinable())
+      {
+        using namespace std;
+        throw system_error(make_error_code(errc::invalid_argument));
+      }
+      if (mHandle != kInvalidHandle)
+      {
+        CloseHandle(mHandle);
+        mHandle = kInvalidHandle;
+      }
+      mThreadId = id{};
+    }
+  };
+
+  namespace detail
+  {
+    class ThreadIdTool
+    {
+    public:
+      static thread::id make_id (DWORD base_id) noexcept
+      {
+        return thread::id(base_id);
+      }
+    };
+  }
+
+  namespace this_thread
+  {
+    inline thread::id get_id() noexcept
+    {
+      return detail::ThreadIdTool::make_id(GetCurrentThreadId());
+    }
+    inline void yield() noexcept {Sleep(0);}
+    template< class Rep, class Period >
+    void sleep_for( const std::chrono::duration<Rep,Period>& sleep_duration)
+    {
+      static constexpr DWORD kInfinite = 0xffffffffl;
+      using namespace std::chrono;
+      using rep = milliseconds::rep;
+      rep ms = duration_cast<milliseconds>(sleep_duration).count();
+      while (ms > 0)
+      {
+        constexpr rep kMaxRep = static_cast<rep>(kInfinite - 1);
+        auto sleepTime = (ms < kMaxRep) ? ms : kMaxRep;
+        Sleep(static_cast<DWORD>(sleepTime));
+        ms -= sleepTime;
+      }
+    }
+    template <class Clock, class Duration>
+    void sleep_until(const std::chrono::time_point<Clock,Duration>& sleep_time)
+    {
+      sleep_for(sleep_time-Clock::now());
+    }
+  }
+}
+
+namespace std
+{
+  // Specialize hash for this implementation's thread::id, even if the
+  // std::thread::id already has a hash.
+  template<>
+  struct hash<mingw_stdthread::thread::id>
+  {
+    typedef mingw_stdthread::thread::id argument_type;
+    typedef size_t result_type;
+    size_t operator() (const argument_type & i) const noexcept
+    {
+      return i.mId;
+    }
+  };
+}
+
+#endif // LIBBUTL_MINGW_THREAD_HXX
diff --git a/libbutl/move-only-function.hxx b/libbutl/move-only-function.hxx
new file mode 100644
index 0000000..e5cfe51
--- /dev/null
+++ b/libbutl/move-only-function.hxx
@@ -0,0 +1,177 @@
+// file      : libbutl/move-only-function.hxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#pragma once
+
+#include <utility>
+#include <functional>
+#include <type_traits>
+
+namespace butl
+{
+  // This is a move-only std::function version which is implemented in terms
+  // of std::function. It is similar to C++23 std::move_only_function but
+  // still provides target() (but not target_type()).
+  //
+  template <typename>
+  class move_only_function_ex;
+
+  // Alias butl::move_only_function to std::move_only_function if available
+  // and to move_only_function_ex otherwise.
+  //
+#ifdef __cpp_lib_move_only_function
+  using std::move_only_function;
+#else
+  template <typename F>
+  using move_only_function = move_only_function_ex<F>;
+#endif
+
+  template <typename R, typename... A>
+  class move_only_function_ex<R (A...)>
+  {
+  public:
+    using result_type = R;
+
+    move_only_function_ex () = default;
+    move_only_function_ex (std::nullptr_t) noexcept {}
+
+    // Note: according to the spec we should also disable these if F is not
+    // callable, but that is not easy to do in C++14. Maybe we should do
+    // something for C++17 and later (without this the diagnostics is quite
+    // hairy).
+    //
+    template <typename F>
+    move_only_function_ex (F&& f, typename std::enable_if<!std::is_same<typename std::remove_reference<F>::type, move_only_function_ex>::value>::type* = 0)
+    {
+      using FV = typename std::decay<F>::type;
+
+      if (!null (f))
+        f_ = wrapper<FV> (std::forward<F> (f));
+    }
+
+    template <typename F>
+    typename std::enable_if<!std::is_same<typename std::remove_reference<F>::type, move_only_function_ex>::value, move_only_function_ex>::type&
+    operator= (F&& f)
+    {
+      move_only_function_ex (std::forward<F> (f)).swap (*this);
+      return *this;
+    }
+
+    move_only_function_ex&
+    operator= (std::nullptr_t) noexcept
+    {
+      f_ = nullptr;
+      return *this;
+    }
+
+    void swap (move_only_function_ex& f) noexcept
+    {
+      f_.swap (f.f_);
+    }
+
+    R operator() (A... args) const
+    {
+      return f_ (std::forward<A> (args)...);
+    }
+
+    explicit operator bool () const noexcept
+    {
+      return static_cast<bool> (f_);
+    }
+
+    template <typename T>
+    T* target() noexcept
+    {
+      wrapper<T>* r (f_.template target<wrapper<T>> ());
+      return r != nullptr ? &r->f : nullptr;
+    }
+
+    template <typename T>
+    const T* target() const noexcept
+    {
+      const wrapper<T>* r (f_.template target<wrapper<T>> ());
+      return r != nullptr ? &r->f : nullptr;
+    }
+
+    move_only_function_ex (move_only_function_ex&&) = default;
+    move_only_function_ex& operator= (move_only_function_ex&&) = default;
+
+    move_only_function_ex (const move_only_function_ex&) = delete;
+    move_only_function_ex& operator= (const move_only_function_ex&) = delete;
+
+  private:
+    template <typename F>
+    struct wrapper
+    {
+      struct empty {};
+
+      union
+      {
+        F f;
+        empty e;
+      };
+
+      explicit wrapper (F&& f_): f (std::move (f_)) {}
+      explicit wrapper (const F& f_): f (f_) {}
+
+      R operator() (A... args)
+      {
+        return f (std::forward<A> (args)...);
+      }
+
+      R operator() (A... args) const
+      {
+        return f (std::forward<A> (args)...);
+      }
+
+      wrapper (wrapper&& w)
+        noexcept (std::is_nothrow_move_constructible<F>::value)
+        : f (std::move (w.f)) {}
+
+      wrapper& operator= (wrapper&&) = delete; // Shouldn't be needed.
+
+      ~wrapper () {f.~F ();}
+
+      // These shouldn't be called.
+      //
+      wrapper (const wrapper&) {}
+      wrapper& operator= (const wrapper&) {return *this;}
+    };
+
+    template <typename F>                              static bool null (const F&) {return false;}
+    template <typename R1, typename... A1>             static bool null (R1 (*p) (A1...)) {return p == nullptr;}
+    template <typename R1, typename... A1>             static bool null (const move_only_function_ex<R1 (A1...)>& f) {return !f;}
+    template <typename R1, typename C, typename... A1> static bool null (R1 (C::*p) (A1...)) {return p == nullptr;}
+    template <typename R1, typename C, typename... A1> static bool null (R1 (C::*p) (A1...) const) {return p == nullptr;}
+
+    std::function<R (A...)> f_;
+  };
+
+  template <typename R, typename... A>
+  inline bool
+  operator== (const move_only_function_ex<R (A...)>& f, std::nullptr_t) noexcept
+  {
+    return !f;
+  }
+
+  template <typename R, typename... A>
+  inline bool
+  operator== (std::nullptr_t, const move_only_function_ex<R (A...)>& f) noexcept
+  {
+    return !f;
+  }
+
+  template <typename R, typename... A>
+  inline bool
+  operator!= (const move_only_function_ex<R (A...)>& f, std::nullptr_t) noexcept
+  {
+    return static_cast<bool> (f);
+  }
+
+  template <typename R, typename... A>
+  inline bool
+  operator!= (std::nullptr_t, const move_only_function_ex<R (A...)>& f) noexcept
+  {
+    return static_cast<bool> (f);
+  }
+}
diff --git a/libbutl/multi-index.mxx b/libbutl/multi-index.hxx
index d51bdfc..a6754cd 100644
--- a/libbutl/multi-index.mxx
+++ b/libbutl/multi-index.hxx
@@ -1,29 +1,14 @@
-// file      : libbutl/multi-index.mxx -*- C++ -*-
+// file      : libbutl/multi-index.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <utility>    // declval()
 #include <functional> // hash
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.multi_index;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Google the "Emulating Boost.MultiIndex with Standard Containers" blog
   // post for details.
@@ -57,7 +42,7 @@ LIBBUTL_MODEXPORT namespace butl
   };
 }
 
-LIBBUTL_MODEXPORT namespace std
+namespace std
 {
   template <typename T>
   struct hash<butl::map_key<T>>: hash<T>
diff --git a/libbutl/openssl.cxx b/libbutl/openssl.cxx
index 8741b35..f9df2e7 100644
--- a/libbutl/openssl.cxx
+++ b/libbutl/openssl.cxx
@@ -1,35 +1,10 @@
 // file      : libbutl/openssl.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/openssl.mxx>
-#endif
+#include <libbutl/openssl.hxx>
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-
 #include <utility> // move()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.openssl;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#endif
-
-#endif
 
 using namespace std;
 
diff --git a/libbutl/openssl.mxx b/libbutl/openssl.hxx
index 6a0907e..b340f5c 100644
--- a/libbutl/openssl.mxx
+++ b/libbutl/openssl.hxx
@@ -1,41 +1,21 @@
-// file      : libbutl/openssl.mxx -*- C++ -*-
+// file      : libbutl/openssl.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <type_traits>
 
-#include <cstddef> // size_t
-#include <utility> // move(), forward()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.openssl;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.process;      //@@ MOD TODO: should we re-export?
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/process.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
+#include <libbutl/semantic-version.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Perform a crypto operation using the openssl(1) program. Throw
   // process_error and io_error (both derive from system_error) in case of
@@ -100,6 +80,23 @@ LIBBUTL_MODEXPORT namespace butl
   //    department (that were apparently fixed in 1.0.2). To work around these
   //    bugs pass user-supplied options first.
   //
+  struct openssl_info
+  {
+    // Note that the program name can be used by the caller to properly
+    // interpret the version.
+    //
+    // The name/version examples:
+    //
+    // OpenSSL  3.0.0
+    // OpenSSL  1.1.1l
+    // LibreSSL 2.8.3
+    //
+    // The `l` component above ends up in semantic_version::build.
+    //
+    std::string name;
+    semantic_version version;
+  };
+
   class LIBBUTL_SYMEXPORT openssl: public process
   {
   public:
@@ -133,6 +130,22 @@ LIBBUTL_MODEXPORT namespace butl
              const std::string& command,
              A&&... options);
 
+    // Run `openssl version` command and try to parse and return the
+    // information it prints to stdout. Return nullopt if the process hasn't
+    // terminated successfully or stdout parsing has failed. Throw
+    // process_error and io_error in case of errors.
+    //
+    template <typename E>
+    static optional<openssl_info>
+    info (E&& err, const process_env&);
+
+    template <typename C,
+              typename E>
+    static optional<openssl_info>
+    info (const C&,
+          E&& err,
+          const process_env&);
+
   private:
     template <typename T>
     struct is_other
diff --git a/libbutl/openssl.ixx b/libbutl/openssl.ixx
index c685b65..db2fbcd 100644
--- a/libbutl/openssl.ixx
+++ b/libbutl/openssl.ixx
@@ -1,7 +1,10 @@
 // file      : libbutl/openssl.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef> // size_t
+#include <utility> // forward()
+
+namespace butl
 {
   template <typename I,
             typename O,
@@ -23,4 +26,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                  std::forward<A> (options)...)
   {
   }
+
+  template <typename E>
+  inline optional<openssl_info> openssl::
+  info (E&& err, const process_env& env)
+  {
+    return info ([] (const char* [], std::size_t) {},
+                 std::forward<E> (err),
+                 env);
+  }
 }
diff --git a/libbutl/openssl.txx b/libbutl/openssl.txx
index 3a2c579..f55432d 100644
--- a/libbutl/openssl.txx
+++ b/libbutl/openssl.txx
@@ -1,7 +1,10 @@
 // file      : libbutl/openssl.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef> // size_t
+#include <utility> // forward()
+
+namespace butl
 {
   template <typename I>
   typename std::enable_if<openssl::is_other<I>::value, I>::type openssl::
@@ -47,4 +50,67 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
 
     // Note: leaving this scope closes any open ends of the pipes in io_data.
   }
+
+  template <typename C,
+            typename E>
+  optional<openssl_info> openssl::
+  info (const C& cmdc, E&& err, const process_env& env)
+  {
+    using namespace std;
+
+    // Run the `openssl version` command.
+    //
+    openssl os (cmdc,
+                nullfd, fdstream_mode::text, forward<E> (err),
+                env,
+                "version");
+
+    // Read the command's stdout and wait for its completion. Bail out if the
+    // command didn't terminate successfully or stdout contains no data.
+    //
+    string s;
+    if (!getline (os.in, s))
+      return nullopt;
+
+    os.in.close ();
+
+    if (!os.wait ())
+      return nullopt;
+
+    // Parse the version string.
+    //
+    // Note that there is some variety in the version representations:
+    //
+    // OpenSSL 3.0.0 7 sep 2021 (Library: OpenSSL 3.0.0 7 sep 2021)
+    // OpenSSL 1.1.1l  FIPS 24 Aug 2021
+    // LibreSSL 2.8.3
+    //
+    // We will only consider the first two space separated components as the
+    // program name and version. We will also assume that there are no leading
+    // spaces and the version is delimited from the program name with a single
+    // space character.
+    //
+    size_t e (s.find (' '));
+
+    // Bail out if there is no version present in the string or the program
+    // name is empty.
+    //
+    if (e == string::npos || e == 0)
+      return nullopt;
+
+    string nm (s, 0, e);
+
+    size_t b (e + 1);    // The beginning of the version.
+    e = s.find (' ', b); // The end of the version.
+
+    optional<semantic_version> ver (
+      parse_semantic_version (string (s, b, e != string::npos ? e - b : e),
+                              semantic_version::allow_build,
+                              "" /* build_separators */));
+
+    if (!ver)
+      return nullopt;
+
+    return openssl_info {move (nm), move (*ver)};
+  }
 }
diff --git a/libbutl/optional.mxx b/libbutl/optional.hxx
index d32e14b..f22189b 100644
--- a/libbutl/optional.mxx
+++ b/libbutl/optional.hxx
@@ -1,11 +1,7 @@
-// file      : libbutl/optional.mxx -*- C++ -*-
+// file      : libbutl/optional.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-// C includes.
 
 // Note: the Clang check must come before GCC since it also defines __GNUC__.
 //
@@ -54,7 +50,6 @@
 #  endif
 #endif
 
-#ifndef __cpp_lib_modules_ts
 #ifdef LIBBUTL_STD_OPTIONAL
 #  include <optional>
 #else
@@ -62,31 +57,19 @@
 #  include <functional>  // hash
 #  include <type_traits> // is_*
 #endif
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.optional;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
 #ifdef LIBBUTL_STD_OPTIONAL
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
-  template <typename T>
-  using optional = std::optional<T>;
-
+  using std::optional;
   using std::nullopt_t;
   using std::nullopt;
 }
 #else
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Simple optional class template while waiting for std::optional.
   //
@@ -125,10 +108,16 @@ LIBBUTL_MODEXPORT namespace butl
 #if (!defined(_MSC_VER) || _MSC_VER > 1900) &&  \
     (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__))
       constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
-      constexpr optional_data (optional_data&& o):      v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+      constexpr optional_data (optional_data&& o)
+        noexcept (std::is_nothrow_move_constructible<T>::value)
+        : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
 #else
       optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
-      optional_data (optional_data&& o):      v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+      optional_data (optional_data&& o)
+        noexcept (std::is_nothrow_move_constructible<T>::value)
+        : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
 #endif
 
       optional_data& operator= (nullopt_t);
@@ -136,7 +125,11 @@ LIBBUTL_MODEXPORT namespace butl
       optional_data& operator= (T&&);
 
       optional_data& operator= (const optional_data&);
-      optional_data& operator= (optional_data&&);
+
+      optional_data& operator= (optional_data&&)
+        noexcept (std::is_nothrow_move_constructible<T>::value &&
+                  std::is_nothrow_move_assignable<T>::value    &&
+                  std::is_nothrow_destructible<T>::value);
 
       ~optional_data ();
     };
@@ -168,10 +161,16 @@ LIBBUTL_MODEXPORT namespace butl
 #if (!defined(_MSC_VER) || _MSC_VER > 1900) && \
     (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__))
       constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
-      constexpr optional_data (optional_data&& o):      v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+      constexpr optional_data (optional_data&& o)
+        noexcept (std::is_nothrow_move_constructible<T>::value)
+        : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
 #else
       optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);}
-      optional_data (optional_data&& o):      v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
+
+      optional_data (optional_data&& o)
+        noexcept (std::is_nothrow_move_constructible<T>::value)
+        : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));}
 #endif
 
       optional_data& operator= (nullopt_t);
@@ -179,7 +178,12 @@ LIBBUTL_MODEXPORT namespace butl
       optional_data& operator= (T&&);
 
       optional_data& operator= (const optional_data&);
-      optional_data& operator= (optional_data&&);
+
+      // Note: it is trivially destructible and thus is no-throw destructible.
+      //
+      optional_data& operator= (optional_data&&)
+        noexcept (std::is_nothrow_move_constructible<T>::value &&
+                  std::is_nothrow_move_assignable<T>::value);
     };
 
     template <typename T,
@@ -306,6 +310,8 @@ LIBBUTL_MODEXPORT namespace butl
     explicit operator bool () const {return this->v_;}
   };
 
+  // optional ? optional
+  //
   template <typename T>
   inline auto
   operator== (const optional<T>& x, const optional<T>& y)
@@ -335,6 +341,131 @@ LIBBUTL_MODEXPORT namespace butl
   {
     return y < x;
   }
+
+  // optional ? nullopt
+  // nullopt ? optional
+  //
+  template <typename T>
+  inline auto
+  operator== (const optional<T>& x, nullopt_t)
+  {
+    bool px (x);
+    return !px;
+  }
+
+  template <typename T>
+  inline auto
+  operator== (nullopt_t, const optional<T>& y)
+  {
+    bool py (y);
+    return !py;
+  }
+
+  template <typename T>
+  inline auto
+  operator!= (const optional<T>& x, nullopt_t y)
+  {
+    return !(x == y);
+  }
+
+  template <typename T>
+  inline auto
+  operator!= (nullopt_t x, const optional<T>& y)
+  {
+    return !(x == y);
+  }
+
+  template <typename T>
+  inline auto
+  operator< (const optional<T>&, nullopt_t)
+  {
+    return false;
+  }
+
+  template <typename T>
+  inline auto
+  operator< (nullopt_t, const optional<T>& y)
+  {
+    bool py (y);
+    return py;
+  }
+
+  template <typename T>
+  inline auto
+  operator> (const optional<T>& x, nullopt_t y)
+  {
+    return y < x;
+  }
+
+  template <typename T>
+  inline auto
+  operator> (nullopt_t x, const optional<T>& y)
+  {
+    return y < x;
+  }
+
+  // optional ? T
+  // T ? optional
+  //
+  template <typename T>
+  inline auto
+  operator== (const optional<T>& x, const T& y)
+  {
+    bool px (x);
+    return px && *x == y;
+  }
+
+  template <typename T>
+  inline auto
+  operator== (const T& x, const optional<T>& y)
+  {
+    bool py (y);
+    return py && x == *y;
+  }
+
+  template <typename T>
+  inline auto
+  operator!= (const optional<T>& x, const T& y)
+  {
+    return !(x == y);
+  }
+
+  template <typename T>
+  inline auto
+  operator!= (const T& x, const optional<T>& y)
+  {
+    return !(x == y);
+  }
+
+  template <typename T>
+  inline auto
+  operator< (const optional<T>& x, const T& y)
+  {
+    bool px (x);
+    return !px || *x < y;
+  }
+
+  template <typename T>
+  inline auto
+  operator< (const T& x, const optional<T>& y)
+  {
+    bool py (y);
+    return py && x < *y;
+  }
+
+  template <typename T>
+  inline auto
+  operator> (const optional<T>& x, const T& y)
+  {
+    return y < x;
+  }
+
+  template <typename T>
+  inline auto
+  operator> (const T& x, const optional<T>& y)
+  {
+    return y < x;
+  }
 }
 
 namespace std
diff --git a/libbutl/optional.ixx b/libbutl/optional.ixx
index e2b552f..fdd0ac5 100644
--- a/libbutl/optional.ixx
+++ b/libbutl/optional.ixx
@@ -77,6 +77,9 @@ namespace butl
     template <typename T>
     inline optional_data<T, false>& optional_data<T, false>::
     operator= (optional_data&& o)
+      noexcept (std::is_nothrow_move_constructible<T>::value &&
+                std::is_nothrow_move_assignable<T>::value    &&
+                std::is_nothrow_destructible<T>::value)
     {
       if (o.v_)
       {
@@ -171,6 +174,8 @@ namespace butl
     template <typename T>
     inline optional_data<T, true>& optional_data<T, true>::
     operator= (optional_data&& o)
+      noexcept (std::is_nothrow_move_constructible<T>::value &&
+                std::is_nothrow_move_assignable<T>::value)
     {
       if (o.v_)
       {
diff --git a/libbutl/pager.cxx b/libbutl/pager.cxx
index 44aa83e..e647948 100644
--- a/libbutl/pager.cxx
+++ b/libbutl/pager.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/pager.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/pager.mxx>
-#endif
+#include <libbutl/pager.hxx>
 
 #include <errno.h> // E*
 
@@ -14,46 +12,20 @@
 #  include <libbutl/win32-utility.hxx>
 #endif
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
-#include <iostream>
-
+#include <cstddef> // size_t
 #include <cstring> // strchr()
 #include <utility> // move()
+
 #ifndef _WIN32
 #  include <chrono>
 #  include <thread> // this_thread::sleep_for()
 #endif
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.pager;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-import butl.fdstream;
-#endif
 
-#ifndef _WIN32
-import std.threading;
-#endif
-
-import butl.utility;  // operator<<(ostream, exception), throw_generic_error()
-import butl.optional;
-import butl.fdstream; // fdclose()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/utility.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>
 
 using namespace std;
 
diff --git a/libbutl/pager.mxx b/libbutl/pager.hxx
index a1f640f..12a6670 100644
--- a/libbutl/pager.mxx
+++ b/libbutl/pager.hxx
@@ -1,36 +1,18 @@
-// file      : libbutl/pager.mxx -*- C++ -*-
+// file      : libbutl/pager.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
 #include <iostream>
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.pager;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-import butl.fdstream;
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Try to run the output through a pager program, such as more or less (no
   // pun intended, less is used by default). If the default pager program is
diff --git a/libbutl/path-io.mxx b/libbutl/path-io.hxx
index 6b6dbcf..a60527d 100644
--- a/libbutl/path-io.mxx
+++ b/libbutl/path-io.hxx
@@ -1,34 +1,16 @@
-// file      : libbutl/path-io.mxx -*- C++ -*-
+// file      : libbutl/path-io.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-// C includes.
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <ostream>
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.path_io;
-#ifdef __cpp_lib_modules_ts
-import std.core; //@@ MOD TMP (should not be needed).
-import std.io;
-#endif
-import butl.path;
-#else
-#include <libbutl/path.mxx>
-#endif
+#include <libbutl/path.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // This is the default path IO implementation. It is separate to allow
   // custom implementations. For example, we may want to print paths as
diff --git a/libbutl/path-map.mxx b/libbutl/path-map.hxx
index daaf0a4..e3d776a 100644
--- a/libbutl/path-map.mxx
+++ b/libbutl/path-map.hxx
@@ -1,33 +1,16 @@
-// file      : libbutl/path-map.mxx -*- C++ -*-
+// file      : libbutl/path-map.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <algorithm> // min()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.path_map;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.prefix_map;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/prefix-map.mxx>
-#endif
+
+#include <libbutl/path.hxx>
+#include <libbutl/prefix-map.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // prefix_map for filesystem paths
   //
@@ -142,4 +125,12 @@ LIBBUTL_MODEXPORT namespace butl
   template <typename T>
   using dir_path_map =
     prefix_map<dir_path, T, dir_path::traits_type::directory_separator>;
+
+  template <typename T>
+  using path_multimap =
+    prefix_multimap<path, T, path::traits_type::directory_separator>;
+
+  template <typename T>
+  using dir_path_multimap =
+    prefix_multimap<dir_path, T, dir_path::traits_type::directory_separator>;
 }
diff --git a/libbutl/path-pattern.cxx b/libbutl/path-pattern.cxx
index cea5aa7..ed36eb5 100644
--- a/libbutl/path-pattern.cxx
+++ b/libbutl/path-pattern.cxx
@@ -1,41 +1,14 @@
 // file      : libbutl/path-pattern.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/path-pattern.mxx>
-#endif
+#include <libbutl/path-pattern.hxx>
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstdint>
-#include <cstddef>
 #include <iterator> // reverse_iterator
-
 #include <algorithm> // find()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.path_pattern;
 
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.optional;
-#endif
-
-import butl.utility;    // lcase()[_WIN32]
-import butl.filesystem; // path_search()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/filesystem.mxx>
-#endif
+#include <libbutl/utility.hxx>    // lcase()[_WIN32]
+#include <libbutl/filesystem.hxx> // path_search()
 
 using namespace std;
 
diff --git a/libbutl/path-pattern.mxx b/libbutl/path-pattern.hxx
index 6d9684a..f6e01be 100644
--- a/libbutl/path-pattern.mxx
+++ b/libbutl/path-pattern.hxx
@@ -1,37 +1,20 @@
-// file      : libbutl/path-pattern.mxx -*- C++ -*-
+// file      : libbutl/path-pattern.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
+#include <cassert>
 #include <cstdint>  // uint16_t
 #include <cstddef>  // ptrdiff_t, size_t
 #include <iterator> // input_iterator_tag
-#endif
-
-// Other includes.
-#ifdef __cpp_modules_ts
-export module butl.path_pattern;
-
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
 
-import butl.path;
-import butl.optional;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Wildcard pattern match (aka glob).
   //
diff --git a/libbutl/path-pattern.ixx b/libbutl/path-pattern.ixx
index 71f125c..6fee31e 100644
--- a/libbutl/path-pattern.ixx
+++ b/libbutl/path-pattern.ixx
@@ -3,6 +3,32 @@
 
 namespace butl
 {
+  // path_match_flags
+  //
+  inline path_match_flags operator& (path_match_flags x, path_match_flags y)
+  {
+    return x &= y;
+  }
+
+  inline path_match_flags operator| (path_match_flags x, path_match_flags y)
+  {
+    return x |= y;
+  }
+
+  inline path_match_flags operator&= (path_match_flags& x, path_match_flags y)
+  {
+    return x = static_cast<path_match_flags> (
+      static_cast<std::uint16_t> (x) &
+      static_cast<std::uint16_t> (y));
+  }
+
+  inline path_match_flags operator|= (path_match_flags& x, path_match_flags y)
+  {
+    return x = static_cast<path_match_flags> (
+      static_cast<std::uint16_t> (x) |
+      static_cast<std::uint16_t> (y));
+  }
+
   // path_pattern_iterator
   //
   inline path_pattern_iterator::
diff --git a/libbutl/path.cxx b/libbutl/path.cxx
index 3b04730..909971b 100644
--- a/libbutl/path.cxx
+++ b/libbutl/path.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/path.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/path.mxx>
-#endif
+#include <libbutl/path.hxx>
 
 #ifdef _WIN32
 #  include <libbutl/win32-utility.hxx>
@@ -25,32 +23,13 @@
 #endif
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <utility>
-
 #include <atomic>
 #include <cstring> // strcpy()
-#endif
 
-#ifdef __cpp_modules_ts
-module butl.path;
+#include <libbutl/ft/lang.hxx> // thread_local
 
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-import butl.utility; // throw_*_error()
-import butl.process; // process::current_id()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/utility.hxx> // throw_*_error()
+#include <libbutl/process.hxx> // process::current_id()
 
 #include <libbutl/export.hxx>
 
@@ -78,10 +57,21 @@ namespace butl
   // char
   //
 
+  static
+#ifdef __cpp_thread_local
+  thread_local
+#else
+  __thread
+#endif
+  const path_traits<char>::string_type* current_directory_ = nullptr;
+
   template <>
   LIBBUTL_SYMEXPORT path_traits<char>::string_type path_traits<char>::
   current_directory ()
   {
+    if (const auto* twd = current_directory_)
+      return *twd;
+
 #ifdef _WIN32
     char cwd[_MAX_PATH];
     if (_getcwd (cwd, _MAX_PATH) == 0)
@@ -121,6 +111,20 @@ namespace butl
 #endif
   }
 
+  template <>
+  LIBBUTL_SYMEXPORT const path_traits<char>::string_type* path_traits<char>::
+  thread_current_directory ()
+  {
+    return current_directory_;
+  }
+
+  template <>
+  LIBBUTL_SYMEXPORT void path_traits<char>::
+  thread_current_directory (const string_type* twd)
+  {
+    current_directory_ = twd;
+  }
+
 #ifndef _WIN32
   static const small_vector<string, 4> tmp_vars (
     {"TMPDIR", "TMP", "TEMP", "TEMPDIR"});
@@ -207,8 +211,8 @@ namespace butl
     using std::to_string;
 
     return prefix
-      + "-" + to_string (process::current_id ())
-      + "-" + to_string (temp_name_count++);
+      + '-' + to_string (process::current_id ())
+      + '-' + to_string (temp_name_count++);
   }
 
   template <>
diff --git a/libbutl/path.mxx b/libbutl/path.hxx
index 12479ce..7d8b862 100644
--- a/libbutl/path.mxx
+++ b/libbutl/path.hxx
@@ -1,13 +1,8 @@
-// file      : libbutl/path.mxx -*- C++ -*-
+// file      : libbutl/path.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-#include <cassert>
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <ostream>
 #include <cstddef>    // ptrdiff_t
@@ -21,31 +16,17 @@
 #ifdef _WIN32
 #include <algorithm> // replace()
 #endif
-#endif
 
-// Other includes.
+#include <libbutl/optional.hxx>
+#include <libbutl/small-vector.hxx>
 
-#ifdef __cpp_modules_ts
-export module butl.path;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.optional;
-import butl.small_vector;
-#ifdef _WIN32
-import butl.utility;
-#endif
-#else
-#include <libbutl/optional.mxx>
-#include <libbutl/small-vector.mxx>
 #ifdef _WIN32
-#include <libbutl/utility.mxx> // *case*()
-#endif
+#include <libbutl/utility.hxx> // *case*()
 #endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Wish list/ideas for improvements.
   //
@@ -78,7 +59,7 @@ LIBBUTL_MODEXPORT namespace butl
     string_type path;
 
     explicit
-    invalid_basic_path (const string_type& p): path (p) {}
+    invalid_basic_path (string_type p): path (std::move (p)) {}
     explicit
     invalid_basic_path (const C* p): path (p) {}
     invalid_basic_path (const C* p, size_type n): path (p, n) {}
@@ -392,6 +373,22 @@ LIBBUTL_MODEXPORT namespace butl
         : (p = rfind_separator (s, n - 1)) == nullptr ? s : ++p;
     }
 
+    // Return true if sb is a sub-path of sp (i.e., sp is a prefix). Expects
+    // both paths to be normalized. Note that this function returns true if
+    // the paths are equal. Empty path is considered a prefix of any path.
+    //
+    static bool
+    sub (const C* sb, size_type nb,
+         const C* sp, size_type np);
+
+    // Return true if sp is a super-path of sb (i.e., sb is a suffix). Expects
+    // both paths to be normalized. Note that this function returns true if
+    // the paths are equal. Empty path is considered a prefix of any path.
+    //
+    static bool
+    sup (const C* sp, size_type np,
+         const C* sb, size_type nb);
+
     static int
     compare (string_type const& l,
              string_type const& r,
@@ -454,11 +451,31 @@ LIBBUTL_MODEXPORT namespace butl
     // Get/set current working directory. Throw std::system_error to report
     // underlying OS errors.
     //
+    // The curren_directory() accessor (as well as the relevant process
+    // startup functions) have a notion of a "thread working directory" which
+    // is implemented as a thread-specific override that can be added/removed
+    // with thread_current_directory() below.
+    //
+    // Note that the current_directory() modifier always sets the process-wide
+    // working directory.
+    //
+    // See also thread_env().
+    //
     static string_type
     current_directory ();
 
     static void
-    current_directory (string_type const&);
+    current_directory (const string_type&);
+
+    // Get/set thread working directory override. Note that the passed
+    // pointed-to string should be valid (and immutable) for as long as the
+    // override is in effect.
+    //
+    static const string_type*
+    thread_current_directory ();
+
+    static void
+    thread_current_directory (const string_type*);
 
     // Return the user home directory. Throw std::system_error to report
     // underlying OS errors.
@@ -615,18 +632,18 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Constructors.
     //
-    path_data ()
+    path_data () noexcept
         : tsep_ (0) {}
 
-    path_data (string_type&& p, difference_type ts)
+    path_data (string_type&& p, difference_type ts) noexcept
         : path_ (std::move (p)), tsep_ (path_.empty () ? 0 : ts) {}
 
     explicit
-    path_data (string_type&& p)
+    path_data (string_type&& p) noexcept
         : path_ (std::move (p)) { _init (); }
 
     void
-    _init ()
+    _init () noexcept
     {
       size_type n (path_.size ()), i;
 
@@ -654,7 +671,8 @@ LIBBUTL_MODEXPORT namespace butl
       using path_data<C>::path_data;
 
       base_type () = default;
-      base_type (path_data<C>&& d): path_data<C> (std::move (d)) {}
+      base_type (path_data<C>&& d) noexcept
+        : path_data<C> (std::move (d)) {}
     };
 
     using dir_type = basic_path<C, dir_path_kind<C>>;
@@ -892,7 +910,7 @@ LIBBUTL_MODEXPORT namespace butl
     make_leaf ();
 
     // Return the path without the specified directory part. Returns empty
-    // path if the paths are the same. Throws invalid_path if the directory is
+    // path if the paths are the same. Throw invalid_path if the directory is
     // not a prefix of *this. Expects both paths to be normalized.
     //
     basic_path
@@ -910,7 +928,7 @@ LIBBUTL_MODEXPORT namespace butl
     make_directory ();
 
     // Return the directory part of the path without the specified leaf part.
-    // Throws invalid_path if the leaf is not a suffix of *this. Expects both
+    // Throw invalid_path if the leaf is not a suffix of *this. Expects both
     // paths to be normalized.
     //
     dir_type
@@ -946,12 +964,18 @@ LIBBUTL_MODEXPORT namespace butl
     extension_cstring () const;
 
     // Return a path relative to the specified path that is equivalent
-    // to *this. Throws invalid_path if a relative path cannot be derived
+    // to *this. Throw invalid_path if a relative path cannot be derived
     // (e.g., paths are on different drives on Windows).
     //
     basic_path
     relative (basic_path) const;
 
+    // As above but return nullopt rather than throw if a relative path cannot
+    // be derived.
+    //
+    optional<basic_path>
+    try_relative (basic_path) const;
+
     // Iteration over path components.
     //
     // Note that for an absolute POSIX path the first component is empty,
@@ -1108,19 +1132,22 @@ LIBBUTL_MODEXPORT namespace butl
     basic_path&
     canonicalize (char dir_sep = '\0');
 
-    // Normalize the path and return *this. Normalization involves collapsing
-    // the '.' and '..'  directories if possible, collapsing multiple
-    // directory separators, and converting all directory separators to the
-    // canonical form. If cur_empty is true then collapse relative paths
-    // representing the current directory (for example, '.', './', 'foo/..')
-    // to an empty path. Otherwise convert it to the canonical form (./ on
-    // POSIX systems). Note that a non-empty path cannot become an empty one
-    // in the latter case.
+    // Normalize the path and return *this. Throw invalid_path if the
+    // resulting path would be invalid (e.g., /tmp/../..).
+    //
+    // Normalization involves collapsing the '.' and '..'  directories if
+    // possible, collapsing multiple directory separators, and converting all
+    // directory separators to the canonical form. If cur_empty is true then
+    // collapse relative paths representing the current directory (for
+    // example, '.', './', 'foo/..')  to an empty path. Otherwise convert it
+    // to the canonical form (./ on POSIX systems). Note that a non-empty path
+    // cannot become an empty one in the latter case.
     //
     // If actual is true, then for case-insensitive filesystems obtain the
     // actual spelling of the path. Only an absolute path can be actualized.
     // If a path component does not exist, then its (and all subsequent)
-    // spelling is unchanged. This is a potentially expensive operation.
+    // spelling is unchanged. Throw system_error on all other underlying
+    // filesystem errors. Note that this is a potentially expensive operation.
     // Normally one can assume that "well-known" directories (current, home,
     // etc.) are returned in their actual spelling.
     //
@@ -1275,7 +1302,8 @@ LIBBUTL_MODEXPORT namespace butl
     // Direct initialization without init()/cast().
     //
     explicit
-    basic_path (data_type&& d): base_type (std::move (d)) {}
+    basic_path (data_type&& d) noexcept
+      : base_type (std::move (d)) {}
 
     using base_type::_size;
     using base_type::_init;
@@ -1474,9 +1502,9 @@ LIBBUTL_MODEXPORT namespace butl
     basic_path_name (): // Create empty/NULL path name.
         base (nullptr, &name) {}
 
-    basic_path_name (basic_path_name&&);
+    basic_path_name (basic_path_name&&) noexcept;
     basic_path_name (const basic_path_name&);
-    basic_path_name& operator= (basic_path_name&&);
+    basic_path_name& operator= (basic_path_name&&) noexcept;
     basic_path_name& operator= (const basic_path_name&);
   };
 
@@ -1503,14 +1531,14 @@ LIBBUTL_MODEXPORT namespace butl
 
     basic_path_name_value (): base (&path) {} // Create empty/NULL path name.
 
-    basic_path_name_value (basic_path_name_value&&);
+    basic_path_name_value (basic_path_name_value&&) noexcept;
     basic_path_name_value (const basic_path_name_value&);
-    basic_path_name_value& operator= (basic_path_name_value&&);
+    basic_path_name_value& operator= (basic_path_name_value&&) noexcept;
     basic_path_name_value& operator= (const basic_path_name_value&);
   };
 }
 
-LIBBUTL_MODEXPORT namespace std
+namespace std
 {
   template <typename C, typename K>
   struct hash<butl::basic_path<C, K>>: hash<basic_string<C>>
diff --git a/libbutl/path.ixx b/libbutl/path.ixx
index 9c96cfc..b2fdb6f 100644
--- a/libbutl/path.ixx
+++ b/libbutl/path.ixx
@@ -1,7 +1,7 @@
 // file      : libbutl/path.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
 {
   // path_abnormality
   //
@@ -117,6 +117,45 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
     return r;
   }
 
+  template <typename C>
+  inline bool path_traits<C>::
+  sub (const C* s, size_type n,
+       const C* ps, size_type pn)
+  {
+    // The thinking here is that we can use the full string representations
+    // (including the trailing slash in "/").
+    //
+    if (pn == 0)
+      return true;
+
+    // The second condition guards against the /foo-bar vs /foo case.
+    //
+    return n >= pn &&
+      compare (s, pn, ps, pn) == 0 &&
+      (is_separator (ps[pn - 1]) || // p ends with a separator
+       n == pn                   || // *this == p
+       is_separator (s[pn]));       // next char is a separator
+  }
+
+  template <typename C>
+  inline bool path_traits<C>::
+  sup (const C* s, size_type n,
+       const C* ps, size_type pn)
+  {
+    // The thinking here is that we can use the full string representations
+    // (including the trailing slash in "/").
+    //
+    if (pn == 0)
+      return true;
+
+    // The second condition guards against the /foo-bar vs bar case.
+    //
+    return n >= pn &&
+      compare (s + n - pn, pn, ps, pn) == 0 &&
+      (n == pn ||                     // *this == p
+       is_separator (s[n - pn - 1])); // Previous char is a separator.
+  }
+
 #ifdef _WIN32
   template <>
   inline char path_traits<char>::
@@ -230,52 +269,16 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
   inline bool basic_path<C, K>::
   sub (const basic_path& p) const
   {
-    // The thinking here is that we can use the full string representations
-    // (including the trailing slash in "/").
-    //
-    const string_type& ps (p.path_);
-    size_type pn (ps.size ());
-
-    if (pn == 0)
-      return true;
-
-    const string_type& s (this->path_);
-    size_type n (s.size ());
-
-    // The second condition guards against the /foo-bar vs /foo case.
-    //
-    return n >= pn &&
-      traits_type::compare (s.c_str (), pn, ps.c_str (), pn) == 0 &&
-      (traits_type::is_separator (ps.back ()) || // p ends with a separator
-       n == pn                                || // *this == p
-       traits_type::is_separator (s[pn]));       // next char is a separator
+    return traits_type::sub (this->path_.c_str (), this->path_.size (),
+                             p.path_.c_str (), p.path_.size ());
   }
 
   template <typename C, typename K>
   inline bool basic_path<C, K>::
   sup (const basic_path& p) const
   {
-    // The thinking here is that we can use the full string representations
-    // (including the trailing slash in "/").
-    //
-    const string_type& ps (p.path_);
-    size_type pn (ps.size ());
-
-    if (pn == 0)
-      return true;
-
-    const string_type& s (this->path_);
-    size_type n (s.size ());
-
-    // The second condition guards against the /foo-bar vs bar case.
-    //
-    return n >= pn &&
-      traits_type::compare (s.c_str () +  n - pn, pn, ps.c_str (), pn) == 0 &&
-      (n == pn || // *this == p
-       //
-       // Previous char is a separator.
-       //
-       traits_type::is_separator (s[n - pn - 1]));
+    return traits_type::sup (this->path_.c_str (), this->path_.size (),
+                             p.path_.c_str (), p.path_.size ());
   }
 
   template <typename C, typename K>
@@ -779,7 +782,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
   //
   template <typename P>
   inline basic_path_name<P>::
-  basic_path_name (basic_path_name&& p)
+  basic_path_name (basic_path_name&& p) noexcept
       : basic_path_name (p.path, std::move (p.name))
   {
   }
@@ -793,7 +796,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
 
   template <typename P>
   inline basic_path_name<P>& basic_path_name<P>::
-  operator= (basic_path_name&& p)
+  operator= (basic_path_name&& p) noexcept
   {
     if (this != &p)
     {
@@ -821,7 +824,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
   //
   template <typename P>
   inline basic_path_name_value<P>::
-  basic_path_name_value (basic_path_name_value&& p)
+  basic_path_name_value (basic_path_name_value&& p) noexcept
       : basic_path_name_value (std::move (p.path), std::move (p.name))
   {
   }
@@ -835,7 +838,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
 
   template <typename P>
   inline basic_path_name_value<P>& basic_path_name_value<P>::
-  operator= (basic_path_name_value&& p)
+  operator= (basic_path_name_value&& p) noexcept
   {
     if (this != &p)
     {
diff --git a/libbutl/path.txx b/libbutl/path.txx
index 45b62bd..60e0f1a 100644
--- a/libbutl/path.txx
+++ b/libbutl/path.txx
@@ -1,7 +1,7 @@
 // file      : libbutl/path.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
 {
   template <typename C, typename K>
   basic_path<C, K> basic_path<C, K>::
@@ -103,8 +103,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
 #endif
 
   template <typename C, typename K>
-  basic_path<C, K> basic_path<C, K>::
-  relative (basic_path<C, K> d) const
+  optional<basic_path<C, K>> basic_path<C, K>::
+  try_relative (basic_path<C, K> d) const
   {
     dir_type r;
 
@@ -118,12 +118,22 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
       // Roots of the paths do not match.
       //
       if (d.root ())
-        throw invalid_basic_path<C> (this->path_);
+        return nullopt;
     }
 
     return r / leaf (d);
   }
 
+  template <typename C, typename K>
+  basic_path<C, K> basic_path<C, K>::
+  relative (basic_path<C, K> d) const
+  {
+    if (optional<basic_path<C, K>> r = try_relative (std::move (d)))
+      return std::move (*r);
+
+    throw invalid_basic_path<C> (this->path_);
+  }
+
 #ifdef _WIN32
   // Find the actual spelling of a name in the specified dir. If the name is
   // found, append it to the result and return true. Otherwise, return false.
diff --git a/libbutl/prefix-map.mxx b/libbutl/prefix-map.hxx
index 75931da..0895d96 100644
--- a/libbutl/prefix-map.mxx
+++ b/libbutl/prefix-map.hxx
@@ -1,31 +1,16 @@
-// file      : libbutl/prefix-map.mxx -*- C++ -*-
+// file      : libbutl/prefix-map.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <map>
 #include <string>
 #include <utility>   // move()
 #include <algorithm> // min()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.prefix_map;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // A map of hierarchical "paths", e.g., 'foo.bar' or 'foo/bar' with the
   // ability to retrieve a range of entries that have a specific prefix as
@@ -149,6 +134,37 @@ LIBBUTL_MODEXPORT namespace butl
 
     const_iterator
     find_sup (const key_type&) const;
+
+    // As above but additionally evaluate a predicate on each matching entry
+    // returning the one for which it returns true.
+    //
+    template <typename P>
+    iterator
+    find_sup_if (const key_type&, P);
+
+    template <typename P>
+    const_iterator
+    find_sup_if (const key_type&, P) const;
+  };
+
+  template <typename M>
+  struct prefix_multimap_common: prefix_map_common<M>
+  {
+    typedef M map_type;
+    typedef typename map_type::key_type key_type;
+    typedef typename map_type::iterator iterator;
+    typedef typename map_type::const_iterator const_iterator;
+
+    using prefix_map_common<M>::prefix_map_common;
+
+    // Find the most qualified entries that are super-prefixes of the
+    // specified prefix.
+    //
+    std::pair<iterator, iterator>
+    sup_range (const key_type&);
+
+    std::pair<const_iterator, const_iterator>
+    sup_range (const key_type&) const;
   };
 
   template <typename M, typename prefix_map_common<M>::delimiter_type D>
@@ -161,6 +177,16 @@ LIBBUTL_MODEXPORT namespace butl
         : prefix_map_common<M> (std::move (i), D) {}
   };
 
+  template <typename M, typename prefix_map_common<M>::delimiter_type D>
+  struct prefix_multimap_impl: prefix_multimap_common<M>
+  {
+    typedef typename prefix_multimap_common<M>::value_type value_type;
+
+    prefix_multimap_impl (): prefix_multimap_common<M> (D) {}
+    prefix_multimap_impl (std::initializer_list<value_type> i)
+        : prefix_multimap_common<M> (std::move (i), D) {}
+  };
+
   template <typename K,
             typename T,
             typename compare_prefix<K>::delimiter_type D>
@@ -170,7 +196,7 @@ LIBBUTL_MODEXPORT namespace butl
             typename T,
             typename compare_prefix<K>::delimiter_type D>
   using prefix_multimap =
-    prefix_map_impl<std::multimap<K, T, compare_prefix<K>>, D>;
+    prefix_multimap_impl<std::multimap<K, T, compare_prefix<K>>, D>;
 }
 
 #include <libbutl/prefix-map.txx>
diff --git a/libbutl/prefix-map.txx b/libbutl/prefix-map.txx
index e9a99c9..80664bf 100644
--- a/libbutl/prefix-map.txx
+++ b/libbutl/prefix-map.txx
@@ -1,7 +1,7 @@
 // file      : libbutl/prefix-map.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
 {
   template <typename M>
   auto prefix_map_common<M>::
@@ -127,4 +127,128 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
     return i;
 #endif
   }
+
+  template <typename M>
+  template <typename P>
+  auto prefix_map_common<M>::
+  find_sup_if (const key_type& k, P pred) -> iterator
+  {
+#if 0
+    const auto& c (this->key_comp ());
+
+    for (auto i (this->upper_bound (k)), b (this->begin ()); i != b; )
+    {
+      --i;
+      if (c.prefix (i->first, k) && pred (*i))
+        return i;
+    }
+
+    return this->end ();
+#else
+    auto i (this->find (k)), e (this->end ());
+
+    if (i == e || !pred (*i))
+    {
+      const auto& c (this->key_comp ());
+
+      for (key_type p (k); c.prefix (p); )
+      {
+        i = this->find (p);
+        if (i != e && pred (*i))
+          break;
+      }
+    }
+
+    return i;
+#endif
+  }
+
+  template <typename M>
+  template <typename P>
+  auto prefix_map_common<M>::
+  find_sup_if (const key_type& k, P pred) const -> const_iterator
+  {
+#if 0
+    const auto& c (this->key_comp ());
+
+    for (auto i (this->upper_bound (k)), b (this->begin ()); i != b; )
+    {
+      --i;
+      if (c.prefix (i->first, k) && pred (*i))
+        return i;
+    }
+
+    return this->end ();
+#else
+    auto i (this->find (k)), e (this->end ());
+
+    if (i == e || !pred (*i))
+    {
+      const auto& c (this->key_comp ());
+
+      for (key_type p (k); c.prefix (p); )
+      {
+        i = this->find (p);
+        if (i != e && pred (*i))
+          break;
+      }
+    }
+
+    return i;
+#endif
+  }
+
+  template <typename M>
+  auto prefix_multimap_common<M>::
+  sup_range (const key_type& k) -> std::pair<iterator, iterator>
+  {
+#if 0
+    // TODO (see above).
+#else
+    // First look for the exact match before making any copies.
+    //
+    auto r (this->equal_range (k));
+
+    if (r.first == r.second)
+    {
+      const auto& c (this->key_comp ());
+
+      for (key_type p (k); c.prefix (p); )
+      {
+        r = this->equal_range (p);
+        if (r.first != r.second)
+          break;
+      }
+    }
+
+    return r;
+#endif
+  }
+
+  template <typename M>
+  auto prefix_multimap_common<M>::
+  sup_range (const key_type& k) const -> std::pair<const_iterator, const_iterator>
+  {
+#if 0
+    // TODO (see above).
+#else
+    // First look for the exact match before making any copies.
+    //
+    auto r (this->equal_range (k));
+
+    if (r.first == r.second)
+    {
+      const auto& c (this->key_comp ());
+
+      for (key_type p (k); c.prefix (p); )
+      {
+        r = this->equal_range (p);
+        if (r.first != r.second)
+          break;
+      }
+    }
+
+    return r;
+#endif
+  }
 }
diff --git a/libbutl/process-details.hxx b/libbutl/process-details.hxx
index cf7624d..10d5241 100644
--- a/libbutl/process-details.hxx
+++ b/libbutl/process-details.hxx
@@ -3,17 +3,25 @@
 
 #pragma once
 
-#include <libbutl/ft/shared_mutex.hxx>
+#ifdef LIBBUTL_MINGW_STDTHREAD
 
-#ifdef __cpp_lib_modules_ts
-import std.core; //@@ MOD TMP (dummy std.threading).
-import std.threading;
-#else
-#include <mutex>
-#if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex)
-#  include <shared_mutex>
-#endif
-#endif
+#  include <libbutl/mingw-shared_mutex.hxx>
+
+namespace butl
+{
+  using shared_mutex = mingw_stdthread::shared_mutex;
+  using ulock        = mingw_stdthread::unique_lock<shared_mutex>;
+  using slock        = mingw_stdthread::shared_lock<shared_mutex>;
+}
+
+#else // LIBBUTL_MINGW_STDTHREADS
+
+#  include <libbutl/ft/shared_mutex.hxx>
+
+#  include <mutex>
+#  if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex)
+#    include <shared_mutex>
+#  endif
 
 namespace butl
 {
@@ -41,7 +49,11 @@ namespace butl
   using ulock        = std::unique_lock<shared_mutex>;
   using slock        = ulock;
 #endif
+}
+#endif // LIBBUTL_MINGW_STDTHREADS
 
+namespace butl
+{
   // Mutex that is acquired to make a sequence of operations atomic in regards
   // to child process spawning. Must be aquired for exclusive access for child
   // process startup, and for shared access otherwise. Defined in process.cxx.
diff --git a/libbutl/process-io.cxx b/libbutl/process-io.cxx
index c29bbc0..0be3a77 100644
--- a/libbutl/process-io.cxx
+++ b/libbutl/process-io.cxx
@@ -1,36 +1,11 @@
 // file      : libbutl/process-io.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/process-io.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <ostream>
+#include <libbutl/process-io.hxx>
 
 #include <cstring> // strchr()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.process_io;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.process;
-#endif
 
-import butl.path-io;
-#else
-#include <libbutl/path-io.mxx>
-#endif
+#include <libbutl/path-io.hxx>
 
 using namespace std;
 
diff --git a/libbutl/process-io.mxx b/libbutl/process-io.hxx
index d07a212..29d6d8b 100644
--- a/libbutl/process-io.mxx
+++ b/libbutl/process-io.hxx
@@ -1,32 +1,15 @@
-// file      : libbutl/process-io.mxx -*- C++ -*-
+// file      : libbutl/process-io.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <ostream>
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.process_io;
-#ifdef __cpp_lib_modules_ts
-import std.core; //@@ MOD TMP (should not be needed).
-import std.io;
-#endif
-import butl.process;
-#else
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/process.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   inline std::ostream&
   operator<< (std::ostream& o, const process_path& p)
diff --git a/libbutl/process-run.cxx b/libbutl/process-run.cxx
index c26c20d..b044ea1 100644
--- a/libbutl/process-run.cxx
+++ b/libbutl/process-run.cxx
@@ -1,35 +1,12 @@
 // file      : libbutl/process-run.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/process.hxx>
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <cstdlib>  // exit()
 #include <iostream> // cerr
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.process;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-#endif
 
-import butl.utility; // operator<<(ostream,exception)
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx> // operator<<(ostream,exception)
 
 using namespace std;
 
@@ -47,7 +24,7 @@ namespace butl
     try
     {
       return process (pp, cmd,
-                      in, out, err,
+                      move (in), move (out), move (err),
                       cwd != nullptr ? cwd->string ().c_str () : nullptr,
                       envvars);
     }
diff --git a/libbutl/process-run.txx b/libbutl/process-run.txx
index aa1e381..6c903a8 100644
--- a/libbutl/process-run.txx
+++ b/libbutl/process-run.txx
@@ -1,7 +1,9 @@
 // file      : libbutl/process-run.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <utility> // forward(), index_sequence
+
+namespace butl
 {
   template <typename V>
   void process_env::
@@ -85,21 +87,21 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
   // valid file descriptor.
   //
   inline process::pipe
-  process_stdin (const process::pipe& v)
+  process_stdin (process::pipe v)
   {
     assert (v.in >= 0);
     return v;
   }
 
   inline process::pipe
-  process_stdout (const process::pipe& v)
+  process_stdout (process::pipe v)
   {
     assert (v.out >= 0);
     return v;
   }
 
   inline process::pipe
-  process_stderr (const process::pipe& v)
+  process_stderr (process::pipe v)
   {
     assert (v.out >= 0);
     return v;
@@ -129,13 +131,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
             typename... A,
             typename std::size_t... index>
   process
-  process_start (std::index_sequence<index...>,
-                 const C& cmdc,
-                 I&& in,
-                 O&& out,
-                 E&& err,
-                 const process_env& env,
-                 A&&... args)
+  process_start_impl (std::index_sequence<index...>,
+                      const C& cmdc,
+                      I&& in,
+                      O&& out,
+                      E&& err,
+                      const process_env& env,
+                      A&&... args)
   {
     // Map stdin/stdout/stderr arguments to their integer values, as expected
     // by the process constructor.
@@ -168,7 +170,9 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
     return process_start (env.cwd,
                           *env.path, cmd.data (),
                           env.vars,
-                          in_i, out_i, err_i);
+                          std::move (in_i),
+                          std::move (out_i),
+                          std::move (err_i));
   }
 
   template <typename C,
@@ -184,13 +188,13 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                           const process_env& env,
                           A&&... args)
   {
-    return process_start (std::index_sequence_for<A...> (),
-                          cmdc,
-                          std::forward<I> (in),
-                          std::forward<O> (out),
-                          std::forward<E> (err),
-                          env,
-                          std::forward<A> (args)...);
+    return process_start_impl (std::index_sequence_for<A...> (),
+                               cmdc,
+                               std::forward<I> (in),
+                               std::forward<O> (out),
+                               std::forward<E> (err),
+                               env,
+                               std::forward<A> (args)...);
   }
 
   template <typename I,
@@ -255,4 +259,45 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
                                  env,
                                  std::forward<A> (args)...);
   }
+
+  template <typename C,
+            typename... A,
+            typename std::size_t... index>
+  void
+  process_print_impl (std::index_sequence<index...>,
+                      const C& cmdc,
+                      const process_env& env,
+                      A&&... args)
+  {
+    // Construct the command line array.
+    //
+    const std::size_t args_size (sizeof... (args));
+
+    small_vector<const char*, args_size + 2> cmd;
+
+    assert (env.path != nullptr);
+    cmd.push_back (env.path->recall_string ());
+
+    std::string storage[args_size != 0 ? args_size : 1];
+
+    const char* dummy[] = {
+      nullptr, process_args_as_wrapper (cmd, args, storage[index])... };
+
+    cmd.push_back (dummy[0]); // NULL (and get rid of unused warning).
+
+    cmdc (cmd.data (), cmd.size ());
+  }
+
+  template <typename C,
+            typename... A>
+  inline void
+  process_print_callback (const C& cmdc,
+                          const process_env& env,
+                          A&&... args)
+  {
+    process_print_impl (std::index_sequence_for<A...> (),
+                        cmdc,
+                        env,
+                        std::forward<A> (args)...);
+  }
 }
diff --git a/libbutl/process.cxx b/libbutl/process.cxx
index 6c736c1..1b8da98 100644
--- a/libbutl/process.cxx
+++ b/libbutl/process.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/process.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/process.mxx>
-#endif
+#include <libbutl/process.hxx>
 
 #include <errno.h>
 
@@ -49,6 +47,14 @@
 #  elif defined(__NetBSD__) && __NetBSD__ >= 6
 #    define LIBBUTL_POSIX_SPAWN
 //
+// On OpenBSD posix_spawn() appeared in 5.2 (see the man page for details).
+//
+#  elif defined(__OpenBSD__)
+#    include <sys/param.h> // OpenBSD (yyyymm)
+#    if OpenBSD >= 201211  // 5.2 released on 1 Nov 2012.
+#      define LIBBUTL_POSIX_SPAWN
+#    endif
+//
 // posix_spawn() appeared in Version 3 of the Single UNIX Specification that
 // was implemented in MacOS 10.5 (see the man page for details).
 //
@@ -87,29 +93,20 @@
 #  endif // _MSC_VER
 #endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <chrono>
-#include <cstdint>
-#include <cstddef>
-#include <system_error>
-
 #include <ios>      // ios_base::failure
-#include <cstring>  // strlen(), strchr(), strncmp()
+#include <memory>   // unique_ptr
+#include <cstring>  // strlen(), strchr(), strpbrk(), strncmp()
 #include <utility>  // move()
 #include <ostream>
+#include <cassert>
 
 #ifndef _WIN32
-#include <thread> // this_thread::sleep_for()
+#  include <thread> // this_thread::sleep_for()
 #else
-#include <map>
-#include <ratio>     // milli
-#include <cstdlib>   // __argv[]
-#include <algorithm> // find()
-#endif
+#  include <map>
+#  include <ratio>     // milli
+#  include <cstdlib>   // __argv[]
+#  include <algorithm> // find()
 #endif
 
 #include <libbutl/process-details.hxx>
@@ -119,32 +116,8 @@ namespace butl
   shared_mutex process_spawn_mutex; // Out of module purview.
 }
 
-#ifdef __cpp_modules_ts
-module butl.process;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.threading; // Clang wants it in purview (see process-details.hxx).
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.vector_view;
-import butl.small_vector;
-#endif
-
-#ifndef _WIN32
-import std.threading;
-#endif
-
-import butl.utility;  // icasecmp()
-import butl.fdstream; // fdopen_null()
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/utility.hxx>  // icasecmp()
+#include <libbutl/fdstream.hxx> // fdopen_null()
 
 using namespace std;
 
@@ -217,7 +190,7 @@ namespace butl
   }
 
   void process::
-  print (ostream& o, const char* const args[], size_t n)
+  print (ostream& o, const char* const* args, size_t n)
   {
     size_t m (0);
     const char* const* p (args);
@@ -253,6 +226,35 @@ namespace butl
     } while (*p != nullptr);
   }
 
+#if defined(LIBBUTL_POSIX_SPAWN) || defined(_WIN32)
+  // Return true if the NULL-terminated variable list contains an (un)set of
+  // the specified variable. The NULL list argument denotes an empty list.
+  //
+  // Note that on Windows variable names are case-insensitive.
+  //
+  static inline bool
+  contains_envvar (const char* const* vs, const char* v, size_t n)
+  {
+    if (vs != nullptr)
+    {
+      // Note that we don't expect the number of variables to (un)set to be
+      // large, so the linear search is OK.
+      //
+      while (const char* v1 = *vs++)
+      {
+#ifdef _WIN32
+        if (icasecmp (v1, v, n) == 0 && (v1[n] == '=' || v1[n] == '\0'))
+#else
+        if (strncmp  (v1, v, n) == 0 && (v1[n] == '=' || v1[n] == '\0'))
+#endif
+          return true;
+      }
+    }
+
+    return false;
+  }
+#endif
+
 #ifndef _WIN32
 
   static process_path
@@ -260,7 +262,7 @@ namespace butl
   {
     // Note that there is a similar version for Win32.
 
-    typedef path::traits_type traits;
+    using traits = path::traits_type;
 
     size_t fn (strlen (f));
 
@@ -381,10 +383,10 @@ namespace butl
   }
 
   process::
-  process (const process_path& pp, const char* args[],
+  process (const process_path& pp, const char* const* args,
            pipe pin, pipe pout, pipe perr,
            const char* cwd,
-           const char* const* envvars)
+           const char* const* evars)
   {
     int in  (pin.in);
     int out (pout.out);
@@ -452,6 +454,17 @@ namespace butl
     else if (err == -2)
       in_efd.out = open_null ();
 
+    // If there is no user-supplied CWD and we have thread-specific override,
+    // use that instead of defaulting to the process-wide value.
+    //
+    if (cwd == nullptr || *cwd == '\0')
+    {
+      if (const string* twd = path::traits_type::thread_current_directory ())
+        cwd = twd->c_str ();
+    }
+
+    const char* const* tevars (thread_env ());
+
     // The posix_spawn()-based implementation.
     //
 #ifdef LIBBUTL_POSIX_SPAWN
@@ -540,47 +553,45 @@ namespace butl
         fail (r);
 #endif
 
-      // Set/unset environment variables if requested.
+      // Set/unset the child process environment variables if requested.
       //
-      small_vector<const char*, 8> new_env;
+      vector<const char*> new_env;
 
-      if (envvars != nullptr)
+      if (tevars != nullptr || evars != nullptr)
       {
-        for (const char* const* env (environ); *env != nullptr; ++env)
+        // Copy the non-overridden process environment variables into the
+        // child's environment.
+        //
+        for (const char* const* ev (environ); *ev != nullptr; ++ev)
         {
-          // Lookup the existing variable among those that are requested to be
-          // (un)set. If not present, than add it to the child process
-          // environment.
-          //
-          // Note that on POSIX variable names are case-sensitive.
-          //
-          // Alse note that we don't expect the number of variables to (un)set
-          // to be large, so the linear search is OK.
-          //
-          const char* cv (*env);
-          const char* eq (strchr (cv, '='));
-          size_t n (eq != nullptr ? eq - cv : strlen (cv));
-
-          const char* const* ev (envvars);
-          for (; *ev != nullptr; ++ev)
-          {
-            const char* v (*ev);
-            if (strncmp (cv, v, n) == 0 && (v[n] == '=' || v[n] == '\0'))
-              break;
-          }
+          const char* v (*ev);
+          const char* e (strchr (v, '='));
+          size_t n (e != nullptr ? e - v : strlen (v));
 
-          if (*ev == nullptr)
-            new_env.push_back (cv);
+          if (!contains_envvar (tevars, v, n) &&
+              !contains_envvar (evars, v, n))
+            new_env.push_back (v);
         }
 
-        // Copy the environment variables that are requested to be set.
+        // Copy non-overridden variable assignments into the child's
+        // environment.
         //
-        for (const char* const* ev (envvars); *ev != nullptr; ++ev)
+        auto set_vars = [&new_env] (const char* const* vs,
+                                    const char* const* ovs = nullptr)
         {
-          const char* v (*ev);
-          if (strchr (v, '=') != nullptr)
-            new_env.push_back (v);
-        }
+          if (vs != nullptr)
+          {
+            while (const char* v = *vs++)
+            {
+              const char* e (strchr (v, '='));
+              if (e != nullptr && !contains_envvar (ovs, v, e - v))
+                new_env.push_back (v);
+            }
+          }
+        };
+
+        set_vars (tevars, evars);
+        set_vars (evars);
 
         new_env.push_back (nullptr);
       }
@@ -598,9 +609,9 @@ namespace butl
                        &fa,
                        nullptr /* attrp */,
                        const_cast<char* const*> (&args[0]),
-                       envvars != nullptr
-                       ? const_cast<char* const*> (new_env.data ())
-                       : environ);
+                       new_env.empty ()
+                       ? environ
+                       : const_cast<char* const*> (new_env.data ()));
         if (r != 0)
           fail (r);
     } // Release the lock in parent.
@@ -641,6 +652,10 @@ namespace butl
       {
         // Child.
         //
+        // NOTE: make sure not to call anything that may acquire a mutex that
+        //       could be already acquired in another thread, most notably
+        //       malloc(). @@ What about exceptions (all the fail() calls)?
+
         // Duplicate the user-supplied (fd > -1) or the created pipe descriptor
         // to the standard stream descriptor (read end for STDIN_FILENO, write
         // end otherwise). Close the pipe afterwards.
@@ -688,27 +703,38 @@ namespace butl
         if (cwd != nullptr && *cwd != '\0' && chdir (cwd) != 0)
           fail (true /* child */);
 
-        // Set/unset environment variables if requested.
+        // Set/unset environment variables.
         //
-        if (envvars != nullptr)
+        auto set_vars = [] (const char* const* vs)
         {
-          while (const char* ev = *envvars++)
+          if (vs != nullptr)
           {
-            const char* v (strchr (ev, '='));
-
-            try
+            while (const char* v = *vs++)
             {
-              if (v != nullptr)
-                setenv (string (ev, v - ev), v + 1);
-              else
-                unsetenv (ev);
-            }
-            catch (const system_error& e)
-            {
-              throw process_child_error (e.code ().value ());
+              const char* e (strchr (v, '='));
+
+              try
+              {
+                // @@ TODO: redo without allocation (PATH_MAX?) Maybe
+                //          also using C API to avoid exceptions.
+                //
+                if (e != nullptr)
+                  setenv (string (v, e - v), e + 1);
+                else
+                  unsetenv (v);
+              }
+              catch (const system_error& e)
+              {
+                // @@ Should we assume this cannot throw?
+                //
+                throw process_child_error (e.code ().value ());
+              }
             }
           }
-        }
+        };
+
+        set_vars (tevars);
+        set_vars (evars);
 
         // Try to re-exec after the "text file busy" failure for 450ms.
         //
@@ -741,6 +767,13 @@ namespace butl
   {
     if (handle != 0)
     {
+      // First close any open pipe ends for good measure but ignore any
+      // errors.
+      //
+      out_fd.reset ();
+      in_ofd.reset ();
+      in_efd.reset ();
+
       int es;
       int r (waitpid (handle, &es, 0));
       handle = 0; // We have tried.
@@ -822,6 +855,12 @@ namespace butl
     return getpid ();
   }
 
+  process::handle_type process::
+  current_handle ()
+  {
+    return getpid ();
+  }
+
   // process_exit
   //
   process_exit::
@@ -1274,13 +1313,30 @@ namespace butl
   };
 
   const char* process::
-  quote_argument (const char* a, string& s)
+  quote_argument (const char* a, string& s, bool bat)
   {
-    // On Windows we need to protect values with spaces using quotes.
-    // Since there could be actual quotes in the value, we need to
-    // escape them.
+    // On Windows we need to protect values with spaces using quotes. Since
+    // there could be actual quotes in the value, we need to escape them.
+    //
+    // For batch files we also protect equal (`=`), comma (`,`) and semicolon
+    // (`;`) since otherwise an argument containing any of these will be split
+    // into several as if they were spaces (that is, the parts will appear in
+    // %1 %2, etc., instead of all in %1). This of course could break some
+    // batch files that rely on this semantics (for example, to automatically
+    // handle --foo=bar as --foo bar) but overall seeing a single argument
+    // (albeit quoted) is closer to the behavior of real executables. So we do
+    // this by default and if it becomes a problem we can invent a flag
+    // (probably in process_env) to disable this quoting (and while at it we
+    // may add a flag to disable all quoting since the user may need to quote
+    // some arguments but not others).
     //
-    bool q (*a == '\0' || strchr (a, ' ') != nullptr);
+    // While `()` and `[]` are not special characters, some "subsystems"
+    // (e.g., Cygwin/MSYS2) try to interpret them in certain contexts (e.g.,
+    // relative paths). So we quote them as well (over-quoting seems to be
+    // harmless according to the "Parsing C Command-Line Arguments" MSDN
+    // article).
+    //
+    bool q (*a == '\0' || strpbrk (a, bat ? " =,;" : " ()[]") != nullptr);
 
     if (!q && strchr (a, '"') == nullptr)
       return a;
@@ -1291,8 +1347,8 @@ namespace butl
       s += '"';
 
     // Note that backslashes don't need escaping, unless they immediately
-    // precede the double quote (see `Parsing C Command-Line Arguments` MSDN
-    // article for more details). For example:
+    // precede the double quote (see "Parsing C Command-Line Arguments" MSDN
+    // article for details). For example:
     //
     // -DPATH="C:\\foo\\"  ->  -DPATH=\"C:\\foo\\\\\"
     // -DPATH=C:\foo bar\  ->  "-DPATH=C:\foo bar\\"
@@ -1331,10 +1387,10 @@ namespace butl
   static map<string, bool> detect_msys_cache_;
 
   process::
-  process (const process_path& pp, const char* args[],
+  process (const process_path& pp, const char* const* args,
            pipe pin, pipe pout, pipe perr,
            const char* cwd,
-           const char* const* envvars)
+           const char* const* evars)
   {
     int in  (pin.in);
     int out (pout.out);
@@ -1345,6 +1401,15 @@ namespace butl
       throw process_error (m == nullptr ? last_error_msg () : m);
     };
 
+    // If there is no user-supplied CWD and we have thread-specific override,
+    // use that instead of defaulting to the process-wide value.
+    //
+    if (cwd == nullptr || *cwd == '\0')
+    {
+      if (const string* twd = path::traits_type::thread_current_directory ())
+        cwd = twd->c_str ();
+    }
+
     // (Un)set the environment variables for the child process.
     //
     // Note that we can not do it incrementally, as for POSIX implementation.
@@ -1356,7 +1421,9 @@ namespace butl
     //
     vector<char> new_env;
 
-    if (envvars != nullptr)
+    const char* const* tevars (thread_env ());
+
+    if (tevars != nullptr || evars != nullptr)
     {
       // The environment block contains the variables in the following format:
       //
@@ -1365,7 +1432,7 @@ namespace butl
       // Note the trailing NULL character that follows the last variable
       // (null-terminated) string.
       //
-      unique_ptr<char, void (*)(char*)> cvars (
+      unique_ptr<char, void (*)(char*)> pevars (
         GetEnvironmentStringsA (),
         [] (char* p)
         {
@@ -1376,50 +1443,45 @@ namespace butl
             assert (false);
         });
 
-      if (cvars.get () == nullptr)
+      if (pevars.get () == nullptr)
         fail ();
 
-      const char* cv (cvars.get ());
-
-      // Copy the current environment variables.
+      // Copy the non-overridden process environment variables into the
+      // child's environment.
       //
-      while (*cv != '\0')
+      for (const char* v (pevars.get ()); *v != '\0'; )
       {
-        // Lookup the existing variable among those that are requested to be
-        // (un)set. If not present, than copy it to the new block.
-        //
-        // Note that on Windows variable names are case-insensitive.
-        //
-        // Alse note that we don't expect the number of variables to (un)set
-        // to be large, so the linear search is OK.
-        //
-        size_t n (strlen (cv) + 1); // Includes NULL character.
+        size_t n (strlen (v) + 1); // Includes NULL character.
 
-        const char* eq (strchr (cv, '='));
-        size_t nn (eq != nullptr ? eq - cv : n - 1);
-        const char* const* ev (envvars);
+        const char* e (strchr (v, '='));
+        size_t nn (e != nullptr ? e - v : n - 1);
 
-        for (; *ev != nullptr; ++ev)
-        {
-          const char* v (*ev);
-          if (icasecmp (cv, v, nn) == 0 && (v[nn] == '=' || v[nn] == '\0'))
-            break;
-        }
-
-        if (*ev == nullptr)
-          new_env.insert (new_env.end (), cv, cv + n);
+        if (!contains_envvar (tevars, v, nn) &&
+            !contains_envvar (evars, v, nn))
+          new_env.insert (new_env.end (), v, v + n);
 
-        cv += n;
+        v += n;
       }
 
-      // Copy the environment variables that are requested to be set.
+      // Copy non-overridden variable assignments into the child's
+      // environment.
       //
-      for (const char* const* ev (envvars); *ev != nullptr; ++ev)
+      auto set_vars = [&new_env] (const char* const* vs,
+                                  const char* const* ovs = nullptr)
       {
-        const char* v (*ev);
-        if (strchr (v, '=') != nullptr)
-          new_env.insert (new_env.end (), v, v + strlen (v) + 1);
-      }
+        if (vs != nullptr)
+        {
+          while (const char* v = *vs++)
+          {
+            const char* e (strchr (v, '='));
+            if (e != nullptr && !contains_envvar (ovs, v, e - v))
+              new_env.insert (new_env.end (), v, v + strlen (v) + 1);
+          }
+        }
+      };
+
+      set_vars (tevars, evars);
+      set_vars (evars);
 
       new_env.push_back ('\0'); // Terminate the new environment block.
     }
@@ -1516,12 +1578,12 @@ namespace butl
     //
     string cmd_line;
     {
-      auto append = [&cmd_line, buf = string ()] (const char* a) mutable
+      auto append = [&batch, &cmd_line, buf = string ()] (const char* a) mutable
       {
         if (!cmd_line.empty ())
           cmd_line += ' ';
 
-        cmd_line += quote_argument (a, buf);
+        cmd_line += quote_argument (a, buf, batch.has_value ());
       };
 
       if (batch)
@@ -1763,7 +1825,6 @@ namespace butl
 
       using namespace chrono;
 
-
       // Retry for about 1 hour.
       //
       system_clock::duration timeout (1h);
@@ -1776,7 +1837,7 @@ namespace butl
               0,    // Primary thread security attributes.
               true, // Inherit handles.
               0,    // Creation flags.
-              envvars != nullptr ? new_env.data () : nullptr,
+              new_env.empty () ? nullptr : new_env.data (),
               cwd != nullptr && *cwd != '\0' ? cwd : nullptr,
               &si,
               &pi))
@@ -1849,7 +1910,7 @@ namespace butl
             return PeekNamedPipe (h, &c, 1, &n, nullptr, nullptr) && n == 1;
           };
 
-          // Hidden by butl::duration that is introduced via fdstream.mxx.
+          // Hidden by butl::duration that is introduced via fdstream.hxx.
           //
           using milli_duration = chrono::duration<DWORD, milli>;
 
@@ -1930,6 +1991,10 @@ namespace butl
   {
     if (handle != 0)
     {
+      out_fd.reset ();
+      in_ofd.reset ();
+      in_efd.reset ();
+
       DWORD es;
       DWORD e (NO_ERROR);
       if (WaitForSingleObject (handle, INFINITE) != WAIT_OBJECT_0 ||
@@ -2037,6 +2102,15 @@ namespace butl
     return GetCurrentProcessId ();
   }
 
+  process::handle_type process::
+  current_handle ()
+  {
+    // Note that the returned handle is a pseudo handle (-1) that does not
+    // need to be closed.
+    //
+    return GetCurrentProcess ();
+  }
+
   // process_exit
   //
   process_exit::
diff --git a/libbutl/process.mxx b/libbutl/process.hxx
index 9106549..bbb7c89 100644
--- a/libbutl/process.mxx
+++ b/libbutl/process.hxx
@@ -1,17 +1,12 @@
-// file      : libbutl/process.mxx -*- C++ -*-
+// file      : libbutl/process.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
 #ifndef _WIN32
 #  include <sys/types.h> // pid_t
 #endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
 #include <chrono>
@@ -20,33 +15,15 @@
 #include <cstdint>      // uint32_t
 #include <system_error>
 
-#include <utility> // move(), forward(), index_sequence
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.process;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.optional;
-import butl.fdstream;     // auto_fd, fdpipe
-import butl.vector_view;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/vector-view.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/optional.hxx>
+#include <libbutl/fdstream.hxx>     // auto_fd, fdpipe
+#include <libbutl/vector-view.hxx>
+#include <libbutl/small-vector.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   struct process_error: std::system_error
   {
@@ -140,8 +117,8 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Moveable-only type.
     //
-    process_path (process_path&&);
-    process_path& operator= (process_path&&);
+    process_path (process_path&&) noexcept;
+    process_path& operator= (process_path&&) noexcept;
 
     process_path (const process_path&) = delete;
     process_path& operator= (const process_path&) = delete;
@@ -191,6 +168,26 @@ LIBBUTL_MODEXPORT namespace butl
     bool
     normal () const;
 
+    // C/C++ don't apply constraints on program exit code other than it being
+    // of type int.
+    //
+    // POSIX specifies that only the least significant 8 bits shall be
+    // available from wait() and waitpid(); the full value shall be available
+    // from waitid() (read more at _Exit, _exit Open Group spec).
+    //
+    // While the Linux man page for waitid() doesn't mention any deviations
+    // from the standard, the FreeBSD implementation (as of version 11.0) only
+    // returns 8 bits like the other wait*() calls.
+    //
+    // Windows supports 32-bit exit codes.
+    //
+    // Note that in shells some exit values can have special meaning so using
+    // them can be a source of confusion. For bash values in the [126, 255]
+    // range are such a special ones (see Appendix E, "Exit Codes With Special
+    // Meanings" in the Advanced Bash-Scripting Guide).
+    //
+    // So [0, 125] appears to be the usable exit code range.
+    //
     code_type
     code () const;
 
@@ -272,7 +269,30 @@ LIBBUTL_MODEXPORT namespace butl
     // the parent. So you should do this yourself, if required.  For example,
     // to redirect the child process stdout to stderr, you can do:
     //
-    // process p (..., 0, 2);
+    // process pr (..., 0, 2);
+    //
+    // Note also that the somewhat roundabout setup with -1 as a redirect
+    // "instruction" and out_fd/in_ofd/in_efd data members for the result
+    // helps to make sure the stream instances are destroyed before the
+    // process instance. For example:
+    //
+    // process pr (..., 0, -1, 2);
+    // ifdstream is (move (pr.in_ofd));
+    //
+    // This is important in case an exception is thrown where we want to make
+    // sure all our pipe ends are closed before we wait for the process exit
+    // (which happens in the process destructor).
+    //
+    // And speaking of the destruction order, another thing to keep in mind is
+    // that only one stream can use the skip mode (fdstream_mode::skip;
+    // because skipping is performed in the blocking mode) and the stream that
+    // skips should come first so that all other streams are destroyed/closed
+    // before it (failed that, we may end up in a deadlock). For example:
+    //
+    // process pr (..., -1, -1, -1);
+    // ifdstream is (move (pr.in_ofd), fdstream_mode::skip); // Must be first.
+    // ifdstream es (move (pr.in_efd));
+    // ofdstream os (move (pr.out_fd));
     //
     // The cwd argument allows to change the current working directory of the
     // child process. NULL and empty arguments are ignored.
@@ -290,39 +310,104 @@ LIBBUTL_MODEXPORT namespace butl
     // Note that the versions without the the process_path argument may
     // temporarily change args[0] (see path_search() for details).
     //
-    process (const char* [],
+    process (const char**,
              int in = 0, int out = 1, int err = 2,
              const char* cwd = nullptr,
              const char* const* envvars = nullptr);
 
-    process (const process_path&, const char* [],
+    process (const process_path&, const char* const*,
+             int in = 0, int out = 1, int err = 2,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (std::vector<const char*>&,
+             int in = 0, int out = 1, int err = 2,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const process_path&, const std::vector<const char*>&,
              int in = 0, int out = 1, int err = 2,
              const char* cwd = nullptr,
              const char* const* envvars = nullptr);
 
     // If the descriptors are pipes that you have created, then you should use
-    // this constructor instead to communicate this information.
+    // this constructor instead to communicate this information (the parent
+    // end may need to be "probed" on Windows).
     //
     // For generality, if the "other" end of the pipe is -1, then assume this
     // is not a pipe.
     //
     struct pipe
     {
-      int in  = -1;
-      int out = -1;
-
       pipe () = default;
       pipe (int i, int o): in (i), out (o) {}
 
       explicit
       pipe (const fdpipe& p): in (p.in.get ()), out (p.out.get ()) {}
+
+      // Transfer ownership to one end of the pipe.
+      //
+      pipe (auto_fd i, int o): in (i.release ()), out (o), own_in (true) {}
+      pipe (int i, auto_fd o): in (i), out (o.release ()), own_out (true) {}
+
+      // Moveable-only type.
+      //
+      pipe (pipe&&) noexcept;
+      pipe& operator= (pipe&&) noexcept;
+
+      pipe (const pipe&) = delete;
+      pipe& operator= (const pipe&) = delete;
+
+      ~pipe ();
+
+    public:
+      int in  = -1;
+      int out = -1;
+
+      bool own_in = false;
+      bool own_out = false;
     };
 
-    process (const process_path&, const char* [],
+    process (const char**,
              pipe in, pipe out, pipe err,
              const char* cwd = nullptr,
              const char* const* envvars = nullptr);
 
+    process (const char**,
+             int in, int out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const process_path&, const char* const*,
+             pipe in, pipe out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const process_path&, const char* const*,
+             int in, int out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (std::vector<const char*>&,
+             pipe in, pipe out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (std::vector<const char*>&,
+             int in, int out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const process_path&, const std::vector<const char*>&,
+             pipe in, pipe out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const process_path&, const std::vector<const char*>&,
+             int in, int out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
     // The "piping" constructor, for example:
     //
     // process lhs (..., 0, -1); // Redirect stdout to a pipe.
@@ -331,16 +416,36 @@ LIBBUTL_MODEXPORT namespace butl
     // rhs.wait (); // Wait for last first.
     // lhs.wait ();
     //
-    process (const char* [],
+    process (const char**,
              process&, int out = 1, int err = 2,
              const char* cwd = nullptr,
              const char* const* envvars = nullptr);
 
-    process (const process_path&, const char* [],
+    process (const process_path&, const char* const*,
              process&, int out = 1, int err = 2,
              const char* cwd = nullptr,
              const char* const* envvars = nullptr);
 
+    process (const char**,
+             process&, pipe out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const char**,
+             process&, int out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const process_path&, const char* const*,
+             process&, pipe out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
+    process (const process_path&, const char* const*,
+             process&, int out, pipe err,
+             const char* cwd = nullptr,
+             const char* const* envvars = nullptr);
+
     // Wait for the process to terminate. Return true if the process
     // terminated normally and with the zero exit code. Unless ignore_error
     // is true, throw process_error if anything goes wrong. This function can
@@ -367,7 +472,7 @@ LIBBUTL_MODEXPORT namespace butl
     // Note that the destructor will wait for the process but will ignore
     // any errors and the exit status.
     //
-    ~process () {if (handle != 0) wait (true);}
+    ~process () { if (handle != 0) wait (true); }
 
     // Process termination.
     //
@@ -394,8 +499,8 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Moveable-only type.
     //
-    process (process&&);
-    process& operator= (process&&);
+    process (process&&) noexcept;
+    process& operator= (process&&) noexcept (false); // Note: calls wait().
 
     process (const process&) = delete;
     process& operator= (const process&) = delete;
@@ -417,7 +522,7 @@ LIBBUTL_MODEXPORT namespace butl
     //
     // ... // E.g., print args[0].
     //
-    // process p (pp, args);
+    // process pr (pp, args);
     //
     // You can also specify the fallback directory which will be tried last.
     // This, for example, can be used to implement the Windows "search in the
@@ -501,15 +606,17 @@ LIBBUTL_MODEXPORT namespace butl
     // nameN arg arg ... nullptr nullptr
     //
     static void
-    print (std::ostream&, const char* const args[], size_t n = 0);
+    print (std::ostream&, const char* const* args, size_t n = 0);
 
-    // Quote and escape the specified command line argument. Return the
-    // original string if neither is necessary and a pointer to the provided
-    // buffer string containing the escaped version otherwise.
+    // Quote and escape the specified command line argument. If batch is true
+    // then also quote the equal (`=`), comma (`,`) and semicolon (`;`)
+    // characters which are treated as argument separators in batch file.
+    // Return the original string if neither is necessary and a pointer to the
+    // provided buffer string containing the escaped version otherwise.
     //
 #ifdef _WIN32
     static const char*
-    quote_argument (const char*, std::string& buffer);
+    quote_argument (const char*, std::string& buffer, bool batch);
 #endif
 
   public:
@@ -522,13 +629,16 @@ LIBBUTL_MODEXPORT namespace butl
   public:
     handle_type handle;
 
+    static handle_type
+    current_handle ();
+
     // Absence means that the exit information is not (yet) known. This can be
     // because you haven't called wait() yet or because wait() failed.
     //
     optional<process_exit> exit;
 
-    // Use the following file descriptors to communicate with the new process's
-    // standard streams.
+    // Use the following file descriptors to communicate with the new
+    // process's standard streams (if redirected to pipes; see above).
     //
     auto_fd out_fd; // Write to it to send to stdin.
     auto_fd in_ofd; // Read from it to receive from stdout.
@@ -642,8 +752,8 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Moveable-only type.
     //
-    process_env (process_env&&);
-    process_env& operator= (process_env&&);
+    process_env (process_env&&) noexcept;
+    process_env& operator= (process_env&&) noexcept;
 
     process_env (const process_env&) = delete;
     process_env& operator= (const process_env&) = delete;
@@ -679,7 +789,7 @@ LIBBUTL_MODEXPORT namespace butl
   // command line or similar. It should be callable with the following
   // signature:
   //
-  // void (const char*[], std::size_t)
+  // void (const char* const*, std::size_t)
   //
   template <typename C,
             typename I,
@@ -720,6 +830,15 @@ LIBBUTL_MODEXPORT namespace butl
                           const process_env&,
                           A&&... args);
 
+  // Call the callback without actually running/starting anything.
+  //
+  template <typename C,
+            typename... A>
+  void
+  process_print_callback (const C&,
+                          const process_env&,
+                          A&&... args);
+
   // Conversion of types to their C string representations. Can be overloaded
   // (including via ADL) for custom types. The default implementation calls
   // to_string() which covers all the numeric values via std::to_string () and
diff --git a/libbutl/process.ixx b/libbutl/process.ixx
index 7676ce3..e4db474 100644
--- a/libbutl/process.ixx
+++ b/libbutl/process.ixx
@@ -1,6 +1,9 @@
 // file      : libbutl/process.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
+#include <cassert>
+#include <utility> // move()
+
 namespace butl
 {
   // process_path
@@ -32,7 +35,7 @@ namespace butl
         args0_ (nullptr) {}
 
   inline process_path::
-  process_path (process_path&& p)
+  process_path (process_path&& p) noexcept
       : effect (std::move (p.effect)),
         args0_ (p.args0_)
   {
@@ -45,7 +48,7 @@ namespace butl
   }
 
   inline process_path& process_path::
-  operator= (process_path&& p)
+  operator= (process_path&& p) noexcept
   {
     if (this != &p)
     {
@@ -121,6 +124,42 @@ namespace butl
   }
 #endif
 
+  // process::pipe
+  //
+  inline process::pipe::
+  pipe (pipe&& p) noexcept
+      : in (p.in), out (p.out), own_in (p.own_in), own_out (p.own_out)
+  {
+    p.in = p.out = -1;
+  }
+
+  inline process::pipe& process::pipe::
+  operator= (pipe&& p) noexcept
+  {
+    if (this != &p)
+    {
+      int d (own_in ? in : own_out ? out : -1);
+      if (d != -1)
+        fdclose (d);
+
+      in = p.in;
+      out = p.out;
+      own_in = p.own_in;
+      own_out = p.own_out;
+
+      p.in = p.out = -1;
+    }
+    return *this;
+  }
+
+  inline process::pipe::
+  ~pipe ()
+  {
+    int d (own_in ? in : own_out ? out : -1);
+    if (d != -1)
+      fdclose (d);
+  }
+
   // process
   //
 #ifndef _WIN32
@@ -175,21 +214,37 @@ namespace butl
 
   inline process::
   process (optional<process_exit> e)
-      : handle (0),
-        exit (std::move (e)),
-        out_fd (-1),
-        in_ofd (-1),
-        in_efd (-1)
+      : handle (0), exit (std::move (e))
+  {
+  }
+
+  inline process::
+  process (const process_path& pp, const char* const* args,
+           int in, int out, int err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (pp, args,
+                 pipe (in, -1), pipe (-1, out), pipe (-1, err),
+                 cwd,
+                 envvars)
+  {
+  }
+
+  inline process::
+  process (const char** args,
+           int in, int out, int err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (path_search (args[0]), args, in, out, err, cwd, envvars)
   {
   }
 
   inline process::
-  process (const process_path& pp, const char* args[],
+  process (const process_path& pp, const std::vector<const char*>& args,
            int in, int out, int err,
            const char* cwd,
            const char* const* envvars)
-      : process (pp,
-                 args,
+      : process (pp, args.data (),
                  pipe (in, -1), pipe (-1, out), pipe (-1, err),
                  cwd,
                  envvars)
@@ -197,32 +252,166 @@ namespace butl
   }
 
   inline process::
-  process (const char* args[],
+  process (std::vector<const char*>& args,
            int in, int out, int err,
            const char* cwd,
            const char* const* envvars)
-      : process (path_search (args[0]), args, in, out, err, cwd, envvars) {}
+      : process (path_search (args[0]), args.data (),
+                 in, out, err,
+                 cwd,
+                 envvars)
+  {
+  }
+
+  inline process::
+  process (const char** args,
+           pipe in, pipe out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (path_search (args[0]), args,
+                 std::move (in), std::move (out), std::move (err),
+                 cwd, envvars)
+  {
+  }
+
+  inline process::
+  process (const char** args,
+           int in, int out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (path_search (args[0]), args,
+                 pipe (in, -1), pipe (-1, out), std::move (err),
+                 cwd, envvars)
+  {
+  }
+
+  inline process::
+  process (const process_path& pp, const char* const* args,
+           int in, int out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (pp, args,
+                 pipe (in, -1), pipe (-1, out), std::move (err),
+                 cwd,
+                 envvars)
+  {
+  }
+
+  inline process::
+  process (std::vector<const char*>& args,
+           pipe in, pipe out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (path_search (args[0]), args.data (),
+                 std::move (in), std::move (out), std::move (err),
+                 cwd,
+                 envvars)
+  {
+  }
+
+  inline process::
+  process (std::vector<const char*>& args,
+           int in, int out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (path_search (args[0]), args.data (),
+                 pipe (in, -1), pipe (-1, out), std::move (err),
+                 cwd,
+                 envvars)
+  {
+  }
+
+  inline process::
+  process (const process_path& pp, const std::vector<const char*>& args,
+           pipe in, pipe out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (pp, args.data (),
+                 std::move (in), std::move (out), std::move (err),
+                 cwd,
+                 envvars)
+  {
+  }
+
+  inline process::
+  process (const process_path& pp, const std::vector<const char*>& args,
+           int in, int out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (pp, args.data (),
+                 pipe (in, -1), pipe (-1, out), std::move (err),
+                 cwd,
+                 envvars)
+  {
+  }
+
+  inline process::
+  process (const process_path& pp, const char* const* args,
+           process& in, pipe out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (pp, args,
+                 [&in] ()
+                 {
+                   assert (in.in_ofd != nullfd); // Should be a pipe.
+                   return process::pipe (std::move (in.in_ofd), -1);
+                 } (),
+                 std::move (out), std::move (err),
+                 cwd, envvars)
+  {
+  }
 
   inline process::
-  process (const process_path& pp, const char* args[],
+  process (const process_path& pp, const char* const* args,
            process& in, int out, int err,
            const char* cwd,
            const char* const* envvars)
-      : process (pp, args, in.in_ofd.get (), out, err, cwd, envvars)
+      : process (pp, args, in, pipe (-1, out), pipe (-1, err), cwd, envvars)
   {
-    assert (in.in_ofd.get () != -1); // Should be a pipe.
-    in.in_ofd.reset (); // Close it on our side.
   }
 
   inline process::
-  process (const char* args[],
+  process (const char** args,
            process& in, int out, int err,
            const char* cwd,
            const char* const* envvars)
-      : process (path_search (args[0]), args, in, out, err, cwd, envvars) {}
+      : process (path_search (args[0]), args, in, out, err, cwd, envvars)
+  {
+  }
+
+  inline process::
+  process (const char** args,
+           process& in, pipe out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (path_search (args[0]), args,
+                 in, std::move (out), std::move (err),
+                 cwd, envvars)
+  {
+  }
+
+  inline process::
+  process (const char** args,
+           process& in, int out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (path_search (args[0]), args,
+                 in, pipe (-1, out), std::move (err),
+                 cwd, envvars)
+  {
+  }
+
+  inline process::
+  process (const process_path& pp, const char* const* args,
+           process& in, int out, pipe err,
+           const char* cwd,
+           const char* const* envvars)
+      : process (pp, args, in, pipe (-1, out), std::move (err), cwd, envvars)
+  {
+  }
 
   inline process::
-  process (process&& p)
+  process (process&& p) noexcept
       : handle (p.handle),
         exit   (std::move (p.exit)),
         out_fd (std::move (p.out_fd)),
@@ -233,7 +422,7 @@ namespace butl
   }
 
   inline process& process::
-  operator= (process&& p)
+  operator= (process&& p) noexcept (false)
   {
     if (this != &p)
     {
@@ -270,13 +459,13 @@ namespace butl
   // process_env
   //
   inline process_env::
-  process_env (process_env&& e)
+  process_env (process_env&& e) noexcept
   {
     *this = std::move (e);
   }
 
   inline process_env& process_env::
-  operator= (process_env&& e)
+  operator= (process_env&& e) noexcept
   {
     if (this != &e)
     {
diff --git a/libbutl/project-name.cxx b/libbutl/project-name.cxx
index 7a14b49..a7ed8a8 100644
--- a/libbutl/project-name.cxx
+++ b/libbutl/project-name.cxx
@@ -1,38 +1,16 @@
 // file      : libbutl/project-name.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/project-name.mxx>
-#endif
+#include <libbutl/project-name.hxx>
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
 #include <utility>   // move()
 #include <algorithm> // find()
 #include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.project_name;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility;
-#endif
-
-import butl.path;    // path::traits
-import butl.utility; // alpha(), alnum()
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#endif
+
+#include <libbutl/path.hxx>    // path::traits
+#include <libbutl/utility.hxx> // alpha(), alnum()
 
 using namespace std;
 
diff --git a/libbutl/project-name.mxx b/libbutl/project-name.hxx
index 1117e28..6e1f925 100644
--- a/libbutl/project-name.mxx
+++ b/libbutl/project-name.hxx
@@ -1,34 +1,17 @@
-// file      : libbutl/project-name.mxx -*- C++ -*-
+// file      : libbutl/project-name.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <utility> // move()
 #include <ostream>
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.project_name;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utility; // icasecmp(), sanitize_identifier()
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx> // icasecmp(), sanitize_identifier()
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Build system project name.
   //
diff --git a/libbutl/prompt.cxx b/libbutl/prompt.cxx
index 1c0820a..154522c 100644
--- a/libbutl/prompt.cxx
+++ b/libbutl/prompt.cxx
@@ -1,33 +1,11 @@
 // file      : libbutl/prompt.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/prompt.mxx>
-#endif
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
+#include <libbutl/prompt.hxx>
 
 #include <iostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.prompt;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
 
-import butl.diagnostics;
-#else
-#include <libbutl/diagnostics.mxx> // diag_stream
-#endif
+#include <libbutl/diagnostics.hxx> // diag_stream
 
 using namespace std;
 
@@ -66,8 +44,8 @@ namespace butl
         if (!e)
           a = def;
       }
-    } while (a != "y" && a != "n");
+    } while (a != "y" && a != "Y" && a != "n" && a != "N");
 
-    return a == "y";
+    return a == "y" || a == "Y";
   }
 }
diff --git a/libbutl/prompt.mxx b/libbutl/prompt.hxx
index 2489b2f..2a07708 100644
--- a/libbutl/prompt.mxx
+++ b/libbutl/prompt.hxx
@@ -1,28 +1,13 @@
-// file      : libbutl/prompt.mxx -*- C++ -*-
+// file      : libbutl/prompt.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.prompt;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // The Y/N prompt. The def argument, if specified, should be either 'y' or
   // 'n'. It is used as the default answer, in case the user just hits enter.
@@ -30,6 +15,10 @@ LIBBUTL_MODEXPORT namespace butl
   // Write the prompt to diag_stream. Throw ios_base::failure if no answer
   // could be extracted from stdin (for example, because it was closed).
   //
+  // Note that the implementation accepts both lower and upper case y/n as
+  // valid answers (apparently the capitalized default answer confuses some
+  // users into answering with capital letters).
+  //
   LIBBUTL_SYMEXPORT bool
   yn_prompt (const std::string&, char def = '\0');
 }
diff --git a/libbutl/regex.cxx b/libbutl/regex.cxx
index 83e296c..34536f2 100644
--- a/libbutl/regex.cxx
+++ b/libbutl/regex.cxx
@@ -1,42 +1,17 @@
 // file      : libbutl/regex.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/regex.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <regex>
-#include <string>
+#include <libbutl/regex.hxx>
 
 #include <ostream>
 #include <sstream>
 #include <stdexcept> // runtime_error
+
 #if defined(_MSC_VER) && _MSC_VER < 2000
 #  include <cstring> // strstr()
 #endif
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-module butl.regex;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.regex;
-#endif
-#endif
-
-import butl.utility; // operator<<(ostream, exception)
-#else
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx> // operator<<(ostream, exception)
 
 namespace std
 {
diff --git a/libbutl/regex.mxx b/libbutl/regex.hxx
index 84b024f..9b31075 100644
--- a/libbutl/regex.mxx
+++ b/libbutl/regex.hxx
@@ -1,22 +1,13 @@
-// file      : libbutl/regex.mxx -*- C++ -*-
+// file      : libbutl/regex.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-// C includes.
 
-#ifndef __cpp_lib_modules_ts
 #include <regex>
 #include <iosfwd>
 #include <string>
 #include <utility> // pair
-
-#include <locale>
 #include <cstddef> // size_t
-#include <utility> // move(), make_pair()
-#endif
 
 #if defined(__clang__)
 #  if __has_include(<__config>)
@@ -24,20 +15,9 @@
 #  endif
 #endif
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.regex;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.regex; // @@ MOD TODO should probably be re-exported.
-#endif
-#endif
-
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // The regex semantics for the following functions is like that of
   // std::regex_replace() extended the standard ECMA-262 substitution escape
@@ -93,9 +73,54 @@ LIBBUTL_MODEXPORT namespace butl
   regex_replace_match (const std::basic_string<C>&,
                        const std::basic_regex<C>&,
                        const std::basic_string<C>& fmt);
+
+  // As above but using match_results.
+  //
+  template <typename C>
+  std::basic_string<C>
+  regex_replace_match_results (
+    const std::match_results<typename std::basic_string<C>::const_iterator>&,
+    const std::basic_string<C>& fmt);
+
+  template <typename C>
+  std::basic_string<C>
+  regex_replace_match_results (
+    const std::match_results<typename std::basic_string<C>::const_iterator>&,
+    const C* fmt, std::size_t fmt_n);
+
+  // Parse the '/<regex>/<format>/' replacement string into the regex/format
+  // pair. Other character can be used as a delimiter instead of '/'. Throw
+  // std::invalid_argument or std::regex_error on parsing error.
+  //
+  // Note: escaping of the delimiter character is not (yet) supported.
+  //
+  template <typename C>
+  std::pair<std::basic_regex<C>, std::basic_string<C>>
+  regex_replace_parse (const std::basic_string<C>&,
+                       std::regex_constants::syntax_option_type =
+                         std::regex_constants::ECMAScript);
+
+  template <typename C>
+  std::pair<std::basic_regex<C>, std::basic_string<C>>
+  regex_replace_parse (const C*,
+                       std::regex_constants::syntax_option_type =
+                         std::regex_constants::ECMAScript);
+
+  template <typename C>
+  std::pair<std::basic_regex<C>, std::basic_string<C>>
+  regex_replace_parse (const C*, size_t,
+                       std::regex_constants::syntax_option_type =
+                         std::regex_constants::ECMAScript);
+
+  // As above but return string instead of regex and do not fail if there is
+  // text after the last delimiter instead returning its position.
+  //
+  template <typename C>
+  std::pair<std::basic_string<C>, std::basic_string<C>>
+  regex_replace_parse (const C*, size_t, size_t& end);
 }
 
-LIBBUTL_MODEXPORT namespace std
+namespace std
 {
   // Print regex error description but only if it is meaningful (this is also
   // why we have to print leading colon).
diff --git a/libbutl/regex.ixx b/libbutl/regex.ixx
index dec15d1..08962cf 100644
--- a/libbutl/regex.ixx
+++ b/libbutl/regex.ixx
@@ -1,7 +1,9 @@
 // file      : libbutl/regex.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <utility> // move(), make_pair()
+
+namespace butl
 {
   template <typename C>
   inline std::pair<std::basic_string<C>, bool>
@@ -21,4 +23,30 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
 
     return make_pair (move (r), match);
   }
+
+  template <typename C>
+  inline std::pair<std::basic_regex<C>, std::basic_string<C>>
+  regex_replace_parse (const std::basic_string<C>& s,
+                       std::regex_constants::syntax_option_type f)
+  {
+    return regex_replace_parse (s.c_str (), s.size (), f);
+  }
+
+  template <typename C>
+  inline std::pair<std::basic_regex<C>, std::basic_string<C>>
+  regex_replace_parse (const C* s,
+                       std::regex_constants::syntax_option_type f)
+  {
+    return regex_replace_parse (
+      s, std::basic_string<C>::traits_type::length (s), f);
+  }
+
+  template <typename C>
+  inline std::basic_string<C>
+  regex_replace_match_results (
+    const std::match_results<typename std::basic_string<C>::const_iterator>& m,
+    const std::basic_string<C>& fmt)
+  {
+    return regex_replace_match_results (m, fmt.c_str (), fmt.size ());
+  }
 }
diff --git a/libbutl/regex.txx b/libbutl/regex.txx
index b785708..214d949 100644
--- a/libbutl/regex.txx
+++ b/libbutl/regex.txx
@@ -1,15 +1,16 @@
 // file      : libbutl/regex.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <locale>
+#include <stdexcept> // invalid_argument
+
+namespace butl
 {
-  // Replace the regex match results using the format string.
-  //
   template <typename C>
   std::basic_string<C>
   regex_replace_match_results (
     const std::match_results<typename std::basic_string<C>::const_iterator>& m,
-    const std::basic_string<C>& fmt)
+    const C* fmt, std::size_t n)
   {
     using namespace std;
 
@@ -60,7 +61,6 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
       }
     };
 
-    size_t n (fmt.size ());
     for (size_t i (0); i < n; ++i)
     {
       C c (fmt[i]);
@@ -278,4 +278,71 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
 
     return match;
   }
+
+  template <typename C>
+  std::pair<std::basic_regex<C>, std::basic_string<C>>
+  regex_replace_parse (const C* s, size_t n,
+                       std::regex_constants::syntax_option_type f)
+  {
+    using namespace std;
+
+    using string_type = basic_string<C>;
+
+    size_t e;
+    pair<string_type, string_type> r (regex_replace_parse (s, n, e));
+
+    if (e != n)
+      throw invalid_argument ("junk after trailing delimiter");
+
+    return make_pair (basic_regex<C> (r.first, f), move (r.second));
+  }
+
+  template <typename C>
+  std::pair<std::basic_string<C>, std::basic_string<C>>
+  regex_replace_parse (const C* s, size_t n, size_t& e)
+  {
+    using namespace std;
+
+    using string_type = basic_string<C>;
+
+    if (n == 0)
+      throw invalid_argument ("no leading delimiter");
+
+    const C* b (s); // Save the beginning of the string.
+
+    char delim (s[0]);
+
+    // Position to the regex first character and find the regex-terminating
+    // delimiter.
+    //
+    --n;
+    ++s;
+
+    const C* p (string_type::traits_type::find (s, n, delim));
+
+    if (p == nullptr)
+      throw invalid_argument ("no delimiter after regex");
+
+    // Empty regex matches nothing, so not of much use.
+    //
+    if (p == s)
+      throw invalid_argument ("empty regex");
+
+    // Save the regex.
+    //
+    string_type re (s, p - s);
+
+    // Position to the format first character and find the trailing delimiter.
+    //
+    n -= p - s + 1;
+    s  = p + 1;
+
+    p = string_type::traits_type::find (s, n, delim);
+
+    if (p == nullptr)
+      throw invalid_argument ("no delimiter after replacement");
+
+    e = p - b + 1;
+    return make_pair (move (re), string_type (s, p - s));
+  }
 }
diff --git a/libbutl/semantic-version.cxx b/libbutl/semantic-version.cxx
index eaf709d..9e0a1ef 100644
--- a/libbutl/semantic-version.cxx
+++ b/libbutl/semantic-version.cxx
@@ -1,39 +1,12 @@
 // file      : libbutl/semantic-version.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/semantic-version.mxx>
-#endif
+#include <libbutl/semantic-version.hxx>
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-#include <ostream>
-
 #include <cstring>   // strchr()
-#include <cstdlib>   // strtoull()
 #include <utility>   // move()
 #include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.semantic_version;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#endif
-#else
-#endif
 
 using namespace std;
 
@@ -80,9 +53,9 @@ namespace butl
   }
 
   semantic_version::
-  semantic_version (const std::string& s, size_t p, const char* bs)
+  semantic_version (const std::string& s, size_t p, flags fs, const char* bs)
   {
-    semantic_version_result r (parse_semantic_version_impl (s, p, bs));
+    semantic_version_result r (parse_semantic_version_impl (s, p, fs, bs));
 
     if (r.version)
       *this = move (*r.version);
@@ -98,8 +71,27 @@ namespace butl
                 uint64_t min = 0, uint64_t max = uint64_t (~0));
 
   semantic_version_result
-  parse_semantic_version_impl (const string& s, size_t p, const char* bs)
+  parse_semantic_version_impl (const string& s, size_t p,
+                               semantic_version::flags fs,
+                               const char* bs)
   {
+    bool allow_build ((fs & semantic_version::allow_build) != 0);
+
+    // If build separators are specified, then the allow_build flag must be
+    // specified explicitly.
+    //
+    assert (bs == nullptr || allow_build);
+
+    if (allow_build && bs == nullptr)
+      bs = "-+";
+
+    bool require_minor ((fs & semantic_version::allow_omit_minor) == 0);
+
+    if (!require_minor)
+      fs |= semantic_version::allow_omit_patch;
+
+    bool require_patch ((fs & semantic_version::allow_omit_patch) == 0);
+
     auto bail = [] (string m)
     {
       return semantic_version_result {nullopt, move (m)};
@@ -110,31 +102,47 @@ namespace butl
     if (!parse_uint64 (s, p, r.major))
       return bail ("invalid major version");
 
-    if (s[p] != '.')
-      return bail ("'.' expected after major version");
-
-    if (!parse_uint64 (s, ++p, r.minor))
-      return bail ("invalid minor version");
-
-    if (s[p] == '.')
+    if (s[p] == '.') // Is there a minor version?
     {
-      // Treat it as build if failed to parse as patch (e.g., 1.2.alpha).
+      // Try to parse the minor version and treat it as build on failure
+      // (e.g., 1.alpha).
       //
-      if (!parse_uint64 (s, ++p, r.patch))
+      if (parse_uint64 (s, ++p, r.minor))
+      {
+        if (s[p] == '.') // Is there a patch version?
+        {
+          // Try to parse the patch version and treat it as build on failure
+          // (e.g., 1.2.alpha).
+          //
+          if (parse_uint64 (s, ++p, r.patch))
+            ;
+          else
+          {
+            if (require_patch)
+              return bail ("invalid patch version");
+
+            --p;
+            // Fall through.
+          }
+        }
+        else if (require_patch)
+          return bail ("'.' expected after minor version");
+      }
+      else
       {
-        //if (require_patch)
-        //  return bail ("invalid patch version");
+        if (require_minor)
+          return bail ("invalid minor version");
 
         --p;
         // Fall through.
       }
     }
-    //else if (require_patch)
-    //  return bail ("'.' expected after minor version");
+    else if (require_minor)
+      return bail ("'.' expected after major version");
 
     if (char c = s[p])
     {
-      if (bs == nullptr || (*bs != '\0' && strchr (bs, c) == nullptr))
+      if (!allow_build || (*bs != '\0' && strchr (bs, c) == nullptr))
         return bail ("junk after version");
 
       r.build.assign (s, p, string::npos);
diff --git a/libbutl/semantic-version.mxx b/libbutl/semantic-version.hxx
index 566d192..4eba38a 100644
--- a/libbutl/semantic-version.mxx
+++ b/libbutl/semantic-version.hxx
@@ -1,32 +1,15 @@
-// file      : libbutl/semantic-version.mxx -*- C++ -*-
+// file      : libbutl/semantic-version.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-// C includes.
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <cstddef> // size_t
 #include <cstdint> // uint*_t
 #include <utility> // move()
 #include <ostream>
-#endif
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.semantic_version;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#else
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/optional.hxx>
 
 #include <libbutl/export.hxx>
 
@@ -40,19 +23,13 @@ import butl.optional;
 #  undef minor
 #endif
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Semantic or semantic-like version.
   //
-  // <major>.<minor>[.<patch>][<build>]
+  // <major>[.<minor>[.<patch>]][<build>]
   //
-  // If the patch component is absent, then it defaults to 0.
-  //
-  // @@ Currently there is no way to enforce the three-component version.
-  //    Supporting this will require changing allow_build to a bit-wise
-  //    flag. See parse_semantic_version_impl() for some sketched code.
-  //    We may also want to pass these flags to string() to not print
-  //    0 patch.
+  // If the minor and patch components are absent, then they default to 0.
   //
   // By default, a version containing the <build> component is considered
   // valid only if separated from <patch> with '-' (semver pre-release) or '+'
@@ -80,23 +57,36 @@ LIBBUTL_MODEXPORT namespace butl
                       std::uint64_t patch,
                       std::string   build = "");
 
-    // The build_separators argument can be NULL (no build component allowed),
-    // empty (any build component allowed), or a string of characters to allow
-    // as separators. When allow_build is true build_separators defaults to
-    // "-+".
+    // If the allow_build flag is specified, then build_separators argument
+    // can be a string of characters to allow as separators, empty (any build
+    // component allowed), or NULL (defaults to "-+").
     //
-    explicit
-    semantic_version (const std::string&, bool allow_build = true);
+    // Note: allow_omit_minor implies allow_omit_patch.
+    //
+    enum flags
+    {
+      none             = 0,    // Exact <major>.<minor>.<patch> form.
+      allow_omit_minor = 0x01, // Allow <major> form.
+      allow_omit_patch = 0x02, // Allow <major>.<minor> form.
+      allow_build      = 0x04, // Allow <major>.<minor>.<patch>-<build> form.
+    };
 
-    semantic_version (const std::string&, const char* build_separators);
+    explicit
+    semantic_version (const std::string&,
+                      flags = none,
+                      const char* build_separators = nullptr);
 
     // As above but parse from the specified position until the end of the
     // string.
     //
-    semantic_version (const std::string&, std::size_t pos, bool = true);
-
-    semantic_version (const std::string&, std::size_t pos, const char*);
+    semantic_version (const std::string&,
+                      std::size_t pos,
+                      flags = none,
+                      const char* = nullptr);
 
+    // @@ We may also want to pass allow_* flags not to print 0 minor/patch or
+    //    maybe invent ignore_* flags.
+    //
     std::string
     string (bool ignore_build = false) const;
 
@@ -133,16 +123,15 @@ LIBBUTL_MODEXPORT namespace butl
   // Try to parse a string as a semantic version returning nullopt if invalid.
   //
   optional<semantic_version>
-  parse_semantic_version (const std::string&, bool allow_build = true);
-
-  optional<semantic_version>
-  parse_semantic_version (const std::string&, const char* build_separators);
-
-  optional<semantic_version>
-  parse_semantic_version (const std::string&, std::size_t pos, bool = true);
+  parse_semantic_version (const std::string&,
+                          semantic_version::flags = semantic_version::none,
+                          const char* build_separators = nullptr);
 
   optional<semantic_version>
-  parse_semantic_version (const std::string&, std::size_t pos, const char*);
+  parse_semantic_version (const std::string&,
+                          std::size_t pos,
+                          semantic_version::flags = semantic_version::none,
+                          const char* = nullptr);
 
   // NOTE: comparison operators take the build component into account.
   //
@@ -187,6 +176,18 @@ LIBBUTL_MODEXPORT namespace butl
   {
     return o << x.string ();
   }
+
+  semantic_version::flags
+  operator& (semantic_version::flags, semantic_version::flags);
+
+  semantic_version::flags
+  operator| (semantic_version::flags, semantic_version::flags);
+
+  semantic_version::flags
+  operator&= (semantic_version::flags&, semantic_version::flags);
+
+  semantic_version::flags
+  operator|= (semantic_version::flags&, semantic_version::flags);
 }
 
 #include <libbutl/semantic-version.ixx>
diff --git a/libbutl/semantic-version.ixx b/libbutl/semantic-version.ixx
index 6bf7584..8de1554 100644
--- a/libbutl/semantic-version.ixx
+++ b/libbutl/semantic-version.ixx
@@ -15,23 +15,9 @@ namespace butl
   {
   }
 
-  // Note: the order is important to MinGW GCC (DLL linkage).
-  //
   inline semantic_version::
-  semantic_version (const std::string& s, std::size_t p, bool ab)
-      : semantic_version (s, p, ab ? "-+" : nullptr)
-  {
-  }
-
-  inline semantic_version::
-  semantic_version (const std::string& s, const char* bs)
-      : semantic_version (s, 0, bs)
-  {
-  }
-
-  inline semantic_version::
-  semantic_version (const std::string& s, bool ab)
-      : semantic_version (s, ab ? "-+" : nullptr)
+  semantic_version (const std::string& s, flags fs, const char* bs)
+      : semantic_version (s, 0, fs, bs)
   {
   }
 
@@ -42,29 +28,53 @@ namespace butl
   };
 
   LIBBUTL_SYMEXPORT semantic_version_result
-  parse_semantic_version_impl (const std::string&, std::size_t, const char*);
+  parse_semantic_version_impl (const std::string&,
+                               std::size_t,
+                               semantic_version::flags,
+                               const char*);
 
   inline optional<semantic_version>
-  parse_semantic_version (const std::string& s, bool ab)
+  parse_semantic_version (const std::string& s,
+                          semantic_version::flags fs,
+                          const char* bs)
   {
-    return parse_semantic_version (s, ab ? "-+" : nullptr);
+    return parse_semantic_version_impl (s, 0, fs, bs).version;
   }
 
   inline optional<semantic_version>
-  parse_semantic_version (const std::string& s, const char* bs)
+  parse_semantic_version (const std::string& s,
+                          std::size_t p,
+                          semantic_version::flags fs,
+                          const char* bs)
   {
-    return parse_semantic_version_impl (s, 0, bs).version;
+    return parse_semantic_version_impl (s, p, fs, bs).version;
   }
 
-  inline optional<semantic_version>
-  parse_semantic_version (const std::string& s, std::size_t p, bool ab)
+  inline semantic_version::flags
+  operator&= (semantic_version::flags& x, semantic_version::flags y)
   {
-    return parse_semantic_version (s, p, ab ? "-+" : nullptr);
+    return x = static_cast<semantic_version::flags> (
+      static_cast<std::uint16_t> (x) &
+      static_cast<std::uint16_t> (y));
   }
 
-  inline optional<semantic_version>
-  parse_semantic_version (const std::string& s, std::size_t p, const char* bs)
+  inline semantic_version::flags
+  operator|= (semantic_version::flags& x, semantic_version::flags y)
+  {
+    return x = static_cast<semantic_version::flags> (
+      static_cast<std::uint16_t> (x) |
+      static_cast<std::uint16_t> (y));
+  }
+
+  inline semantic_version::flags
+  operator& (semantic_version::flags x, semantic_version::flags y)
+  {
+    return x &= y;
+  }
+
+  inline semantic_version::flags
+  operator| (semantic_version::flags x, semantic_version::flags y)
   {
-    return parse_semantic_version_impl (s, p, bs).version;
+    return x |= y;
   }
 }
diff --git a/libbutl/sendmail.cxx b/libbutl/sendmail.cxx
index 1038cf4..5fec1a6 100644
--- a/libbutl/sendmail.cxx
+++ b/libbutl/sendmail.cxx
@@ -1,32 +1,7 @@
 // file      : libbutl/sendmail.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/sendmail.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.sendmail;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#endif
-
-#endif
+#include <libbutl/sendmail.hxx>
 
 using namespace std;
 
diff --git a/libbutl/sendmail.mxx b/libbutl/sendmail.hxx
index 0d5b239..97a4d82 100644
--- a/libbutl/sendmail.mxx
+++ b/libbutl/sendmail.hxx
@@ -1,38 +1,17 @@
-// file      : libbutl/sendmail.mxx -*- C++ -*-
+// file      : libbutl/sendmail.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 
-#include <cstddef> // size_t
-#include <utility> // move(), forward()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.sendmail;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.process;
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/process.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/process.hxx>
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Send email using the sendmail(1) program.
   //
diff --git a/libbutl/sendmail.ixx b/libbutl/sendmail.ixx
index 105c1af..35b5c47 100644
--- a/libbutl/sendmail.ixx
+++ b/libbutl/sendmail.ixx
@@ -1,7 +1,10 @@
 // file      : libbutl/sendmail.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <cstddef> // size_t
+#include <utility> // move(), forward()
+
+namespace butl
 {
   template <typename E, typename... O>
   inline sendmail::
diff --git a/libbutl/sha1.c b/libbutl/sha1.c
index 37e862e..98fce5e 100644
--- a/libbutl/sha1.c
+++ b/libbutl/sha1.c
@@ -121,11 +121,17 @@ main ()
 
 #include <string.h>
 
+/* Assume if bzero/bcopy are defined as macros, then they do what we need. */
+
 /* void bzero(void *s, size_t n); */
-#define bzero(s, n) memset((s), 0, (n))
+#ifndef bzero
+#  define bzero(s, n) memset((s), 0, (n))
+#endif
 
 /* void bcopy(const void *s1, void *s2, size_t n); */
-#define bcopy(s1, s2, n) memmove((s2), (s1), (n))
+#ifndef bcopy
+#  define bcopy(s1, s2, n) memmove((s2), (s1), (n))
+#endif
 
 /* The rest is the unmodified (except for adjusting function declarations and
    adding a few explicit casts to make compilable in C++ without warnings)
diff --git a/libbutl/sha1.cxx b/libbutl/sha1.cxx
index 6a5e9db..e546922 100644
--- a/libbutl/sha1.cxx
+++ b/libbutl/sha1.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/sha1.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/sha1.mxx>
-#endif
+#include <libbutl/sha1.hxx>
 
 // C interface for sha1c.
 //
@@ -42,29 +40,9 @@ extern "C"
 #define SHA1_Final(x, y)     sha1_result((y), (char(&)[20])(x))
 
 #include <cassert>
+#include <istream>
 
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.sha1;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-import butl.fdstream;
-#else
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/bufstreambuf.hxx>
 
 using namespace std;
 
@@ -91,12 +69,12 @@ namespace butl
   }
 
   void sha1::
-  append (ifdstream& is)
+  append (istream& is)
   {
-    fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+    bufstreambuf* buf (dynamic_cast<bufstreambuf*> (is.rdbuf ()));
     assert (buf != nullptr);
 
-    while (is.peek () != ifdstream::traits_type::eof () && is.good ())
+    while (is.peek () != istream::traits_type::eof () && is.good ())
     {
       size_t n (buf->egptr () - buf->gptr ());
       append (buf->gptr (), n);
diff --git a/libbutl/sha1.mxx b/libbutl/sha1.hxx
index 07c469c..62710f4 100644
--- a/libbutl/sha1.mxx
+++ b/libbutl/sha1.hxx
@@ -1,34 +1,18 @@
-// file      : libbutl/sha1.mxx -*- C++ -*-
+// file      : libbutl/sha1.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
+#include <iosfwd>   // istream
 #include <string>
-#include <cstddef>     // size_t
+#include <cstddef>  // size_t
 #include <cstdint>
-#include <cstring>     // strlen()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.sha1;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
+#include <cstring>  // strlen()
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
-  class ifdstream;
-
   // SHA1 checksum calculator.
   //
   // For a single chunk of data a sum can be obtained in one line, for
@@ -67,11 +51,14 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Append stream.
     //
+    // Note that currently the stream is expected to be bufstreambuf-based
+    // (e.g., ifdstream).
+    //
     void
-    append (ifdstream&);
+    append (std::istream&);
 
     explicit
-    sha1 (ifdstream& i): sha1 () {append (i);}
+    sha1 (std::istream& i): sha1 () {append (i);}
 
     // Check if any data has been hashed.
     //
diff --git a/libbutl/sha256.cxx b/libbutl/sha256.cxx
index 2528693..95987ec 100644
--- a/libbutl/sha256.cxx
+++ b/libbutl/sha256.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/sha256.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/sha256.mxx>
-#endif
+#include <libbutl/sha256.hxx>
 
 // C interface for sha256c.
 //
@@ -26,39 +24,13 @@ extern "C"
 #include "sha256c.c"
 }
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-
 #include <cctype>    // isxdigit()
+#include <cassert>
+#include <istream>
 #include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.sha256;
-
-// Only imports additional to interface.
-#ifdef __cpp_lib_modules_ts
-import std.io;
-#endif
-
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
-import butl.utility; // *case()
-import butl.fdstream;
-#else
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#endif
+#include <libbutl/utility.hxx>      // *case()
+#include <libbutl/bufstreambuf.hxx>
 
 using namespace std;
 
@@ -85,12 +57,12 @@ namespace butl
   }
 
   void sha256::
-  append (ifdstream& is)
+  append (istream& is)
   {
-    fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+    bufstreambuf* buf (dynamic_cast<bufstreambuf*> (is.rdbuf ()));
     assert (buf != nullptr);
 
-    while (is.peek () != ifdstream::traits_type::eof () && is.good ())
+    while (is.peek () != istream::traits_type::eof () && is.good ())
     {
       size_t n (buf->egptr () - buf->gptr ());
       append (buf->gptr (), n);
diff --git a/libbutl/sha256.mxx b/libbutl/sha256.hxx
index 9bc0971..566068f 100644
--- a/libbutl/sha256.mxx
+++ b/libbutl/sha256.hxx
@@ -1,35 +1,19 @@
-// file      : libbutl/sha256.mxx -*- C++ -*-
+// file      : libbutl/sha256.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
+#include <iosfwd>      // istream
 #include <cstddef>     // size_t
 #include <cstdint>
 #include <cstring>     // strlen(), memcpy()
 #include <type_traits> // enable_if, is_integral
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.sha256;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
-  class ifdstream;
-
   // SHA256 checksum calculator.
   //
   // For a single chunk of data a sum can be obtained in one line, for
@@ -101,11 +85,14 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Append stream.
     //
+    // Note that currently the stream is expected to be bufstreambuf-based
+    // (e.g., ifdstream).
+    //
     void
-    append (ifdstream&);
+    append (std::istream&);
 
     explicit
-    sha256 (ifdstream& i): sha256 () {append (i);}
+    sha256 (std::istream& i): sha256 () {append (i);}
 
     // Check if any data has been hashed.
     //
diff --git a/libbutl/small-allocator.mxx b/libbutl/small-allocator.hxx
index 5ef74be..429ba41 100644
--- a/libbutl/small-allocator.mxx
+++ b/libbutl/small-allocator.hxx
@@ -1,30 +1,16 @@
-// file      : libbutl/small-allocator.mxx -*- C++ -*-
+// file      : libbutl/small-allocator.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <cstddef>     // size_t
 #include <utility>     // move()
 #include <type_traits> // true_type, is_same
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.small_allocator;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Implementation of the allocator (and its buffer) for small containers.
   //
diff --git a/libbutl/small-forward-list.mxx b/libbutl/small-forward-list.hxx
index 6aa4986..8d1cf68 100644
--- a/libbutl/small-forward-list.mxx
+++ b/libbutl/small-forward-list.hxx
@@ -1,31 +1,18 @@
-// file      : libbutl/small-forward-list.mxx -*- C++ -*-
+// file      : libbutl/small-forward-list.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#ifndef __cpp_lib_modules_ts
 #include <cstddef>      // size_t
 #include <utility>      // move()
+#include <type_traits>  // is_nothrow_move_constructible
 #include <forward_list>
-#endif
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.small_forward_list;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.small_allocator;
-#else
-#include <libbutl/small-allocator.mxx>
-#endif
+#include <libbutl/small-allocator.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Issues and limitations.
   //
@@ -115,14 +102,20 @@ LIBBUTL_MODEXPORT namespace butl
       return *this;
     }
 
+    // See small_vector for the move-constructor/assignment noexept
+    // expressions reasoning.
+    //
     small_forward_list (small_forward_list&& v)
+#if !defined(_MSC_VER) || _MSC_VER > 1900
+      noexcept (std::is_nothrow_move_constructible<T>::value)
+#endif
       : base_type (allocator_type (this))
     {
       *this = std::move (v); // Delegate to operator=(&&).
     }
 
     small_forward_list&
-    operator= (small_forward_list&& v)
+    operator= (small_forward_list&& v) noexcept (false)
     {
       // VC14's implementation of operator=(&&) swaps pointers without regard
       // for allocator (fixed in 15).
diff --git a/libbutl/small-list.mxx b/libbutl/small-list.hxx
index ff62192..7cb51fd 100644
--- a/libbutl/small-list.mxx
+++ b/libbutl/small-list.hxx
@@ -1,31 +1,18 @@
-// file      : libbutl/small-list.mxx -*- C++ -*-
+// file      : libbutl/small-list.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#ifndef __cpp_lib_modules_ts
 #include <list>
 #include <cstddef>     // size_t
 #include <utility>     // move()
-#endif
+#include <type_traits> // is_nothrow_move_constructible
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.small_list;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.small_allocator;
-#else
-#include <libbutl/small-allocator.mxx>
-#endif
+#include <libbutl/small-allocator.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Issues and limitations.
   //
@@ -117,14 +104,20 @@ LIBBUTL_MODEXPORT namespace butl
       return *this;
     }
 
+    // See small_vector for the move-constructor/assignment noexept
+    // expressions reasoning.
+    //
     small_list (small_list&& v)
+#if !defined(__GLIBCXX__) && (!defined(_MSC_VER) || _MSC_VER > 1900)
+      noexcept (std::is_nothrow_move_constructible<T>::value)
+#endif
       : base_type (allocator_type (this))
     {
       *this = std::move (v); // Delegate to operator=(&&).
     }
 
     small_list&
-    operator= (small_list&& v)
+    operator= (small_list&& v) noexcept (false)
     {
       // libstdc++'s implementation prior to GCC 6 is broken (calls swap()).
       // Since there is no easy way to determine this library's version, for
@@ -136,7 +129,7 @@ LIBBUTL_MODEXPORT namespace butl
 #if defined(__GLIBCXX__) || (defined(_MSC_VER) && _MSC_VER <= 1900)
       this->clear ();
       for (T& x: v)
-        this->push_back (std::move (x));
+        this->push_back (std::move (x)); // Note: can throw bad_alloc.
       v.clear ();
 #else
       // Note: propagate_on_container_move_assignment = false
diff --git a/libbutl/small-vector-odb.hxx b/libbutl/small-vector-odb.hxx
index af9d96c..289ca38 100644
--- a/libbutl/small-vector-odb.hxx
+++ b/libbutl/small-vector-odb.hxx
@@ -5,7 +5,7 @@
 
 #include <odb/pre.hxx>
 
-#include <libbutl/small-vector.mxx>
+#include <libbutl/small-vector.hxx>
 
 #include <odb/container-traits.hxx>
 
diff --git a/libbutl/small-vector.mxx b/libbutl/small-vector.hxx
index 2a92182..44a3ef5 100644
--- a/libbutl/small-vector.mxx
+++ b/libbutl/small-vector.hxx
@@ -1,31 +1,18 @@
-// file      : libbutl/small-vector.mxx -*- C++ -*-
+// file      : libbutl/small-vector.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#ifndef __cpp_lib_modules_ts
 #include <vector>
 #include <cstddef>     // size_t
 #include <utility>     // move()
-#endif
-
-// Other includes.
+#include <type_traits> // is_nothrow_move_constructible
 
-#ifdef __cpp_modules_ts
-export module butl.small_vector;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.small_allocator;
-#else
-#include <libbutl/small-allocator.mxx>
-#endif
+#include <libbutl/small-allocator.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Issues and limitations.
   //
@@ -38,6 +25,9 @@ LIBBUTL_MODEXPORT namespace butl
   //
   // - swap() is deleted (see notes below).
   //
+  // - In contrast to std::vector, the references, pointers, and iterators
+  //   referring to elements are invalidated after moving from it.
+  //
   template <typename T, std::size_t N>
   class small_vector: private small_allocator_buffer<T, N>,
                       public std::vector<T, small_allocator<T, N>>
@@ -118,17 +108,46 @@ LIBBUTL_MODEXPORT namespace butl
       return *this;
     }
 
+    // Note that while the move constructor is implemented via the move
+    // assignment it may not throw if the value type is no-throw move
+    // constructible.
+    //
+    // Specifically, if v.size() > N then allocators evaluate as equal and the
+    // buffer ownership is transferred. Otherwise, the allocators do not
+    // evaluate as equal and the individual elements are move-constructed in
+    // the preallocated buffer.
+    //
+    // Also note that this constructor ends up calling
+    // base_type::operator=(base_type&&) whose noexcept expression evaluates
+    // to false (propagate_on_container_move_assignment and is_always_equal
+    // are false for small_allocator; see std::vector documentation for
+    // details). We, however, assume that the noexcept expression we use here
+    // is strict enough for all "sane" std::vector implementations since
+    // small_allocator never throws directly.
+    //
     small_vector (small_vector&& v)
+      noexcept (std::is_nothrow_move_constructible<T>::value)
       : base_type (allocator_type (this))
     {
       if (v.size () <= N)
         reserve ();
 
       *this = std::move (v); // Delegate to operator=(&&).
+
+      // Note that in contrast to the move assignment operator, the
+      // constructor must clear the other vector.
+      //
+      v.clear ();
     }
 
+    // Note that when size() <= N and v.size() > N, then allocators of this
+    // and other containers do not evaluate as equal. Thus, the memory for the
+    // new elements is allocated on the heap and so std::bad_alloc can be
+    // thrown. @@ TODO: maybe we could re-implement this case in terms of
+    // swap()?
+    //
     small_vector&
-    operator= (small_vector&& v)
+    operator= (small_vector&& v) noexcept (false)
     {
       // VC's implementation of operator=(&&) (both 14 and 15) frees the
       // memory and then reallocated with capacity equal to v.size(). This is
diff --git a/libbutl/standard-version.cxx b/libbutl/standard-version.cxx
index a9f5eb8..36f4830 100644
--- a/libbutl/standard-version.cxx
+++ b/libbutl/standard-version.cxx
@@ -1,41 +1,14 @@
 // file      : libbutl/standard-version.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/standard-version.mxx>
-#endif
+#include <libbutl/standard-version.hxx>
 
 #include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstdint>
-#include <cstddef>
-#include <ostream>
-
 #include <cstdlib>   // strtoull()
 #include <utility>   // move()
 #include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.standard_version;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#endif
 
-import butl.utility;
-#else
-#include <libbutl/utility.mxx> // alnum()
-#endif
+#include <libbutl/utility.hxx> // alnum()
 
 using namespace std;
 
@@ -60,6 +33,7 @@ namespace butl
 
     const char* b (s.c_str () + p);
     char* e (nullptr);
+    errno = 0; // We must clear it according to POSIX.
     uint64_t v (strtoull (b, &e, 10)); // Can't throw.
 
     if (errno == ERANGE || b == e || v < min || v > max)
diff --git a/libbutl/standard-version.mxx b/libbutl/standard-version.hxx
index b86e3a9..e973352 100644
--- a/libbutl/standard-version.mxx
+++ b/libbutl/standard-version.hxx
@@ -1,31 +1,14 @@
-// file      : libbutl/standard-version.mxx -*- C++ -*-
+// file      : libbutl/standard-version.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
-
-// C includes.
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <cstdint> // uint*_t
 #include <cstddef> // size_t
 #include <ostream>
-#endif
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.standard_version;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-#else
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/optional.hxx>
 
 #include <libbutl/export.hxx>
 
@@ -39,7 +22,7 @@ import butl.optional;
 #  undef minor
 #endif
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // The build2 "standard version" (normal, earliest, and stub):
   //
@@ -221,7 +204,7 @@ LIBBUTL_MODEXPORT namespace butl
 
     // Create empty version.
     //
-    standard_version () {} // = default; @@ MOD VC
+    standard_version () = default;
   };
 
   // Try to parse a string as a standard version returning nullopt if invalid.
diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx
index 5d5ec47..af5c1b3 100644
--- a/libbutl/string-parser.cxx
+++ b/libbutl/string-parser.cxx
@@ -1,33 +1,7 @@
 // file      : libbutl/string-parser.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/string-parser.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstddef>
-#include <utility>   // move()
-#include <stdexcept>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.string_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
-
-#endif
+#include <libbutl/string-parser.hxx>
 
 using namespace std;
 
@@ -40,7 +14,7 @@ namespace butl
     inline static bool
     space (char c) noexcept
     {
-      return c == ' ' || c == '\t';
+      return c == ' ' || c == '\t' || c == '\n' || c == '\r';
     }
 
     vector<pair<string, size_t>>
diff --git a/libbutl/string-parser.mxx b/libbutl/string-parser.hxx
index 4ff1590..9fc20c0 100644
--- a/libbutl/string-parser.mxx
+++ b/libbutl/string-parser.hxx
@@ -1,32 +1,17 @@
-// file      : libbutl/string-parser.mxx -*- C++ -*-
+// file      : libbutl/string-parser.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
 #include <cstddef>   // size_t
 #include <utility>   // pair
 #include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.string_parser;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   namespace string_parser
   {
diff --git a/libbutl/string-table.mxx b/libbutl/string-table.hxx
index 78c6cd6..010fb01 100644
--- a/libbutl/string-table.mxx
+++ b/libbutl/string-table.hxx
@@ -1,36 +1,18 @@
-// file      : libbutl/string-table.mxx -*- C++ -*-
+// file      : libbutl/string-table.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <vector>
+#include <cassert>
 #include <unordered_map>
 
-#include <limits>  // numeric_limits
-#include <cstddef> // size_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.string_table;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.multi_index;
-#else
-#include <libbutl/multi-index.mxx>
-#endif
+#include <libbutl/multi-index.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // A pool of strings and, optionally, other accompanying data in which each
   // entry is assigned an individual index (or id) of type I (e.g., uint8_t,
diff --git a/libbutl/string-table.txx b/libbutl/string-table.txx
index 4db0a6b..8416b48 100644
--- a/libbutl/string-table.txx
+++ b/libbutl/string-table.txx
@@ -1,6 +1,9 @@
 // file      : libbutl/string-table.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
+#include <limits>  // numeric_limits
+#include <cstddef> // size_t
+
 namespace butl
 {
   template <typename I, typename D>
diff --git a/libbutl/tab-parser.cxx b/libbutl/tab-parser.cxx
index cca2792..d7e5a14 100644
--- a/libbutl/tab-parser.cxx
+++ b/libbutl/tab-parser.cxx
@@ -1,39 +1,12 @@
 // file      : libbutl/tab-parser.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/tab-parser.mxx>
-#endif
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <vector>
-#include <cstdint>
-#include <stdexcept>
+#include <libbutl/tab-parser.hxx>
 
 #include <istream>
 #include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.tab_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
 
-import butl.string_parser;
-#else
-#include <libbutl/string-parser.mxx>
-#endif
+#include <libbutl/string-parser.hxx>
 
 using namespace std;
 
diff --git a/libbutl/tab-parser.mxx b/libbutl/tab-parser.hxx
index a7f7e01..2dc612b 100644
--- a/libbutl/tab-parser.mxx
+++ b/libbutl/tab-parser.hxx
@@ -1,33 +1,17 @@
-// file      : libbutl/tab-parser.mxx -*- C++ -*-
+// file      : libbutl/tab-parser.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <iosfwd>
 #include <string>
 #include <vector>
 #include <cstdint>   // uint64_t
 #include <stdexcept> // runtime_error
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.tab_parser;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   class LIBBUTL_SYMEXPORT tab_parsing: public std::runtime_error
   {
diff --git a/libbutl/target-triplet.cxx b/libbutl/target-triplet.cxx
index db71e3c..e28f119 100644
--- a/libbutl/target-triplet.cxx
+++ b/libbutl/target-triplet.cxx
@@ -1,33 +1,9 @@
 // file      : libbutl/target-triplet.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/target-triplet.mxx>
-#endif
-
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <ostream>
+#include <libbutl/target-triplet.hxx>
 
 #include <stdexcept> // invalid_argument
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.target_triplet;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-#endif
 
 using namespace std;
 
@@ -112,6 +88,13 @@ namespace butl
     if (system.front () == '-' || system.back () == '-')
       bad ("invalid os/kernel/abi");
 
+    // Canonicalize SYSTEM.
+    //
+    if (system == "linux")
+      system = "linux-gnu"; // Per config.sub.
+    else if (system == "windows-gnu" && vendor == "w64") // Clang's innovation.
+      system = "mingw32";
+
     // Extract VERSION for some recognized systems.
     //
     string::size_type v (0);
@@ -129,6 +112,14 @@ namespace butl
       version.assign (system, v, string::npos);
       system.resize (system.size () - version.size ());
     }
+    else if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+    {
+      // Handle iosNN[-...].
+      //
+      string::size_type p (system.find ('-'));
+      version.assign (system, 3, p == string::npos ? p : p - 3);
+      system.erase (3, version.size ());
+    }
 
     // Determine class for some recognized systems.
     //
@@ -136,6 +127,8 @@ namespace butl
       class_ = "linux";
     else if (vendor == "apple" && system == "darwin")
       class_ = "macos";
+    else if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+      class_ = "ios";
     else if (system == "freebsd" ||
              system == "openbsd" ||
              system == "netbsd")
@@ -167,7 +160,10 @@ namespace butl
 
     if (!version.empty ())
     {
-      r += version;
+      if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+        r.insert (r.size () - system.size () + 3, version);
+      else
+        r += version;
     }
 
     return r;
@@ -191,7 +187,10 @@ namespace butl
 
     if (!version.empty ())
     {
-      r += version;
+      if (vendor == "apple" && system.compare (0, 3, "ios") == 0)
+        r.insert (r.size () - system.size () + 3, version);
+      else
+        r += version;
     }
 
     return r;
diff --git a/libbutl/target-triplet.mxx b/libbutl/target-triplet.hxx
index 1ecc7e5..bfb2c00 100644
--- a/libbutl/target-triplet.mxx
+++ b/libbutl/target-triplet.hxx
@@ -1,30 +1,14 @@
-// file      : libbutl/target-triplet.mxx -*- C++ -*-
+// file      : libbutl/target-triplet.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <ostream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.target_triplet;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS
   // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus
@@ -91,14 +75,19 @@ LIBBUTL_MODEXPORT namespace butl
   // arm-softfloat-linux-gnu           arm     softfloat  linux-gnu
   // i686-pc-mingw32                   i686               mingw32
   // i686-w64-mingw32                  i686    w64        mingw32
+  // i686-w64-windows-gnu              i686    w64        mingw32
   // i686-lfs-linux-gnu                i686    lfs        linux-gnu
   // x86_64-unknown-linux-gnu          x86_64             linux-gnu
+  // x86_64-redhat-linux               x86_64  redhat     linux-gnu
   // x86_64-linux-gnux32               x86_64             linux-gnux32
   // x86_64-microsoft-win32-msvc14.0   x86_64  microsoft  win32-msvc     14.0
   // x86_64-pc-windows-msvc            x86_64             windows-msvc
   // x86_64-pc-windows-msvc19.11.25547 x86_64             windows-msvc   19.11.25547
   // wasm32-unknown-emscripten         wasm32             emscripten
   // arm64-apple-darwin20.1.0          aarch64 apple      darwin         20.1.0
+  // arm64-apple-ios14.4               aarch64 apple      ios            14.4
+  // arm64-apple-ios14.4-simulator     aarch64 apple      ios-simulator  14.4
+  // x86_64-apple-ios14.4-macabi       x86_64  apple      ios-macabi     14.4
   //
   // Similar to version splitting, for certain commonly-used targets we also
   // derive the "target class" which can be used as a shorthand, more
@@ -110,6 +99,9 @@ LIBBUTL_MODEXPORT namespace butl
   // macos     *-apple-darwin*
   // bsd       *-*-(freebsd|openbsd|netbsd)*
   // windows   *-*-win32-* | *-*-windows-* | *-*-mingw32
+  // ios       *-apple-ios*
+  //
+  // NOTE: see also os_release if adding anything new here.
   //
   // References:
   //
@@ -160,7 +152,7 @@ LIBBUTL_MODEXPORT namespace butl
     explicit
     target_triplet (const std::string&);
 
-    target_triplet () {} // = default; @@ MOD VC
+    target_triplet () = default;
   };
 
   inline bool
diff --git a/libbutl/timestamp.cxx b/libbutl/timestamp.cxx
index 9be2a82..260fbef 100644
--- a/libbutl/timestamp.cxx
+++ b/libbutl/timestamp.cxx
@@ -1,9 +1,7 @@
 // file      : libbutl/timestamp.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/timestamp.mxx>
-#endif
+#include <libbutl/timestamp.hxx>
 
 #include <time.h>  // localtime_{r,s}(), gmtime_{r,s}(), strptime(), timegm()
 #include <errno.h> // EINVAL
@@ -25,22 +23,18 @@
 #ifdef __GLIBCXX__
 extern "C"
 {
-#include "strptime.c"
+#  include "strptime.c"
 }
 #else
-#include <locale.h> // LC_ALL
+#  include <locale.h> // LC_ALL
 #endif
 #endif
 
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <chrono>
-
-#include <ctime>        // tm, time_t, mktime(), strftime()[__GLIBCXX__]
+#include <ctime>        // tm, time_t, mktime(), strftime()[libstdc++]
 #include <cstdlib>      // strtoull()
-#include <sstream>
+#include <sstream>      // ostringstream, stringstream[VC]
 #include <iomanip>      // put_time(), setw(), dec, right
-#include <cstring>      // strlen(), memcpy()
+#include <cstring>      // strlen(), memcpy(), strchr()[VC]
 #include <ostream>
 #include <utility>      // pair, make_pair()
 #include <stdexcept>    // runtime_error
@@ -49,30 +43,14 @@ extern "C"
 //
 #ifdef _WIN32
 #ifndef __GLIBCXX__
-#include <locale>
-#include <clocale>
-#include <iomanip>
-#endif
+#  include <ios>
+#  include <locale>
+#  include <clocale>
+#  include <iomanip>
 #endif
 #endif
 
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.timestamp;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-import butl.utility;
-#else
-#include <libbutl/utility.mxx> // throw_generic_error()
-#endif
+#include <libbutl/utility.hxx> // throw_generic_error()
 
 using namespace std;
 
@@ -180,24 +158,85 @@ strptime (const char* input, const char* format, tm* time)
 {
   // VC std::get_time()-based implementation.
   //
-  istringstream is (input);
+  // Note that the major difference in semantics of strptime() and
+  // std::get_time() is that the former always fails if the format string is
+  // not fully processed, while the latter can succeed in such a case,
+  // specifically if the end of the stream is reached after a conversion
+  // specifier was successfully applied. See this post for some background:
+  //
+  // https://stackoverflow.com/questions/67060956/what-is-the-correct-behavior-of-stdget-time-for-short-input
+  //
+  // The consequence of this fact is that there is no easy way to detect if
+  // the format was fully processed when the end of input is reached. It seems
+  // that the only way to resolve this ambiguity is to append some end marker
+  // to both the input and format and re-parse. We can dedicate some character
+  // that is unlikely to be used in the time format/input (for example '\x1')
+  // to serve as an end marker.
+  //
+  // Alternatively, we can abandon the use of std::get_time() altogether and,
+  // for example, use a FreeBSD-based strptime() implementation. This feels a
+  // bit too radical at the moment, though.
+  //
+  const char em ('\x1');
+
+  if (strchr (input, em) != nullptr || strchr (format, em) != nullptr)
+    return nullptr;
+
+  stringstream ss (input); // Input/output stream.
 
   // The original strptime() function behaves according to the process' C
   // locale (set with std::setlocale()), which can differ from the process C++
   // locale (set with std::locale::global()).
   //
-  is.imbue (locale (setlocale (LC_ALL, nullptr)));
+  ss.imbue (locale (setlocale (LC_ALL, nullptr)));
 
-  if (!(is >> get_time (time, format)))
+  // Bail out on the parsing error.
+  //
+  if (!(ss >> get_time (time, format)))
     return nullptr;
-  else
-    // tellg() behaves as UnformattedInputFunction, so returns failure status
-    // if eofbit is set.
-    //
-    return const_cast<char*> (
-      input + (is.eof ()
-               ? strlen (input)
-               : static_cast<size_t> (is.tellg ())));
+
+  // If the end of input is not reached then the format string is fully
+  // processed.
+  //
+  if (!ss.eof ())
+    return const_cast<char*> (input + static_cast<size_t> (ss.tellg ()));
+
+  // Since eof is reached, we cannot say if the format string was fully
+  // processed or not. For example:
+  //
+  // %b %Y     - format
+  // Feb 2016  - eofbit is set with a format fully processed
+  // Feb       - eofbit is set with a format partially processed
+  //
+  // So append the end marker character to both input and format and re-parse.
+  //
+  ss.clear ();                 // Clear eof.
+  ss.seekp (0, ios_base::end); // Position to the end for writing.
+  ss.put (em);                 // Append the end marker.
+  ss.seekg (0);                // Rewind for reading.
+
+  string fm (format);
+  fm += em;                    // Append the end marker.
+
+  // Fail if the input is "shorter" than the format. For example:
+  //
+  // %b %Y\x1  - format
+  // Feb\x1    - stream
+  //
+  // Note that we can reuse the time object for re-parsing, since on success
+  // its fields will be overwritten with the same values.
+  //
+  if (!(ss >> get_time (time, fm.c_str ())))
+    return nullptr;
+
+  // We would fail earlier otherwise.
+  //
+  assert (ss.eof () || ss.get () == stringstream::traits_type::eof ());
+
+  // tellg() behaves as UnformattedInputFunction, so returns failure status if
+  // eofbit is set.
+  //
+  return const_cast<char*> (input + strlen (input));
 }
 #endif
 
diff --git a/libbutl/timestamp.mxx b/libbutl/timestamp.hxx
index 141e13d..2714a0d 100644
--- a/libbutl/timestamp.mxx
+++ b/libbutl/timestamp.hxx
@@ -1,34 +1,15 @@
-// file      : libbutl/timestamp.mxx -*- C++ -*-
+// file      : libbutl/timestamp.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <iosfwd>
 #include <string>
 #include <chrono>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.timestamp;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-//@@ MOD TODO: should't we re-export chrono (for somparison operator, etc)?
-//   or ADL should kick in?
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // On all three main platforms that we target (GNU/Linux, Windows (both
   // VC++ and GCC/MinGW64), and MacOS X) with recent C++ runtimes,
@@ -61,21 +42,12 @@ LIBBUTL_MODEXPORT namespace butl
   // unreal and all of them are less than any non-special value (strictly
   // speaking unreal is no greater (older) than any real value).
   //
-#if defined(__cpp_modules_ts) && defined(__clang__) //@@ MOD Clang duplicate sym.
-  inline const timestamp::rep timestamp_unknown_rep     = -1;
-  inline const timestamp      timestamp_unknown         = timestamp (duration (-1));
-  inline const timestamp::rep timestamp_nonexistent_rep = 0;
-  inline const timestamp      timestamp_nonexistent     = timestamp (duration (0));
-  inline const timestamp::rep timestamp_unreal_rep      = 1;
-  inline const timestamp      timestamp_unreal          = timestamp (duration (1));
-#else
   const timestamp::rep timestamp_unknown_rep     = -1;
   const timestamp      timestamp_unknown         = timestamp (duration (-1));
   const timestamp::rep timestamp_nonexistent_rep = 0;
   const timestamp      timestamp_nonexistent     = timestamp (duration (0));
   const timestamp::rep timestamp_unreal_rep      = 1;
   const timestamp      timestamp_unreal          = timestamp (duration (1));
-#endif
 
   // Print human-readable representation of the timestamp.
   //
diff --git a/libbutl/unicode.cxx b/libbutl/unicode.cxx
index 4219846..294bb3f 100644
--- a/libbutl/unicode.cxx
+++ b/libbutl/unicode.cxx
@@ -1,32 +1,11 @@
 // file      : libbutl/unicode.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/unicode.mxx>
-#endif
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <ostream>
-#include <cstdint>
+#include <libbutl/unicode.hxx>
 
 #include <cstddef>   // size_t
 #include <utility>   // pair
 #include <algorithm> // lower_bound()
-#endif
-
-#ifdef __cpp_modules_ts
-module butl.unicode;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-#endif
 
 using namespace std;
 
diff --git a/libbutl/unicode.mxx b/libbutl/unicode.hxx
index b846476..8d99d0e 100644
--- a/libbutl/unicode.mxx
+++ b/libbutl/unicode.hxx
@@ -1,31 +1,15 @@
-// file      : libbutl/unicode.mxx -*- C++ -*-
+// file      : libbutl/unicode.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <ostream>
 #include <cstdint> // uint16_t
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.unicode;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Note that the Unicode Standard requires the surrogates ([D800 DFFF]) to
   // only be used in the context of the UTF-16 character encoding form. Thus,
diff --git a/libbutl/url.mxx b/libbutl/url.hxx
index 713bc3e..5721cfd 100644
--- a/libbutl/url.mxx
+++ b/libbutl/url.hxx
@@ -1,50 +1,23 @@
-// file      : libbutl/url.mxx -*- C++ -*-
+// file      : libbutl/url.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
+#include <cassert>
+#include <cstddef>  // size_t
 #include <cstdint>  // uint*_t
 #include <utility>  // move()
 #include <ostream>
 #include <iterator> // back_inserter
 
-#include <cstddef>   // size_t
-#include <stdexcept> // invalid_argument
-#include <algorithm> // find(), find_if()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.url;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility;
-import butl.optional;
-
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx>
+#include <libbutl/optional.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // RFC3986 Uniform Resource Locator (URL).
   //
diff --git a/libbutl/url.ixx b/libbutl/url.ixx
index b823ee7..19d54c7 100644
--- a/libbutl/url.ixx
+++ b/libbutl/url.ixx
@@ -1,7 +1,7 @@
 // file      : libbutl/url.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+namespace butl
 {
   // url_traits
   //
diff --git a/libbutl/url.txx b/libbutl/url.txx
index 0951e80..b2caa37 100644
--- a/libbutl/url.txx
+++ b/libbutl/url.txx
@@ -1,7 +1,12 @@
 // file      : libbutl/url.txx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
+#include <stdexcept> // invalid_argument
+#include <algorithm> // find(), find_if()
+
+#include <libbutl/small-vector.hxx>
+
+namespace butl
 {
   // Convenience functions.
   //
diff --git a/libbutl/utf8.mxx b/libbutl/utf8.hxx
index 15e8ded..697f77a 100644
--- a/libbutl/utf8.mxx
+++ b/libbutl/utf8.hxx
@@ -1,33 +1,17 @@
-// file      : libbutl/utf8.mxx -*- C++ -*-
+// file      : libbutl/utf8.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <cstdint> // uint8_t
 #include <utility> // pair
-#endif
-
-// Other includes.
 
-#ifdef __cpp_modules_ts
-export module butl.utf8;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.unicode;
-#else
-#include <libbutl/unicode.mxx>
-#endif
+#include <libbutl/unicode.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Here and below we will refer to bytes that encode a singe Unicode
   // codepoint as "UTF-8 byte sequence" ("UTF-8 sequence" or "byte sequence"
diff --git a/libbutl/utf8.ixx b/libbutl/utf8.ixx
index 3d2e092..10624f8 100644
--- a/libbutl/utf8.ixx
+++ b/libbutl/utf8.ixx
@@ -116,7 +116,7 @@ namespace butl
         {
           if (b < 0xFE)
           {
-            *what  = b < 0xFC ? "5" : "6";
+            *what  = b < 0xFC ? '5' : '6';
             *what += "-byte length UTF-8 sequence";
           }
           else
diff --git a/libbutl/utility.cxx b/libbutl/utility.cxx
index bbeafd2..b03a8f8 100644
--- a/libbutl/utility.cxx
+++ b/libbutl/utility.cxx
@@ -1,44 +1,23 @@
 // file      : libbutl/utility.cxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
-#include <libbutl/utility.mxx>
-#endif
+#include <libbutl/utility.hxx>
 
 #ifdef _WIN32
 #include <libbutl/win32-utility.hxx>
 #endif
 
-#include <stdlib.h> // setenv(), unsetenv(), _putenv()
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <utility>
+#include <stdlib.h> // getenv(), setenv(), unsetenv(), _putenv()
 
+#include <cstring>      // strncmp(), strlen()
 #include <ostream>
 #include <type_traits>  // enable_if, is_base_of
 #include <system_error>
-#endif
 
 #include <libbutl/ft/lang.hxx>
 #include <libbutl/ft/exception.hxx>
 
-#ifdef __cpp_modules_ts
-module butl.utility;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-#endif
-
-import butl.utf8;
-#else
-#include <libbutl/utf8.mxx>
-#endif
+#include <libbutl/utf8.hxx>
 
 namespace butl
 {
@@ -192,13 +171,42 @@ namespace butl
     for (; i != n && ws (l[i]);     ++i) ;
     for (; n != i && ws (l[n - 1]); --n) ;
 
-    if (i != 0)
+    if (n != l.size ()) l.resize (n);
+    if (i != 0)         l.erase (0, i);
+
+    return l;
+  }
+
+  string&
+  trim_left (string& l)
+  {
+    auto ws = [] (char c )
     {
-      string s (l, i, n - i);
-      l.swap (s);
-    }
-    else if (n != l.size ())
-      l.resize (n);
+      return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+    };
+
+    size_t i (0), n (l.size ());
+
+    for (; i != n && ws (l[i]); ++i) ;
+
+    if (i != 0) l.erase (0, i);
+
+    return l;
+  }
+
+  string&
+  trim_right (string& l)
+  {
+    auto ws = [] (char c )
+    {
+      return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+    };
+
+    size_t i (0), n (l.size ());
+
+    for (; n != i && ws (l[n - 1]); --n) ;
+
+    if (n != l.size ()) l.resize (n);
 
     return l;
   }
@@ -332,6 +340,55 @@ namespace butl
       s.resize (d - s.begin ());
   }
 
+#ifdef __cpp_thread_local
+  thread_local
+#else
+  __thread
+#endif
+  const char* const* thread_env_ = nullptr;
+
+#ifdef _WIN32
+  const char* const*
+  thread_env () {return thread_env_;}
+
+  void
+  thread_env (const char* const* v) {thread_env_ = v;}
+#endif
+
+  optional<std::string>
+  getenv (const char* name)
+  {
+    if (const char* const* vs = thread_env_)
+    {
+      size_t n (strlen (name));
+
+      for (; *vs != nullptr; ++vs)
+      {
+        const char* v (*vs);
+
+        // Note that on Windows variable names are case-insensitive.
+        //
+#ifdef _WIN32
+        if (icasecmp (name, v, n) == 0)
+#else
+        if (strncmp (name, v, n) == 0)
+#endif
+        {
+          switch (v[n])
+          {
+          case '=':  return string (v + n + 1);
+          case '\0': return nullopt;
+          }
+        }
+      }
+    }
+
+    if (const char* r = ::getenv (name))
+      return std::string (r);
+
+    return nullopt;
+  }
+
   void
   setenv (const string& name, const string& value)
   {
diff --git a/libbutl/utility.mxx b/libbutl/utility.hxx
index 8a0059a..9eb052d 100644
--- a/libbutl/utility.mxx
+++ b/libbutl/utility.hxx
@@ -1,9 +1,7 @@
-// file      : libbutl/utility.mxx -*- C++ -*-
+// file      : libbutl/utility.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
 #ifndef _WIN32
 #  include <strings.h> // strcasecmp(), strncasecmp()
@@ -11,7 +9,6 @@
 #  include <string.h> // _stricmp(), _strnicmp()
 #endif
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
 #include <iosfwd>       // ostream
 #include <istream>
@@ -20,29 +17,17 @@
 #include <cstring>      // strcmp(), strlen()
 #include <exception>    // exception, uncaught_exception[s]()
 //#include <functional> // hash
-#endif
 
 #include <libbutl/ft/lang.hxx>      // thread_local
 #include <libbutl/ft/exception.hxx> // uncaught_exceptions
 
-#ifdef __cpp_modules_ts
-export module butl.utility;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.utf8;
-import butl.unicode;
-import butl.optional;
-#else
-#include <libbutl/utf8.mxx>
-#include <libbutl/unicode.mxx>
-#include <libbutl/optional.mxx>
-#endif
+#include <libbutl/utf8.hxx>
+#include <libbutl/unicode.hxx>
+#include <libbutl/optional.hxx>
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // Throw std::system_error with generic_category or system_category,
   // respectively.
@@ -147,11 +132,13 @@ LIBBUTL_MODEXPORT namespace butl
   bool digit  (char);
   bool alnum  (char);
   bool xdigit (char);
+  bool wspace (char);
 
   bool alpha  (wchar_t);
   bool digit  (wchar_t);
   bool alnum  (wchar_t);
   bool xdigit (wchar_t);
+  bool wspace (wchar_t);
 
   // Basic string utilities.
   //
@@ -161,13 +148,31 @@ LIBBUTL_MODEXPORT namespace butl
   LIBBUTL_SYMEXPORT std::string&
   trim (std::string&);
 
+  LIBBUTL_SYMEXPORT std::string&
+  trim_left (std::string&);
+
+  LIBBUTL_SYMEXPORT std::string&
+  trim_right (std::string&);
+
   inline std::string
   trim (std::string&& s)
   {
     return move (trim (s));
   }
 
-  // Find the beginning and end poistions of the next word. Return the size
+  inline std::string
+  trim_left (std::string&& s)
+  {
+    return move (trim_left (s));
+  }
+
+  inline std::string
+  trim_right (std::string&& s)
+  {
+    return move (trim_right (s));
+  }
+
+  // Find the beginning and end positions of the next word. Return the size
   // of the word or 0 and set b = e = n if there are no more words. For
   // example:
   //
@@ -185,6 +190,24 @@ LIBBUTL_MODEXPORT namespace butl
   //
   // The second version examines up to the n'th character in the string.
   //
+  // The third version, instead of skipping consecutive delimiters, treats
+  // them as separating empty words. The additional m variable contains an
+  // unspecified internal state and should be initialized to 0. Note that in
+  // this case you should use the (b == n) condition to detect the end. Note
+  // also that a leading delimiter is considered as separating an empty word
+  // from the rest and the trailing delimiter is considered as separating the
+  // rest from an empty word. For example, this is how to parse lines while
+  // observing blanks:
+  //
+  // for (size_t b (0), e (0), m (0), n (s.size ());
+  //      next_word (s, n, b, e, m, '\n', '\r'), b != n; )
+  // {
+  //   string l (s, b, e - b);
+  // }
+  //
+  // For string "\na\n" this code will observe the {"", "a", ""} words. And
+  // for just "\n" it will observe the {"", ""} words.
+  //
   std::size_t
   next_word (const std::string&, std::size_t& b, std::size_t& e,
              char d1 = ' ', char d2 = '\0');
@@ -193,6 +216,11 @@ LIBBUTL_MODEXPORT namespace butl
   next_word (const std::string&, std::size_t n, std::size_t& b, std::size_t& e,
              char d1 = ' ', char d2 = '\0');
 
+  std::size_t
+  next_word (const std::string&, std::size_t n,
+             std::size_t& b, std::size_t& e, std::size_t& m,
+             char d1 = ' ', char d2 = '\0');
+
   // Sanitize a string to only contain characters valid in an identifier
   // (ASCII alphanumeric plus `_`) replacing all others with `_`.
   //
@@ -266,17 +294,82 @@ LIBBUTL_MODEXPORT namespace butl
 
   // Environment variables.
   //
-  optional<std::string>
-  getenv (const std::string&);
+  // Our getenv() wrapper (as well as the relevant process startup functions)
+  // have a notion of a "thread environment", that is, thread-specific
+  // environment variables. However, unlike the process environment (in the
+  // form of the environ array), the thread environment is specified as a set
+  // of overrides over the process environment (sets and unsets), the same as
+  // for the process startup.
+  //
+  // See also path_traits::thread_current_directory().
+  //
+  extern
+#ifdef __cpp_thread_local
+  thread_local
+#else
+  __thread
+#endif
+  const char* const* thread_env_;
+
+  // On Windows one cannot export a thread-local variable so we have to
+  // use wrapper functions.
+  //
+#ifdef _WIN32
+  LIBBUTL_SYMEXPORT const char* const*
+  thread_env ();
+
+  LIBBUTL_SYMEXPORT void
+  thread_env (const char* const*);
+#else
+  const char* const*
+  thread_env ();
+
+  void
+  thread_env (const char* const*);
+#endif
+
+  struct auto_thread_env
+  {
+    optional<const char* const*> prev_env;
+
+    auto_thread_env () = default;
+
+    explicit
+    auto_thread_env (const char* const*);
+
+    // Move-to-empty-only type.
+    //
+    auto_thread_env (auto_thread_env&&) noexcept;
+    auto_thread_env& operator= (auto_thread_env&&) noexcept;
+
+    auto_thread_env (const auto_thread_env&) = delete;
+    auto_thread_env& operator= (const auto_thread_env&) = delete;
+
+    ~auto_thread_env ();
+  };
+
+  // Get the environment variables taking into account the current thread's
+  // overrides (thread_env).
+  //
+  LIBBUTL_SYMEXPORT optional<std::string>
+  getenv (const char*);
+
+  inline optional<std::string>
+  getenv (const std::string& n)
+  {
+    return getenv (n.c_str ());
+  }
 
-  // Throw system_error on failure.
+  // Set the process environment variable. Best done before starting any
+  // threads (see thread_env). Throw system_error on failure.
   //
   // Note that on Windows setting an empty value unsets the variable.
   //
   LIBBUTL_SYMEXPORT void
   setenv (const std::string& name, const std::string& value);
 
-  // Throw system_error on failure.
+  // Unset the process environment variable. Best done before starting any
+  // threads (see thread_env). Throw system_error on failure.
   //
   LIBBUTL_SYMEXPORT void
   unsetenv (const std::string&);
@@ -477,7 +570,7 @@ LIBBUTL_MODEXPORT namespace butl
 #endif
 }
 
-LIBBUTL_MODEXPORT namespace std
+namespace std
 {
   // Sanitize the exception description before printing. This includes:
   //
diff --git a/libbutl/utility.ixx b/libbutl/utility.ixx
index fa37a14..fda1ce5 100644
--- a/libbutl/utility.ixx
+++ b/libbutl/utility.ixx
@@ -1,13 +1,10 @@
 // file      : libbutl/utility.ixx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_lib_modules_ts
 #include <cctype>    // toupper(), tolower(), is*()
 #include <cwctype>   // isw*()
-#include <cstdlib>   // getenv()
 #include <algorithm> // for_each()
 #include <stdexcept> // invalid_argument
-#endif
 
 namespace butl
 {
@@ -146,6 +143,12 @@ namespace butl
   }
 
   inline bool
+  wspace (char c)
+  {
+    return std::isspace (c);
+  }
+
+  inline bool
   alpha (wchar_t c)
   {
     return std::iswalpha (c);
@@ -169,6 +172,12 @@ namespace butl
     return std::iswxdigit (c);
   }
 
+  inline bool
+  wspace (wchar_t c)
+  {
+    return std::iswspace (c);
+  }
+
   inline std::size_t
   next_word (const std::string& s, std::size_t& b, std::size_t& e,
              char d1, char d2)
@@ -176,7 +185,7 @@ namespace butl
     return next_word (s, s.size (), b, e, d1, d2);
   }
 
-  inline size_t
+  inline std::size_t
   next_word (const std::string& s,
              std::size_t n, std::size_t& b, std::size_t& e,
              char d1, char d2)
@@ -201,6 +210,66 @@ namespace butl
     return e - b;
   }
 
+  inline std::size_t
+  next_word (const std::string& s,
+             std::size_t n, std::size_t& b, std::size_t& e, std::size_t& m,
+             char d1, char d2)
+  {
+    // An empty word will necessarily be represented as b and e being the
+    // position of a delimiter. Consider these corner cases (in all three we
+    // should produce two words):
+    //
+    // \n
+    // a\n
+    // \na
+    //
+    // It feels sensible to represent an empty word as the position of the
+    // trailing delimiter except if it is the last character (the first two
+    // cases). Thus the additional m state, which, if 0 or 1 indicates the
+    // number of delimiters to skip before parsing the next word and 2 if
+    // this is a trailing delimiter for which we need to fake an empty word
+    // with the leading delimiter.
+
+    if (b != e)
+      b = e;
+
+    if (m > 1)
+    {
+      --m;
+      return 0;
+    }
+
+    // Skip the leading delimiter, if any.
+    //
+    b += m;
+
+    if (b == n)
+    {
+      e = n;
+      return 0;
+    }
+
+    // Find first trailing delimiter.
+    //
+    m = 0;
+    for (e = b; e != n; ++e)
+    {
+      if (s[e] == d1 || s[e] == d2)
+      {
+        m = 1;
+
+        // Handle the special delimiter as the last character case.
+        //
+        if (e + 1 == n)
+          ++m;
+
+        break;
+      }
+    }
+
+    return e - b;
+  }
+
   inline std::string&
   sanitize_identifier (std::string& s)
   {
@@ -228,7 +297,7 @@ namespace butl
   inline void
   sanitize_strlit (const std::string& s, std::string& o)
   {
-    for (size_t i (0), j;; i = j + 1)
+    for (std::size_t i (0), j;; i = j + 1)
     {
       j = s.find_first_of ("\\\"\n", i);
       o.append (s.c_str () + i, (j == std::string::npos ? s.size () : j) - i);
@@ -333,13 +402,58 @@ namespace butl
     return utf8_length_impl (s, nullptr, ts, wl).has_value ();
   }
 
-  inline optional<std::string>
-  getenv (const std::string& name)
+#ifndef _WIN32
+  inline const char* const*
+  thread_env ()
+  {
+    return thread_env_;
+  }
+
+  inline void
+  thread_env (const char* const* v)
+  {
+    thread_env_ = v;
+  }
+#endif
+
+  // auto_thread_env
+  //
+  inline auto_thread_env::
+  auto_thread_env (const char* const* new_env)
   {
-    if (const char* r = std::getenv (name.c_str ()))
-      return std::string (r);
+    const char* const* cur_env (thread_env ());
 
-    return nullopt;
+    if (cur_env != new_env)
+    {
+      prev_env = cur_env;
+      thread_env (new_env);
+    }
+  }
+
+  inline auto_thread_env::
+  auto_thread_env (auto_thread_env&& x) noexcept
+      : prev_env (std::move (x.prev_env))
+  {
+    x.prev_env = nullopt;
+  }
+
+  inline auto_thread_env& auto_thread_env::
+  operator= (auto_thread_env&& x) noexcept
+  {
+    if (this != &x)
+    {
+      prev_env = std::move (x.prev_env);
+      x.prev_env = nullopt;
+    }
+
+    return *this;
+  }
+
+  inline auto_thread_env::
+  ~auto_thread_env ()
+  {
+    if (prev_env)
+      thread_env (*prev_env);
   }
 
   template <typename F, typename P>
diff --git a/libbutl/uuid-linux.cxx b/libbutl/uuid-linux.cxx
index 7689088..82af2e9 100644
--- a/libbutl/uuid-linux.cxx
+++ b/libbutl/uuid-linux.cxx
@@ -13,7 +13,7 @@
 #include <utility>      // move()
 #include <system_error>
 
-#include <libbutl/utility.mxx> // function_cast()
+#include <libbutl/utility.hxx> // function_cast()
 
 using namespace std;
 
diff --git a/libbutl/uuid-openbsd.cxx b/libbutl/uuid-openbsd.cxx
new file mode 100644
index 0000000..b64436b
--- /dev/null
+++ b/libbutl/uuid-openbsd.cxx
@@ -0,0 +1,80 @@
+// file      : libbutl/uuid-openbsd.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BUILD2_BOOTSTRAP
+
+#include <libbutl/uuid.hxx>
+
+#include <uuid.h>
+
+#include <errno.h>
+
+#include <cassert>
+#include <cstring>      // memcpy()
+#include <system_error>
+
+using namespace std;
+
+namespace butl
+{
+  void
+  uuid_throw_weak (); // uuid.cxx
+
+  uuid uuid_system_generator::
+  generate (bool strong)
+  {
+    // The OpenBSD uuid_*() (<uuid.h>, uuid_compare(3)) API generates version
+    // 4 UUIDs (i.e. randomly generated) at least from version 6.4. For now we
+    // will assume that only random ones are strong.
+    //
+    // Here we assume uuid_t has the same definition as in FreeBSD/NetBSD (it
+    // is defined in <sys/uuid.h>).
+    //
+    uuid_t d;
+    uint32_t s;
+    uuid_create (&d, &s);
+
+    // None of the uuid_s_* errors seem plausible for this function so let's
+    // return the generic "not supported" error code.
+    //
+    if (s != uuid_s_ok)
+      throw system_error (ENOSYS, system_category ());
+
+    uuid r;
+
+    // This is effectively just memcpy() but let's reference the member names
+    // in case anything changes on either side.
+    //
+    r.time_low = d.time_low;
+    r.time_mid = d.time_mid;
+    r.time_hiv = d.time_hi_and_version;
+    r.clock_seq_hir = d.clock_seq_hi_and_reserved;
+    r.clock_seq_low = d.clock_seq_low;
+    memcpy (r.node, d.node, 6);
+
+    assert (r.variant () == uuid_variant::dce); // Sanity check.
+
+    if (strong)
+    {
+      switch (r.version ())
+      {
+      case uuid_version::random: break;
+      default:                   uuid_throw_weak ();
+      }
+    }
+
+    return r;
+  }
+
+  void uuid_system_generator::
+  initialize ()
+  {
+  }
+
+  void uuid_system_generator::
+  terminate ()
+  {
+  }
+}
+
+#endif // BUILD2_BOOTSTRAP
diff --git a/libbutl/uuid.cxx b/libbutl/uuid.cxx
index 377afb7..2132808 100644
--- a/libbutl/uuid.cxx
+++ b/libbutl/uuid.cxx
@@ -5,7 +5,7 @@
 
 #include <errno.h> // ENOTSUP
 
-#include <cstdio>       // sprintf() scanf()
+#include <cstdio>       // snprintf() sscanf()
 #include <cstring>      // strlen()
 #include <stdexcept>
 #include <system_error>
@@ -19,16 +19,17 @@ namespace butl
   {
     array<char, 37> r;
 
-    sprintf (r.data (),
-             (upper
-              ? "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X"
-              : "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"),
-             time_low,
-             time_mid,
-             time_hiv,
-             clock_seq_hir,
-             clock_seq_low,
-             node[0], node[1], node[2], node[3], node[4], node[5]);
+    snprintf (r.data (),
+              37,
+              (upper
+               ? "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X"
+               : "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"),
+              time_low,
+              time_mid,
+              time_hiv,
+              clock_seq_hir,
+              clock_seq_low,
+              node[0], node[1], node[2], node[3], node[4], node[5]);
 
     return r;
   }
diff --git a/libbutl/uuid.hxx b/libbutl/uuid.hxx
index 2361640..862f02d 100644
--- a/libbutl/uuid.hxx
+++ b/libbutl/uuid.hxx
@@ -48,12 +48,12 @@ namespace butl
   {
     // Normally not accessed directly (see RFC4122 Section 4.1.2).
     //
-    std::uint32_t  time_low = 0;
-    std::uint16_t  time_mid = 0;
-    std::uint16_t  time_hiv = 0;       // hi_and_version
-    std::uint8_t   clock_seq_hir = 0;  // hi_and_reserved
-    std::uint8_t   clock_seq_low = 0;
-    std::uint8_t   node[6] = {0, 0, 0, 0, 0, 0};
+    std::uint32_t time_low = 0;
+    std::uint16_t time_mid = 0;
+    std::uint16_t time_hiv = 0;      // hi_and_version
+    std::uint8_t  clock_seq_hir = 0; // hi_and_reserved
+    std::uint8_t  clock_seq_low = 0;
+    std::uint8_t  node[6] = {0, 0, 0, 0, 0, 0};
 
     // System UUID generator. See the uuid_generator interface for details.
     //
@@ -158,10 +158,10 @@ namespace butl
     void
     swap (uuid&);
 
-    uuid (uuid&&);
+    uuid (uuid&&) noexcept;
     uuid (const uuid&) = default;
 
-    uuid& operator= (uuid&&);
+    uuid& operator= (uuid&&) noexcept;
     uuid& operator= (const uuid&) = default;
   };
 
@@ -183,7 +183,7 @@ namespace butl
     ~uuid_generator () = default;
 
     // Generate a UUID. If strong is true (default), generate a strongly-
-    // unique UUID. Throw std::runtime_error to report errors, including if
+    // unique UUID. Throw std::system_error to report errors, including if
     // strong uniqueness cannot be guaranteed.
     //
     // A weak UUID is not guaranteed to be unique, neither universialy nor
@@ -207,7 +207,7 @@ namespace butl
     // Optional explicit initialization and termination. Note that it is not
     // thread-safe and must only be performed once (normally from main())
     // before/after any calls to generate(), respectively. Both functions may
-    // throw std::runtime_error to report errors.
+    // throw std::system_error to report errors.
     //
     static void
     initialize ();
diff --git a/libbutl/uuid.ixx b/libbutl/uuid.ixx
index 6744af7..6115be1 100644
--- a/libbutl/uuid.ixx
+++ b/libbutl/uuid.ixx
@@ -39,14 +39,14 @@ namespace butl
   }
 
   inline uuid::
-  uuid (uuid&& u)
+  uuid (uuid&& u) noexcept
       : uuid () // nil
   {
     swap (u);
   }
 
   inline uuid& uuid::
-  operator= (uuid&& u)
+  operator= (uuid&& u) noexcept
   {
     if (this != &u)
     {
diff --git a/libbutl/vector-view.mxx b/libbutl/vector-view.hxx
index 7924371..16ab08e 100644
--- a/libbutl/vector-view.mxx
+++ b/libbutl/vector-view.hxx
@@ -1,32 +1,17 @@
-// file      : libbutl/vector-view.mxx -*- C++ -*-
+// file      : libbutl/vector-view.hxx -*- C++ -*-
 // license   : MIT; see accompanying LICENSE file
 
-#ifndef __cpp_modules_ts
 #pragma once
-#endif
 
-// C includes.
-
-#ifndef __cpp_lib_modules_ts
 #include <vector>
 #include <cstddef>   // size_t, ptrdiff_t
 #include <utility>   // swap()
 #include <iterator>  // reverse_iterator
 #include <stdexcept> // out_of_range
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.vector_view;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-#endif
 
 #include <libbutl/export.hxx>
 
-LIBBUTL_MODEXPORT namespace butl
+namespace butl
 {
   // In our version a const view allows the modification of the elements
   // unless T is made const (the same semantics as in smart pointers).
diff --git a/libbutl/win32-utility.cxx b/libbutl/win32-utility.cxx
index 3b44d60..c69842b 100644
--- a/libbutl/win32-utility.cxx
+++ b/libbutl/win32-utility.cxx
@@ -8,16 +8,9 @@
 //
 #ifdef _WIN32
 
-#ifndef __cpp_lib_modules_ts
-#include <string>
 #include <memory> // unique_ptr
 
-#include <libbutl/utility.mxx> // throw_system_error()
-#else
-import std.core;
-
-import butl.utility;
-#endif
+#include <libbutl/utility.hxx> // throw_system_error()
 
 using namespace std;
 
diff --git a/libbutl/win32-utility.hxx b/libbutl/win32-utility.hxx
index b71eb1a..9bed647 100644
--- a/libbutl/win32-utility.hxx
+++ b/libbutl/win32-utility.hxx
@@ -31,11 +31,7 @@
 #  endif
 #endif
 
-#ifndef __cpp_lib_modules_ts
 #include <string>
-#else
-import std.core;
-#endif
 
 #include <libbutl/export.hxx>
 
diff --git a/libbutl/xxhash.c b/libbutl/xxhash.c
new file mode 100644
index 0000000..ff28749
--- /dev/null
+++ b/libbutl/xxhash.c
@@ -0,0 +1,1030 @@
+/*
+*  xxHash - Fast Hash algorithm
+*  Copyright (C) 2012-2016, Yann Collet
+*
+*  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions are
+*  met:
+*
+*  * Redistributions of source code must retain the above copyright
+*  notice, this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above
+*  copyright notice, this list of conditions and the following disclaimer
+*  in the documentation and/or other materials provided with the
+*  distribution.
+*
+*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*  You can contact the author at :
+*  - xxHash homepage: http://www.xxhash.com
+*  - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+                        || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+                        || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+                    || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+                    || defined(__ARM_ARCH_7S__) ))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
+ */
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
+#  define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
+#  define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/*! Modify the local functions below should you wish to use some other memory routines
+*   for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/*! and for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#include <assert.h>   /* assert */
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint8_t  BYTE;
+    typedef uint16_t U16;
+    typedef uint32_t U32;
+# else
+    typedef unsigned char      BYTE;
+    typedef unsigned short     U16;
+    typedef unsigned int       U32;
+# endif
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; } __attribute__((packed)) unalign;
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* *************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+static int XXH_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+
+/* *************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)  { enum { XXH_sa = 1/(int)(!!(c)) }; }  /* use after variable declarations */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+*  32-bit hash functions
+*********************************************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 =  668265263U;
+static const U32 PRIME32_5 =  374761393U;
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+    seed += input * PRIME32_2;
+    seed  = XXH_rotl32(seed, 13);
+    seed *= PRIME32_1;
+    return seed;
+}
+
+/* mix all bits */
+static U32 XXH32_avalanche(U32 h32)
+{
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+    return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+static U32
+XXH32_finalize(U32 h32, const void* ptr, size_t len,
+                XXH_endianess endian, XXH_alignment align)
+
+{
+    const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1               \
+    h32 += (*p++) * PRIME32_5; \
+    h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+
+#define PROCESS4                         \
+    h32 += XXH_get32bits(p) * PRIME32_3; \
+    p+=4;                                \
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+
+    switch(len&15)  /* or switch(bEnd - p) */
+    {
+      case 12:      PROCESS4;
+                    /* fallthrough */
+      case 8:       PROCESS4;
+                    /* fallthrough */
+      case 4:       PROCESS4;
+                    return XXH32_avalanche(h32);
+
+      case 13:      PROCESS4;
+                    /* fallthrough */
+      case 9:       PROCESS4;
+                    /* fallthrough */
+      case 5:       PROCESS4;
+                    PROCESS1;
+                    return XXH32_avalanche(h32);
+
+      case 14:      PROCESS4;
+                    /* fallthrough */
+      case 10:      PROCESS4;
+                    /* fallthrough */
+      case 6:       PROCESS4;
+                    PROCESS1;
+                    PROCESS1;
+                    return XXH32_avalanche(h32);
+
+      case 15:      PROCESS4;
+                    /* fallthrough */
+      case 11:      PROCESS4;
+                    /* fallthrough */
+      case 7:       PROCESS4;
+                    /* fallthrough */
+      case 3:       PROCESS1;
+                    /* fallthrough */
+      case 2:       PROCESS1;
+                    /* fallthrough */
+      case 1:       PROCESS1;
+                    /* fallthrough */
+      case 0:       return XXH32_avalanche(h32);
+    }
+    assert(0);
+    return h32;   /* reaching this point is deemed impossible */
+}
+
+
+FORCE_INLINE U32
+XXH32_endian_align(const void* input, size_t len, U32 seed,
+                    XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16) {
+        const BYTE* const limit = bEnd - 15;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+        } while (p < limit);
+
+        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
+            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32)len;
+
+    return XXH32_finalize(h32, p, len&15, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+
+/*======   Hash streaming   ======*/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    /* do not write into reserved, planned to be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode
+XXH32_update_endian(XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
+#endif
+
+    {   const BYTE* p = (const BYTE*)input;
+        const BYTE* const bEnd = p + len;
+
+        state->total_len_32 += (unsigned)len;
+        state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+            XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+            state->memsize += (unsigned)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* some data left from previous update */
+            XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+            {   const U32* p32 = state->mem32;
+                state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+                state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+                state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+                state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian));
+            }
+            p += 16-state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p <= bEnd-16) {
+            const BYTE* const limit = bEnd - 16;
+            U32 v1 = state->v1;
+            U32 v2 = state->v2;
+            U32 v3 = state->v3;
+            U32 v4 = state->v4;
+
+            do {
+                v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+                v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+                v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+                v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+            } while (p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+FORCE_INLINE U32
+XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+    U32 h32;
+
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v1, 1)
+            + XXH_rotl32(state->v2, 7)
+            + XXH_rotl32(state->v3, 12)
+            + XXH_rotl32(state->v4, 18);
+    } else {
+        h32 = state->v3 /* == seed */ + PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*======   Canonical representation   ======*/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, remaining comparable across different systems.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+
+/*======   Memory access   ======*/
+
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint64_t U64;
+# else
+    /* if compiler doesn't support unsigned long long, replace by another 64-bit type */
+    typedef unsigned long long U64;
+# endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
+static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/*======   xxh64   ======*/
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 =  1609587929392839161ULL;
+static const U64 PRIME64_4 =  9650029242287828579ULL;
+static const U64 PRIME64_5 =  2870177450012600261ULL;
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
+static U64 XXH64_avalanche(U64 h64)
+{
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+static U64
+XXH64_finalize(U64 h64, const void* ptr, size_t len,
+               XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1_64            \
+    h64 ^= (*p++) * PRIME64_5; \
+    h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+
+#define PROCESS4_64          \
+    h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
+    p+=4;                    \
+    h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+
+#define PROCESS8_64 {        \
+    U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
+    p+=8;                    \
+    h64 ^= k1;               \
+    h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
+}
+
+    switch(len&31) {
+      case 24: PROCESS8_64;
+                    /* fallthrough */
+      case 16: PROCESS8_64;
+                    /* fallthrough */
+      case  8: PROCESS8_64;
+               return XXH64_avalanche(h64);
+
+      case 28: PROCESS8_64;
+                    /* fallthrough */
+      case 20: PROCESS8_64;
+                    /* fallthrough */
+      case 12: PROCESS8_64;
+                    /* fallthrough */
+      case  4: PROCESS4_64;
+               return XXH64_avalanche(h64);
+
+      case 25: PROCESS8_64;
+                    /* fallthrough */
+      case 17: PROCESS8_64;
+                    /* fallthrough */
+      case  9: PROCESS8_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 29: PROCESS8_64;
+                    /* fallthrough */
+      case 21: PROCESS8_64;
+                    /* fallthrough */
+      case 13: PROCESS8_64;
+                    /* fallthrough */
+      case  5: PROCESS4_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 26: PROCESS8_64;
+                    /* fallthrough */
+      case 18: PROCESS8_64;
+                    /* fallthrough */
+      case 10: PROCESS8_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 30: PROCESS8_64;
+                    /* fallthrough */
+      case 22: PROCESS8_64;
+                    /* fallthrough */
+      case 14: PROCESS8_64;
+                    /* fallthrough */
+      case  6: PROCESS4_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 27: PROCESS8_64;
+                    /* fallthrough */
+      case 19: PROCESS8_64;
+                    /* fallthrough */
+      case 11: PROCESS8_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 31: PROCESS8_64;
+                    /* fallthrough */
+      case 23: PROCESS8_64;
+                    /* fallthrough */
+      case 15: PROCESS8_64;
+                    /* fallthrough */
+      case  7: PROCESS4_64;
+                    /* fallthrough */
+      case  3: PROCESS1_64;
+                    /* fallthrough */
+      case  2: PROCESS1_64;
+                    /* fallthrough */
+      case  1: PROCESS1_64;
+                    /* fallthrough */
+      case  0: return XXH64_avalanche(h64);
+    }
+
+    /* impossible to reach */
+    assert(0);
+    return 0;  /* unreachable, but some compilers complain without it */
+}
+
+FORCE_INLINE U64
+XXH64_endian_align(const void* input, size_t len, U64 seed,
+                XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+    if (p==NULL) {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+        } while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    return XXH64_finalize(h64, p, len, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/*======   Hash Streaming   ======*/
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+     /* do not write into reserved, planned to be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+    return XXH_OK;
+}
+
+FORCE_INLINE XXH_errorcode
+XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
+#endif
+
+    {   const BYTE* p = (const BYTE*)input;
+        const BYTE* const bEnd = p + len;
+
+        state->total_len += len;
+
+        if (state->memsize + len < 32) {  /* fill in tmp buffer */
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+            state->memsize += (U32)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* tmp buffer is full */
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+            state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+            state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+            state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+            state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+            p += 32-state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p+32 <= bEnd) {
+            const BYTE* const limit = bEnd - 32;
+            U64 v1 = state->v1;
+            U64 v2 = state->v2;
+            U64 v3 = state->v3;
+            U64 v4 = state->v4;
+
+            do {
+                v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+                v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+                v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+                v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+            } while (p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+    U64 h64;
+
+    if (state->total_len >= 32) {
+        U64 const v1 = state->v1;
+        U64 const v2 = state->v2;
+        U64 const v3 = state->v3;
+        U64 const v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->v3 /*seed*/ + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned);
+}
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation   ======*/
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+
+#endif  /* XXH_NO_LONG_LONG */
diff --git a/libbutl/xxhash.h b/libbutl/xxhash.h
new file mode 100644
index 0000000..d6bad94
--- /dev/null
+++ b/libbutl/xxhash.h
@@ -0,0 +1,328 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+ *  API modifier
+ ******************************/
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ *  This is useful to include xxhash functions in `static` mode
+ *  in order to inline them, and remove their symbol from the public list.
+ *  Inlining can offer dramatic performance improvement on small keys.
+ *  Methodology :
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ *  It's not useful to compile and link it as a separate module.
+ */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+     /* this version may generate warnings for unused static functions */
+#    define XXH_PUBLIC_API static
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  5
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+typedef unsigned int XXH32_hash_t;
+
+/*! XXH32() :
+    Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+
+/*======   Streaming   ======*/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+/*
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
+ * Note that, for small input, they are slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hashes later on, by calling again XXH*_digest().
+ *
+ * When done, free XXH state space if it was allocated dynamically.
+ */
+
+/*======   Canonical representation   ======*/
+
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+ * These functions allow transformation of hash result into and from its canonical format.
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ */
+
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+typedef unsigned long long XXH64_hash_t;
+
+/*! XXH64() :
+    Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
+    "seed" can be used to alter the result predictably.
+    This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
+*/
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*======   Streaming   ======*/
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*======   Canonical representation   ======*/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+#endif  /* XXH_NO_LONG_LONG */
+
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+   This section contains declarations which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   These declarations should only be used with static linking.
+   Never use them in association with dynamic linking !
+=================================================================================================== */
+
+/* These definitions are only present to allow
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+
+struct XXH32_state_s {
+   uint32_t total_len_32;
+   uint32_t large_len;
+   uint32_t v1;
+   uint32_t v2;
+   uint32_t v3;
+   uint32_t v4;
+   uint32_t mem32[4];
+   uint32_t memsize;
+   uint32_t reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+struct XXH64_state_s {
+   uint64_t total_len;
+   uint64_t v1;
+   uint64_t v2;
+   uint64_t v3;
+   uint64_t v4;
+   uint64_t mem64[4];
+   uint32_t memsize;
+   uint32_t reserved[2];          /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+
+# else
+
+struct XXH32_state_s {
+   unsigned total_len_32;
+   unsigned large_len;
+   unsigned v1;
+   unsigned v2;
+   unsigned v3;
+   unsigned v4;
+   unsigned mem32[4];
+   unsigned memsize;
+   unsigned reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+#   ifndef XXH_NO_LONG_LONG  /* remove 64-bit support */
+struct XXH64_state_s {
+   unsigned long long total_len;
+   unsigned long long v1;
+   unsigned long long v2;
+   unsigned long long v3;
+   unsigned long long v4;
+   unsigned long long mem64[4];
+   unsigned memsize;
+   unsigned reserved[2];     /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+#    endif
+
+# endif
+
+
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */
+#endif
+
+#endif /* XXH_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */