From df1ef68cd8e8582724ce1192bfc202e0b9aeaf0c Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 28 Sep 2021 19:24:31 +0300 Subject: Get rid of C++ modules related code and rename *.mxx files to *.hxx --- build/root.build | 2 +- libbutl/b.cxx | 58 +- libbutl/b.hxx | 130 +++ libbutl/b.mxx | 151 ---- libbutl/backtrace.cxx | 22 +- libbutl/backtrace.hxx | 27 + libbutl/backtrace.mxx | 42 - libbutl/base64.cxx | 26 +- libbutl/base64.hxx | 46 + libbutl/base64.mxx | 62 -- libbutl/buildfile | 19 +- libbutl/builtin.cxx | 55 +- libbutl/builtin.hxx | 220 +++++ libbutl/builtin.mxx | 239 ------ libbutl/char-scanner.hxx | 246 ++++++ libbutl/char-scanner.mxx | 261 ------ libbutl/char-scanner.txx | 2 - libbutl/command.cxx | 42 +- libbutl/command.hxx | 107 +++ libbutl/command.mxx | 122 --- libbutl/const-ptr.hxx | 78 ++ libbutl/const-ptr.mxx | 93 -- libbutl/curl.cxx | 34 +- libbutl/curl.hxx | 171 ++++ libbutl/curl.ixx | 6 +- libbutl/curl.mxx | 194 ----- libbutl/curl.txx | 2 +- libbutl/default-options.hxx | 162 ++++ libbutl/default-options.ixx | 2 +- libbutl/default-options.mxx | 189 ----- libbutl/default-options.txx | 10 +- libbutl/diagnostics.cxx | 37 +- libbutl/diagnostics.hxx | 266 ++++++ libbutl/diagnostics.mxx | 279 ------ libbutl/export.hxx | 8 - libbutl/fdstream.cxx | 43 +- libbutl/fdstream.hxx | 917 ++++++++++++++++++++ libbutl/fdstream.ixx | 2 + libbutl/fdstream.mxx | 941 -------------------- libbutl/filesystem.cxx | 45 +- libbutl/filesystem.hxx | 855 +++++++++++++++++++ libbutl/filesystem.ixx | 3 + libbutl/filesystem.mxx | 879 ------------------- libbutl/git.cxx | 40 +- libbutl/git.hxx | 27 + libbutl/git.mxx | 45 - libbutl/lz4-stream.cxx | 2 +- libbutl/lz4-stream.hxx | 2 +- libbutl/lz4.cxx | 2 +- libbutl/lz4.hxx | 4 +- libbutl/manifest-parser.cxx | 31 +- libbutl/manifest-parser.hxx | 160 ++++ libbutl/manifest-parser.mxx | 180 ---- libbutl/manifest-rewriter.cxx | 36 +- libbutl/manifest-rewriter.hxx | 60 ++ libbutl/manifest-rewriter.mxx | 78 -- libbutl/manifest-serializer.cxx | 37 +- libbutl/manifest-serializer.hxx | 136 +++ libbutl/manifest-serializer.mxx | 153 ---- libbutl/manifest-types.hxx | 32 + libbutl/manifest-types.mxx | 48 -- libbutl/multi-index.hxx | 57 ++ libbutl/multi-index.mxx | 72 -- libbutl/openssl.cxx | 27 +- libbutl/openssl.hxx | 161 ++++ libbutl/openssl.ixx | 5 +- libbutl/openssl.mxx | 183 ---- libbutl/openssl.txx | 4 +- libbutl/optional.hxx | 343 ++++++++ libbutl/optional.mxx | 358 -------- libbutl/pager.cxx | 40 +- libbutl/pager.hxx | 84 ++ libbutl/pager.mxx | 102 --- libbutl/path-io.hxx | 36 + libbutl/path-io.mxx | 54 -- libbutl/path-map.hxx | 128 +++ libbutl/path-map.mxx | 145 ---- libbutl/path-pattern.cxx | 33 +- libbutl/path-pattern.hxx | 224 +++++ libbutl/path-pattern.mxx | 241 ------ libbutl/path.cxx | 29 +- libbutl/path.hxx | 1536 +++++++++++++++++++++++++++++++++ libbutl/path.ixx | 2 +- libbutl/path.mxx | 1555 ---------------------------------- libbutl/path.txx | 2 +- libbutl/prefix-map.hxx | 173 ++++ libbutl/prefix-map.mxx | 188 ---- libbutl/prefix-map.txx | 2 +- libbutl/process-details.hxx | 5 - libbutl/process-io.cxx | 29 +- libbutl/process-io.hxx | 50 ++ libbutl/process-io.mxx | 67 -- libbutl/process-run.cxx | 27 +- libbutl/process-run.txx | 4 +- libbutl/process.cxx | 56 +- libbutl/process.hxx | 832 ++++++++++++++++++ libbutl/process.ixx | 3 + libbutl/process.mxx | 855 ------------------- libbutl/project-name.cxx | 30 +- libbutl/project-name.hxx | 216 +++++ libbutl/project-name.mxx | 233 ----- libbutl/prompt.cxx | 26 +- libbutl/prompt.hxx | 20 + libbutl/prompt.mxx | 35 - libbutl/regex.cxx | 31 +- libbutl/regex.hxx | 133 +++ libbutl/regex.ixx | 4 +- libbutl/regex.mxx | 154 ---- libbutl/regex.txx | 5 +- libbutl/semantic-version.cxx | 29 +- libbutl/semantic-version.hxx | 175 ++++ libbutl/semantic-version.mxx | 192 ----- libbutl/sendmail.cxx | 27 +- libbutl/sendmail.hxx | 116 +++ libbutl/sendmail.ixx | 5 +- libbutl/sendmail.mxx | 137 --- libbutl/sha1.cxx | 24 +- libbutl/sha1.hxx | 120 +++ libbutl/sha1.mxx | 135 --- libbutl/sha256.cxx | 34 +- libbutl/sha256.hxx | 159 ++++ libbutl/sha256.mxx | 174 ---- libbutl/small-allocator.hxx | 181 ++++ libbutl/small-allocator.mxx | 195 ----- libbutl/small-forward-list.hxx | 145 ++++ libbutl/small-forward-list.mxx | 159 ---- libbutl/small-list.hxx | 150 ++++ libbutl/small-list.mxx | 164 ---- libbutl/small-vector-odb.hxx | 2 +- libbutl/small-vector.hxx | 175 ++++ libbutl/small-vector.mxx | 189 ----- libbutl/standard-version.cxx | 33 +- libbutl/standard-version.hxx | 340 ++++++++ libbutl/standard-version.mxx | 357 -------- libbutl/string-parser.cxx | 28 +- libbutl/string-parser.hxx | 51 ++ libbutl/string-parser.mxx | 66 -- libbutl/string-table.hxx | 95 +++ libbutl/string-table.mxx | 113 --- libbutl/string-table.txx | 3 + libbutl/tab-parser.cxx | 31 +- libbutl/tab-parser.hxx | 68 ++ libbutl/tab-parser.mxx | 84 -- libbutl/target-triplet.cxx | 26 +- libbutl/target-triplet.hxx | 171 ++++ libbutl/target-triplet.mxx | 187 ---- libbutl/timestamp.cxx | 39 +- libbutl/timestamp.hxx | 179 ++++ libbutl/timestamp.mxx | 207 ----- libbutl/unicode.cxx | 23 +- libbutl/unicode.hxx | 66 ++ libbutl/unicode.mxx | 82 -- libbutl/url.hxx | 552 ++++++++++++ libbutl/url.ixx | 2 +- libbutl/url.mxx | 579 ------------- libbutl/url.txx | 7 +- libbutl/utf8.hxx | 114 +++ libbutl/utf8.mxx | 130 --- libbutl/utility.cxx | 26 +- libbutl/utility.hxx | 541 ++++++++++++ libbutl/utility.ixx | 2 - libbutl/utility.mxx | 556 ------------ libbutl/uuid-linux.cxx | 2 +- libbutl/vector-view.hxx | 118 +++ libbutl/vector-view.mxx | 133 --- libbutl/win32-utility.cxx | 9 +- libbutl/win32-utility.hxx | 4 - tests/b-info/driver.cxx | 22 +- tests/backtrace/driver.cxx | 21 +- tests/base64/driver.cxx | 14 +- tests/builtin/driver.cxx | 26 +- tests/command/driver.cxx | 29 +- tests/cpfile/driver.cxx | 20 +- tests/curl/driver.cxx | 31 +- tests/default-options/driver.cxx | 30 +- tests/dir-iterator/driver.cxx | 25 +- tests/entry-time/driver.cxx | 23 +- tests/fdstream/driver.cxx | 29 +- tests/link/driver.cxx | 26 +- tests/lz4/driver.cxx | 4 +- tests/manifest-parser/driver.cxx | 19 +- tests/manifest-rewriter/driver.cxx | 28 +- tests/manifest-roundtrip/driver.cxx | 23 +- tests/manifest-serializer/driver.cxx | 16 +- tests/mventry/driver.cxx | 20 +- tests/openssl/driver.cxx | 27 +- tests/optional/driver.cxx | 15 +- tests/pager/driver.cxx | 16 +- tests/path-entry/driver.cxx | 31 +- tests/path/driver.cxx | 17 +- tests/prefix-map/driver.cxx | 16 +- tests/process-run/driver.cxx | 26 +- tests/process-term/driver.cxx | 20 +- tests/process/driver.cxx | 31 +- tests/progress/driver.cxx | 26 +- tests/project-name/driver.cxx | 20 +- tests/regex/driver.cxx | 18 +- tests/semantic-version/driver.cxx | 14 +- tests/sendmail/driver.cxx | 27 +- tests/sha1/driver.cxx | 24 +- tests/sha256/driver.cxx | 24 +- tests/small-forward-list/driver.cxx | 16 +- tests/small-list/driver.cxx | 14 +- tests/small-vector/driver.cxx | 16 +- tests/standard-version/driver.cxx | 22 +- tests/strcase/driver.cxx | 13 +- tests/string-parser/driver.cxx | 19 +- tests/tab-parser/driver.cxx | 17 +- tests/target-triplet/driver.cxx | 16 +- tests/timestamp/driver.cxx | 14 +- tests/url/driver.cxx | 19 +- tests/utf8/driver.cxx | 18 +- tests/wildcard/driver.cxx | 26 +- 213 files changed, 11502 insertions(+), 13944 deletions(-) create mode 100644 libbutl/b.hxx delete mode 100644 libbutl/b.mxx create mode 100644 libbutl/backtrace.hxx delete mode 100644 libbutl/backtrace.mxx create mode 100644 libbutl/base64.hxx delete mode 100644 libbutl/base64.mxx create mode 100644 libbutl/builtin.hxx delete mode 100644 libbutl/builtin.mxx create mode 100644 libbutl/char-scanner.hxx delete mode 100644 libbutl/char-scanner.mxx create mode 100644 libbutl/command.hxx delete mode 100644 libbutl/command.mxx create mode 100644 libbutl/const-ptr.hxx delete mode 100644 libbutl/const-ptr.mxx create mode 100644 libbutl/curl.hxx delete mode 100644 libbutl/curl.mxx create mode 100644 libbutl/default-options.hxx delete mode 100644 libbutl/default-options.mxx create mode 100644 libbutl/diagnostics.hxx delete mode 100644 libbutl/diagnostics.mxx create mode 100644 libbutl/fdstream.hxx delete mode 100644 libbutl/fdstream.mxx create mode 100644 libbutl/filesystem.hxx delete mode 100644 libbutl/filesystem.mxx create mode 100644 libbutl/git.hxx delete mode 100644 libbutl/git.mxx create mode 100644 libbutl/manifest-parser.hxx delete mode 100644 libbutl/manifest-parser.mxx create mode 100644 libbutl/manifest-rewriter.hxx delete mode 100644 libbutl/manifest-rewriter.mxx create mode 100644 libbutl/manifest-serializer.hxx delete mode 100644 libbutl/manifest-serializer.mxx create mode 100644 libbutl/manifest-types.hxx delete mode 100644 libbutl/manifest-types.mxx create mode 100644 libbutl/multi-index.hxx delete mode 100644 libbutl/multi-index.mxx create mode 100644 libbutl/openssl.hxx delete mode 100644 libbutl/openssl.mxx create mode 100644 libbutl/optional.hxx delete mode 100644 libbutl/optional.mxx create mode 100644 libbutl/pager.hxx delete mode 100644 libbutl/pager.mxx create mode 100644 libbutl/path-io.hxx delete mode 100644 libbutl/path-io.mxx create mode 100644 libbutl/path-map.hxx delete mode 100644 libbutl/path-map.mxx create mode 100644 libbutl/path-pattern.hxx delete mode 100644 libbutl/path-pattern.mxx create mode 100644 libbutl/path.hxx delete mode 100644 libbutl/path.mxx create mode 100644 libbutl/prefix-map.hxx delete mode 100644 libbutl/prefix-map.mxx create mode 100644 libbutl/process-io.hxx delete mode 100644 libbutl/process-io.mxx create mode 100644 libbutl/process.hxx delete mode 100644 libbutl/process.mxx create mode 100644 libbutl/project-name.hxx delete mode 100644 libbutl/project-name.mxx create mode 100644 libbutl/prompt.hxx delete mode 100644 libbutl/prompt.mxx create mode 100644 libbutl/regex.hxx delete mode 100644 libbutl/regex.mxx create mode 100644 libbutl/semantic-version.hxx delete mode 100644 libbutl/semantic-version.mxx create mode 100644 libbutl/sendmail.hxx delete mode 100644 libbutl/sendmail.mxx create mode 100644 libbutl/sha1.hxx delete mode 100644 libbutl/sha1.mxx create mode 100644 libbutl/sha256.hxx delete mode 100644 libbutl/sha256.mxx create mode 100644 libbutl/small-allocator.hxx delete mode 100644 libbutl/small-allocator.mxx create mode 100644 libbutl/small-forward-list.hxx delete mode 100644 libbutl/small-forward-list.mxx create mode 100644 libbutl/small-list.hxx delete mode 100644 libbutl/small-list.mxx create mode 100644 libbutl/small-vector.hxx delete mode 100644 libbutl/small-vector.mxx create mode 100644 libbutl/standard-version.hxx delete mode 100644 libbutl/standard-version.mxx create mode 100644 libbutl/string-parser.hxx delete mode 100644 libbutl/string-parser.mxx create mode 100644 libbutl/string-table.hxx delete mode 100644 libbutl/string-table.mxx create mode 100644 libbutl/tab-parser.hxx delete mode 100644 libbutl/tab-parser.mxx create mode 100644 libbutl/target-triplet.hxx delete mode 100644 libbutl/target-triplet.mxx create mode 100644 libbutl/timestamp.hxx delete mode 100644 libbutl/timestamp.mxx create mode 100644 libbutl/unicode.hxx delete mode 100644 libbutl/unicode.mxx create mode 100644 libbutl/url.hxx delete mode 100644 libbutl/url.mxx create mode 100644 libbutl/utf8.hxx delete mode 100644 libbutl/utf8.mxx create mode 100644 libbutl/utility.hxx delete mode 100644 libbutl/utility.mxx create mode 100644 libbutl/vector-view.hxx delete mode 100644 libbutl/vector-view.mxx diff --git a/build/root.build b/build/root.build index e867421..1526ef0 100644 --- a/build/root.build +++ b/build/root.build @@ -5,7 +5,7 @@ cxx.std = latest using cxx -hxx{*}: extension = hxx # We also have .mxx; see libbutl/buildfile. +hxx{*}: extension = hxx ixx{*}: extension = ixx txx{*}: extension = txx cxx{*}: extension = cxx diff --git a/libbutl/b.cxx b/libbutl/b.cxx index 86a87ff..74a430c 100644 --- a/libbutl/b.cxx +++ b/libbutl/b.cxx @@ -1,59 +1,19 @@ // file : libbutl/b.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. +#include +#include // ios::failure #include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include -#include - -#include // ios::failure -#include // move() +#include // move() #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.b; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.url; -import butl.path; -import butl.process; -import butl.optional; -import butl.project_name; -import butl.standard_version; -#endif - -import butl.utility; // next_word(), eof(), etc -import butl.path_io; -import butl.fdstream; -import butl.process_io; // operator<<(ostream, process_path) -import butl.small_vector; -#else -#include -#include -#include -#include -#include -#endif + +#include // next_word(), eof(), etc +#include +#include +#include // operator<<(ostream, process_path) +#include using namespace std; diff --git a/libbutl/b.hxx b/libbutl/b.hxx new file mode 100644 index 0000000..cc3a309 --- /dev/null +++ b/libbutl/b.hxx @@ -0,0 +1,130 @@ +// file : libbutl/b.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // move() +#include // size_tu +#include // uint16_t +#include // runtime_error +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace butl +{ + class LIBBUTL_SYMEXPORT b_error: public std::runtime_error + { + public: + // Build system program exit information. May be absent if the error + // occured before the process has been started. + // + // Can be used by the caller to decide if to print the error message to + // stderr. Normally, it is not required if the process exited normally + // with non-zero code, since presumably it has issued diagnostics. Note + // that the normal() function can be used to check for this. + // + optional exit; + + // Return true if the build2 process exited normally with non-zero code. + // + bool + normal () const {return exit && exit->normal () && !*exit;} + + explicit + b_error (const std::string& description, optional = nullopt); + }; + + // Run `b info: ...` command and parse and return (via argument + // to allow appending and for error position; see below) the build2 projects + // information it prints to stdout. Return the empty list if the specified + // project list is empty. Throw b_error on error. Note that the size of the + // result vector can be used to determine which project information caused + // the error. + // + // Unless you need information that may come from external modules + // (operations, meta-operations, etc), pass false as the ext_mods argument, + // which results in passing --no-external-modules to the build2 program and + // speeds up its execution. + // + // You can also specify the build2 verbosity level, command line callback + // (see process_run_callback() for details), build program search details, + // and additional options. + // + // Note that version_string is only parsed to standard_version if a project + // uses the version module. Otherwise, standard_version is empty. + // + struct b_project_info + { + using url_type = butl::url; + + struct subproject + { + project_name name; // Empty if anonymous. + dir_path path; // Relative to the project root. + }; + + project_name project; + std::string version_string; + standard_version version; + std::string summary; + url_type url; + + dir_path src_root; + dir_path out_root; + + dir_path amalgamation; // Relative to project root and + // empty if not amalgmated. + std::vector subprojects; + + std::vector operations; + std::vector meta_operations; + + std::vector modules; + }; + + using b_callback = void (const char* const args[], std::size_t n); + + LIBBUTL_SYMEXPORT void + b_info (std::vector& result, + const std::vector& projects, + bool ext_mods, + std::uint16_t verb = 1, + const std::function& cmd_callback = {}, + const path& program = path ("b"), + const dir_path& search_fallback = {}, + const std::vector& options = {}); + + // As above but retrieve information for a single project. + // + inline b_project_info + b_info (const dir_path& project, + bool ext_mods, + std::uint16_t verb = 1, + const std::function& cmd_callback = {}, + const path& program = path ("b"), + const dir_path& search_fallback = {}, + const std::vector& options = {}) + { + std::vector r; + b_info (r, + std::vector ({project}), + ext_mods, + verb, + cmd_callback, + program, + search_fallback, + options); + + return std::move (r[0]); + } +} diff --git a/libbutl/b.mxx b/libbutl/b.mxx deleted file mode 100644 index cca9696..0000000 --- a/libbutl/b.mxx +++ /dev/null @@ -1,151 +0,0 @@ -// file : libbutl/b.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include // move() -#include // size_tu -#include // uint16_t -#include // runtime_error -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.b; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.url; -import butl.path; -import butl.process; -import butl.optional; -import butl.project_name; -import butl.standard_version; -#else -#include -#include -#include -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - class LIBBUTL_SYMEXPORT b_error: public std::runtime_error - { - public: - // Build system program exit information. May be absent if the error - // occured before the process has been started. - // - // Can be used by the caller to decide if to print the error message to - // stderr. Normally, it is not required if the process exited normally - // with non-zero code, since presumably it has issued diagnostics. Note - // that the normal() function can be used to check for this. - // - optional exit; - - // Return true if the build2 process exited normally with non-zero code. - // - bool - normal () const {return exit && exit->normal () && !*exit;} - - explicit - b_error (const std::string& description, optional = nullopt); - }; - - // Run `b info: ...` command and parse and return (via argument - // to allow appending and for error position; see below) the build2 projects - // information it prints to stdout. Return the empty list if the specified - // project list is empty. Throw b_error on error. Note that the size of the - // result vector can be used to determine which project information caused - // the error. - // - // Unless you need information that may come from external modules - // (operations, meta-operations, etc), pass false as the ext_mods argument, - // which results in passing --no-external-modules to the build2 program and - // speeds up its execution. - // - // You can also specify the build2 verbosity level, command line callback - // (see process_run_callback() for details), build program search details, - // and additional options. - // - // Note that version_string is only parsed to standard_version if a project - // uses the version module. Otherwise, standard_version is empty. - // - struct b_project_info - { - using url_type = butl::url; - - struct subproject - { - project_name name; // Empty if anonymous. - dir_path path; // Relative to the project root. - }; - - project_name project; - std::string version_string; - standard_version version; - std::string summary; - url_type url; - - dir_path src_root; - dir_path out_root; - - dir_path amalgamation; // Relative to project root and - // empty if not amalgmated. - std::vector subprojects; - - std::vector operations; - std::vector meta_operations; - - std::vector modules; - }; - - using b_callback = void (const char* const args[], std::size_t n); - - LIBBUTL_SYMEXPORT void - b_info (std::vector& result, - const std::vector& projects, - bool ext_mods, - std::uint16_t verb = 1, - const std::function& cmd_callback = {}, - const path& program = path ("b"), - const dir_path& search_fallback = {}, - const std::vector& options = {}); - - // As above but retrieve information for a single project. - // - inline b_project_info - b_info (const dir_path& project, - bool ext_mods, - std::uint16_t verb = 1, - const std::function& cmd_callback = {}, - const path& program = path ("b"), - const dir_path& search_fallback = {}, - const std::vector& options = {}) - { - std::vector r; - b_info (r, - std::vector ({project}), - ext_mods, - verb, - cmd_callback, - program, - search_fallback, - options); - - return std::move (r[0]); - } -} diff --git a/libbutl/backtrace.cxx b/libbutl/backtrace.cxx index 8c9c6ae..c0cf472 100644 --- a/libbutl/backtrace.cxx +++ b/libbutl/backtrace.cxx @@ -1,9 +1,7 @@ // file : libbutl/backtrace.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include // We only enable backtrace during bootstrap if we can do it without any // complications of the build scripts/makefiles. @@ -35,30 +33,12 @@ #include -#ifndef __cpp_lib_modules_ts -#include - #ifdef LIBBUTL_BACKTRACE # include // unique_ptr # include // size_t #endif #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.backtrace; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#endif using namespace std; diff --git a/libbutl/backtrace.hxx b/libbutl/backtrace.hxx new file mode 100644 index 0000000..6afb6ea --- /dev/null +++ b/libbutl/backtrace.hxx @@ -0,0 +1,27 @@ +// file : libbutl/backtrace.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include + +#include + +namespace butl +{ + // Return the calling thread's backtrace or empty string if this + // functionality is not supported or an error has occurred. The exact + // backtrace format is implementation-defined; it normally contains a line + // with the binary name, address in that binary, and, if available, the + // function name for each stack frame. + // + // Currently this functionality is only available on Linux (with glibc), + // FreeBSD/NetBSD, and Mac OS. On the first two platforms the address + // can be mapped to the function name and, if built with debug info, to + // source location using the addr2line(1) utility: + // + // $ addr2line -f -C -e + // + LIBBUTL_SYMEXPORT std::string + backtrace () noexcept; +} diff --git a/libbutl/backtrace.mxx b/libbutl/backtrace.mxx deleted file mode 100644 index f5a63d5..0000000 --- a/libbutl/backtrace.mxx +++ /dev/null @@ -1,42 +0,0 @@ -// file : libbutl/backtrace.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.backtrace; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Return the calling thread's backtrace or empty string if this - // functionality is not supported or an error has occurred. The exact - // backtrace format is implementation-defined; it normally contains a line - // with the binary name, address in that binary, and, if available, the - // function name for each stack frame. - // - // Currently this functionality is only available on Linux (with glibc), - // FreeBSD/NetBSD, and Mac OS. On the first two platforms the address - // can be mapped to the function name and, if built with debug info, to - // source location using the addr2line(1) utility: - // - // $ addr2line -f -C -e - // - LIBBUTL_SYMEXPORT std::string - backtrace () noexcept; -} diff --git a/libbutl/base64.cxx b/libbutl/base64.cxx index 527c6af..4466f24 100644 --- a/libbutl/base64.cxx +++ b/libbutl/base64.cxx @@ -1,37 +1,13 @@ // file : libbutl/base64.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include +#include #include // size_t #include #include #include // {istreambuf, ostreambuf, back_insert}_iterator #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.base64; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#endif using namespace std; diff --git a/libbutl/base64.hxx b/libbutl/base64.hxx new file mode 100644 index 0000000..f38e62f --- /dev/null +++ b/libbutl/base64.hxx @@ -0,0 +1,46 @@ +// file : libbutl/base64.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include + +#include + +namespace butl +{ + // Base64-encode a stream or a buffer. Split the output into 76 char-long + // lines (new line is the 77th). If reading from a stream, check if it has + // badbit, failbit, or eofbit set and throw invalid_argument if that's the + // case. Otherwise, set eofbit on completion. If writing to a stream, check + // if it has badbit, failbit, or eofbit set and throw invalid_argument if + // that's the case. Otherwise set badbit if the write operation fails. + // + LIBBUTL_SYMEXPORT void + base64_encode (std::ostream&, std::istream&); + + LIBBUTL_SYMEXPORT std::string + base64_encode (std::istream&); + + LIBBUTL_SYMEXPORT std::string + base64_encode (const std::vector&); + + // Base64-decode a stream or a string. Throw invalid_argument if the input + // is not a valid base64 representation. If reading from a stream, check if + // it has badbit, failbit, or eofbit set and throw invalid_argument if + // that's the case. Otherwise, set eofbit on completion. If writing to a + // stream, check if it has badbit, failbit, or eofbit set and throw + // invalid_argument if that's the case. Otherwise set badbit if the write + // operation fails. + // + LIBBUTL_SYMEXPORT void + base64_decode (std::ostream&, std::istream&); + + LIBBUTL_SYMEXPORT void + base64_decode (std::ostream&, const std::string&); + + LIBBUTL_SYMEXPORT std::vector + base64_decode (const std::string&); +} diff --git a/libbutl/base64.mxx b/libbutl/base64.mxx deleted file mode 100644 index 698b7e2..0000000 --- a/libbutl/base64.mxx +++ /dev/null @@ -1,62 +0,0 @@ -// file : libbutl/base64.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.base64; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Base64-encode a stream or a buffer. Split the output into 76 char-long - // lines (new line is the 77th). If reading from a stream, check if it has - // badbit, failbit, or eofbit set and throw invalid_argument if that's the - // case. Otherwise, set eofbit on completion. If writing to a stream, check - // if it has badbit, failbit, or eofbit set and throw invalid_argument if - // that's the case. Otherwise set badbit if the write operation fails. - // - LIBBUTL_SYMEXPORT void - base64_encode (std::ostream&, std::istream&); - - LIBBUTL_SYMEXPORT std::string - base64_encode (std::istream&); - - LIBBUTL_SYMEXPORT std::string - base64_encode (const std::vector&); - - // Base64-decode a stream or a string. Throw invalid_argument if the input - // is not a valid base64 representation. If reading from a stream, check if - // it has badbit, failbit, or eofbit set and throw invalid_argument if - // that's the case. Otherwise, set eofbit on completion. If writing to a - // stream, check if it has badbit, failbit, or eofbit set and throw - // invalid_argument if that's the case. Otherwise set badbit if the write - // operation fails. - // - LIBBUTL_SYMEXPORT void - base64_decode (std::ostream&, std::istream&); - - LIBBUTL_SYMEXPORT void - base64_decode (std::ostream&, const std::string&); - - LIBBUTL_SYMEXPORT std::vector - base64_decode (const std::string&); -} diff --git a/libbutl/buildfile b/libbutl/buildfile index 5398f71..ff7344e 100644 --- a/libbutl/buildfile +++ b/libbutl/buildfile @@ -1,22 +1,13 @@ # file : libbutl/buildfile # license : MIT; see accompanying LICENSE file -# This library was modularized using the Modules TS semantics (with support -# for dual, module/header consumption) which was subsequently partially -# dismantled. We, however, kept some of the changes in anticipation that they -# would be useful when attempting to modularize using the merged modules -# semantics. Specifically, there are currently headers with both .mxx and .hxx -# extensions and the code is littered with the `#if __cpp_[lib_]modules_ts` -# blocks. Note that it's important for the auto-generated header support -# that the default extension for hxx{} is .hxx. -# # @@ If/when going back to using mxx{}, make sure to cleanup explicit .mxx. # -lib{butl}: {hxx ixx txx cxx}{** -uuid-* +uuid-io \ - -win32-utility \ - -version \ - -builtin-options} \ - hxx{**.mxx} {hxx}{version} {hxx ixx cxx}{builtin-options} +lib{butl}: {hxx ixx txx cxx}{** -uuid-* +uuid-io \ + -win32-utility \ + -version \ + -builtin-options} \ + {hxx}{version} {hxx ixx cxx}{builtin-options} tclass = $cxx.target.class tsys = $cxx.target.system diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index 61df568..4d6b60d 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -1,28 +1,16 @@ // file : libbutl/builtin.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #ifdef _WIN32 # include #endif -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include // move(), forward() -#include // uint*_t -#include - #include #include #include +#include #include #include #include // strtoull() @@ -30,41 +18,16 @@ #include #include -#endif +#include +#include +#include // operator<<(ostream,exception), + // throw_generic_error() +#include +#include +#include #include -#ifdef __cpp_modules_ts -module butl.builtin; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.threading; -#endif -import butl.path; -import butl.fdstream; -import butl.timestamp; -#endif - -import butl.regex; -import butl.path_io; -import butl.utility; // operator<<(ostream,exception), - // throw_generic_error() -import butl.optional; -import butl.filesystem; -import butl.small_vector; -#else -#include -#include -#include -#include -#include -#include -#endif - // Strictly speaking a builtin which reads/writes from/to standard streams // must be asynchronous so that the caller can communicate with it through // pipes without being blocked on I/O operations. However, as an optimization, diff --git a/libbutl/builtin.hxx b/libbutl/builtin.hxx new file mode 100644 index 0000000..2398c84 --- /dev/null +++ b/libbutl/builtin.hxx @@ -0,0 +1,220 @@ +// file : libbutl/builtin.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include +#include +#include +#include // unique_ptr +#include // size_t +#include // move() +#include // uint8_t +#include +#include + +#include +#include +#include + +#include + +namespace butl +{ + // A process/thread-like object representing a running builtin. + // + // For now, instead of allocating the result storage dynamically, we expect + // it to be provided by the caller (allocating it dynamically would be + // wasteful for synchronous builtins). + // + class LIBBUTL_SYMEXPORT builtin + { + public: + // Wait for the builtin to complete and return its exit code. This + // function can be called multiple times. + // + std::uint8_t + wait (); + + // Return the same result as wait() if the builtin has already completed + // and nullopt otherwise. + // + optional + try_wait (); + + // Wait for the builtin to complete for up to the specified time duration. + // Return the same result as wait() if the builtin has completed in this + // timeframe and nullopt otherwise. + // + template + optional + timed_wait (const std::chrono::duration&); + + ~builtin () {if (state_ != nullptr) state_->thread.join ();} + + public: + struct async_state + { + bool finished = false; + std::mutex mutex; + std::condition_variable condv; + std::thread thread; + + // Note that we can't use std::function as an argument type to get rid + // of the template since std::function can only be instantiated with a + // copy-constructible function and that's too restrictive for us (won't + // be able to capture auto_fd by value in a lambda, etc). + // + template + explicit + async_state (F); + }; + + builtin (std::uint8_t& r, std::unique_ptr&& s = nullptr) + : result_ (r), state_ (move (s)) {} + + builtin (builtin&&) = default; + + private: + std::uint8_t& result_; + std::unique_ptr state_; + }; + + // Builtin execution callbacks that can be used for checking/handling the + // filesystem entries being acted upon (enforcing that they are sub-entries + // of some "working" directory, registering cleanups for new entries, etc) + // and for providing custom implementations for some functions used by + // builtins. + // + // Note that the filesystem paths passed to the callbacks are absolute and + // normalized with directories distinguished from non-directories based on + // the lexical representation (presence of the trailing directory separator; + // use path::to_directory() to check). + // + // Also note that builtins catch any exceptions that may be thrown by the + // callbacks and, if that's the case, issue diagnostics and exit with the + // non-zero status. + // + struct builtin_callbacks + { + // If specified, called before (pre is true) and after (pre is false) a + // new filesystem entry is created or an existing one is re-created or + // updated. + // + using create_hook = void (const path&, bool pre); + + std::function create; + + // If specified, called before (pre is true) and after (pre is false) a + // filesystem entry is moved. The force argument is true if the builtin is + // executed with the --force option. + // + using move_hook = void (const path& from, + const path& to, + bool force, + bool pre); + + std::function move; + + // If specified, called before (pre is true) and after (pre is false) a + // filesystem entry is removed. The force argument is true if the builtin + // is executed with the --force option. + // + using remove_hook = void (const path&, bool force, bool pre); + + std::function remove; + + // If specified, called on encountering an unknown option passing the + // argument list and the position of the option in question. Return the + // number of parsed arguments. + // + using parse_option_function = + std::size_t (const std::vector&, std::size_t); + + std::function parse_option; + + // If specified, called by the sleep builtin instead of the default + // implementation. + // + using sleep_function = void (const duration&); + + std::function sleep; + + explicit + builtin_callbacks (std::function c = {}, + std::function m = {}, + std::function r = {}, + std::function p = {}, + std::function s = {}) + : create (std::move (c)), + move (std::move (m)), + remove (std::move (r)), + parse_option (std::move (p)), + sleep (std::move (s)) {} + + explicit + builtin_callbacks (std::function sl) + : sleep (std::move (sl)) {} + }; + + // Start a builtin command. Use the current process' standard streams for + // the unopened in, out, and err file descriptors. Use the process' current + // working directory unless an alternative is specified. Throw + // std::system_error on failure. + // + // Note that unlike argc/argv, args don't include the program name. + // + using builtin_function = builtin (std::uint8_t& result, + const std::vector& args, + auto_fd in, auto_fd out, auto_fd err, + const dir_path& cwd, + const builtin_callbacks&); + + // Builtin function and weight. + // + // The weight between 0 and 2 reflects the builtin's contribution to the + // containing script semantics with 0 being the lowest/ignore. Current + // mapping is as follows: + // + // 0 - non-contributing (true, false) + // 1 - non-creative (rm, rmdir, sleep, test) + // 2 - creative (any builtin that may produce output) + // + // If the function is NULL, then the builtin has an external implementation + // and should be executed by running the program with this name. + // + struct builtin_info + { + builtin_function* function; + uint8_t weight; + }; + + class builtin_map: public std::map + { + public: + using base = std::map; + using base::base; + + // Return NULL if not a builtin. + // + const builtin_info* + find (const std::string&) const; + }; + + // Asynchronously run a function as if it was a builtin. The function must + // have the std::uint8_t() signature and not throw exceptions. + // + // Note that using std::function as an argument type would be too + // restrictive (see above). + // + template + builtin + pseudo_builtin (std::uint8_t&, F); + + LIBBUTL_SYMEXPORT extern const builtin_map builtins; +} + +#include diff --git a/libbutl/builtin.mxx b/libbutl/builtin.mxx deleted file mode 100644 index a99d6f4..0000000 --- a/libbutl/builtin.mxx +++ /dev/null @@ -1,239 +0,0 @@ -// file : libbutl/builtin.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include -#include -#include // unique_ptr -#include // size_t -#include // move() -#include // uint8_t -#include -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.builtin; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.threading; -#endif -import butl.path; -import butl.fdstream; -import butl.timestamp; -#else -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // A process/thread-like object representing a running builtin. - // - // For now, instead of allocating the result storage dynamically, we expect - // it to be provided by the caller (allocating it dynamically would be - // wasteful for synchronous builtins). - // - class LIBBUTL_SYMEXPORT builtin - { - public: - // Wait for the builtin to complete and return its exit code. This - // function can be called multiple times. - // - std::uint8_t - wait (); - - // Return the same result as wait() if the builtin has already completed - // and nullopt otherwise. - // - optional - try_wait (); - - // Wait for the builtin to complete for up to the specified time duration. - // Return the same result as wait() if the builtin has completed in this - // timeframe and nullopt otherwise. - // - template - optional - timed_wait (const std::chrono::duration&); - - ~builtin () {if (state_ != nullptr) state_->thread.join ();} - - public: - struct async_state - { - bool finished = false; - std::mutex mutex; - std::condition_variable condv; - std::thread thread; - - // Note that we can't use std::function as an argument type to get rid - // of the template since std::function can only be instantiated with a - // copy-constructible function and that's too restrictive for us (won't - // be able to capture auto_fd by value in a lambda, etc). - // - template - explicit - async_state (F); - }; - - builtin (std::uint8_t& r, std::unique_ptr&& s = nullptr) - : result_ (r), state_ (move (s)) {} - - builtin (builtin&&) = default; - - private: - std::uint8_t& result_; - std::unique_ptr state_; - }; - - // Builtin execution callbacks that can be used for checking/handling the - // filesystem entries being acted upon (enforcing that they are sub-entries - // of some "working" directory, registering cleanups for new entries, etc) - // and for providing custom implementations for some functions used by - // builtins. - // - // Note that the filesystem paths passed to the callbacks are absolute and - // normalized with directories distinguished from non-directories based on - // the lexical representation (presence of the trailing directory separator; - // use path::to_directory() to check). - // - // Also note that builtins catch any exceptions that may be thrown by the - // callbacks and, if that's the case, issue diagnostics and exit with the - // non-zero status. - // - struct builtin_callbacks - { - // If specified, called before (pre is true) and after (pre is false) a - // new filesystem entry is created or an existing one is re-created or - // updated. - // - using create_hook = void (const path&, bool pre); - - std::function create; - - // If specified, called before (pre is true) and after (pre is false) a - // filesystem entry is moved. The force argument is true if the builtin is - // executed with the --force option. - // - using move_hook = void (const path& from, - const path& to, - bool force, - bool pre); - - std::function move; - - // If specified, called before (pre is true) and after (pre is false) a - // filesystem entry is removed. The force argument is true if the builtin - // is executed with the --force option. - // - using remove_hook = void (const path&, bool force, bool pre); - - std::function remove; - - // If specified, called on encountering an unknown option passing the - // argument list and the position of the option in question. Return the - // number of parsed arguments. - // - using parse_option_function = - std::size_t (const std::vector&, std::size_t); - - std::function parse_option; - - // If specified, called by the sleep builtin instead of the default - // implementation. - // - using sleep_function = void (const duration&); - - std::function sleep; - - explicit - builtin_callbacks (std::function c = {}, - std::function m = {}, - std::function r = {}, - std::function p = {}, - std::function s = {}) - : create (std::move (c)), - move (std::move (m)), - remove (std::move (r)), - parse_option (std::move (p)), - sleep (std::move (s)) {} - - explicit - builtin_callbacks (std::function sl) - : sleep (std::move (sl)) {} - }; - - // Start a builtin command. Use the current process' standard streams for - // the unopened in, out, and err file descriptors. Use the process' current - // working directory unless an alternative is specified. Throw - // std::system_error on failure. - // - // Note that unlike argc/argv, args don't include the program name. - // - using builtin_function = builtin (std::uint8_t& result, - const std::vector& args, - auto_fd in, auto_fd out, auto_fd err, - const dir_path& cwd, - const builtin_callbacks&); - - // Builtin function and weight. - // - // The weight between 0 and 2 reflects the builtin's contribution to the - // containing script semantics with 0 being the lowest/ignore. Current - // mapping is as follows: - // - // 0 - non-contributing (true, false) - // 1 - non-creative (rm, rmdir, sleep, test) - // 2 - creative (any builtin that may produce output) - // - // If the function is NULL, then the builtin has an external implementation - // and should be executed by running the program with this name. - // - struct builtin_info - { - builtin_function* function; - uint8_t weight; - }; - - class builtin_map: public std::map - { - public: - using base = std::map; - using base::base; - - // Return NULL if not a builtin. - // - const builtin_info* - find (const std::string&) const; - }; - - // Asynchronously run a function as if it was a builtin. The function must - // have the std::uint8_t() signature and not throw exceptions. - // - // Note that using std::function as an argument type would be too - // restrictive (see above). - // - template - builtin - pseudo_builtin (std::uint8_t&, F); - - LIBBUTL_SYMEXPORT extern const builtin_map builtins; -} - -#include diff --git a/libbutl/char-scanner.hxx b/libbutl/char-scanner.hxx new file mode 100644 index 0000000..b7ea14b --- /dev/null +++ b/libbutl/char-scanner.hxx @@ -0,0 +1,246 @@ +// file : libbutl/char-scanner.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // char_traits +#include +#include // size_t +#include // uint64_t +#include // INT_* +#include // pair, make_pair() +#include + +#include + +#include + +namespace butl +{ + // Refer to utf8_validator for details. + // + struct noop_validator + { + std::pair + validate (char) {return std::make_pair (true, true);} + + std::pair + validate (char c, std::string&) {return validate (c);} + }; + + // Low-level character stream scanner. Normally used as a base for + // higher-level lexers. + // + template + class char_scanner + { + public: + using validator_type = V; + static constexpr const std::size_t unget_depth = N; + + // If the crlf argument is true, then recognize Windows newlines (0x0D + // 0x0A) and convert them to just '\n' (0x0A). Note that a standalone + // 0x0D is treated "as if" it was followed by 0x0A and multiple 0x0D + // are treated as one. + // + // Note also that if the stream happens to be bufstreambuf-based, then it + // includes a number of optimizations that assume nobody else is messing + // with the stream. + // + // The line and position arguments can be used to override the start line + // and position in the stream (useful when re-scanning data saved with the + // save_* facility). + // + char_scanner (std::istream&, + bool crlf = true, + std::uint64_t line = 1, + std::uint64_t position = 0); + + char_scanner (std::istream&, + validator_type, + bool crlf = true, + std::uint64_t line = 1, + std::uint64_t position = 0); + + char_scanner (const char_scanner&) = delete; + char_scanner& operator= (const char_scanner&) = delete; + + // Scanner interface. + // + public: + + // Extended character. It includes line/column/position information and is + // capable of representing EOF and invalid characters. + // + // Note that implicit conversion of EOF/invalid to char_type results in + // NUL character (which means in most cases it is safe to compare xchar to + // char without checking for EOF). + // + class xchar + { + public: + using traits_type = std::char_traits; + using int_type = traits_type::int_type; + using char_type = traits_type::char_type; + + int_type value; + + // Note that the column is of the codepoint this byte belongs to. + // + std::uint64_t line; + std::uint64_t column; + + // Logical character position (see bufstreambuf for details on the + // logical part) if the scanned stream is bufstreambuf-based and always + // zero otherwise. + // + std::uint64_t position; + + static int_type + invalid () {return traits_type::eof () != INT_MIN ? INT_MIN : INT_MAX;} + + operator char_type () const + { + return value != traits_type::eof () && value != invalid () + ? static_cast (value) + : char_type (0); + } + + xchar (int_type v = 0, + std::uint64_t l = 0, + std::uint64_t c = 0, + std::uint64_t p = 0) + : value (v), line (l), column (c), position (p) {} + }; + + // Note that if any of the get() or peek() functions return an invalid + // character, then the scanning has failed and none of them should be + // called again. + + xchar + get (); + + // As above but in case of an invalid character also return the + // description of why it is invalid. + // + xchar + get (std::string& what); + + void + get (const xchar& peeked); // Get previously peeked character (faster). + + void + unget (const xchar&); + + // Note that if there is an "ungot" character, peek() will return that. + // + xchar + peek (); + + // As above but in case of an invalid character also return the + // description of why it is invalid. + // + xchar + peek (std::string& what); + + // Tests. In the future we can add tests line alpha(), alnum(), etc. + // + static bool + eos (const xchar& c) {return c.value == xchar::traits_type::eof ();} + + static bool + invalid (const xchar& c) {return c.value == xchar::invalid ();} + + // Line, column and position of the next character to be extracted from + // the stream by peek() or get(). + // + std::uint64_t line; + std::uint64_t column; + std::uint64_t position; + + // Ability to save raw data as it is being scanned. Note that the + // character is only saved when it is got, not peeked. + // + public: + void + save_start (std::string& b) + { + assert (save_ == nullptr); + save_ = &b; + } + + void + save_stop () + { + assert (save_ != nullptr); + save_ = nullptr; + } + + struct save_guard + { + explicit + save_guard (char_scanner& s, std::string& b): s_ (&s) {s.save_start (b);} + + void + stop () {if (s_ != nullptr) {s_->save_stop (); s_ = nullptr;}} + + ~save_guard () {stop ();} + + private: + char_scanner* s_; + }; + + protected: + using int_type = typename xchar::int_type; + using char_type = typename xchar::char_type; + + int_type + peek_ (); + + void + get_ (); + + std::uint64_t + pos_ () const; + + xchar + get (std::string* what); + + xchar + peek (std::string* what); + + protected: + std::istream& is_; + + validator_type val_; + bool decoded_ = true; // The peeked character is last byte of sequence. + bool validated_ = false; // The peeked character has been validated. + + // Note that if you are reading from the buffer directly, then it is also + // your responsibility to call the validator and save the data (see + // save_*(). + // + // Besides that, make sure that the peek() call preceding the scan is + // followed by the get() call (see validated_, decoded_, and unpeek_ for + // the hairy details; realistically, you would probably only direct-scan + // ASCII fragments). + // + bufstreambuf* buf_; // NULL if not bufstreambuf-based. + const char_type* gptr_; + const char_type* egptr_; + + std::string* save_ = nullptr; + + bool crlf_; + bool eos_ = false; + + std::size_t ungetn_ = 0; + xchar ungetb_[N]; + + bool unpeek_ = false; + xchar unpeekc_ = '\0'; + }; +} + +#include +#include diff --git a/libbutl/char-scanner.mxx b/libbutl/char-scanner.mxx deleted file mode 100644 index 27f692b..0000000 --- a/libbutl/char-scanner.mxx +++ /dev/null @@ -1,261 +0,0 @@ -// file : libbutl/char-scanner.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include // char_traits -#include // size_t -#include // uint64_t -#include // INT_* -#include // pair, make_pair() -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.char_scanner; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Refer to utf8_validator for details. - // - struct noop_validator - { - std::pair - validate (char) {return std::make_pair (true, true);} - - std::pair - validate (char c, std::string&) {return validate (c);} - }; - - // Low-level character stream scanner. Normally used as a base for - // higher-level lexers. - // - template - class char_scanner - { - public: - using validator_type = V; - static constexpr const std::size_t unget_depth = N; - - // If the crlf argument is true, then recognize Windows newlines (0x0D - // 0x0A) and convert them to just '\n' (0x0A). Note that a standalone - // 0x0D is treated "as if" it was followed by 0x0A and multiple 0x0D - // are treated as one. - // - // Note also that if the stream happens to be bufstreambuf-based, then it - // includes a number of optimizations that assume nobody else is messing - // with the stream. - // - // The line and position arguments can be used to override the start line - // and position in the stream (useful when re-scanning data saved with the - // save_* facility). - // - char_scanner (std::istream&, - bool crlf = true, - std::uint64_t line = 1, - std::uint64_t position = 0); - - char_scanner (std::istream&, - validator_type, - bool crlf = true, - std::uint64_t line = 1, - std::uint64_t position = 0); - - char_scanner (const char_scanner&) = delete; - char_scanner& operator= (const char_scanner&) = delete; - - // Scanner interface. - // - public: - - // Extended character. It includes line/column/position information and is - // capable of representing EOF and invalid characters. - // - // Note that implicit conversion of EOF/invalid to char_type results in - // NUL character (which means in most cases it is safe to compare xchar to - // char without checking for EOF). - // - class xchar - { - public: - using traits_type = std::char_traits; - using int_type = traits_type::int_type; - using char_type = traits_type::char_type; - - int_type value; - - // Note that the column is of the codepoint this byte belongs to. - // - std::uint64_t line; - std::uint64_t column; - - // Logical character position (see bufstreambuf for details on the - // logical part) if the scanned stream is bufstreambuf-based and always - // zero otherwise. - // - std::uint64_t position; - - static int_type - invalid () {return traits_type::eof () != INT_MIN ? INT_MIN : INT_MAX;} - - operator char_type () const - { - return value != traits_type::eof () && value != invalid () - ? static_cast (value) - : char_type (0); - } - - xchar (int_type v = 0, - std::uint64_t l = 0, - std::uint64_t c = 0, - std::uint64_t p = 0) - : value (v), line (l), column (c), position (p) {} - }; - - // Note that if any of the get() or peek() functions return an invalid - // character, then the scanning has failed and none of them should be - // called again. - - xchar - get (); - - // As above but in case of an invalid character also return the - // description of why it is invalid. - // - xchar - get (std::string& what); - - void - get (const xchar& peeked); // Get previously peeked character (faster). - - void - unget (const xchar&); - - // Note that if there is an "ungot" character, peek() will return that. - // - xchar - peek (); - - // As above but in case of an invalid character also return the - // description of why it is invalid. - // - xchar - peek (std::string& what); - - // Tests. In the future we can add tests line alpha(), alnum(), etc. - // - static bool - eos (const xchar& c) {return c.value == xchar::traits_type::eof ();} - - static bool - invalid (const xchar& c) {return c.value == xchar::invalid ();} - - // Line, column and position of the next character to be extracted from - // the stream by peek() or get(). - // - std::uint64_t line; - std::uint64_t column; - std::uint64_t position; - - // Ability to save raw data as it is being scanned. Note that the - // character is only saved when it is got, not peeked. - // - public: - void - save_start (std::string& b) - { - assert (save_ == nullptr); - save_ = &b; - } - - void - save_stop () - { - assert (save_ != nullptr); - save_ = nullptr; - } - - struct save_guard - { - explicit - save_guard (char_scanner& s, std::string& b): s_ (&s) {s.save_start (b);} - - void - stop () {if (s_ != nullptr) {s_->save_stop (); s_ = nullptr;}} - - ~save_guard () {stop ();} - - private: - char_scanner* s_; - }; - - protected: - using int_type = typename xchar::int_type; - using char_type = typename xchar::char_type; - - int_type - peek_ (); - - void - get_ (); - - std::uint64_t - pos_ () const; - - xchar - get (std::string* what); - - xchar - peek (std::string* what); - - protected: - std::istream& is_; - - validator_type val_; - bool decoded_ = true; // The peeked character is last byte of sequence. - bool validated_ = false; // The peeked character has been validated. - - // Note that if you are reading from the buffer directly, then it is also - // your responsibility to call the validator and save the data (see - // save_*(). - // - // Besides that, make sure that the peek() call preceding the scan is - // followed by the get() call (see validated_, decoded_, and unpeek_ for - // the hairy details; realistically, you would probably only direct-scan - // ASCII fragments). - // - bufstreambuf* buf_; // NULL if not bufstreambuf-based. - const char_type* gptr_; - const char_type* egptr_; - - std::string* save_ = nullptr; - - bool crlf_; - bool eos_ = false; - - std::size_t ungetn_ = 0; - xchar ungetb_[N]; - - bool unpeek_ = false; - xchar unpeekc_ = '\0'; - }; -} - -#include -#include diff --git a/libbutl/char-scanner.txx b/libbutl/char-scanner.txx index 63389f0..6e0063a 100644 --- a/libbutl/char-scanner.txx +++ b/libbutl/char-scanner.txx @@ -1,9 +1,7 @@ // file : libbutl/char-scanner.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include // move -#endif namespace butl { diff --git a/libbutl/command.cxx b/libbutl/command.cxx index c23dfd5..6f8c0f1 100644 --- a/libbutl/command.cxx +++ b/libbutl/command.cxx @@ -1,48 +1,18 @@ // file : libbutl/command.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include +#include #include // ios::failure #include +#include #include // move() #include // invalid_argument #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.command; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.process; -import butl.optional; -#endif - -import butl.builtin; -import butl.fdstream; -import butl.string_parser; -#else -#include -#include -#include -#endif + +#include +#include +#include using namespace std; diff --git a/libbutl/command.hxx b/libbutl/command.hxx new file mode 100644 index 0000000..fb7258f --- /dev/null +++ b/libbutl/command.hxx @@ -0,0 +1,107 @@ +// file : libbutl/command.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // size_t +#include + +#include +#include + +#include + +namespace butl +{ + // Run a process or a builtin, interpreting the command line as + // whitespace-separated, potentially quoted program path/builtin name, + // arguments, and redirects. Throw std::invalid_argument on the parsing + // error, ios::failure on the underlying OS error, process_error on the + // process running error and std::system_error on the builtin running error. + // + // To run a system utility rather than a builtin prefix its name with `^`, + // for example: + // + // ^cat --squeeze-blank file + // + // The process environment path is unused and must point to the empty + // process path. + // + // Currently only the following stdout redirects are supported: + // + // >file # Overwrite file. + // >>file # Append to file. + // + // In particular, the file descriptor cannot be specified. The file path can + // optionally be separated from '>' by whitespaces. Note that redirects are + // distinguished from arguments by the presence of leading '>' and prior to + // possible substitutions (so the redirect character cannot be the result of + // a substitution; see below). + // + // The relative redirect file paths are completed using the command + // current working directory. Note that if it is altered via the process + // environment, then the new value is used. + // + // The command line elements (program, arguments, etc) may optionally + // contain substitutions - variable names enclosed with the substitution + // symbol ('@' by default) - which are replaced with the corresponding + // variable values to produce the actual command. Variable names must not + // contain whitespaces and an attempt to substitute an unknown or a + // malformed variable is an error. Double substitution character ('@@' by + // default) is an escape sequence. + // + // If the variable map is absent, then '@' has no special meaning and is + // treated as a regular character. + // + // The callback function, if specified, is called prior to running the + // command process with the substituted command elements and including + // redirects which will be in the "canonical" form (single argument without + // space after '>'). The callback can be used, for example, for tracing the + // resulting command line, etc. + // + using command_substitution_map = std::map; + using command_callback = void (const char* const args[], std::size_t n); + + LIBBUTL_SYMEXPORT process_exit + command_run (const std::string& command, + const optional& = nullopt, + const optional& = nullopt, + char subst = '@', + const std::function& = {}); + + // Reusable substitution utility functions. + // + // Unlike command_run(), these support different opening and closing + // substitution characters (e.g., ). Note that unmatched closing + // characters are treated literally and there is no support for their + // escaping (which would only be necessary if we needed to support variable + // names containing the closing character). + + // Perform substitutions in a string. The second argument should be the + // position of the openning substitution character in the passed string. + // Throw invalid_argument for a malformed substitution or an unknown + // variable name. + // + LIBBUTL_SYMEXPORT std::string + command_substitute (const std::string&, std::size_t, + const command_substitution_map&, + char open, char close); + + // As above but using a callback instead of a map. + // + // Specifically, on success, the callback should substitute the specified + // variable in out by appending its value and returning true. On failure, + // the callback can either throw invalid_argument or return false, in which + // case the standard "unknown substitution variable ..." exception will be + // thrown. + // + using command_substitution_callback = + bool (const std::string& var, std::string& out); + + LIBBUTL_SYMEXPORT std::string + command_substitute (const std::string&, std::size_t, + const std::function&, + char open, char close); +} diff --git a/libbutl/command.mxx b/libbutl/command.mxx deleted file mode 100644 index 143d406..0000000 --- a/libbutl/command.mxx +++ /dev/null @@ -1,122 +0,0 @@ -// file : libbutl/command.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#ifndef __cpp_lib_modules_ts -#include -#include -#include // size_t -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.command; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.process; -import butl.optional; -#else -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Run a process or a builtin, interpreting the command line as - // whitespace-separated, potentially quoted program path/builtin name, - // arguments, and redirects. Throw std::invalid_argument on the parsing - // error, ios::failure on the underlying OS error, process_error on the - // process running error and std::system_error on the builtin running error. - // - // To run a system utility rather than a builtin prefix its name with `^`, - // for example: - // - // ^cat --squeeze-blank file - // - // The process environment path is unused and must point to the empty - // process path. - // - // Currently only the following stdout redirects are supported: - // - // >file # Overwrite file. - // >>file # Append to file. - // - // In particular, the file descriptor cannot be specified. The file path can - // optionally be separated from '>' by whitespaces. Note that redirects are - // distinguished from arguments by the presence of leading '>' and prior to - // possible substitutions (so the redirect character cannot be the result of - // a substitution; see below). - // - // The relative redirect file paths are completed using the command - // current working directory. Note that if it is altered via the process - // environment, then the new value is used. - // - // The command line elements (program, arguments, etc) may optionally - // contain substitutions - variable names enclosed with the substitution - // symbol ('@' by default) - which are replaced with the corresponding - // variable values to produce the actual command. Variable names must not - // contain whitespaces and an attempt to substitute an unknown or a - // malformed variable is an error. Double substitution character ('@@' by - // default) is an escape sequence. - // - // If the variable map is absent, then '@' has no special meaning and is - // treated as a regular character. - // - // The callback function, if specified, is called prior to running the - // command process with the substituted command elements and including - // redirects which will be in the "canonical" form (single argument without - // space after '>'). The callback can be used, for example, for tracing the - // resulting command line, etc. - // - using command_substitution_map = std::map; - using command_callback = void (const char* const args[], std::size_t n); - - LIBBUTL_SYMEXPORT process_exit - command_run (const std::string& command, - const optional& = nullopt, - const optional& = nullopt, - char subst = '@', - const std::function& = {}); - - // Reusable substitution utility functions. - // - // Unlike command_run(), these support different opening and closing - // substitution characters (e.g., ). Note that unmatched closing - // characters are treated literally and there is no support for their - // escaping (which would only be necessary if we needed to support variable - // names containing the closing character). - - // Perform substitutions in a string. The second argument should be the - // position of the openning substitution character in the passed string. - // Throw invalid_argument for a malformed substitution or an unknown - // variable name. - // - LIBBUTL_SYMEXPORT std::string - command_substitute (const std::string&, std::size_t, - const command_substitution_map&, - char open, char close); - - // As above but using a callback instead of a map. - // - // Specifically, on success, the callback should substitute the specified - // variable in out by appending its value and returning true. On failure, - // the callback can either throw invalid_argument or return false, in which - // case the standard "unknown substitution variable ..." exception will be - // thrown. - // - using command_substitution_callback = - bool (const std::string& var, std::string& out); - - LIBBUTL_SYMEXPORT std::string - command_substitute (const std::string&, std::size_t, - const std::function&, - char open, char close); -} diff --git a/libbutl/const-ptr.hxx b/libbutl/const-ptr.hxx new file mode 100644 index 0000000..f0ff706 --- /dev/null +++ b/libbutl/const-ptr.hxx @@ -0,0 +1,78 @@ +// file : libbutl/const-ptr.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // nullptr_t + +#include + +namespace butl +{ + // Const-propagating pointer. + // + // It has the semantics of a raw pointer except that it passes on its own + // const-ness to the pointed-to object. In other words, if you have a const + // instance of this pointer, then you can only obtain a const raw pointer to + // the underlying object. It is normally used as a data member, for example: + // + // struct tree + // { + // const_ptr left; + // const_ptr right; + // + // void modify (); + // }; + // + // tree* x = ...; + // const tree* y = ...; + // + // x.left->modify (); // Ok. + // y.left->modify (); // Error. + // + // Note that due to this semantics, copy construction/assignment requires + // a non-const instance of const_ptr. + // + // Note that this type is standard layout (which means we can reinterpret + // it as a raw pointer). + // + // Known drawbacks/issues: + // + // 1. Cannot do static_cast (x.left). + // + template + class const_ptr + { + public: + const_ptr () = default; + explicit const_ptr (T* p): p_ (p) {} + const_ptr (std::nullptr_t): p_ (nullptr) {} + + const_ptr& operator= (T* p) {p_ = p; return *this;} + const_ptr& operator= (std::nullptr_t) {p_ = nullptr; return *this;} + + template explicit const_ptr (T1* p): p_ (p) {} + template const_ptr (const_ptr& p): p_ (p.p_) {} + + template const_ptr& operator= (T1* p) {p_ = p; return *this;} + template const_ptr& operator= (const_ptr& p) { + p_ = p.p_; return *this;} + + T* operator-> () {return p_;} + const T* operator-> () const {return p_;} + + T& operator* () {return *p_;} + const T& operator* () const {return *p_;} + + operator T* () {return p_;} + operator const T* () const {return p_;} + + explicit operator bool () const {return p_ != nullptr;} + + T* get () {return p_;} + const T* get () const {return p_;} + + private: + T* p_; + }; +} diff --git a/libbutl/const-ptr.mxx b/libbutl/const-ptr.mxx deleted file mode 100644 index 343ecf6..0000000 --- a/libbutl/const-ptr.mxx +++ /dev/null @@ -1,93 +0,0 @@ -// file : libbutl/const-ptr.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include // nullptr_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.const_ptr; -#ifdef __cpp_lib_modules_ts -import std.core; // @@ MOD std.fundamental. -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Const-propagating pointer. - // - // It has the semantics of a raw pointer except that it passes on its own - // const-ness to the pointed-to object. In other words, if you have a const - // instance of this pointer, then you can only obtain a const raw pointer to - // the underlying object. It is normally used as a data member, for example: - // - // struct tree - // { - // const_ptr left; - // const_ptr right; - // - // void modify (); - // }; - // - // tree* x = ...; - // const tree* y = ...; - // - // x.left->modify (); // Ok. - // y.left->modify (); // Error. - // - // Note that due to this semantics, copy construction/assignment requires - // a non-const instance of const_ptr. - // - // Note that this type is standard layout (which means we can reinterpret - // it as a raw pointer). - // - // Known drawbacks/issues: - // - // 1. Cannot do static_cast (x.left). - // - template - class const_ptr - { - public: - const_ptr () = default; - explicit const_ptr (T* p): p_ (p) {} - const_ptr (std::nullptr_t): p_ (nullptr) {} - - const_ptr& operator= (T* p) {p_ = p; return *this;} - const_ptr& operator= (std::nullptr_t) {p_ = nullptr; return *this;} - - template explicit const_ptr (T1* p): p_ (p) {} - template const_ptr (const_ptr& p): p_ (p.p_) {} - - template const_ptr& operator= (T1* p) {p_ = p; return *this;} - template const_ptr& operator= (const_ptr& p) { - p_ = p.p_; return *this;} - - T* operator-> () {return p_;} - const T* operator-> () const {return p_;} - - T& operator* () {return *p_;} - const T& operator* () const {return *p_;} - - operator T* () {return p_;} - operator const T* () const {return p_;} - - explicit operator bool () const {return p_ != nullptr;} - - T* get () {return p_;} - const T* get () const {return p_;} - - private: - T* p_; - }; -} diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx index ac3d0cb..addba81 100644 --- a/libbutl/curl.cxx +++ b/libbutl/curl.cxx @@ -1,41 +1,13 @@ // file : libbutl/curl.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. +#include #include - -#ifndef __cpp_lib_modules_ts -#include - #include // move() #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.curl; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.process; -import butl.fdstream; -import butl.small_vector; -#endif - -import butl.utility; // icasecmp() -#else -#include -#endif + +#include using namespace std; diff --git a/libbutl/curl.hxx b/libbutl/curl.hxx new file mode 100644 index 0000000..cd4ebd0 --- /dev/null +++ b/libbutl/curl.hxx @@ -0,0 +1,171 @@ +// file : libbutl/curl.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include + +namespace butl +{ + // Perform a method (GET, POST, PUT) on a URL using the curl(1) program. + // Throw process_error and io_error (both derive from system_error) in case + // of errors. + // + // The I (in) and O (out) can be of the following types/values: + // + // nullfd Signal that no input/output is expected. + // + // path Read input/write output from/to a file. If the special "-" + // value is used, then instead input is connected to the curl::out + // ofdstream member and output -- to the curl::in ifdstream member. + // Note that the argument type should be path, not string (i.e., + // pass path("-")). + // + // other Forwarded as is to process_start(). Normally either int or + // auto_fd. + // + // For example: + // + // curl (nullfd, // No input expected for GET. + // path ("-"), // Write response to curl::in. + // 2, + // curl::get, + // "http://example.org"); + // + // curl (path ("-"), // Read request from curl::out. + // path::temp_path (), // Write result to a file. + // 2, + // curl::post, + // "http://example.org"); + // + // curl (nullfd, + // fdopen_null (), // Write result to /dev/null. + // 2, + // curl::get, + // "tftp://localhost/foo"); + // + // Typical usage: + // + // try + // { + // curl c (nullfd, // No input expected. + // path ("-"), // Output to curl::in. + // 2, // Diagnostics to stderr. + // curl::get, // GET method. + // "https://example.org", + // "-A", "foobot/1.2.3"); // Additional curl(1) options. + // + // for (string s; getline (c.in, s); ) + // cout << s << endl; + // + // c.in.close (); + // + // if (!c.wait ()) + // ... // curl returned non-zero status. + // } + // catch (const std::system_error& e) + // { + // cerr << "curl error: " << e << endl; + // } + // + // Notes: + // + // 1. If opened, in/out streams are in the binary mode. + // + // 2. If opened, in/out must be explicitly closed before calling wait(). + // + // 3. Only binary data HTTP POST is currently supported (the --data-binary + // curl option). + // + class LIBBUTL_SYMEXPORT curl: public process + { + public: + enum method_type {get, put, post}; + + ifdstream in; + ofdstream out; + + template + curl (I&& in, + O&& out, + E&& err, + method_type, + const std::string& url, + A&&... options); + + // Version with the command line callback (see process_run_callback() for + // details). + // + template + curl (const C&, + I&& in, + O&& out, + E&& err, + method_type, + const std::string& url, + A&&... options); + + private: + enum method_proto {ftp_get, ftp_put, http_get, http_post}; + using method_proto_options = small_vector; + + method_proto + translate (method_type, const std::string& url, method_proto_options&); + + private: + template + struct is_other + { + using type = typename std::remove_reference< + typename std::remove_cv::type>::type; + + static const bool value = !(std::is_same::value || + std::is_same::value); + }; + + struct io_data + { + fdpipe pipe; + method_proto_options options; + std::string storage; + }; + + pipe + map_in (nullfd_t, method_proto, io_data&); + + pipe + map_in (const path&, method_proto, io_data&); + + template + typename std::enable_if::value, I>::type + map_in (I&&, method_proto, io_data&); + + pipe + map_out (nullfd_t, method_proto, io_data&); + + pipe + map_out (const path&, method_proto, io_data&); + + template + typename std::enable_if::value, O>::type + map_out (O&&, method_proto, io_data&); + }; +} + +#include +#include diff --git a/libbutl/curl.ixx b/libbutl/curl.ixx index 61a4ff5..b7f6496 100644 --- a/libbutl/curl.ixx +++ b/libbutl/curl.ixx @@ -1,7 +1,11 @@ // file : libbutl/curl.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // size_t +#include // forward() +#include // invalid_argument + +namespace butl { template -#include - -#include // size_t -#include // forward() -#include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.curl; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.process; //@@ MOD TODO: should we re-export? -import butl.fdstream; -import butl.small_vector; -#else -#include -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Perform a method (GET, POST, PUT) on a URL using the curl(1) program. - // Throw process_error and io_error (both derive from system_error) in case - // of errors. - // - // The I (in) and O (out) can be of the following types/values: - // - // nullfd Signal that no input/output is expected. - // - // path Read input/write output from/to a file. If the special "-" - // value is used, then instead input is connected to the curl::out - // ofdstream member and output -- to the curl::in ifdstream member. - // Note that the argument type should be path, not string (i.e., - // pass path("-")). - // - // other Forwarded as is to process_start(). Normally either int or - // auto_fd. - // - // For example: - // - // curl (nullfd, // No input expected for GET. - // path ("-"), // Write response to curl::in. - // 2, - // curl::get, - // "http://example.org"); - // - // curl (path ("-"), // Read request from curl::out. - // path::temp_path (), // Write result to a file. - // 2, - // curl::post, - // "http://example.org"); - // - // curl (nullfd, - // fdopen_null (), // Write result to /dev/null. - // 2, - // curl::get, - // "tftp://localhost/foo"); - // - // Typical usage: - // - // try - // { - // curl c (nullfd, // No input expected. - // path ("-"), // Output to curl::in. - // 2, // Diagnostics to stderr. - // curl::get, // GET method. - // "https://example.org", - // "-A", "foobot/1.2.3"); // Additional curl(1) options. - // - // for (string s; getline (c.in, s); ) - // cout << s << endl; - // - // c.in.close (); - // - // if (!c.wait ()) - // ... // curl returned non-zero status. - // } - // catch (const std::system_error& e) - // { - // cerr << "curl error: " << e << endl; - // } - // - // Notes: - // - // 1. If opened, in/out streams are in the binary mode. - // - // 2. If opened, in/out must be explicitly closed before calling wait(). - // - // 3. Only binary data HTTP POST is currently supported (the --data-binary - // curl option). - // - class LIBBUTL_SYMEXPORT curl: public process - { - public: - enum method_type {get, put, post}; - - ifdstream in; - ofdstream out; - - template - curl (I&& in, - O&& out, - E&& err, - method_type, - const std::string& url, - A&&... options); - - // Version with the command line callback (see process_run_callback() for - // details). - // - template - curl (const C&, - I&& in, - O&& out, - E&& err, - method_type, - const std::string& url, - A&&... options); - - private: - enum method_proto {ftp_get, ftp_put, http_get, http_post}; - using method_proto_options = small_vector; - - method_proto - translate (method_type, const std::string& url, method_proto_options&); - - private: - template - struct is_other - { - using type = typename std::remove_reference< - typename std::remove_cv::type>::type; - - static const bool value = !(std::is_same::value || - std::is_same::value); - }; - - struct io_data - { - fdpipe pipe; - method_proto_options options; - std::string storage; - }; - - pipe - map_in (nullfd_t, method_proto, io_data&); - - pipe - map_in (const path&, method_proto, io_data&); - - template - typename std::enable_if::value, I>::type - map_in (I&&, method_proto, io_data&); - - pipe - map_out (nullfd_t, method_proto, io_data&); - - pipe - map_out (const path&, method_proto, io_data&); - - template - typename std::enable_if::value, O>::type - map_out (O&&, method_proto, io_data&); - }; -} - -#include -#include diff --git a/libbutl/curl.txx b/libbutl/curl.txx index 0c07d35..ee08145 100644 --- a/libbutl/curl.txx +++ b/libbutl/curl.txx @@ -1,7 +1,7 @@ // file : libbutl/curl.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +namespace butl { template typename std::enable_if::value, I>::type curl:: diff --git a/libbutl/default-options.hxx b/libbutl/default-options.hxx new file mode 100644 index 0000000..1d363b6 --- /dev/null +++ b/libbutl/default-options.hxx @@ -0,0 +1,162 @@ +// file : libbutl/default-options.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace butl +{ + // Default options files helper implementation. + // + struct default_options_files + { + small_vector files; + optional start; + }; + + template + struct default_options_entry + { + path file; + O options; + small_vector arguments; + bool remote; + }; + + template + using default_options = small_vector, 4>; + + // Search for and load the specified list of options files in the specified + // directories returning a vector of option class instances (O). Read + // additional options from files referenced by the specified option + // (normally --options-file). If args is false, only options are allowed and + // are parsed using scanner S in the U::fail mode. If args is true, then + // both options and arguments are allowed in any order with options parsed + // in the U::stop mode. + // + // Pass each default options file path to the specified function prior to + // load (can be used for tracing, etc). The function signature is: + // + // void (const path&, bool remote, bool overwrite) + // + // Note that the function may be called for the same file twice if it was + // later discovered that it is in fact remote. In the second call the + // overwrite flag will be true. + // + // Throw `pair` on the underlying OS error with the + // first half referring the filesystem entry the error relates to and pass + // through exceptions thrown by the options scanner/parser. + // + // Search order: + // + // - sys_dir + // - home_dir + // - extra_dir (can also be handled during the start/outer traversal) + // - start_dir and outer until home_dir or root (both excluding) + // + // Except for sys_dir and extra_dir, the options files are looked for in the + // .build2/ and .build2/local/ subdirectories of each directory. For + // sys_dir and extra_dir they are looked for in the directory itself (e.g., + // /etc/build2/). + // + // Note that the search is stopped at the directory containing a file with + // --no-default-options. + // + // Also note that all the directories should be absolute and normalized. + // + // The presence of the .git filesystem entry causes the options files in + // this directory and any of its subdirectories to be considered remote + // (note that in the current implementation this is the case even for files + // from the .build2/local/ subdirectory since the mere location is not a + // sufficient ground to definitively conclude that the file is not remote; + // to be sure we would need to query the VCS or some such). + // + // Note that the extra directory options files are never considered remote. + // + // For the convenience of implementation, the function parses the option + // files in the reverse order. Thus, to make sure that positions in the + // options list monotonically increase, it needs the maximum number of + // arguments, globally and per file, to be specified. This way the starting + // options position for each file will be less than for the previously + // parsed file by arg_max_file and equal to arg_max - arg_max_file for the + // first file. If the actual number of arguments exceeds the specified, then + // invalid_argument is thrown. + // + template + default_options + load_default_options (const optional& sys_dir, + const optional& home_dir, + const optional& extra_dir, + const default_options_files&, + F&&, + const std::string& option, + std::size_t arg_max, + std::size_t arg_max_file, + bool args = false); + + // Merge the default options/arguments and the command line + // options/arguments. + // + // Note that these are the default implementations and in some cases you may + // want to provide an options class-specific version that verifies/sanitizes + // the default options/arguments (e.g., you may not want to allow certain + // options to be specified in the default options files) or warns/prompts + // about potentially dangerous options if they came from the remote options + // files. + // + template + O + merge_default_options (const default_options&, const O& cmd_ops); + + template + AS + merge_default_arguments (const default_options&, const AS& cmd_args); + + // As above but pass each default option/argument entry to the specified + // function prior to merging. The function signature is: + // + // void (const default_options_entry&, const O& cmd_ops) + // + // This version can be used to verify the default options/arguments. For + // example, you may want to disallow certain options/arguments from being + // specified in the default options files. + // + template + O + merge_default_options (const default_options&, const O&, F&&); + + template + AS + merge_default_arguments (const default_options&, const AS&, F&&); + + // Find a common start (parent) directory for directories specified as an + // iterator range, stopping at home or root (excluding). Optionally pass a + // function resolving an iterator into a directory in a way other than just + // dereferencing it. The function signature is: + // + // const dir_path& (I) + // + template + optional + default_options_start (const optional& home, I, I, F&&); + + template + inline optional + default_options_start (const optional& home, I b, I e) + { + return default_options_start (home, + b, e, + [] (I i) -> const dir_path& {return *i;}); + } +} + +#include +#include diff --git a/libbutl/default-options.ixx b/libbutl/default-options.ixx index 4a551ac..7248d7d 100644 --- a/libbutl/default-options.ixx +++ b/libbutl/default-options.ixx @@ -1,7 +1,7 @@ // file : libbutl/default-options.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +namespace butl { template inline O diff --git a/libbutl/default-options.mxx b/libbutl/default-options.mxx deleted file mode 100644 index 1694d48..0000000 --- a/libbutl/default-options.mxx +++ /dev/null @@ -1,189 +0,0 @@ -// file : libbutl/default-options.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#ifndef __cpp_lib_modules_ts -#include -#include - -#include // move(), forward(), make_pair() -#include // reverse() -#include // invalid_argument -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.default_options; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.optional; -import butl.small_vector; - -import butl.git; -import butl.filesystem; -#else -#include -#include -#include - -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Default options files helper implementation. - // - struct default_options_files - { - small_vector files; - optional start; - }; - - template - struct default_options_entry - { - path file; - O options; - small_vector arguments; - bool remote; - }; - - template - using default_options = small_vector, 4>; - - // Search for and load the specified list of options files in the specified - // directories returning a vector of option class instances (O). Read - // additional options from files referenced by the specified option - // (normally --options-file). If args is false, only options are allowed and - // are parsed using scanner S in the U::fail mode. If args is true, then - // both options and arguments are allowed in any order with options parsed - // in the U::stop mode. - // - // Pass each default options file path to the specified function prior to - // load (can be used for tracing, etc). The function signature is: - // - // void (const path&, bool remote, bool overwrite) - // - // Note that the function may be called for the same file twice if it was - // later discovered that it is in fact remote. In the second call the - // overwrite flag will be true. - // - // Throw `pair` on the underlying OS error with the - // first half referring the filesystem entry the error relates to and pass - // through exceptions thrown by the options scanner/parser. - // - // Search order: - // - // - sys_dir - // - home_dir - // - extra_dir (can also be handled during the start/outer traversal) - // - start_dir and outer until home_dir or root (both excluding) - // - // Except for sys_dir and extra_dir, the options files are looked for in the - // .build2/ and .build2/local/ subdirectories of each directory. For - // sys_dir and extra_dir they are looked for in the directory itself (e.g., - // /etc/build2/). - // - // Note that the search is stopped at the directory containing a file with - // --no-default-options. - // - // Also note that all the directories should be absolute and normalized. - // - // The presence of the .git filesystem entry causes the options files in - // this directory and any of its subdirectories to be considered remote - // (note that in the current implementation this is the case even for files - // from the .build2/local/ subdirectory since the mere location is not a - // sufficient ground to definitively conclude that the file is not remote; - // to be sure we would need to query the VCS or some such). - // - // Note that the extra directory options files are never considered remote. - // - // For the convenience of implementation, the function parses the option - // files in the reverse order. Thus, to make sure that positions in the - // options list monotonically increase, it needs the maximum number of - // arguments, globally and per file, to be specified. This way the starting - // options position for each file will be less than for the previously - // parsed file by arg_max_file and equal to arg_max - arg_max_file for the - // first file. If the actual number of arguments exceeds the specified, then - // invalid_argument is thrown. - // - template - default_options - load_default_options (const optional& sys_dir, - const optional& home_dir, - const optional& extra_dir, - const default_options_files&, - F&&, - const std::string& option, - std::size_t arg_max, - std::size_t arg_max_file, - bool args = false); - - // Merge the default options/arguments and the command line - // options/arguments. - // - // Note that these are the default implementations and in some cases you may - // want to provide an options class-specific version that verifies/sanitizes - // the default options/arguments (e.g., you may not want to allow certain - // options to be specified in the default options files) or warns/prompts - // about potentially dangerous options if they came from the remote options - // files. - // - template - O - merge_default_options (const default_options&, const O& cmd_ops); - - template - AS - merge_default_arguments (const default_options&, const AS& cmd_args); - - // As above but pass each default option/argument entry to the specified - // function prior to merging. The function signature is: - // - // void (const default_options_entry&, const O& cmd_ops) - // - // This version can be used to verify the default options/arguments. For - // example, you may want to disallow certain options/arguments from being - // specified in the default options files. - // - template - O - merge_default_options (const default_options&, const O&, F&&); - - template - AS - merge_default_arguments (const default_options&, const AS&, F&&); - - // Find a common start (parent) directory for directories specified as an - // iterator range, stopping at home or root (excluding). Optionally pass a - // function resolving an iterator into a directory in a way other than just - // dereferencing it. The function signature is: - // - // const dir_path& (I) - // - template - optional - default_options_start (const optional& home, I, I, F&&); - - template - inline optional - default_options_start (const optional& home, I b, I e) - { - return default_options_start (home, - b, e, - [] (I i) -> const dir_path& {return *i;}); - } -} - -#include -#include diff --git a/libbutl/default-options.txx b/libbutl/default-options.txx index 0c2501c..aa254b2 100644 --- a/libbutl/default-options.txx +++ b/libbutl/default-options.txx @@ -1,7 +1,15 @@ // file : libbutl/default-options.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // move(), forward(), make_pair() +#include // reverse() +#include // invalid_argument +#include + +#include +#include + +namespace butl { inline bool options_dir_exists (const dir_path& d) diff --git a/libbutl/diagnostics.cxx b/libbutl/diagnostics.cxx index b038e5d..8525d60 100644 --- a/libbutl/diagnostics.cxx +++ b/libbutl/diagnostics.cxx @@ -1,9 +1,7 @@ // file : libbutl/diagnostics.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #ifndef _WIN32 # include // write() @@ -12,41 +10,16 @@ # include //_write() #endif -#include - -#ifndef __cpp_lib_modules_ts -#include -#include - #include // ios::failure #include #include +#include #include // size_t #include // cerr -#endif - -// Other includes. -#ifdef __cpp_modules_ts -module butl.diagnostics; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -import std.threading; -import butl.utility; -import butl.optional; -import butl.fdstream; // stderr_fd(), fdterm() -#else -#include -#include -#include -#endif +#include +#include +#include using namespace std; diff --git a/libbutl/diagnostics.hxx b/libbutl/diagnostics.hxx new file mode 100644 index 0000000..712de0c --- /dev/null +++ b/libbutl/diagnostics.hxx @@ -0,0 +1,266 @@ +// file : libbutl/diagnostics.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include // move(), forward() +#include // uncaught_exception[s]() + +#include // uncaught_exceptions + +#include + +namespace butl +{ + // Diagnostic facility base infrastructure. + // + + // Diagnostics destination stream (std::cerr by default). Note that its + // modification is not MT-safe. Also note that concurrent writing to the + // stream from multiple threads can result in interleaved characters. To + // prevent this an object of diag_stream_lock type (see below) must be + // created prior to write operation. + // + LIBBUTL_SYMEXPORT extern std::ostream* diag_stream; + + // Acquire the diagnostics exclusive access mutex in ctor, release in dtor. + // An object of the type must be created prior to writing to diag_stream (see + // above). + // + struct LIBBUTL_SYMEXPORT diag_stream_lock + { + diag_stream_lock (); + ~diag_stream_lock (); + + // Support for one-liners, for example: + // + // diag_stream_lock () << "Hello, World!" << endl; + // + template + std::ostream& + operator<< (const T& x) const + { + return *diag_stream << x; + } + }; + + // Progress line facility. + // + // The idea is to keep a progress line at the bottom of the terminal with + // other output scrolling above it. For a non-terminal STDERR the progress + // line is printed as a regular one terminated with the newline character. + // The printing of the progress line is integrated into diag_stream_lock and + // diag_progress_lock. To print or update the progress acquire + // diag_progress_lock and update the diag_progress string. To remove the + // progress line, set this string to empty. For better readability start the + // progress line with a space (which is where the cursor will be parked). + // Should only be used if diag_stream points to std::cerr. + // + // Note that child processes writing to the same stream may not completely + // overwrite the progress line so in this case it makes sense to keep the + // line as short as possible. + // + // To restore the progress line being overwritten by an independent writer + // (such as a child process), create and destroy the diag_progress_lock. + // + LIBBUTL_SYMEXPORT extern std::string diag_progress; + + struct LIBBUTL_SYMEXPORT diag_progress_lock + { + diag_progress_lock (); + ~diag_progress_lock (); + }; + + // + // + struct diag_record; + template struct diag_prologue; + template struct diag_mark; + + using diag_epilogue = void (const diag_record&); + + struct LIBBUTL_SYMEXPORT diag_record + { + template + const diag_record& + operator<< (const T& x) const + { + os << x; + return *this; + } + + diag_record () + : +#ifdef __cpp_lib_uncaught_exceptions + uncaught_ (std::uncaught_exceptions ()), +#endif + empty_ (true), + epilogue_ (nullptr) {} + + template + explicit + diag_record (const diag_prologue& p): diag_record () { *this << p;} + + template + explicit + diag_record (const diag_mark& m): diag_record () { *this << m;} + + ~diag_record () noexcept (false); + + bool + empty () const {return empty_;} + + bool + full () const {return !empty_;} + + void + flush () const; + + void + append (const char* indent, diag_epilogue* e) const + { + // Ignore subsequent epilogues (e.g., from nested marks, etc). + // + if (empty_) + { + epilogue_ = e; + empty_ = false; + } + else if (indent != nullptr) + os << indent; + } + + // Move constructible-only type. + // + // Older versions of libstdc++ don't have the ostringstream move support + // and accuratly detecting its version is non-trivial. So we always use + // the pessimized implementation with libstdc++. Luckily, GCC doesn't seem + // to be needing move due to copy/move elision. + // +#ifdef __GLIBCXX__ + diag_record (diag_record&&); +#else + diag_record (diag_record&& r) + : +#ifdef __cpp_lib_uncaught_exceptions + uncaught_ (r.uncaught_), +#endif + empty_ (r.empty_), + epilogue_ (r.epilogue_), + os (std::move (r.os)) + { + if (!empty_) + { + r.empty_ = true; + r.epilogue_ = nullptr; + } + } +#endif + + diag_record& operator= (diag_record&&) = delete; + + diag_record (const diag_record&) = delete; + diag_record& operator= (const diag_record&) = delete; + + // Diagnostics writer. The default implementation writes the record text + // to diag_stream. If it is NULL, then the record text is ignored. + // + static void (*writer) (const diag_record&); + + protected: +#ifdef __cpp_lib_uncaught_exceptions + const int uncaught_; +#endif + mutable bool empty_; + mutable diag_epilogue* epilogue_; + + public: + mutable std::ostringstream os; + }; + + template + struct diag_prologue: B + { + const char* indent; + diag_epilogue* epilogue; + + diag_prologue (const char* i = "\n ", diag_epilogue* e = nullptr) + : B (), indent (i), epilogue (e) {} + + template + diag_prologue (A&&... a) + : B (std::forward (a)...), indent ("\n "), epilogue (nullptr) {} + + template + diag_prologue (diag_epilogue* e, A&&... a) + : B (std::forward (a)...), indent ("\n "), epilogue (e) {} + + template + diag_prologue (const char* i, diag_epilogue* e, A&&... a) + : B (std::forward (a)...), indent (i), epilogue (e) {} + + template + diag_record + operator<< (const T& x) const + { + diag_record r; + r.append (indent, epilogue); + B::operator() (r); + r << x; + return r; + } + + friend const diag_record& + operator<< (const diag_record& r, const diag_prologue& p) + { + r.append (p.indent, p.epilogue); + p (r); + return r; + } + }; + + template + struct diag_mark: B + { + diag_mark (): B () {} + + template + diag_mark (A&&... a): B (std::forward (a)...) {} + + template + diag_record + operator<< (const T& x) const + { + return B::operator() () << x; + } + + friend const diag_record& + operator<< (const diag_record& r, const diag_mark& m) + { + return r << m (); + } + }; + + template + struct diag_noreturn_end: B + { + diag_noreturn_end (): B () {} + + template + diag_noreturn_end (A&&... a): B (std::forward (a)...) {} + + [[noreturn]] friend void + operator<< (const diag_record& r, const diag_noreturn_end& e) + { + // We said that we never return which means this end mark cannot be used + // to "maybe not return". And not returning without any diagnostics is + // probably a mistake. + // + assert (r.full ()); + e.B::operator() (r); + } + }; +} diff --git a/libbutl/diagnostics.mxx b/libbutl/diagnostics.mxx deleted file mode 100644 index d41ba74..0000000 --- a/libbutl/diagnostics.mxx +++ /dev/null @@ -1,279 +0,0 @@ -// file : libbutl/diagnostics.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include // move(), forward() -#include // uncaught_exception[s]() -#endif - -#include // uncaught_exceptions - -#ifdef __cpp_modules_ts -export module butl.diagnostics; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Diagnostic facility base infrastructure. - // - - // Diagnostics destination stream (std::cerr by default). Note that its - // modification is not MT-safe. Also note that concurrent writing to the - // stream from multiple threads can result in interleaved characters. To - // prevent this an object of diag_stream_lock type (see below) must be - // created prior to write operation. - // - LIBBUTL_SYMEXPORT extern std::ostream* diag_stream; - - // Acquire the diagnostics exclusive access mutex in ctor, release in dtor. - // An object of the type must be created prior to writing to diag_stream (see - // above). - // - struct LIBBUTL_SYMEXPORT diag_stream_lock - { - diag_stream_lock (); - ~diag_stream_lock (); - - // Support for one-liners, for example: - // - // diag_stream_lock () << "Hello, World!" << endl; - // - template - std::ostream& - operator<< (const T& x) const - { - return *diag_stream << x; - } - }; - - // Progress line facility. - // - // The idea is to keep a progress line at the bottom of the terminal with - // other output scrolling above it. For a non-terminal STDERR the progress - // line is printed as a regular one terminated with the newline character. - // The printing of the progress line is integrated into diag_stream_lock and - // diag_progress_lock. To print or update the progress acquire - // diag_progress_lock and update the diag_progress string. To remove the - // progress line, set this string to empty. For better readability start the - // progress line with a space (which is where the cursor will be parked). - // Should only be used if diag_stream points to std::cerr. - // - // Note that child processes writing to the same stream may not completely - // overwrite the progress line so in this case it makes sense to keep the - // line as short as possible. - // - // To restore the progress line being overwritten by an independent writer - // (such as a child process), create and destroy the diag_progress_lock. - // - LIBBUTL_SYMEXPORT extern std::string diag_progress; - - struct LIBBUTL_SYMEXPORT diag_progress_lock - { - diag_progress_lock (); - ~diag_progress_lock (); - }; - - // - // - struct diag_record; - template struct diag_prologue; - template struct diag_mark; - - using diag_epilogue = void (const diag_record&); - - struct LIBBUTL_SYMEXPORT diag_record - { - template - const diag_record& - operator<< (const T& x) const - { - os << x; - return *this; - } - - diag_record () - : -#ifdef __cpp_lib_uncaught_exceptions - uncaught_ (std::uncaught_exceptions ()), -#endif - empty_ (true), - epilogue_ (nullptr) {} - - template - explicit - diag_record (const diag_prologue& p): diag_record () { *this << p;} - - template - explicit - diag_record (const diag_mark& m): diag_record () { *this << m;} - - ~diag_record () noexcept (false); - - bool - empty () const {return empty_;} - - bool - full () const {return !empty_;} - - void - flush () const; - - void - append (const char* indent, diag_epilogue* e) const - { - // Ignore subsequent epilogues (e.g., from nested marks, etc). - // - if (empty_) - { - epilogue_ = e; - empty_ = false; - } - else if (indent != nullptr) - os << indent; - } - - // Move constructible-only type. - // - // Older versions of libstdc++ don't have the ostringstream move support - // and accuratly detecting its version is non-trivial. So we always use - // the pessimized implementation with libstdc++. Luckily, GCC doesn't seem - // to be needing move due to copy/move elision. - // -#ifdef __GLIBCXX__ - diag_record (diag_record&&); -#else - diag_record (diag_record&& r) - : -#ifdef __cpp_lib_uncaught_exceptions - uncaught_ (r.uncaught_), -#endif - empty_ (r.empty_), - epilogue_ (r.epilogue_), - os (std::move (r.os)) - { - if (!empty_) - { - r.empty_ = true; - r.epilogue_ = nullptr; - } - } -#endif - - diag_record& operator= (diag_record&&) = delete; - - diag_record (const diag_record&) = delete; - diag_record& operator= (const diag_record&) = delete; - - // Diagnostics writer. The default implementation writes the record text - // to diag_stream. If it is NULL, then the record text is ignored. - // - static void (*writer) (const diag_record&); - - protected: -#ifdef __cpp_lib_uncaught_exceptions - const int uncaught_; -#endif - mutable bool empty_; - mutable diag_epilogue* epilogue_; - - public: - mutable std::ostringstream os; - }; - - template - struct diag_prologue: B - { - const char* indent; - diag_epilogue* epilogue; - - diag_prologue (const char* i = "\n ", diag_epilogue* e = nullptr) - : B (), indent (i), epilogue (e) {} - - template - diag_prologue (A&&... a) - : B (std::forward (a)...), indent ("\n "), epilogue (nullptr) {} - - template - diag_prologue (diag_epilogue* e, A&&... a) - : B (std::forward (a)...), indent ("\n "), epilogue (e) {} - - template - diag_prologue (const char* i, diag_epilogue* e, A&&... a) - : B (std::forward (a)...), indent (i), epilogue (e) {} - - template - diag_record - operator<< (const T& x) const - { - diag_record r; - r.append (indent, epilogue); - B::operator() (r); - r << x; - return r; - } - - friend const diag_record& - operator<< (const diag_record& r, const diag_prologue& p) - { - r.append (p.indent, p.epilogue); - p (r); - return r; - } - }; - - template - struct diag_mark: B - { - diag_mark (): B () {} - - template - diag_mark (A&&... a): B (std::forward (a)...) {} - - template - diag_record - operator<< (const T& x) const - { - return B::operator() () << x; - } - - friend const diag_record& - operator<< (const diag_record& r, const diag_mark& m) - { - return r << m (); - } - }; - - template - struct diag_noreturn_end: B - { - diag_noreturn_end (): B () {} - - template - diag_noreturn_end (A&&... a): B (std::forward (a)...) {} - - [[noreturn]] friend void - operator<< (const diag_record& r, const diag_noreturn_end& e) - { - // We said that we never return which means this end mark cannot be used - // to "maybe not return". And not returning without any diagnostics is - // probably a mistake. - // - assert (r.full ()); - e.B::operator() (r); - } - }; -} diff --git a/libbutl/export.hxx b/libbutl/export.hxx index 3353ca8..dc04f85 100644 --- a/libbutl/export.hxx +++ b/libbutl/export.hxx @@ -3,14 +3,6 @@ #pragma once -// If modules are available, setup the module export. -// -#ifdef __cpp_modules_ts -# define LIBBUTL_MODEXPORT export -#else -# define LIBBUTL_MODEXPORT -#endif - // Normally we don't export class templates (but do complete specializations), // inline functions, and classes with only inline member functions. Exporting // classes that inherit from non-exported/imported bases (e.g., std::string) diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx index eb0ec7b..85459de 100644 --- a/libbutl/fdstream.cxx +++ b/libbutl/fdstream.cxx @@ -1,9 +1,7 @@ // file : libbutl/fdstream.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include // errno, E* @@ -39,52 +37,21 @@ # include // count() #endif -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include -#include -#include -#include - #include // ios_base::openmode, ios_base::failure #include // bad_alloc #include // numeric_limits +#include #include // memcpy(), memmove() #include // cin, cout #include // uncaught_exception[s]() #include // invalid_argument #include -#endif -#include // uncaught_exceptions +#include // uncaught_exceptions #include -#ifdef __cpp_modules_ts -module butl.fdstream; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.threading; // Clang wants it in purview (see process-details.hxx). -#endif -import butl.path; -import butl.filesystem; -import butl.small_vector; -#endif - -import butl.utility; // throw_*_ios_failure(), function_cast() -import butl.timestamp; -#else -#include -#include -#endif +#include // throw_*_ios_failure(), function_cast() +#include using namespace std; diff --git a/libbutl/fdstream.hxx b/libbutl/fdstream.hxx new file mode 100644 index 0000000..4dc89a3 --- /dev/null +++ b/libbutl/fdstream.hxx @@ -0,0 +1,917 @@ +// file : libbutl/fdstream.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // streamsize +#include +#include +#include +#include +#include +#include // move(), pair +#include // uint16_t, uint64_t +#include // size_t + +#include +#include // permissions, entry_stat +#include +#include + +#include + +namespace butl +{ + // RAII type for file descriptors. Note that failure to close the descriptor + // is silently ignored by both the destructor and reset(). + // + // The descriptor can be negative. Such a descriptor is treated as unopened + // and is not closed. + // + struct nullfd_t + { + constexpr explicit nullfd_t (int) {} + constexpr operator int () const {return -1;} + }; + + constexpr nullfd_t nullfd (-1); + + class LIBBUTL_SYMEXPORT auto_fd + { + public: + auto_fd (nullfd_t = nullfd) noexcept: fd_ (-1) {} + + explicit + auto_fd (int fd) noexcept: fd_ (fd) {} + + auto_fd (auto_fd&& fd) noexcept: fd_ (fd.release ()) {} + auto_fd& operator= (auto_fd&&) noexcept; + + auto_fd (const auto_fd&) = delete; + auto_fd& operator= (const auto_fd&) = delete; + + ~auto_fd () noexcept; + + int + get () const noexcept {return fd_;} + + void + reset (int fd = -1) noexcept; + + int + release () noexcept + { + int r (fd_); + fd_ = -1; + return r; + } + + // Close an open file descriptor. Throw ios::failure on the underlying OS + // error. Reset the descriptor to -1 whether the exception is thrown or + // not. + // + void + close (); + + private: + int fd_; + }; + + inline bool + operator== (const auto_fd& x, const auto_fd& y) + { + return x.get () == y.get (); + } + + inline bool + operator!= (const auto_fd& x, const auto_fd& y) + { + return !(x == y); + } + + inline bool + operator== (const auto_fd& x, nullfd_t) + { + return x.get () == -1; + } + + inline bool + operator!= (const auto_fd& x, nullfd_t y) + { + return !(x == y); + } + + // An [io]fstream that can be initialized with a file descriptor in addition + // to a file name and that also by default enables exceptions on badbit and + // failbit. So instead of a dance like this: + // + // ifstream ifs; + // ifs.exceptions (ifstream::badbit | ifstream::failbit); + // ifs.open (path.string ()); + // + // You can simply do: + // + // ifdstream ifs (path); + // + // Notes and limitations: + // + // - char only + // - input or output but not both (can use a union of two streams for that) + // - no support for put back + // - use of tell[gp]() and seek[gp]() is discouraged on Windows for + // fdstreams opened in the text mode (see fdstreambuf::seekoff() + // implementation for reasoning and consider using non-standard tellg() + // and seekg() in fdstreambuf, instead) + // - non-blocking file descriptor is supported only by showmanyc() function + // and only for pipes on Windows, in contrast to POSIX systems + // - throws ios::failure in case of open(), read(), write(), close(), + // seek[gp](), or tell[gp]() errors + // - exception mask has at least badbit + // - after catching an exception caused by badbit the stream is no longer + // usable + // - not movable, though can be easily supported (or not: there is no move + // constructor for istream/ostream in GCC 4.9) + // - passing to constructor auto_fd with a negative file descriptor is valid + // and results in the creation of an unopened object + // + class LIBBUTL_SYMEXPORT fdstreambuf: public bufstreambuf + { + public: + fdstreambuf () = default; + + // Unless specified, the current read/write position is assumed to + // be 0 (note: not queried). + // + fdstreambuf (auto_fd&&, std::uint64_t pos = 0); + + // Before we invented auto_fd into fdstreams we keept fdstreambuf opened + // on faulty close attempt. Now fdstreambuf is always closed by close() + // function. This semantics change seems to be the right one as there is + // no reason to expect fdclose() to succeed after it has already failed + // once. + // + void + close () {fd_.close ();} + + auto_fd + release (); + + void + open (auto_fd&&, std::uint64_t pos = 0); + + bool + is_open () const {return fd_.get () >= 0;} + + int + fd () const {return fd_.get ();} + + // Set the file descriptor blocking mode returning the previous mode on + // success and throwing ios::failure otherwise (see fdmode() for details). + // + // Note that besides calling fdmode(fd()), this function also updating its + // internal state according to the new mode. + // + bool + blocking (bool); + + public: + using base = bufstreambuf; + + // basic_streambuf input interface. + // + public: + virtual std::streamsize + showmanyc (); + + virtual int_type + underflow (); + + // Direct access to the get area. Use with caution. + // + using base::gptr; + using base::egptr; + using base::gbump; + + // Return the (logical) position of the next byte to be read. + // + using base::tellg; + + // Seek to the (logical) position as if by reading the specified number of + // bytes from the beginning of the stream. Throw ios::failure on the + // underlying OS errors. + // + void + seekg (std::uint64_t); + + private: + bool + load (); + + // basic_streambuf output interface. + // + public: + virtual int_type + overflow (int_type); + + virtual int + sync (); + + virtual std::streamsize + xsputn (const char_type*, std::streamsize); + + // Return the (logical) position of the next byte to be written. + // + using base::tellp; + + // basic_streambuf positioning interface (both input/output). + // + public: + virtual pos_type + seekpos (pos_type, std::ios_base::openmode); + + virtual pos_type + seekoff (off_type, std::ios_base::seekdir, std::ios_base::openmode); + + private: + bool + save (); + + private: + auto_fd fd_; + char buf_[8192]; + bool non_blocking_ = false; + }; + + // File stream mode. + // + // The text/binary flags have the same semantics as those in std::fstream. + // Specifically, this is a noop for POSIX systems where the two modes are + // the same. On Windows, when reading in the text mode the sequence of 0xD, + // 0xA characters is translated into the single OxA character and 0x1A is + // interpreted as EOF. When writing in the text mode the OxA character is + // translated into the 0xD, 0xA sequence. + // + // The skip flag instructs the stream to skip to the end before closing the + // file descriptor. This is primarily useful when working with pipes where + // you may want not to "offend" the other end by closing your end before + // reading all the data. + // + // The blocking/non_blocking flags determine whether the IO operation should + // block or return control if currently there is no data to read or no room + // to write. Only the istream::readsome() function supports the semantics of + // non-blocking operations. In contrast to POSIX systems, we only support + // this for pipes on Windows, always assuming the blocking mode for other + // file descriptors. IO stream operations other than readsome() are illegal + // in the non-blocking mode and result in the badbit being set (note that + // it is not the more appropriate failbit for implementation reasons). + // + enum class fdstream_mode: std::uint16_t + { + text = 0x01, + binary = 0x02, + skip = 0x04, + blocking = 0x08, + non_blocking = 0x10 + }; + + inline fdstream_mode operator& (fdstream_mode, fdstream_mode); + inline fdstream_mode operator| (fdstream_mode, fdstream_mode); + inline fdstream_mode operator&= (fdstream_mode&, fdstream_mode); + inline fdstream_mode operator|= (fdstream_mode&, fdstream_mode); + + // Extended (compared to ios::openmode) file open flags. + // + enum class fdopen_mode: std::uint16_t + { + in = 0x01, // Open for reading. + out = 0x02, // Open for writing. + append = 0x04, // Seek to the end of file before each write. + truncate = 0x08, // Discard the file contents on open. + create = 0x10, // Create a file if not exists. + exclusive = 0x20, // Fail if the file exists and the create flag is set. + binary = 0x40, // Set binary translation mode. + at_end = 0x80, // Seek to the end of stream immediately after open. + + none = 0 // Usefull when building the mode incrementally. + }; + + inline fdopen_mode operator& (fdopen_mode, fdopen_mode); + inline fdopen_mode operator| (fdopen_mode, fdopen_mode); + inline fdopen_mode operator&= (fdopen_mode&, fdopen_mode); + inline fdopen_mode operator|= (fdopen_mode&, fdopen_mode); + + class LIBBUTL_SYMEXPORT fdstream_base + { + protected: + fdstream_base () = default; + fdstream_base (auto_fd&&, std::uint64_t pos); + fdstream_base (auto_fd&&, fdstream_mode, std::uint64_t pos); + + public: + int + fd () const {return buf_.fd ();} + + protected: + fdstreambuf buf_; + }; + + // iofdstream constructors and open() functions that take openmode as an + // argument mimic the corresponding iofstream functions in terms of the + // openmode mask interpretation. They throw std::invalid_argument for an + // invalid combination of flags (as per the standard). Note that the in and + // out flags are always added implicitly for ifdstream and ofdstream, + // respectively. + // + // iofdstream constructors and open() functions that take fdopen_mode as an + // argument interpret the mask literally just ignoring some flags which are + // meaningless in the absense of others (read more on that in the comment + // for fdopen()). Note that the in and out flags are always added implicitly + // for ifdstream and ofdstream, respectively. + // + // iofdstream constructors and open() functions that take file path as a + // const std::string& or const char* may throw the invalid_path exception. + // + // Passing auto_fd with a negative file descriptor is valid and results in + // the creation of an unopened object. + // + // Also note that open() and close() functions can be successfully called + // for an opened and unopened objects respectively. That is in contrast with + // iofstream that sets failbit in such cases. + // + + // Note that ifdstream destructor will close an open file descriptor but + // will ignore any errors. To detect such errors, call close() explicitly. + // + // This is a sample usage of iofdstreams with process. Note that here it is + // expected that the child process reads from STDIN first and writes to + // STDOUT afterwards. + // + // try + // { + // process pr (args, -1, -1); + // + // try + // { + // // In case of exception, skip and close input after output. + // // + // ifdstream is (move (pr.in_ofd), fdstream_mode::skip); + // ofdstream os (move (pr.out_fd)); + // + // // Write. + // + // os.close (); // Don't block the other end. + // + // // Read. + // + // is.close (); // Skip till end and close. + // + // if (pr.wait ()) + // { + // return ...; // Good. + // } + // + // // Non-zero exit, diagnostics presumably issued, fall through. + // } + // catch (const failure&) + // { + // // IO failure, child exit status doesn't matter. Just wait for the + // // process completion and fall through. + // // + // // Note that this is optional if the process_error handler simply + // // falls through since process destructor will wait (but will ignore + // // any errors). + // // + // pr.wait (); + // } + // + // error << .... ; + // + // // Fall through. + // } + // catch (const process_error& e) + // { + // error << ... << e; + // + // if (e.child ()) + // exit (1); + // + // // Fall through. + // } + // + // throw failed (); + // + class LIBBUTL_SYMEXPORT ifdstream: public fdstream_base, public std::istream + { + public: + // Create an unopened object. + // + explicit + ifdstream (iostate = badbit | failbit); + + explicit + ifdstream (auto_fd&&, + iostate = badbit | failbit, + std::uint64_t pos = 0); + + ifdstream (auto_fd&&, + fdstream_mode m, + iostate = badbit | failbit, + std::uint64_t pos = 0); + + explicit + ifdstream (const char*, + iostate = badbit | failbit); + + explicit + ifdstream (const std::string&, + iostate = badbit | failbit); + + explicit + ifdstream (const path&, + iostate = badbit | failbit); + + // @@ In some implementations (for example, MSVC), iostate and openmode + // (and/or their respective constants) are not distinct enough which + // causes overload resolution errors. + // + ifdstream (const char*, + openmode, + iostate /*= badbit | failbit*/); + + ifdstream (const std::string&, + openmode, + iostate /*= badbit | failbit*/); + + ifdstream (const path&, + openmode, + iostate /*= badbit | failbit*/); + + ifdstream (const char*, + fdopen_mode, + iostate = badbit | failbit); + + ifdstream (const std::string&, + fdopen_mode, + iostate = badbit | failbit); + + ifdstream (const path&, + fdopen_mode, + iostate = badbit | failbit); + + ~ifdstream () override; + + void + open (const char*, openmode = in); + + void + open (const std::string&, openmode = in); + + void + open (const path&, openmode = in); + + void + open (const char*, fdopen_mode); + + void + open (const std::string&, fdopen_mode); + + void + open (const path&, fdopen_mode); + + void + open (auto_fd&& fd, std::uint64_t pos = 0) + { + buf_.open (std::move (fd), pos); + clear (); + } + + void + open (auto_fd&& fd, fdstream_mode m, std::uint64_t pos = 0); + + void close (); + auto_fd release (); // Note: no skipping. + bool is_open () const {return buf_.is_open ();} + + // Read the textual stream. The stream is supposed not to contain the null + // character. + // + std::string + read_text (); + + // Read the binary stream. + // + std::vector + read_binary (); + + private: + bool skip_ = false; + }; + + // Note that ofdstream requires that you explicitly call close() before + // destroying it. Or, more specifically, the ofdstream object should not be + // in the opened state by the time its destructor is called, unless it is in + // the "not good" state (good() == false) or the destructor is being called + // during the stack unwinding due to an exception being thrown + // (std::uncaught_exception() == true). This is enforced with assert() in + // the ofdstream destructor. + // + class LIBBUTL_SYMEXPORT ofdstream: public fdstream_base, public std::ostream + { + public: + // Create an unopened object. + // + explicit + ofdstream (iostate = badbit | failbit); + + explicit + ofdstream (auto_fd&&, + iostate = badbit | failbit, + std::uint64_t pos = 0); + + ofdstream (auto_fd&&, + fdstream_mode m, + iostate = badbit | failbit, + std::uint64_t pos = 0); + + explicit + ofdstream (const char*, + iostate = badbit | failbit); + + explicit + ofdstream (const std::string&, + iostate = badbit | failbit); + + explicit + ofdstream (const path&, + iostate = badbit | failbit); + + // @@ In some implementations (for example, MSVC), iostate and openmode + // (and/or their respective constants) are not distinct enough which + // causes overload resolution errors. + // + ofdstream (const char*, + openmode, + iostate /*= badbit | failbit*/); + + ofdstream (const std::string&, + openmode, + iostate /*= badbit | failbit*/); + + ofdstream (const path&, + openmode, + iostate /*= badbit | failbit*/); + + ofdstream (const char*, + fdopen_mode, + iostate = badbit | failbit); + + ofdstream (const std::string&, + fdopen_mode, + iostate = badbit | failbit); + + ofdstream (const path&, + fdopen_mode, + iostate = badbit | failbit); + + ~ofdstream () override; + + void + open (const char*, openmode = out); + + void + open (const std::string&, openmode = out); + + void + open (const path&, openmode = out); + + void + open (const char*, fdopen_mode); + + void + open (const std::string&, fdopen_mode); + + void + open (const path&, fdopen_mode); + + void + open (auto_fd&& fd, std::uint64_t pos = 0) + { + buf_.open (std::move (fd), pos); + clear (); + } + + void close () {if (is_open ()) flush (); buf_.close ();} + auto_fd release (); + bool is_open () const {return buf_.is_open ();} + }; + + // Open a file or, if the file name is `-`, stdin/stdout. + // + // In case of the stdin/stdout, these functions simply adjust the exception + // mask on std::cin/cout to match the i/ofdstreams argument. + // + // Return a reference to the opened i/ofdstream or cin/cout and, in the + // latter case, set the translated name in path_name to / + // (unless it is already present). + // + // Note that ofdstream::close() should be called explicitly unless stdout + // was opened (but harmless to call even if it was). + // + LIBBUTL_SYMEXPORT std::istream& + open_file_or_stdin (path_name&, ifdstream&); + + LIBBUTL_SYMEXPORT std::ostream& + open_file_or_stdout (path_name&, ofdstream&); + + // The std::getline() replacement that provides a workaround for libstdc++'s + // ios::failure ABI fiasco (#66145) by throwing ios::failure, as it is + // defined at libbutl build time (new ABI on recent distributions) rather + // than libstdc++ build time (still old ABI on most distributions). + // + // Notes: + // + // - This relies of ADL so if the stream is used via the std::istream + // interface, then std::getline() will still be used. To put it another + // way, this is "the best we can do" until GCC folks get their act + // together. + // + // - The fail and eof bits may be left cleared in the stream exception mask + // when the function throws because of badbit. + // + LIBBUTL_SYMEXPORT ifdstream& + getline (ifdstream&, std::string&, char delim = '\n'); + + // Open a file returning an auto_fd that holds its file descriptor on + // success and throwing ios::failure otherwise. + // + // The mode argument should have at least one of the in or out flags set. + // The append and truncate flags are meaningless in the absense of the out + // flag and are ignored without it. The exclusive flag is meaningless in the + // absense of the create flag and is ignored without it. Note also that if + // the exclusive flag is specified then a dangling symbolic link is treated + // as an existing file. + // + // The permissions argument is taken into account only if the file is + // created. Note also that permissions can be adjusted while being set in a + // way specific for the OS. On POSIX systems they are modified with the + // process' umask, so effective permissions are permissions & ~umask. On + // Windows permissions other than ru and wu are unlikelly to have effect. + // + // Also note that on POSIX the FD_CLOEXEC flag is set for the file descriptor + // to prevent its leakage into child processes. On Windows, for the same + // purpose, the _O_NOINHERIT flag is set. Note that the process class, that + // passes such a descriptor to the child, makes it inheritable for a while. + // + LIBBUTL_SYMEXPORT auto_fd + fdopen (const char*, + fdopen_mode, + permissions = permissions::ru | permissions::wu | + permissions::rg | permissions::wg | + permissions::ro | permissions::wo); + + LIBBUTL_SYMEXPORT auto_fd + fdopen (const std::string&, + fdopen_mode, + permissions = permissions::ru | permissions::wu | + permissions::rg | permissions::wg | + permissions::ro | permissions::wo); + + LIBBUTL_SYMEXPORT auto_fd + fdopen (const path&, + fdopen_mode, + permissions = permissions::ru | permissions::wu | + permissions::rg | permissions::wg | + permissions::ro | permissions::wo); + + // Duplicate an open file descriptor. Throw ios::failure on the underlying + // OS error. + // + // Note that on POSIX the FD_CLOEXEC flag is set for the new descriptor if it + // is present for the source one. That's in contrast to POSIX dup() that + // doesn't copy file descriptor flags. Also note that duplicating descriptor + // and setting the flag is not an atomic operation generally, but it is in + // regards to child process spawning (to prevent file descriptor leakage into + // a child process). + // + // Note that on Windows the _O_NOINHERIT flag is set for the new descriptor + // if it is present for the source one. That's in contrast to Windows _dup() + // that doesn't copy the flag. Also note that duplicating descriptor and + // setting the flag is not an atomic operation generally, but it is in + // regards to child process spawning (to prevent file descriptor leakage into + // a child process). + // + LIBBUTL_SYMEXPORT auto_fd + fddup (int fd); + + // Set the translation and/or blocking modes for the file descriptor. Throw + // invalid_argument for an invalid combination of flags. Return the previous + // mode on success, throw ios::failure otherwise. + // + // The text and binary flags are mutually exclusive on Windows. On POSIX + // system the two modes are the same and so no check is performed. + // + // The blocking and non-blocking flags are mutually exclusive. In contrast + // to POSIX systems, on Windows the non-blocking mode is only supported for + // pipes, with the blocking mode assumed for other file descriptors + // regardless of the flags. + // + // Note that on Wine currently pipes always behave as blocking regardless of + // the mode set. + // + LIBBUTL_SYMEXPORT fdstream_mode + fdmode (int, fdstream_mode); + + // Portable functions for obtaining file descriptors of standard streams. + // Throw ios::failure on the underlying OS error. + // + // Note that you normally wouldn't want to close them using fddup() to + // convert them to auto_fd, for example: + // + // ifdstream is (fddup (stdin_fd ())); + // + LIBBUTL_SYMEXPORT int stdin_fd (); + LIBBUTL_SYMEXPORT int stdout_fd (); + LIBBUTL_SYMEXPORT int stderr_fd (); + + // Convenience functions for setting the translation mode for standard + // streams. + // + LIBBUTL_SYMEXPORT fdstream_mode stdin_fdmode (fdstream_mode); + LIBBUTL_SYMEXPORT fdstream_mode stdout_fdmode (fdstream_mode); + LIBBUTL_SYMEXPORT fdstream_mode stderr_fdmode (fdstream_mode); + + // Low-level, nothrow file descriptor API. + // + + // Close the file descriptor. Return true on success, set errno and return + // false otherwise. + // + LIBBUTL_SYMEXPORT bool + fdclose (int) noexcept; + + // Open the null device (e.g., /dev/null) that discards all data written to + // it and provides no data for read operations (i.e., yelds EOF on read). + // Return an auto_fd that holds its file descriptor on success and throwing + // ios::failure otherwise. + // + // On Windows the null device is NUL and writing anything substantial to it + // (like redirecting a process' output) is extremely slow, as in, an order + // of magnitude slower than writing to disk. If you are using the descriptor + // yourself this can be mitigated by setting the binary mode (already done + // by fdopen()) and using a buffer of around 64K. However, sometimes you + // have no control of how the descriptor will be used. For instance, it can + // be used to redirect a child's stdout and the way the child sets up its + // stdout is out of your control (on Windows). For such cases, there is an + // emulation via a temporary file. Mostly it functions as a proper null + // device with the file automatically removed once the descriptor is + // closed. One difference, however, would be if you were to both write to + // and read from the descriptor. + // + // Note that on POSIX the FD_CLOEXEC flag is set for the file descriptor to + // prevent its leakage into child processes. On Windows, for the same + // purpose, the _O_NOINHERIT flag is set. + // +#ifndef _WIN32 + LIBBUTL_SYMEXPORT auto_fd + fdopen_null (); +#else + LIBBUTL_SYMEXPORT auto_fd + fdopen_null (bool temp = false); +#endif + + struct fdpipe + { + auto_fd in; + auto_fd out; + + void + close () + { + in.close (); + out.close (); + } + }; + + // Create a pipe. Throw ios::failure on the underlying OS error. By default + // both ends of the pipe are opened in the text mode. Pass the binary flag + // to instead open them in the binary mode. Passing a mode other than none + // or binary is illegal. + // + // Note that on Windows both ends of the created pipe are not inheritable. + // In particular, the process class that uses fdpipe underneath makes the + // appropriate end (the one being passed to the child) inheritable. + // + // Note that on POSIX the FD_CLOEXEC flag is set for both ends, so they get + // automatically closed by the child process to prevent undesired behaviors + // (such as child deadlock on read from a pipe due to the write-end leakage + // into the child process). Opening a pipe and setting the flag is not an + // atomic operation generally, but it is in regards to child process spawning + // (to prevent file descriptor leakage into child processes spawned from + // other threads). Also note that you don't need to reset the flag for a pipe + // end being passed to the process class ctor. + // + LIBBUTL_SYMEXPORT fdpipe + fdopen_pipe (fdopen_mode = fdopen_mode::none); + + // Seeking. + // + enum class fdseek_mode {set, cur, end}; + + LIBBUTL_SYMEXPORT std::uint64_t + fdseek (int, std::int64_t, fdseek_mode); + + // Truncate or expand the file to the specified size. Throw ios::failure on + // the underlying OS error. + // + LIBBUTL_SYMEXPORT void + fdtruncate (int, std::uint64_t); + + // Return filesystem entry stat from file descriptor. Throw ios::failure on + // the underlying OS error. + // + // See also path_entry() in filesystem. + // + LIBBUTL_SYMEXPORT entry_stat + fdstat (int); + + // Test whether a file descriptor refers to a terminal. Throw ios::failure + // on the underlying OS error. + // + LIBBUTL_SYMEXPORT bool + fdterm (int); + + // Wait until one or more file descriptors becomes ready for input (reading) + // or output (writing). Return the pair of numbers of descriptors that are + // ready. Throw std::invalid_argument if anything is wrong with arguments + // (both sets are empty, invalid fd, etc). Throw ios::failure on the + // underlying OS error. + // + // Note that the function clears all the previously-ready entries on each + // call. Entries with nullfd are ignored. + // + // On Windows only pipes and only their input (read) ends are supported. + // + struct fdselect_state + { + int fd; + bool ready; + + // Note: intentionally non-explicit to allow implicit initialization when + // pushing to fdselect_set. + // + fdselect_state (int fd): fd (fd), ready (false) {} + }; + + using fdselect_set = small_vector; + + LIBBUTL_SYMEXPORT std::pair + fdselect (fdselect_set& ifds, fdselect_set& ofds); + + inline std::size_t + ifdselect (fdselect_set& ifds) + { + fdselect_set ofds; + return fdselect (ifds, ofds).first; + } + + inline std::size_t + ofdselect (fdselect_set& ofds) + { + fdselect_set ifds; + return fdselect (ifds, ofds).second; + } + + // As above but wait up to the specified timeout returning a pair of zeroes + // if none of the descriptors became ready. + // + template + std::pair + fdselect (fdselect_set&, fdselect_set&, const std::chrono::duration&); + + template + inline std::size_t + ifdselect (fdselect_set& ifds, const std::chrono::duration& timeout) + { + fdselect_set ofds; + return fdselect (ifds, ofds, timeout).first; + } + + template + inline std::size_t + ofdselect (fdselect_set& ofds, const std::chrono::duration& timeout) + { + fdselect_set ifds; + return fdselect (ifds, ofds, timeout).second; + } + + // POSIX read() function wrapper. In particular, it supports the semantics + // of non-blocking read for pipes on Windows. + // + // Note that on Wine currently pipes always behave as blocking regardless of + // the mode. + // + LIBBUTL_SYMEXPORT std::streamsize + fdread (int, void*, std::size_t); +} + +#include diff --git a/libbutl/fdstream.ixx b/libbutl/fdstream.ixx index 9ec9e06..08e317c 100644 --- a/libbutl/fdstream.ixx +++ b/libbutl/fdstream.ixx @@ -1,6 +1,8 @@ // file : libbutl/fdstream.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file +#include + namespace butl { // auto_fd diff --git a/libbutl/fdstream.mxx b/libbutl/fdstream.mxx deleted file mode 100644 index 0d3fd86..0000000 --- a/libbutl/fdstream.mxx +++ /dev/null @@ -1,941 +0,0 @@ -// file : libbutl/fdstream.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include // streamsize -#include -#include -#include -#include -#include -#include // move(), pair -#include // uint16_t, uint64_t -#include // size_t - -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.fdstream; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.filesystem; // permissions, entry_stat -import butl.small_vector; -#else -#include -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // RAII type for file descriptors. Note that failure to close the descriptor - // is silently ignored by both the destructor and reset(). - // - // The descriptor can be negative. Such a descriptor is treated as unopened - // and is not closed. - // - struct nullfd_t - { - constexpr explicit nullfd_t (int) {} - constexpr operator int () const {return -1;} - }; - -#if defined(__cpp_modules_ts) && defined(__clang__) //@@ MOD Clang duplicate sym. - inline -#endif - constexpr nullfd_t nullfd (-1); - - class LIBBUTL_SYMEXPORT auto_fd - { - public: - auto_fd (nullfd_t = nullfd) noexcept: fd_ (-1) {} - - explicit - auto_fd (int fd) noexcept: fd_ (fd) {} - - auto_fd (auto_fd&& fd) noexcept: fd_ (fd.release ()) {} - auto_fd& operator= (auto_fd&&) noexcept; - - auto_fd (const auto_fd&) = delete; - auto_fd& operator= (const auto_fd&) = delete; - - ~auto_fd () noexcept; - - int - get () const noexcept {return fd_;} - - void - reset (int fd = -1) noexcept; - - int - release () noexcept - { - int r (fd_); - fd_ = -1; - return r; - } - - // Close an open file descriptor. Throw ios::failure on the underlying OS - // error. Reset the descriptor to -1 whether the exception is thrown or - // not. - // - void - close (); - - private: - int fd_; - }; - - inline bool - operator== (const auto_fd& x, const auto_fd& y) - { - return x.get () == y.get (); - } - - inline bool - operator!= (const auto_fd& x, const auto_fd& y) - { - return !(x == y); - } - - inline bool - operator== (const auto_fd& x, nullfd_t) - { - return x.get () == -1; - } - - inline bool - operator!= (const auto_fd& x, nullfd_t y) - { - return !(x == y); - } - - // An [io]fstream that can be initialized with a file descriptor in addition - // to a file name and that also by default enables exceptions on badbit and - // failbit. So instead of a dance like this: - // - // ifstream ifs; - // ifs.exceptions (ifstream::badbit | ifstream::failbit); - // ifs.open (path.string ()); - // - // You can simply do: - // - // ifdstream ifs (path); - // - // Notes and limitations: - // - // - char only - // - input or output but not both (can use a union of two streams for that) - // - no support for put back - // - use of tell[gp]() and seek[gp]() is discouraged on Windows for - // fdstreams opened in the text mode (see fdstreambuf::seekoff() - // implementation for reasoning and consider using non-standard tellg() - // and seekg() in fdstreambuf, instead) - // - non-blocking file descriptor is supported only by showmanyc() function - // and only for pipes on Windows, in contrast to POSIX systems - // - throws ios::failure in case of open(), read(), write(), close(), - // seek[gp](), or tell[gp]() errors - // - exception mask has at least badbit - // - after catching an exception caused by badbit the stream is no longer - // usable - // - not movable, though can be easily supported (or not: there is no move - // constructor for istream/ostream in GCC 4.9) - // - passing to constructor auto_fd with a negative file descriptor is valid - // and results in the creation of an unopened object - // - class LIBBUTL_SYMEXPORT fdstreambuf: public bufstreambuf - { - public: - fdstreambuf () = default; - - // Unless specified, the current read/write position is assumed to - // be 0 (note: not queried). - // - fdstreambuf (auto_fd&&, std::uint64_t pos = 0); - - // Before we invented auto_fd into fdstreams we keept fdstreambuf opened - // on faulty close attempt. Now fdstreambuf is always closed by close() - // function. This semantics change seems to be the right one as there is - // no reason to expect fdclose() to succeed after it has already failed - // once. - // - void - close () {fd_.close ();} - - auto_fd - release (); - - void - open (auto_fd&&, std::uint64_t pos = 0); - - bool - is_open () const {return fd_.get () >= 0;} - - int - fd () const {return fd_.get ();} - - // Set the file descriptor blocking mode returning the previous mode on - // success and throwing ios::failure otherwise (see fdmode() for details). - // - // Note that besides calling fdmode(fd()), this function also updating its - // internal state according to the new mode. - // - bool - blocking (bool); - - public: - using base = bufstreambuf; - - // basic_streambuf input interface. - // - public: - virtual std::streamsize - showmanyc (); - - virtual int_type - underflow (); - - // Direct access to the get area. Use with caution. - // - using base::gptr; - using base::egptr; - using base::gbump; - - // Return the (logical) position of the next byte to be read. - // - using base::tellg; - - // Seek to the (logical) position as if by reading the specified number of - // bytes from the beginning of the stream. Throw ios::failure on the - // underlying OS errors. - // - void - seekg (std::uint64_t); - - private: - bool - load (); - - // basic_streambuf output interface. - // - public: - virtual int_type - overflow (int_type); - - virtual int - sync (); - - virtual std::streamsize - xsputn (const char_type*, std::streamsize); - - // Return the (logical) position of the next byte to be written. - // - using base::tellp; - - // basic_streambuf positioning interface (both input/output). - // - public: - virtual pos_type - seekpos (pos_type, std::ios_base::openmode); - - virtual pos_type - seekoff (off_type, std::ios_base::seekdir, std::ios_base::openmode); - - private: - bool - save (); - - private: - auto_fd fd_; - char buf_[8192]; - bool non_blocking_ = false; - }; - - // File stream mode. - // - // The text/binary flags have the same semantics as those in std::fstream. - // Specifically, this is a noop for POSIX systems where the two modes are - // the same. On Windows, when reading in the text mode the sequence of 0xD, - // 0xA characters is translated into the single OxA character and 0x1A is - // interpreted as EOF. When writing in the text mode the OxA character is - // translated into the 0xD, 0xA sequence. - // - // The skip flag instructs the stream to skip to the end before closing the - // file descriptor. This is primarily useful when working with pipes where - // you may want not to "offend" the other end by closing your end before - // reading all the data. - // - // The blocking/non_blocking flags determine whether the IO operation should - // block or return control if currently there is no data to read or no room - // to write. Only the istream::readsome() function supports the semantics of - // non-blocking operations. In contrast to POSIX systems, we only support - // this for pipes on Windows, always assuming the blocking mode for other - // file descriptors. IO stream operations other than readsome() are illegal - // in the non-blocking mode and result in the badbit being set (note that - // it is not the more appropriate failbit for implementation reasons). - // - enum class fdstream_mode: std::uint16_t - { - text = 0x01, - binary = 0x02, - skip = 0x04, - blocking = 0x08, - non_blocking = 0x10 - }; - - inline fdstream_mode operator& (fdstream_mode, fdstream_mode); - inline fdstream_mode operator| (fdstream_mode, fdstream_mode); - inline fdstream_mode operator&= (fdstream_mode&, fdstream_mode); - inline fdstream_mode operator|= (fdstream_mode&, fdstream_mode); - - // Extended (compared to ios::openmode) file open flags. - // - enum class fdopen_mode: std::uint16_t - { - in = 0x01, // Open for reading. - out = 0x02, // Open for writing. - append = 0x04, // Seek to the end of file before each write. - truncate = 0x08, // Discard the file contents on open. - create = 0x10, // Create a file if not exists. - exclusive = 0x20, // Fail if the file exists and the create flag is set. - binary = 0x40, // Set binary translation mode. - at_end = 0x80, // Seek to the end of stream immediately after open. - - none = 0 // Usefull when building the mode incrementally. - }; - - inline fdopen_mode operator& (fdopen_mode, fdopen_mode); - inline fdopen_mode operator| (fdopen_mode, fdopen_mode); - inline fdopen_mode operator&= (fdopen_mode&, fdopen_mode); - inline fdopen_mode operator|= (fdopen_mode&, fdopen_mode); - - class LIBBUTL_SYMEXPORT fdstream_base - { - protected: - fdstream_base () = default; - fdstream_base (auto_fd&&, std::uint64_t pos); - fdstream_base (auto_fd&&, fdstream_mode, std::uint64_t pos); - - public: - int - fd () const {return buf_.fd ();} - - protected: - fdstreambuf buf_; - }; - - // iofdstream constructors and open() functions that take openmode as an - // argument mimic the corresponding iofstream functions in terms of the - // openmode mask interpretation. They throw std::invalid_argument for an - // invalid combination of flags (as per the standard). Note that the in and - // out flags are always added implicitly for ifdstream and ofdstream, - // respectively. - // - // iofdstream constructors and open() functions that take fdopen_mode as an - // argument interpret the mask literally just ignoring some flags which are - // meaningless in the absense of others (read more on that in the comment - // for fdopen()). Note that the in and out flags are always added implicitly - // for ifdstream and ofdstream, respectively. - // - // iofdstream constructors and open() functions that take file path as a - // const std::string& or const char* may throw the invalid_path exception. - // - // Passing auto_fd with a negative file descriptor is valid and results in - // the creation of an unopened object. - // - // Also note that open() and close() functions can be successfully called - // for an opened and unopened objects respectively. That is in contrast with - // iofstream that sets failbit in such cases. - // - - // Note that ifdstream destructor will close an open file descriptor but - // will ignore any errors. To detect such errors, call close() explicitly. - // - // This is a sample usage of iofdstreams with process. Note that here it is - // expected that the child process reads from STDIN first and writes to - // STDOUT afterwards. - // - // try - // { - // process pr (args, -1, -1); - // - // try - // { - // // In case of exception, skip and close input after output. - // // - // ifdstream is (move (pr.in_ofd), fdstream_mode::skip); - // ofdstream os (move (pr.out_fd)); - // - // // Write. - // - // os.close (); // Don't block the other end. - // - // // Read. - // - // is.close (); // Skip till end and close. - // - // if (pr.wait ()) - // { - // return ...; // Good. - // } - // - // // Non-zero exit, diagnostics presumably issued, fall through. - // } - // catch (const failure&) - // { - // // IO failure, child exit status doesn't matter. Just wait for the - // // process completion and fall through. - // // - // // Note that this is optional if the process_error handler simply - // // falls through since process destructor will wait (but will ignore - // // any errors). - // // - // pr.wait (); - // } - // - // error << .... ; - // - // // Fall through. - // } - // catch (const process_error& e) - // { - // error << ... << e; - // - // if (e.child ()) - // exit (1); - // - // // Fall through. - // } - // - // throw failed (); - // - class LIBBUTL_SYMEXPORT ifdstream: public fdstream_base, public std::istream - { - public: - // Create an unopened object. - // - explicit - ifdstream (iostate = badbit | failbit); - - explicit - ifdstream (auto_fd&&, - iostate = badbit | failbit, - std::uint64_t pos = 0); - - ifdstream (auto_fd&&, - fdstream_mode m, - iostate = badbit | failbit, - std::uint64_t pos = 0); - - explicit - ifdstream (const char*, - iostate = badbit | failbit); - - explicit - ifdstream (const std::string&, - iostate = badbit | failbit); - - explicit - ifdstream (const path&, - iostate = badbit | failbit); - - // @@ In some implementations (for example, MSVC), iostate and openmode - // (and/or their respective constants) are not distinct enough which - // causes overload resolution errors. - // - ifdstream (const char*, - openmode, - iostate /*= badbit | failbit*/); - - ifdstream (const std::string&, - openmode, - iostate /*= badbit | failbit*/); - - ifdstream (const path&, - openmode, - iostate /*= badbit | failbit*/); - - ifdstream (const char*, - fdopen_mode, - iostate = badbit | failbit); - - ifdstream (const std::string&, - fdopen_mode, - iostate = badbit | failbit); - - ifdstream (const path&, - fdopen_mode, - iostate = badbit | failbit); - - ~ifdstream () override; - - void - open (const char*, openmode = in); - - void - open (const std::string&, openmode = in); - - void - open (const path&, openmode = in); - - void - open (const char*, fdopen_mode); - - void - open (const std::string&, fdopen_mode); - - void - open (const path&, fdopen_mode); - - void - open (auto_fd&& fd, std::uint64_t pos = 0) - { - buf_.open (std::move (fd), pos); - clear (); - } - - void - open (auto_fd&& fd, fdstream_mode m, std::uint64_t pos = 0); - - void close (); - auto_fd release (); // Note: no skipping. - bool is_open () const {return buf_.is_open ();} - - // Read the textual stream. The stream is supposed not to contain the null - // character. - // - std::string - read_text (); - - // Read the binary stream. - // - std::vector - read_binary (); - - private: - bool skip_ = false; - }; - - // Note that ofdstream requires that you explicitly call close() before - // destroying it. Or, more specifically, the ofdstream object should not be - // in the opened state by the time its destructor is called, unless it is in - // the "not good" state (good() == false) or the destructor is being called - // during the stack unwinding due to an exception being thrown - // (std::uncaught_exception() == true). This is enforced with assert() in - // the ofdstream destructor. - // - class LIBBUTL_SYMEXPORT ofdstream: public fdstream_base, public std::ostream - { - public: - // Create an unopened object. - // - explicit - ofdstream (iostate = badbit | failbit); - - explicit - ofdstream (auto_fd&&, - iostate = badbit | failbit, - std::uint64_t pos = 0); - - ofdstream (auto_fd&&, - fdstream_mode m, - iostate = badbit | failbit, - std::uint64_t pos = 0); - - explicit - ofdstream (const char*, - iostate = badbit | failbit); - - explicit - ofdstream (const std::string&, - iostate = badbit | failbit); - - explicit - ofdstream (const path&, - iostate = badbit | failbit); - - // @@ In some implementations (for example, MSVC), iostate and openmode - // (and/or their respective constants) are not distinct enough which - // causes overload resolution errors. - // - ofdstream (const char*, - openmode, - iostate /*= badbit | failbit*/); - - ofdstream (const std::string&, - openmode, - iostate /*= badbit | failbit*/); - - ofdstream (const path&, - openmode, - iostate /*= badbit | failbit*/); - - ofdstream (const char*, - fdopen_mode, - iostate = badbit | failbit); - - ofdstream (const std::string&, - fdopen_mode, - iostate = badbit | failbit); - - ofdstream (const path&, - fdopen_mode, - iostate = badbit | failbit); - - ~ofdstream () override; - - void - open (const char*, openmode = out); - - void - open (const std::string&, openmode = out); - - void - open (const path&, openmode = out); - - void - open (const char*, fdopen_mode); - - void - open (const std::string&, fdopen_mode); - - void - open (const path&, fdopen_mode); - - void - open (auto_fd&& fd, std::uint64_t pos = 0) - { - buf_.open (std::move (fd), pos); - clear (); - } - - void close () {if (is_open ()) flush (); buf_.close ();} - auto_fd release (); - bool is_open () const {return buf_.is_open ();} - }; - - // Open a file or, if the file name is `-`, stdin/stdout. - // - // In case of the stdin/stdout, these functions simply adjust the exception - // mask on std::cin/cout to match the i/ofdstreams argument. - // - // Return a reference to the opened i/ofdstream or cin/cout and, in the - // latter case, set the translated name in path_name to / - // (unless it is already present). - // - // Note that ofdstream::close() should be called explicitly unless stdout - // was opened (but harmless to call even if it was). - // - LIBBUTL_SYMEXPORT std::istream& - open_file_or_stdin (path_name&, ifdstream&); - - LIBBUTL_SYMEXPORT std::ostream& - open_file_or_stdout (path_name&, ofdstream&); - - // The std::getline() replacement that provides a workaround for libstdc++'s - // ios::failure ABI fiasco (#66145) by throwing ios::failure, as it is - // defined at libbutl build time (new ABI on recent distributions) rather - // than libstdc++ build time (still old ABI on most distributions). - // - // Notes: - // - // - This relies of ADL so if the stream is used via the std::istream - // interface, then std::getline() will still be used. To put it another - // way, this is "the best we can do" until GCC folks get their act - // together. - // - // - The fail and eof bits may be left cleared in the stream exception mask - // when the function throws because of badbit. - // - LIBBUTL_SYMEXPORT ifdstream& - getline (ifdstream&, std::string&, char delim = '\n'); - - // Open a file returning an auto_fd that holds its file descriptor on - // success and throwing ios::failure otherwise. - // - // The mode argument should have at least one of the in or out flags set. - // The append and truncate flags are meaningless in the absense of the out - // flag and are ignored without it. The exclusive flag is meaningless in the - // absense of the create flag and is ignored without it. Note also that if - // the exclusive flag is specified then a dangling symbolic link is treated - // as an existing file. - // - // The permissions argument is taken into account only if the file is - // created. Note also that permissions can be adjusted while being set in a - // way specific for the OS. On POSIX systems they are modified with the - // process' umask, so effective permissions are permissions & ~umask. On - // Windows permissions other than ru and wu are unlikelly to have effect. - // - // Also note that on POSIX the FD_CLOEXEC flag is set for the file descriptor - // to prevent its leakage into child processes. On Windows, for the same - // purpose, the _O_NOINHERIT flag is set. Note that the process class, that - // passes such a descriptor to the child, makes it inheritable for a while. - // - LIBBUTL_SYMEXPORT auto_fd - fdopen (const char*, - fdopen_mode, - permissions = permissions::ru | permissions::wu | - permissions::rg | permissions::wg | - permissions::ro | permissions::wo); - - LIBBUTL_SYMEXPORT auto_fd - fdopen (const std::string&, - fdopen_mode, - permissions = permissions::ru | permissions::wu | - permissions::rg | permissions::wg | - permissions::ro | permissions::wo); - - LIBBUTL_SYMEXPORT auto_fd - fdopen (const path&, - fdopen_mode, - permissions = permissions::ru | permissions::wu | - permissions::rg | permissions::wg | - permissions::ro | permissions::wo); - - // Duplicate an open file descriptor. Throw ios::failure on the underlying - // OS error. - // - // Note that on POSIX the FD_CLOEXEC flag is set for the new descriptor if it - // is present for the source one. That's in contrast to POSIX dup() that - // doesn't copy file descriptor flags. Also note that duplicating descriptor - // and setting the flag is not an atomic operation generally, but it is in - // regards to child process spawning (to prevent file descriptor leakage into - // a child process). - // - // Note that on Windows the _O_NOINHERIT flag is set for the new descriptor - // if it is present for the source one. That's in contrast to Windows _dup() - // that doesn't copy the flag. Also note that duplicating descriptor and - // setting the flag is not an atomic operation generally, but it is in - // regards to child process spawning (to prevent file descriptor leakage into - // a child process). - // - LIBBUTL_SYMEXPORT auto_fd - fddup (int fd); - - // Set the translation and/or blocking modes for the file descriptor. Throw - // invalid_argument for an invalid combination of flags. Return the previous - // mode on success, throw ios::failure otherwise. - // - // The text and binary flags are mutually exclusive on Windows. On POSIX - // system the two modes are the same and so no check is performed. - // - // The blocking and non-blocking flags are mutually exclusive. In contrast - // to POSIX systems, on Windows the non-blocking mode is only supported for - // pipes, with the blocking mode assumed for other file descriptors - // regardless of the flags. - // - // Note that on Wine currently pipes always behave as blocking regardless of - // the mode set. - // - LIBBUTL_SYMEXPORT fdstream_mode - fdmode (int, fdstream_mode); - - // Portable functions for obtaining file descriptors of standard streams. - // Throw ios::failure on the underlying OS error. - // - // Note that you normally wouldn't want to close them using fddup() to - // convert them to auto_fd, for example: - // - // ifdstream is (fddup (stdin_fd ())); - // - LIBBUTL_SYMEXPORT int stdin_fd (); - LIBBUTL_SYMEXPORT int stdout_fd (); - LIBBUTL_SYMEXPORT int stderr_fd (); - - // Convenience functions for setting the translation mode for standard - // streams. - // - LIBBUTL_SYMEXPORT fdstream_mode stdin_fdmode (fdstream_mode); - LIBBUTL_SYMEXPORT fdstream_mode stdout_fdmode (fdstream_mode); - LIBBUTL_SYMEXPORT fdstream_mode stderr_fdmode (fdstream_mode); - - // Low-level, nothrow file descriptor API. - // - - // Close the file descriptor. Return true on success, set errno and return - // false otherwise. - // - LIBBUTL_SYMEXPORT bool - fdclose (int) noexcept; - - // Open the null device (e.g., /dev/null) that discards all data written to - // it and provides no data for read operations (i.e., yelds EOF on read). - // Return an auto_fd that holds its file descriptor on success and throwing - // ios::failure otherwise. - // - // On Windows the null device is NUL and writing anything substantial to it - // (like redirecting a process' output) is extremely slow, as in, an order - // of magnitude slower than writing to disk. If you are using the descriptor - // yourself this can be mitigated by setting the binary mode (already done - // by fdopen()) and using a buffer of around 64K. However, sometimes you - // have no control of how the descriptor will be used. For instance, it can - // be used to redirect a child's stdout and the way the child sets up its - // stdout is out of your control (on Windows). For such cases, there is an - // emulation via a temporary file. Mostly it functions as a proper null - // device with the file automatically removed once the descriptor is - // closed. One difference, however, would be if you were to both write to - // and read from the descriptor. - // - // Note that on POSIX the FD_CLOEXEC flag is set for the file descriptor to - // prevent its leakage into child processes. On Windows, for the same - // purpose, the _O_NOINHERIT flag is set. - // -#ifndef _WIN32 - LIBBUTL_SYMEXPORT auto_fd - fdopen_null (); -#else - LIBBUTL_SYMEXPORT auto_fd - fdopen_null (bool temp = false); -#endif - - struct fdpipe - { - auto_fd in; - auto_fd out; - - void - close () - { - in.close (); - out.close (); - } - }; - - // Create a pipe. Throw ios::failure on the underlying OS error. By default - // both ends of the pipe are opened in the text mode. Pass the binary flag - // to instead open them in the binary mode. Passing a mode other than none - // or binary is illegal. - // - // Note that on Windows both ends of the created pipe are not inheritable. - // In particular, the process class that uses fdpipe underneath makes the - // appropriate end (the one being passed to the child) inheritable. - // - // Note that on POSIX the FD_CLOEXEC flag is set for both ends, so they get - // automatically closed by the child process to prevent undesired behaviors - // (such as child deadlock on read from a pipe due to the write-end leakage - // into the child process). Opening a pipe and setting the flag is not an - // atomic operation generally, but it is in regards to child process spawning - // (to prevent file descriptor leakage into child processes spawned from - // other threads). Also note that you don't need to reset the flag for a pipe - // end being passed to the process class ctor. - // - LIBBUTL_SYMEXPORT fdpipe - fdopen_pipe (fdopen_mode = fdopen_mode::none); - - // Seeking. - // - enum class fdseek_mode {set, cur, end}; - - LIBBUTL_SYMEXPORT std::uint64_t - fdseek (int, std::int64_t, fdseek_mode); - - // Truncate or expand the file to the specified size. Throw ios::failure on - // the underlying OS error. - // - LIBBUTL_SYMEXPORT void - fdtruncate (int, std::uint64_t); - - // Return filesystem entry stat from file descriptor. Throw ios::failure on - // the underlying OS error. - // - // See also path_entry() in filesystem. - // - LIBBUTL_SYMEXPORT entry_stat - fdstat (int); - - // Test whether a file descriptor refers to a terminal. Throw ios::failure - // on the underlying OS error. - // - LIBBUTL_SYMEXPORT bool - fdterm (int); - - // Wait until one or more file descriptors becomes ready for input (reading) - // or output (writing). Return the pair of numbers of descriptors that are - // ready. Throw std::invalid_argument if anything is wrong with arguments - // (both sets are empty, invalid fd, etc). Throw ios::failure on the - // underlying OS error. - // - // Note that the function clears all the previously-ready entries on each - // call. Entries with nullfd are ignored. - // - // On Windows only pipes and only their input (read) ends are supported. - // - struct fdselect_state - { - int fd; - bool ready; - - // Note: intentionally non-explicit to allow implicit initialization when - // pushing to fdselect_set. - // - fdselect_state (int fd): fd (fd), ready (false) {} - }; - - using fdselect_set = small_vector; - - LIBBUTL_SYMEXPORT std::pair - fdselect (fdselect_set& ifds, fdselect_set& ofds); - - inline std::size_t - ifdselect (fdselect_set& ifds) - { - fdselect_set ofds; - return fdselect (ifds, ofds).first; - } - - inline std::size_t - ofdselect (fdselect_set& ofds) - { - fdselect_set ifds; - return fdselect (ifds, ofds).second; - } - - // As above but wait up to the specified timeout returning a pair of zeroes - // if none of the descriptors became ready. - // - template - std::pair - fdselect (fdselect_set&, fdselect_set&, const std::chrono::duration&); - - template - inline std::size_t - ifdselect (fdselect_set& ifds, const std::chrono::duration& timeout) - { - fdselect_set ofds; - return fdselect (ifds, ofds, timeout).first; - } - - template - inline std::size_t - ofdselect (fdselect_set& ofds, const std::chrono::duration& timeout) - { - fdselect_set ifds; - return fdselect (ifds, ofds, timeout).second; - } - - // POSIX read() function wrapper. In particular, it supports the semantics - // of non-blocking read for pipes on Windows. - // - // Note that on Wine currently pipes always behave as blocking regardless of - // the mode. - // - LIBBUTL_SYMEXPORT std::streamsize - fdread (int, void*, std::size_t); -} - -#include diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index 3427ee9..0a3d260 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -1,9 +1,7 @@ // file : libbutl/filesystem.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include // errno, E* @@ -34,46 +32,17 @@ # include // strncmp() #endif -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include -#include - +#include #include #include // unique_ptr +#include #include // find(), copy() #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -module butl.filesystem; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.timestamp; -import butl.path_pattern; -#endif - -import butl.utility; // throw_generic_error() -import butl.fdstream; -import butl.small_vector; -#else -#include -#include -#include -#include -#endif +#include +#include // throw_generic_error() +#include +#include #ifndef _WIN32 # ifndef PATH_MAX diff --git a/libbutl/filesystem.hxx b/libbutl/filesystem.hxx new file mode 100644 index 0000000..8804b04 --- /dev/null +++ b/libbutl/filesystem.hxx @@ -0,0 +1,855 @@ +// file : libbutl/filesystem.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // E* + +#ifndef _WIN32 +# include // DIR +#else +# include // intptr_t +#endif + +// VC's sys/types.h header file doesn't define mode_t type. So we define it +// ourselves according to the POSIX specification. +// +#ifndef _MSC_VER +# include // mode_t +#else + using mode_t = int; +#endif + +#include +#include // ptrdiff_t +#include // uint16_t, etc +#include // move(), pair +#include // input_iterator_tag +#include + +#include +#include +#include +#include // path_match_flags + +#include + +namespace butl +{ + // Return true if the path is to an existing regular file. Note that by + // default this function follows symlinks. Underlying OS errors are reported + // by throwing std::system_error, unless ignore_error is true (in which case + // erroneous entries are treated as non-existent). + // + LIBBUTL_SYMEXPORT bool + file_exists (const char*, + bool follow_symlinks = true, + bool ignore_error = false); + + inline bool + file_exists (const path& p, bool fs = true, bool ie = false) { + return file_exists (p.string ().c_str (), fs, ie);} + + // Return true if the path is to an existing directory. Note that this + // function follows symlinks. Underlying OS errors are reported by throwing + // std::system_error, unless ignore_error is true (in which case erroneous + // entries are treated as non-existent). + // + LIBBUTL_SYMEXPORT bool + dir_exists (const char*, bool ignore_error = false); + + inline bool + dir_exists (const path& p, bool ie = false) { + return dir_exists (p.string ().c_str (), ie);} + + // Return true if the path is to an existing file system entry. Note that by + // default this function doesn't follow symlinks. Underlying OS errors are + // reported by throwing std::system_error, unless ignore_error is true (in + // which case erroneous entries are treated as non-existent). + // + LIBBUTL_SYMEXPORT bool + entry_exists (const char*, + bool follow_symlinks = false, + bool ignore_error = false); + + inline bool + entry_exists (const path& p, bool fs = false, bool ie = false) { + return entry_exists (p.string ().c_str (), fs, ie);} + + // Filesystem entry type. + // + enum class entry_type + { + unknown, + regular, + directory, + symlink, + other + }; + + // Filesystem entry info. The size is only meaningful for regular files. + // + struct entry_stat + { + entry_type type; + std::uint64_t size; + }; + + // Return a flag indicating if the path is to an existing filesystem entry + // and its info if so. Note that by default this function doesn't follow + // symlinks. Underlying OS errors are reported by throwing + // std::system_error, unless ignore_error is true (in which case erroneous + // entries are treated as non-existent). + // + // See also fdstat() in fdstream. + // + LIBBUTL_SYMEXPORT std::pair + path_entry (const char*, + bool follow_symlinks = false, + bool ignore_error = false); + + inline std::pair + path_entry (const path& p, bool fs = false, bool ie = false) { + return path_entry (p.string ().c_str (), fs, ie);} + + // Return true if the directory is empty. Note that the path must exist + // and be a directory. This function follows symlinks. + // + bool + dir_empty (const dir_path&); + + // Return true if the file is empty. Note that the path must exist and be a + // regular file. This function follows symlinks. + // + bool + file_empty (const path&); + + // Set the file access and modification times to the current time. If the + // file does not exist and create is true, create it and fail otherwise. + // Return true if the file was created and false otherwise. Errors are + // reported by throwing std::system_error. + // + LIBBUTL_SYMEXPORT bool + touch_file (const path&, bool create = true); + + // Try to create a directory unless it already exists. If you expect + // the directory to exist and performance is important, then you + // should first call dir_exists() above since that's what this + // implementation will do to make sure the path is actually a + // directory. + // + // You should also probably use the default mode 0777 and let the + // umask mechanism adjust it to the user's preferences. + // + // Errors are reported by throwing std::system_error. + // + enum class mkdir_status {success, already_exists}; + + LIBBUTL_SYMEXPORT mkdir_status + try_mkdir (const dir_path&, mode_t = 0777); + + // The '-p' version of the above (i.e., it creates the parent + // directories if necessary). + // + LIBBUTL_SYMEXPORT mkdir_status + try_mkdir_p (const dir_path&, mode_t = 0777); + + // Try to remove the directory returning not_exist if it does not exist + // and not_empty if it is not empty. Unless ignore_error is true, all + // other errors are reported by throwing std::system_error. + // + enum class rmdir_status {success, not_exist, not_empty}; + + LIBBUTL_SYMEXPORT rmdir_status + try_rmdir (const dir_path&, bool ignore_error = false); + + // The '-r' (recursive) version of the above. Note that it will + // never return not_empty. + // + LIBBUTL_SYMEXPORT rmdir_status + try_rmdir_r (const dir_path&, bool ignore_error = false); + + // As above but throws rather than returns not_exist if the directory + // does not exist (unless ignore_error is true), so check before calling. + // If the second argument is false, then the directory itself is not removed. + // + LIBBUTL_SYMEXPORT void + rmdir_r (const dir_path&, bool dir = true, bool ignore_error = false); + + // Try to remove the file (or symlink) returning not_exist if it does not + // exist. Unless ignore_error is true, all other errors are reported by + // throwing std::system_error. + // + // Note that if it is known that the path refers to a symlink, then usage of + // try_rmsymlink() function must be preferred, as a more efficient one. + // + enum class rmfile_status {success, not_exist}; + + // Note that on Windows the read-only attribute is reset prior to the file + // removal (as it can't otherwise be deleted). In such a case the operation + // is not atomic. It is also not atomic for the directory-type reparse point + // removal. + // + rmfile_status + try_rmfile (const path&, bool ignore_error = false); + + optional + try_rmfile_ignore_error (const path&); + + // Automatically try to remove a non-empty path on destruction unless + // cancelled. Since the non-cancelled destruction will normally happen as a + // result of an exception, the failure to remove the path is silently + // ignored. + // + template + struct auto_rm + { + P path; + bool active; + + explicit + auto_rm (P p = P (), bool a = true): path (std::move (p)), active (a) {} + + void + cancel () {active = false;} + + // Movable-only type. Move-assignment cancels the lhs object. + // + auto_rm (auto_rm&&); + auto_rm& operator= (auto_rm&&); + auto_rm (const auto_rm&) = delete; + auto_rm& operator= (const auto_rm&) = delete; + + ~auto_rm (); + }; + + using auto_rmfile = auto_rm; + using auto_rmdir = auto_rm; // Note: recursive (rm_r). + + // Create a symbolic link to a file (default) or directory (third argument + // is true). Assume a relative target to be relative to the link's + // directory. Throw std::system_error on failures. + // + // Note that on Windows symlinks are supported partially: + // + // - File symlinks are implemented via the Windows symlink mechanism and may + // only be created on Windows 10 Build 14972 and above with either the + // Developer Mode enabled or if the process runs in the elevated command + // prompt. + // + // - Directory symlinks are implemented via the Windows symlink mechanism if + // possible (see above) and via the Windows junction mechanism otherwise. + // Note that creating a junction doesn't require a process to have + // administrative privileges and so succeeds regardless of the Windows + // version and mode. Also note that junctions, in contrast to symlinks, + // may only store target absolute paths. Thus, when create a junction with + // a relative target we complete it using the link directory and, if the + // latter is also relative, using the process' current working directory. + // This makes it impossible for a mksymlink() caller to rely on the target + // path staying relative. Note that we also normalize the junction target + // path regardless if we complete it or not. + // + // - Functions other than mksymlink() fully support Windows reparse points + // and treat them as follows: + // + // - consider the file symlink entries (file-type reparse points tagged + // as IO_REPARSE_TAG_SYMLINK and referring to files) as regular file + // symlinks (having the entry_type::symlink type). + // + // - consider the directory symlink entries (same as above but refer to + // directories) and junctions (directory-type reparse points tagged as + // IO_REPARSE_TAG_MOUNT_POINT and referring to directories) as directory + // symlinks (having the entry_type::symlink type). + // + // - consider all other reparse point types (volume mount points, Unix + // domain sockets, etc) as other entries (having the entry_type::other + // type). + // + // Also note that symlinks are currently not supported properly on Wine due + // to some differences in the underlying API behavior. + // + LIBBUTL_SYMEXPORT void + mksymlink (const path& target, const path& link, bool dir = false); + + // Create a symbolic link to a directory. Throw std::system_error on + // failures. + // + inline void + mksymlink (const dir_path& target, const dir_path& link) + { + mksymlink (target, link, true); + } + + // Return the symbolic link target. Throw std::system_error on failures. + // + // Note that this function doesn't follow symlinks, so if a symlink refers + // to another symlink then the second link's path is returned. + // + // Also note that the function returns the exact target path as it is stored + // in the symlink filesystem entry, without completing or normalizing it. + // + LIBBUTL_SYMEXPORT path + readsymlink (const path&); + + // Follow a symbolic link chain until non-symlink filesystem entry is + // encountered and return its path. Throw std::system_error on failures, + // including on encountering a non-existent filesystem entry anywhere in the + // chain (but see try_followsymlink() below). + // + // The resulting path is constructed by starting with the specified path and + // then by sequentially resolving the symlink chain rebasing a relative + // target path over the current resulting path and resetting it to the path + // itself on encountering an absolute target path. For example: + // + // for a/b/c -> ../d/e the result is a/d/e + // for a/b/c -> /x/y/z -> ../d/e the result is /x/d/e + // + path + followsymlink (const path&); + + // As above but instead of failing on the dangling symlink return its path + // (first) as well as as an indication of this condition (false as second). + // + LIBBUTL_SYMEXPORT std::pair + try_followsymlink (const path&); + + // Remove a symbolic link to a file (default) or directory (third argument + // is true). Throw std::system_error on failures. + // + LIBBUTL_SYMEXPORT rmfile_status + try_rmsymlink (const path&, bool dir = false, bool ignore_error = false); + + // Remove a symbolic link to a directory. Throw std::system_error on + // failures. + // + inline rmfile_status + try_rmsymlink (const dir_path& link, bool ignore_error = false) + { + return try_rmsymlink (link, true /* dir */, ignore_error); + } + + // Create a hard link to a file (default) or directory (third argument is + // true). Throw std::system_error on failures. + // + // Note that on Linux, FreeBSD, Windows and some other platforms the target + // cannot be a directory. + // + LIBBUTL_SYMEXPORT void + mkhardlink (const path& target, const path& link, bool dir = false); + + // Create a hard link to a directory. Throw std::system_error on failures. + // + inline void + mkhardlink (const dir_path& target, const dir_path& link) + { + mkhardlink (target, link, true /* dir */); + } + + // Make a symlink, hardlink, or, if `copy` is true, a copy of a file (note: + // no directories, only files), whichever is possible in that order. If + // `relative` is true, then make the symlink target relative to the link + // directory (note: it is the caller's responsibility to make sure this is + // possible). Otherwise, assume a relative target to be relative to the + // link directory and complete it accordingly when create a hardlink or a + // copy. + // + // On success, return the type of entry created: `regular` for copy, + // `symlink` for symlink, and `other` for hardlink. On failure, throw a + // `pair` with the first half indicating the part + // of the logic that caused the error. + // + LIBBUTL_SYMEXPORT entry_type + mkanylink (const path& target, + const path& link, + bool copy, + bool relative = false); + + // File copy flags. + // + enum class cpflags: std::uint16_t + { + overwrite_content = 0x1, // Overwrite content of destination. + overwrite_permissions = 0x2, // Overwrite permissions of destination. + + copy_timestamps = 0x4, // Copy timestamps from source. + + none = 0 + }; + + inline cpflags operator& (cpflags, cpflags); + inline cpflags operator| (cpflags, cpflags); + inline cpflags operator&= (cpflags&, cpflags); + inline cpflags operator|= (cpflags&, cpflags); + + // Copy a regular file, including its permissions, and optionally timestamps. + // Throw std::system_error on failure. Fail if the destination file exists + // and the overwrite_content flag is not set. Leave permissions of an + // existing destination file intact unless the overwrite_permissions flag is + // set. Delete incomplete copies before throwing. + // + // Note that in case of overwriting, the existing destination file gets + // truncated (not deleted) prior to being overwritten. As a side-effect, + // hard link to the destination file will still reference the same file + // system node after the copy. + // + // Also note that if the overwrite_content flag is not set and the + // destination is a dangling symbolic link, then this function will still + // fail. + // + LIBBUTL_SYMEXPORT void + cpfile (const path& from, const path& to, cpflags = cpflags::none); + + // Copy a regular file into (inside) an existing directory. + // + inline void + cpfile_into (const path& from, + const dir_path& into, + cpflags fl = cpflags::none) + { + cpfile (from, into / from.leaf (), fl); + } + + // Rename a filesystem entry (file, symlink, or directory). Throw + // std::system_error on failure. + // + // If the source path refers to a directory, then the destination path must + // either not exist, or refer to an empty directory. If the source path + // refers to an entry that is not a directory, then the destination path must + // not exist or not refer to a directory. + // + // If the source path refers to a symlink, then the link is renamed. If the + // destination path refers to a symlink, then the link will be overwritten. + // + // If the source and destination paths are on different file systems (or + // different drives on Windows) and the underlying OS does not support move + // for the source entry, then fail unless the source paths refers to a file + // or a file symlink. In this case fall back to copying the source file + // (content, permissions, access and modification times) and removing the + // source entry afterwards. + // + // Note that the operation is atomic only on POSIX, only if source and + // destination paths are on the same file system, and only if the + // overwrite_content flag is specified. + // + LIBBUTL_SYMEXPORT void + mventry (const path& from, + const path& to, + cpflags = cpflags::overwrite_permissions); + + // Move a filesystem entry into (inside) an existing directory. + // + inline void + mventry_into (const path& from, + const dir_path& into, + cpflags f = cpflags::overwrite_permissions) + { + mventry (from, into / from.leaf (), f); + } + + // Raname file or file symlink. + // + inline void + mvfile (const path& from, + const path& to, + cpflags f = cpflags::overwrite_permissions) + { + mventry (from, to, f); + } + + inline void + mvfile_into (const path& from, + const dir_path& into, + cpflags f = cpflags::overwrite_permissions) + { + mventry_into (from, into, f); + } + + // Raname directory or directory symlink. + // + inline void + mvdir (const dir_path& from, + const dir_path& to, + cpflags f = cpflags::overwrite_permissions) + { + mventry (from, to, f); + } + + inline void + mvdir_into (const path& from, + const dir_path& into, + cpflags f = cpflags::overwrite_permissions) + { + mventry_into (from, into, f); + } + + struct entry_time + { + timestamp modification; + timestamp access; + }; + + // Return timestamp_nonexistent for the modification and access times if the + // entry at the specified path does not exist or is not a regular file. All + // other errors are reported by throwing std::system_error. Note that these + // functions resolves symlinks. + // + LIBBUTL_SYMEXPORT entry_time + file_time (const char*); + + inline entry_time + file_time (const path& p) {return file_time (p.string ().c_str ());} + + inline timestamp + file_mtime (const char* p) {return file_time (p).modification;} + + inline timestamp + file_mtime (const path& p) {return file_mtime (p.string ().c_str ());} + + inline timestamp + file_atime (const char* p) {return file_time (p).access;} + + inline timestamp + file_atime (const path& p) {return file_atime (p.string ().c_str ());} + + // As above but return the directory times. + // + LIBBUTL_SYMEXPORT entry_time + dir_time (const char*); + + inline entry_time + dir_time (const dir_path& p) {return dir_time (p.string ().c_str ());} + + inline timestamp + dir_mtime (const char* p) {return dir_time (p).modification;} + + inline timestamp + dir_mtime (const dir_path& p) {return dir_mtime (p.string ().c_str ());} + + inline timestamp + dir_atime (const char* p) {return dir_time (p).access;} + + inline timestamp + dir_atime (const dir_path& p) {return dir_atime (p.string ().c_str ());} + + // Set a regular file modification and access times. If a time value is + // timestamp_nonexistent then it is left unchanged. All errors are reported + // by throwing std::system_error. + // + // Note: use touch_file() instead of file_mtime(system_clock::now()). + // + LIBBUTL_SYMEXPORT void + file_time (const char*, const entry_time&); + + inline void + file_time (const path& p, const entry_time& t) + { + return file_time (p.string ().c_str (), t); + } + + inline void + file_mtime (const char* p, timestamp t) + { + return file_time (p, {t, timestamp_nonexistent}); + } + + inline void + file_mtime (const path& p, timestamp t) + { + return file_mtime (p.string ().c_str (), t); + } + + inline void + file_atime (const char* p, timestamp t) + { + return file_time (p, {timestamp_nonexistent, t}); + } + + inline void + file_atime (const path& p, timestamp t) + { + return file_atime (p.string ().c_str (), t); + } + + // As above but set the directory times. + // + LIBBUTL_SYMEXPORT void + dir_time (const char*, const entry_time&); + + inline void + dir_time (const dir_path& p, const entry_time& t) + { + return dir_time (p.string ().c_str (), t); + } + + inline void + dir_mtime (const char* p, timestamp t) + { + return dir_time (p, {t, timestamp_nonexistent}); + } + + inline void + dir_mtime (const dir_path& p, timestamp t) + { + return dir_mtime (p.string ().c_str (), t); + } + + inline void + dir_atime (const char* p, timestamp t) + { + return dir_time (p, {timestamp_nonexistent, t}); + } + + inline void + dir_atime (const dir_path& p, timestamp t) + { + return dir_atime (p.string ().c_str (), t); + } + + // Path permissions. + // + enum class permissions: std::uint16_t + { + // Note: matching POSIX values. + // + xo = 0001, + wo = 0002, + ro = 0004, + + xg = 0010, + wg = 0020, + rg = 0040, + + xu = 0100, + wu = 0200, + ru = 0400, + + none = 0 + }; + + inline permissions operator& (permissions, permissions); + inline permissions operator| (permissions, permissions); + inline permissions operator&= (permissions&, permissions); + inline permissions operator|= (permissions&, permissions); + + // Get path permissions. Throw std::system_error on failure. Note that this + // function resolves symlinks. + // + LIBBUTL_SYMEXPORT permissions + path_permissions (const path&); + + // Set path permissions. Throw std::system_error on failure. Note that this + // function resolves symlinks. + // + LIBBUTL_SYMEXPORT void + path_permissions (const path&, permissions); + + // Directory entry iteration. + // + class LIBBUTL_SYMEXPORT dir_entry + { + public: + using path_type = butl::path; + + // Symlink target type in case of the symlink, ltype() otherwise. + // + entry_type + type () const; + + entry_type + ltype () const; + + // Entry path (excluding the base). To get the full path, do + // base () / path (). + // + const path_type& + path () const {return p_;} + + const dir_path& + base () const {return b_;} + + dir_entry () = default; + dir_entry (entry_type t, path_type p, dir_path b) + : t_ (t), p_ (std::move (p)), b_ (std::move (b)) {} + + private: + entry_type + type (bool follow_symlinks) const; + + private: + friend class dir_iterator; + + mutable entry_type t_ = entry_type::unknown; // Lazy evaluation. + mutable entry_type lt_ = entry_type::unknown; // Lazy evaluation. + path_type p_; + dir_path b_; + }; + + class LIBBUTL_SYMEXPORT dir_iterator + { + public: + using value_type = dir_entry; + using pointer = const dir_entry*; + using reference = const dir_entry&; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + ~dir_iterator (); + dir_iterator () = default; + + // If it is requested to ignore dangling symlinks, then the increment + // operator will skip symlinks that refer to non-existing or inaccessible + // targets. That implies that it will always try to stat() symlinks. + // + explicit + dir_iterator (const dir_path&, bool ignore_dangling); + + dir_iterator (const dir_iterator&) = delete; + dir_iterator& operator= (const dir_iterator&) = delete; + + dir_iterator (dir_iterator&&) noexcept; + dir_iterator& operator= (dir_iterator&&); + + dir_iterator& operator++ () {next (); return *this;} + + reference operator* () const {return e_;} + pointer operator-> () const {return &e_;} + + friend bool operator== (const dir_iterator&, const dir_iterator&); + friend bool operator!= (const dir_iterator&, const dir_iterator&); + + private: + void + next (); + + private: + dir_entry e_; + +#ifndef _WIN32 + DIR* h_ = nullptr; +#else + intptr_t h_ = -1; +#endif + + bool ignore_dangling_ = false; + }; + + // Range-based for loop support. + // + // for (const auto& de: dir_iterator (dir_path ("/tmp"))) ... + // + // Note that the "range" (which is the "begin" iterator), is no + // longer usable. In other words, don't do this: + // + // dir_iterator i (...); + // for (...: i) ... + // ++i; // Invalid. + // + inline dir_iterator begin (dir_iterator&); + inline dir_iterator end (const dir_iterator&); + + // MSVC in the strict mode (/permissive-), which we enable by default from + // 15.5, needs this declaration to straighten its brains out. + // +#if defined(_MSC_VER) && _MSC_VER >= 1912 + inline dir_iterator begin (dir_iterator&&); +#endif + + // Wildcard pattern search (aka glob). + // + // For details on the wildcard patterns see + + // Search for paths matching the pattern calling the specified function for + // each matching path (see below for details). + // + // If the pattern is relative, then search in the start directory. If the + // start directory is empty, then search in the current working directory. + // Searching in non-existent directories is not an error. Throw + // std::system_error in case of a failure (insufficient permissions, etc). + // + // The pattern may contain multiple components that include wildcards. On + // Windows the drive letter may not be a wildcard. + // + // In addition to the wildcard characters, path_search() also recognizes the + // ** and *** wildcard sequences. If a path component contains **, then it + // is matched just like * but in all the subdirectories, recursively. The + // *** wildcard behaves like ** but also matches the start directory itself. + // Note that if the first pattern component contains ***, then the start + // directory must be empty or be terminated with a "meaningful" component + // (e.g., probably not '.' or '..'). + // + // So, for example, foo/bar-**.txt will return all the files matching the + // bar-*.txt pattern in all the subdirectoris of foo/. And foo/f***/ will + // return all the subdirectories matching the f*/ pattern plus foo/ itself. + // + // Note that having multiple recursive components in the pattern we can end + // up with calling func() multiple times (once per such a component) for the + // same path. For example the search with pattern f***/b**/ starting in + // directory foo, that has the foo/fox/box/ structure, will result in + // calling func(foo/fox/box/) twice: first time for being a child of fox/, + // second time for being a child of foo/. + // + // The callback function is called for both intermediate matches (interm is + // true) and final matches (interm is false). Pattern is what matched the + // last component in the path and is empty if the last component is not a + // pattern (final match only; say as in */foo.txt). + // + // If the callback function returns false for an intermediate path, then no + // further search is performed at or below this path. If false is returned + // for a final match, then the entire search is stopped. + // + // The path can be moved for the final match or for an intermediate match + // but only if false is returned. + // + // As an example, consider pattern f*/bar/b*/*.txt and path + // foo/bar/baz/x.txt. The sequence of calls in this case will be: + // + // (foo/, f*/, true) + // (foo/bar/baz/, b*/, true) + // (foo/bar/baz/x.txt, *.txt, false) + // + // If the pattern contains a recursive wildcard, then the callback function + // can be called for the same directory twice: first time as an intermediate + // match with */ pattern to decide if to recursively traverse the directory, + // and the second time if the directory matches the pattern component (either + // as an intermediate or a final match). As an example, consider pattern + // b**/c* and directory tree a/b/c/. The sequence of calls in this case will + // be: + // + // (a/, */, true) + // (a/b/, */ true) + // (a/b/c/, */, true) + // (a/b/, b*/, true) + // (a/b/c/, c*/, false) + // + // Note that recursive iterating through directories currently goes + // depth-first which make sense for the cleanup use cases. In future we may + // want to make it controllable. + // + LIBBUTL_SYMEXPORT void + path_search (const path& pattern, + const std::function&, + const dir_path& start = dir_path (), + path_match_flags = path_match_flags::follow_symlinks); + + // Same as above, but behaves as if the directory tree being searched + // through contains only the specified entry. The start directory is used if + // the first pattern component is a self-matching wildcard (see above). + // + // If pattern or entry is relative, then it is assumed to be relative to the + // start directory (which, if relative itself, is assumed to be relative to + // the current directory). Note that the implementation can optimize the + // case when pattern and entry are both non-empty and relative. + // + LIBBUTL_SYMEXPORT void + path_search (const path& pattern, + const path& entry, + const std::function&, + const dir_path& start = dir_path (), + path_match_flags = path_match_flags::none); +} + +#include diff --git a/libbutl/filesystem.ixx b/libbutl/filesystem.ixx index 8cd8456..763d311 100644 --- a/libbutl/filesystem.ixx +++ b/libbutl/filesystem.ixx @@ -1,6 +1,9 @@ // file : libbutl/filesystem.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file +#include // operator<<(ostream,exception), + // throw_generic_error() + namespace butl { inline bool diff --git a/libbutl/filesystem.mxx b/libbutl/filesystem.mxx deleted file mode 100644 index eb03ab2..0000000 --- a/libbutl/filesystem.mxx +++ /dev/null @@ -1,879 +0,0 @@ -// file : libbutl/filesystem.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include // E* - -#ifndef _WIN32 -# include // DIR -#else -# include // intptr_t -#endif - -// VC's sys/types.h header file doesn't define mode_t type. So we define it -// ourselves according to the POSIX specification. -// -#ifndef _MSC_VER -# include // mode_t -#else - using mode_t = int; -#endif - -#ifndef __cpp_lib_modules_ts -#include -#include // ptrdiff_t -#include // uint16_t, etc -#include // move(), pair -#include // input_iterator_tag -#include - -#include //@@ MOD needed by timestamp module (no re-export). -#endif - -// Other includes. -#ifdef __cpp_modules_ts -export module butl.filesystem; - -#ifdef __cpp_lib_modules_ts -import std.core; -#endif - -import butl.path; -import butl.optional; -import butl.timestamp; -import butl.path_pattern; // path_match_flags - -import butl.utility; // operator<<(ostream,exception), throw_generic_error() -#else -#include -#include -#include -#include - -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Return true if the path is to an existing regular file. Note that by - // default this function follows symlinks. Underlying OS errors are reported - // by throwing std::system_error, unless ignore_error is true (in which case - // erroneous entries are treated as non-existent). - // - LIBBUTL_SYMEXPORT bool - file_exists (const char*, - bool follow_symlinks = true, - bool ignore_error = false); - - inline bool - file_exists (const path& p, bool fs = true, bool ie = false) { - return file_exists (p.string ().c_str (), fs, ie);} - - // Return true if the path is to an existing directory. Note that this - // function follows symlinks. Underlying OS errors are reported by throwing - // std::system_error, unless ignore_error is true (in which case erroneous - // entries are treated as non-existent). - // - LIBBUTL_SYMEXPORT bool - dir_exists (const char*, bool ignore_error = false); - - inline bool - dir_exists (const path& p, bool ie = false) { - return dir_exists (p.string ().c_str (), ie);} - - // Return true if the path is to an existing file system entry. Note that by - // default this function doesn't follow symlinks. Underlying OS errors are - // reported by throwing std::system_error, unless ignore_error is true (in - // which case erroneous entries are treated as non-existent). - // - LIBBUTL_SYMEXPORT bool - entry_exists (const char*, - bool follow_symlinks = false, - bool ignore_error = false); - - inline bool - entry_exists (const path& p, bool fs = false, bool ie = false) { - return entry_exists (p.string ().c_str (), fs, ie);} - - // Filesystem entry type. - // - enum class entry_type - { - unknown, - regular, - directory, - symlink, - other - }; - - // Filesystem entry info. The size is only meaningful for regular files. - // - struct entry_stat - { - entry_type type; - std::uint64_t size; - }; - - // Return a flag indicating if the path is to an existing filesystem entry - // and its info if so. Note that by default this function doesn't follow - // symlinks. Underlying OS errors are reported by throwing - // std::system_error, unless ignore_error is true (in which case erroneous - // entries are treated as non-existent). - // - // See also fdstat() in fdstream. - // - LIBBUTL_SYMEXPORT std::pair - path_entry (const char*, - bool follow_symlinks = false, - bool ignore_error = false); - - inline std::pair - path_entry (const path& p, bool fs = false, bool ie = false) { - return path_entry (p.string ().c_str (), fs, ie);} - - // Return true if the directory is empty. Note that the path must exist - // and be a directory. This function follows symlinks. - // - bool - dir_empty (const dir_path&); - - // Return true if the file is empty. Note that the path must exist and be a - // regular file. This function follows symlinks. - // - bool - file_empty (const path&); - - // Set the file access and modification times to the current time. If the - // file does not exist and create is true, create it and fail otherwise. - // Return true if the file was created and false otherwise. Errors are - // reported by throwing std::system_error. - // - LIBBUTL_SYMEXPORT bool - touch_file (const path&, bool create = true); - - // Try to create a directory unless it already exists. If you expect - // the directory to exist and performance is important, then you - // should first call dir_exists() above since that's what this - // implementation will do to make sure the path is actually a - // directory. - // - // You should also probably use the default mode 0777 and let the - // umask mechanism adjust it to the user's preferences. - // - // Errors are reported by throwing std::system_error. - // - enum class mkdir_status {success, already_exists}; - - LIBBUTL_SYMEXPORT mkdir_status - try_mkdir (const dir_path&, mode_t = 0777); - - // The '-p' version of the above (i.e., it creates the parent - // directories if necessary). - // - LIBBUTL_SYMEXPORT mkdir_status - try_mkdir_p (const dir_path&, mode_t = 0777); - - // Try to remove the directory returning not_exist if it does not exist - // and not_empty if it is not empty. Unless ignore_error is true, all - // other errors are reported by throwing std::system_error. - // - enum class rmdir_status {success, not_exist, not_empty}; - - LIBBUTL_SYMEXPORT rmdir_status - try_rmdir (const dir_path&, bool ignore_error = false); - - // The '-r' (recursive) version of the above. Note that it will - // never return not_empty. - // - LIBBUTL_SYMEXPORT rmdir_status - try_rmdir_r (const dir_path&, bool ignore_error = false); - - // As above but throws rather than returns not_exist if the directory - // does not exist (unless ignore_error is true), so check before calling. - // If the second argument is false, then the directory itself is not removed. - // - LIBBUTL_SYMEXPORT void - rmdir_r (const dir_path&, bool dir = true, bool ignore_error = false); - - // Try to remove the file (or symlink) returning not_exist if it does not - // exist. Unless ignore_error is true, all other errors are reported by - // throwing std::system_error. - // - // Note that if it is known that the path refers to a symlink, then usage of - // try_rmsymlink() function must be preferred, as a more efficient one. - // - enum class rmfile_status {success, not_exist}; - - // Note that on Windows the read-only attribute is reset prior to the file - // removal (as it can't otherwise be deleted). In such a case the operation - // is not atomic. It is also not atomic for the directory-type reparse point - // removal. - // - rmfile_status - try_rmfile (const path&, bool ignore_error = false); - - optional - try_rmfile_ignore_error (const path&); - - // Automatically try to remove a non-empty path on destruction unless - // cancelled. Since the non-cancelled destruction will normally happen as a - // result of an exception, the failure to remove the path is silently - // ignored. - // - template - struct auto_rm - { - P path; - bool active; - - explicit - auto_rm (P p = P (), bool a = true): path (std::move (p)), active (a) {} - - void - cancel () {active = false;} - - // Movable-only type. Move-assignment cancels the lhs object. - // - auto_rm (auto_rm&&); - auto_rm& operator= (auto_rm&&); - auto_rm (const auto_rm&) = delete; - auto_rm& operator= (const auto_rm&) = delete; - - ~auto_rm (); - }; - - using auto_rmfile = auto_rm; - using auto_rmdir = auto_rm; // Note: recursive (rm_r). - - // Create a symbolic link to a file (default) or directory (third argument - // is true). Assume a relative target to be relative to the link's - // directory. Throw std::system_error on failures. - // - // Note that on Windows symlinks are supported partially: - // - // - File symlinks are implemented via the Windows symlink mechanism and may - // only be created on Windows 10 Build 14972 and above with either the - // Developer Mode enabled or if the process runs in the elevated command - // prompt. - // - // - Directory symlinks are implemented via the Windows symlink mechanism if - // possible (see above) and via the Windows junction mechanism otherwise. - // Note that creating a junction doesn't require a process to have - // administrative privileges and so succeeds regardless of the Windows - // version and mode. Also note that junctions, in contrast to symlinks, - // may only store target absolute paths. Thus, when create a junction with - // a relative target we complete it using the link directory and, if the - // latter is also relative, using the process' current working directory. - // This makes it impossible for a mksymlink() caller to rely on the target - // path staying relative. Note that we also normalize the junction target - // path regardless if we complete it or not. - // - // - Functions other than mksymlink() fully support Windows reparse points - // and treat them as follows: - // - // - consider the file symlink entries (file-type reparse points tagged - // as IO_REPARSE_TAG_SYMLINK and referring to files) as regular file - // symlinks (having the entry_type::symlink type). - // - // - consider the directory symlink entries (same as above but refer to - // directories) and junctions (directory-type reparse points tagged as - // IO_REPARSE_TAG_MOUNT_POINT and referring to directories) as directory - // symlinks (having the entry_type::symlink type). - // - // - consider all other reparse point types (volume mount points, Unix - // domain sockets, etc) as other entries (having the entry_type::other - // type). - // - // Also note that symlinks are currently not supported properly on Wine due - // to some differences in the underlying API behavior. - // - LIBBUTL_SYMEXPORT void - mksymlink (const path& target, const path& link, bool dir = false); - - // Create a symbolic link to a directory. Throw std::system_error on - // failures. - // - inline void - mksymlink (const dir_path& target, const dir_path& link) - { - mksymlink (target, link, true); - } - - // Return the symbolic link target. Throw std::system_error on failures. - // - // Note that this function doesn't follow symlinks, so if a symlink refers - // to another symlink then the second link's path is returned. - // - // Also note that the function returns the exact target path as it is stored - // in the symlink filesystem entry, without completing or normalizing it. - // - LIBBUTL_SYMEXPORT path - readsymlink (const path&); - - // Follow a symbolic link chain until non-symlink filesystem entry is - // encountered and return its path. Throw std::system_error on failures, - // including on encountering a non-existent filesystem entry anywhere in the - // chain (but see try_followsymlink() below). - // - // The resulting path is constructed by starting with the specified path and - // then by sequentially resolving the symlink chain rebasing a relative - // target path over the current resulting path and resetting it to the path - // itself on encountering an absolute target path. For example: - // - // for a/b/c -> ../d/e the result is a/d/e - // for a/b/c -> /x/y/z -> ../d/e the result is /x/d/e - // - path - followsymlink (const path&); - - // As above but instead of failing on the dangling symlink return its path - // (first) as well as as an indication of this condition (false as second). - // - LIBBUTL_SYMEXPORT std::pair - try_followsymlink (const path&); - - // Remove a symbolic link to a file (default) or directory (third argument - // is true). Throw std::system_error on failures. - // - LIBBUTL_SYMEXPORT rmfile_status - try_rmsymlink (const path&, bool dir = false, bool ignore_error = false); - - // Remove a symbolic link to a directory. Throw std::system_error on - // failures. - // - inline rmfile_status - try_rmsymlink (const dir_path& link, bool ignore_error = false) - { - return try_rmsymlink (link, true /* dir */, ignore_error); - } - - // Create a hard link to a file (default) or directory (third argument is - // true). Throw std::system_error on failures. - // - // Note that on Linux, FreeBSD, Windows and some other platforms the target - // cannot be a directory. - // - LIBBUTL_SYMEXPORT void - mkhardlink (const path& target, const path& link, bool dir = false); - - // Create a hard link to a directory. Throw std::system_error on failures. - // - inline void - mkhardlink (const dir_path& target, const dir_path& link) - { - mkhardlink (target, link, true /* dir */); - } - - // Make a symlink, hardlink, or, if `copy` is true, a copy of a file (note: - // no directories, only files), whichever is possible in that order. If - // `relative` is true, then make the symlink target relative to the link - // directory (note: it is the caller's responsibility to make sure this is - // possible). Otherwise, assume a relative target to be relative to the - // link directory and complete it accordingly when create a hardlink or a - // copy. - // - // On success, return the type of entry created: `regular` for copy, - // `symlink` for symlink, and `other` for hardlink. On failure, throw a - // `pair` with the first half indicating the part - // of the logic that caused the error. - // - LIBBUTL_SYMEXPORT entry_type - mkanylink (const path& target, - const path& link, - bool copy, - bool relative = false); - - // File copy flags. - // - enum class cpflags: std::uint16_t - { - overwrite_content = 0x1, // Overwrite content of destination. - overwrite_permissions = 0x2, // Overwrite permissions of destination. - - copy_timestamps = 0x4, // Copy timestamps from source. - - none = 0 - }; - - inline cpflags operator& (cpflags, cpflags); - inline cpflags operator| (cpflags, cpflags); - inline cpflags operator&= (cpflags&, cpflags); - inline cpflags operator|= (cpflags&, cpflags); - - // Copy a regular file, including its permissions, and optionally timestamps. - // Throw std::system_error on failure. Fail if the destination file exists - // and the overwrite_content flag is not set. Leave permissions of an - // existing destination file intact unless the overwrite_permissions flag is - // set. Delete incomplete copies before throwing. - // - // Note that in case of overwriting, the existing destination file gets - // truncated (not deleted) prior to being overwritten. As a side-effect, - // hard link to the destination file will still reference the same file - // system node after the copy. - // - // Also note that if the overwrite_content flag is not set and the - // destination is a dangling symbolic link, then this function will still - // fail. - // - LIBBUTL_SYMEXPORT void - cpfile (const path& from, const path& to, cpflags = cpflags::none); - - // Copy a regular file into (inside) an existing directory. - // - inline void - cpfile_into (const path& from, - const dir_path& into, - cpflags fl = cpflags::none) - { - cpfile (from, into / from.leaf (), fl); - } - - // Rename a filesystem entry (file, symlink, or directory). Throw - // std::system_error on failure. - // - // If the source path refers to a directory, then the destination path must - // either not exist, or refer to an empty directory. If the source path - // refers to an entry that is not a directory, then the destination path must - // not exist or not refer to a directory. - // - // If the source path refers to a symlink, then the link is renamed. If the - // destination path refers to a symlink, then the link will be overwritten. - // - // If the source and destination paths are on different file systems (or - // different drives on Windows) and the underlying OS does not support move - // for the source entry, then fail unless the source paths refers to a file - // or a file symlink. In this case fall back to copying the source file - // (content, permissions, access and modification times) and removing the - // source entry afterwards. - // - // Note that the operation is atomic only on POSIX, only if source and - // destination paths are on the same file system, and only if the - // overwrite_content flag is specified. - // - LIBBUTL_SYMEXPORT void - mventry (const path& from, - const path& to, - cpflags = cpflags::overwrite_permissions); - - // Move a filesystem entry into (inside) an existing directory. - // - inline void - mventry_into (const path& from, - const dir_path& into, - cpflags f = cpflags::overwrite_permissions) - { - mventry (from, into / from.leaf (), f); - } - - // Raname file or file symlink. - // - inline void - mvfile (const path& from, - const path& to, - cpflags f = cpflags::overwrite_permissions) - { - mventry (from, to, f); - } - - inline void - mvfile_into (const path& from, - const dir_path& into, - cpflags f = cpflags::overwrite_permissions) - { - mventry_into (from, into, f); - } - - // Raname directory or directory symlink. - // - inline void - mvdir (const dir_path& from, - const dir_path& to, - cpflags f = cpflags::overwrite_permissions) - { - mventry (from, to, f); - } - - inline void - mvdir_into (const path& from, - const dir_path& into, - cpflags f = cpflags::overwrite_permissions) - { - mventry_into (from, into, f); - } - - struct entry_time - { - timestamp modification; - timestamp access; - }; - - // Return timestamp_nonexistent for the modification and access times if the - // entry at the specified path does not exist or is not a regular file. All - // other errors are reported by throwing std::system_error. Note that these - // functions resolves symlinks. - // - LIBBUTL_SYMEXPORT entry_time - file_time (const char*); - - inline entry_time - file_time (const path& p) {return file_time (p.string ().c_str ());} - - inline timestamp - file_mtime (const char* p) {return file_time (p).modification;} - - inline timestamp - file_mtime (const path& p) {return file_mtime (p.string ().c_str ());} - - inline timestamp - file_atime (const char* p) {return file_time (p).access;} - - inline timestamp - file_atime (const path& p) {return file_atime (p.string ().c_str ());} - - // As above but return the directory times. - // - LIBBUTL_SYMEXPORT entry_time - dir_time (const char*); - - inline entry_time - dir_time (const dir_path& p) {return dir_time (p.string ().c_str ());} - - inline timestamp - dir_mtime (const char* p) {return dir_time (p).modification;} - - inline timestamp - dir_mtime (const dir_path& p) {return dir_mtime (p.string ().c_str ());} - - inline timestamp - dir_atime (const char* p) {return dir_time (p).access;} - - inline timestamp - dir_atime (const dir_path& p) {return dir_atime (p.string ().c_str ());} - - // Set a regular file modification and access times. If a time value is - // timestamp_nonexistent then it is left unchanged. All errors are reported - // by throwing std::system_error. - // - // Note: use touch_file() instead of file_mtime(system_clock::now()). - // - LIBBUTL_SYMEXPORT void - file_time (const char*, const entry_time&); - - inline void - file_time (const path& p, const entry_time& t) - { - return file_time (p.string ().c_str (), t); - } - - inline void - file_mtime (const char* p, timestamp t) - { - return file_time (p, {t, timestamp_nonexistent}); - } - - inline void - file_mtime (const path& p, timestamp t) - { - return file_mtime (p.string ().c_str (), t); - } - - inline void - file_atime (const char* p, timestamp t) - { - return file_time (p, {timestamp_nonexistent, t}); - } - - inline void - file_atime (const path& p, timestamp t) - { - return file_atime (p.string ().c_str (), t); - } - - // As above but set the directory times. - // - LIBBUTL_SYMEXPORT void - dir_time (const char*, const entry_time&); - - inline void - dir_time (const dir_path& p, const entry_time& t) - { - return dir_time (p.string ().c_str (), t); - } - - inline void - dir_mtime (const char* p, timestamp t) - { - return dir_time (p, {t, timestamp_nonexistent}); - } - - inline void - dir_mtime (const dir_path& p, timestamp t) - { - return dir_mtime (p.string ().c_str (), t); - } - - inline void - dir_atime (const char* p, timestamp t) - { - return dir_time (p, {timestamp_nonexistent, t}); - } - - inline void - dir_atime (const dir_path& p, timestamp t) - { - return dir_atime (p.string ().c_str (), t); - } - - // Path permissions. - // - enum class permissions: std::uint16_t - { - // Note: matching POSIX values. - // - xo = 0001, - wo = 0002, - ro = 0004, - - xg = 0010, - wg = 0020, - rg = 0040, - - xu = 0100, - wu = 0200, - ru = 0400, - - none = 0 - }; - - inline permissions operator& (permissions, permissions); - inline permissions operator| (permissions, permissions); - inline permissions operator&= (permissions&, permissions); - inline permissions operator|= (permissions&, permissions); - - // Get path permissions. Throw std::system_error on failure. Note that this - // function resolves symlinks. - // - LIBBUTL_SYMEXPORT permissions - path_permissions (const path&); - - // Set path permissions. Throw std::system_error on failure. Note that this - // function resolves symlinks. - // - LIBBUTL_SYMEXPORT void - path_permissions (const path&, permissions); - - // Directory entry iteration. - // - class LIBBUTL_SYMEXPORT dir_entry - { - public: - using path_type = butl::path; - - // Symlink target type in case of the symlink, ltype() otherwise. - // - entry_type - type () const; - - entry_type - ltype () const; - - // Entry path (excluding the base). To get the full path, do - // base () / path (). - // - const path_type& - path () const {return p_;} - - const dir_path& - base () const {return b_;} - - dir_entry () = default; - dir_entry (entry_type t, path_type p, dir_path b) - : t_ (t), p_ (std::move (p)), b_ (std::move (b)) {} - - private: - entry_type - type (bool follow_symlinks) const; - - private: - friend class dir_iterator; - - mutable entry_type t_ = entry_type::unknown; // Lazy evaluation. - mutable entry_type lt_ = entry_type::unknown; // Lazy evaluation. - path_type p_; - dir_path b_; - }; - - class LIBBUTL_SYMEXPORT dir_iterator - { - public: - using value_type = dir_entry; - using pointer = const dir_entry*; - using reference = const dir_entry&; - using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; - - ~dir_iterator (); - dir_iterator () = default; - - // If it is requested to ignore dangling symlinks, then the increment - // operator will skip symlinks that refer to non-existing or inaccessible - // targets. That implies that it will always try to stat() symlinks. - // - explicit - dir_iterator (const dir_path&, bool ignore_dangling); - - dir_iterator (const dir_iterator&) = delete; - dir_iterator& operator= (const dir_iterator&) = delete; - - dir_iterator (dir_iterator&&) noexcept; - dir_iterator& operator= (dir_iterator&&); - - dir_iterator& operator++ () {next (); return *this;} - - reference operator* () const {return e_;} - pointer operator-> () const {return &e_;} - - friend bool operator== (const dir_iterator&, const dir_iterator&); - friend bool operator!= (const dir_iterator&, const dir_iterator&); - - private: - void - next (); - - private: - dir_entry e_; - -#ifndef _WIN32 - DIR* h_ = nullptr; -#else - intptr_t h_ = -1; -#endif - - bool ignore_dangling_ = false; - }; - - // Range-based for loop support. - // - // for (const auto& de: dir_iterator (dir_path ("/tmp"))) ... - // - // Note that the "range" (which is the "begin" iterator), is no - // longer usable. In other words, don't do this: - // - // dir_iterator i (...); - // for (...: i) ... - // ++i; // Invalid. - // - inline dir_iterator begin (dir_iterator&); - inline dir_iterator end (const dir_iterator&); - - // MSVC in the strict mode (/permissive-), which we enable by default from - // 15.5, needs this declaration to straighten its brains out. - // -#if defined(_MSC_VER) && _MSC_VER >= 1912 - inline dir_iterator begin (dir_iterator&&); -#endif - - // Wildcard pattern search (aka glob). - // - // For details on the wildcard patterns see - - // Search for paths matching the pattern calling the specified function for - // each matching path (see below for details). - // - // If the pattern is relative, then search in the start directory. If the - // start directory is empty, then search in the current working directory. - // Searching in non-existent directories is not an error. Throw - // std::system_error in case of a failure (insufficient permissions, etc). - // - // The pattern may contain multiple components that include wildcards. On - // Windows the drive letter may not be a wildcard. - // - // In addition to the wildcard characters, path_search() also recognizes the - // ** and *** wildcard sequences. If a path component contains **, then it - // is matched just like * but in all the subdirectories, recursively. The - // *** wildcard behaves like ** but also matches the start directory itself. - // Note that if the first pattern component contains ***, then the start - // directory must be empty or be terminated with a "meaningful" component - // (e.g., probably not '.' or '..'). - // - // So, for example, foo/bar-**.txt will return all the files matching the - // bar-*.txt pattern in all the subdirectoris of foo/. And foo/f***/ will - // return all the subdirectories matching the f*/ pattern plus foo/ itself. - // - // Note that having multiple recursive components in the pattern we can end - // up with calling func() multiple times (once per such a component) for the - // same path. For example the search with pattern f***/b**/ starting in - // directory foo, that has the foo/fox/box/ structure, will result in - // calling func(foo/fox/box/) twice: first time for being a child of fox/, - // second time for being a child of foo/. - // - // The callback function is called for both intermediate matches (interm is - // true) and final matches (interm is false). Pattern is what matched the - // last component in the path and is empty if the last component is not a - // pattern (final match only; say as in */foo.txt). - // - // If the callback function returns false for an intermediate path, then no - // further search is performed at or below this path. If false is returned - // for a final match, then the entire search is stopped. - // - // The path can be moved for the final match or for an intermediate match - // but only if false is returned. - // - // As an example, consider pattern f*/bar/b*/*.txt and path - // foo/bar/baz/x.txt. The sequence of calls in this case will be: - // - // (foo/, f*/, true) - // (foo/bar/baz/, b*/, true) - // (foo/bar/baz/x.txt, *.txt, false) - // - // If the pattern contains a recursive wildcard, then the callback function - // can be called for the same directory twice: first time as an intermediate - // match with */ pattern to decide if to recursively traverse the directory, - // and the second time if the directory matches the pattern component (either - // as an intermediate or a final match). As an example, consider pattern - // b**/c* and directory tree a/b/c/. The sequence of calls in this case will - // be: - // - // (a/, */, true) - // (a/b/, */ true) - // (a/b/c/, */, true) - // (a/b/, b*/, true) - // (a/b/c/, c*/, false) - // - // Note that recursive iterating through directories currently goes - // depth-first which make sense for the cleanup use cases. In future we may - // want to make it controllable. - // - LIBBUTL_SYMEXPORT void - path_search (const path& pattern, - const std::function&, - const dir_path& start = dir_path (), - path_match_flags = path_match_flags::follow_symlinks); - - // Same as above, but behaves as if the directory tree being searched - // through contains only the specified entry. The start directory is used if - // the first pattern component is a self-matching wildcard (see above). - // - // If pattern or entry is relative, then it is assumed to be relative to the - // start directory (which, if relative itself, is assumed to be relative to - // the current directory). Note that the implementation can optimize the - // case when pattern and entry are both non-empty and relative. - // - LIBBUTL_SYMEXPORT void - path_search (const path& pattern, - const path& entry, - const std::function&, - const dir_path& start = dir_path (), - path_match_flags = path_match_flags::none); -} - -#include diff --git a/libbutl/git.cxx b/libbutl/git.cxx index b9dd9bc..cc10c91 100644 --- a/libbutl/git.cxx +++ b/libbutl/git.cxx @@ -1,43 +1,11 @@ // file : libbutl/git.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include -// C includes. - -#include - -#ifndef __cpp_lib_modules_ts -#include - -#include // size_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.git; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.optional; -import butl.semantic_version -#endif - -import butl.utility; // digit() -import butl.filesystem; // entry_exists() -#else -#include -#include -#include -#include -#endif +#include +#include // entry_exists() +#include using namespace std; diff --git a/libbutl/git.hxx b/libbutl/git.hxx new file mode 100644 index 0000000..add721e --- /dev/null +++ b/libbutl/git.hxx @@ -0,0 +1,27 @@ +// file : libbutl/git.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include + +#include +#include +#include + +#include + +namespace butl +{ + // Return true if the specified directory is a git repository root (contains + // the .git filesystem entry). + // + LIBBUTL_SYMEXPORT bool + git_repository (const dir_path&); + + // Try to parse the line printed by the 'git --version' command. Return git + // version if succeed, nullopt otherwise. + // + LIBBUTL_SYMEXPORT optional + git_version (const std::string&); +} diff --git a/libbutl/git.mxx b/libbutl/git.mxx deleted file mode 100644 index 3f003be..0000000 --- a/libbutl/git.mxx +++ /dev/null @@ -1,45 +0,0 @@ -// file : libbutl/git.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.git; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.optional; -import butl.semantic_version; -#else -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Return true if the specified directory is a git repository root (contains - // the .git filesystem entry). - // - LIBBUTL_SYMEXPORT bool - git_repository (const dir_path&); - - // Try to parse the line printed by the 'git --version' command. Return git - // version if succeed, nullopt otherwise. - // - LIBBUTL_SYMEXPORT optional - git_version (const std::string&); -} diff --git a/libbutl/lz4-stream.cxx b/libbutl/lz4-stream.cxx index 9d0ac99..8001770 100644 --- a/libbutl/lz4-stream.cxx +++ b/libbutl/lz4-stream.cxx @@ -6,7 +6,7 @@ #include // memcpy() #include // invalid_argument -#include // eof() +#include // eof() using namespace std; diff --git a/libbutl/lz4-stream.hxx b/libbutl/lz4-stream.hxx index 5d25670..b11c0a2 100644 --- a/libbutl/lz4-stream.hxx +++ b/libbutl/lz4-stream.hxx @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/libbutl/lz4.cxx b/libbutl/lz4.cxx index a627b06..2db7af2 100644 --- a/libbutl/lz4.cxx +++ b/libbutl/lz4.cxx @@ -26,7 +26,7 @@ #include #include // invalid_argument, logic_error -#include // eos() +#include // eos() #if 0 #include diff --git a/libbutl/lz4.hxx b/libbutl/lz4.hxx index cfe9967..7886788 100644 --- a/libbutl/lz4.hxx +++ b/libbutl/lz4.hxx @@ -6,8 +6,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx index 9514bbd..c208eb1 100644 --- a/libbutl/manifest-parser.cxx +++ b/libbutl/manifest-parser.cxx @@ -1,39 +1,10 @@ // file : libbutl/manifest-parser.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include - #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.manifest_parser; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.optional; -import butl.char_scanner; -import butl.manifest_types; -#endif - -#endif using namespace std; diff --git a/libbutl/manifest-parser.hxx b/libbutl/manifest-parser.hxx new file mode 100644 index 0000000..d53eb42 --- /dev/null +++ b/libbutl/manifest-parser.hxx @@ -0,0 +1,160 @@ +// file : libbutl/manifest-parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include // uint64_t +#include // pair, move() +#include // runtime_error +#include + +#include +#include +#include +#include + +#include + +namespace butl +{ + class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error + { + public: + manifest_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + manifest_parsing (const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + class LIBBUTL_SYMEXPORT manifest_parser: + protected char_scanner + { + public: + // The filter, if specified, is called by next() prior to returning the + // pair to the caller. If the filter returns false, then the pair is + // discarded. + // + // Note that the filter should handle the end-of-manifest pairs (see + // below) carefully, so next() doesn't end up with an infinite cycle. + // + using filter_function = bool (manifest_name_value&); + + manifest_parser (std::istream& is, + const std::string& name, + std::function filter = {}) + : char_scanner (is, + utf8_validator (codepoint_types::graphic, U"\n\r\t")), + name_ (name), + filter_ (std::move (filter)) {} + + const std::string& + name () const {return name_;} + + // The first returned pair is special "start-of-manifest" with empty name + // and value being the format version: {"", ""}. After that we have a + // sequence of ordinary pairs which are the manifest. At the end of the + // manifest we have the special "end-of-manifest" pair with empty name and + // value: {"", ""}. After that we can either get another start-of-manifest + // pair (in which case the whole sequence repeats from the beginning) or + // we get another end-of-manifest-like pair which signals the end of + // stream (aka EOF) and which we will call the end-of-stream pair. To put + // it another way, the parse sequence always has the following form: + // + // ({"", ""} {"", ""}* {"", ""})* {"", ""} + // + manifest_name_value + next (); + + // Split the manifest value, optionally followed by ';' character and a + // comment into the value/comment pair. Note that ';' characters in the + // value must be escaped by the backslash. + // + static std::pair + split_comment (const std::string&); + + private: + using base = char_scanner; + + void + parse_next (manifest_name_value&); + + void + parse_name (manifest_name_value&); + + void + parse_value (manifest_name_value&); + + // Skip spaces and return the first peeked non-space character and the + // starting position of the line it belongs to. If the later is not + // available (skipped spaces are all in the middle of a line, we are at + // eos, etc.), then fallback to the first peeked character position. + // + std::pair + skip_spaces (); + + // As base::get() but in case of an invalid character throws + // manifest_parsing. + // + xchar + get (const char* what); + + // Get previously peeked character (faster). + // + void + get (const xchar&); + + // As base::peek() but in case of an invalid character throws + // manifest_parsing. + // + xchar + peek (const char* what); + + private: + const std::string name_; + const std::function filter_; + + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + // Buffer for a get()/peek() potential error. + // + std::string ebuf_; + }; + + // Parse and return a single manifest. Throw manifest_parsing in case of an + // error. + // + // Note that the returned manifest doesn't contain the format version nor + // the end-of-manifest/stream pairs. + // + LIBBUTL_SYMEXPORT std::vector + parse_manifest (manifest_parser&); + + // As above but append the manifest values to an existing list. + // + LIBBUTL_SYMEXPORT void + parse_manifest (manifest_parser&, std::vector&); + + // As above but return nullopt if eos is reached before reading any values. + // + LIBBUTL_SYMEXPORT optional> + try_parse_manifest (manifest_parser&); + + // As above but append the manifest values to an existing list returning + // false if eos is reached before reading any values. + // + LIBBUTL_SYMEXPORT bool + try_parse_manifest (manifest_parser&, std::vector&); +} + +#include diff --git a/libbutl/manifest-parser.mxx b/libbutl/manifest-parser.mxx deleted file mode 100644 index 77addff..0000000 --- a/libbutl/manifest-parser.mxx +++ /dev/null @@ -1,180 +0,0 @@ -// file : libbutl/manifest-parser.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include // uint64_t -#include // pair, move() -#include // runtime_error -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.manifest_parser; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utf8; -import butl.optional; -import butl.char_scanner; -import butl.manifest_types; -#else -#include -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - class LIBBUTL_SYMEXPORT manifest_parsing: public std::runtime_error - { - public: - manifest_parsing (const std::string& name, - std::uint64_t line, - std::uint64_t column, - const std::string& description); - - manifest_parsing (const std::string& description); - - std::string name; - std::uint64_t line; - std::uint64_t column; - std::string description; - }; - - class LIBBUTL_SYMEXPORT manifest_parser: - protected char_scanner - { - public: - // The filter, if specified, is called by next() prior to returning the - // pair to the caller. If the filter returns false, then the pair is - // discarded. - // - // Note that the filter should handle the end-of-manifest pairs (see - // below) carefully, so next() doesn't end up with an infinite cycle. - // - using filter_function = bool (manifest_name_value&); - - manifest_parser (std::istream& is, - const std::string& name, - std::function filter = {}) - : char_scanner (is, - utf8_validator (codepoint_types::graphic, U"\n\r\t")), - name_ (name), - filter_ (std::move (filter)) {} - - const std::string& - name () const {return name_;} - - // The first returned pair is special "start-of-manifest" with empty name - // and value being the format version: {"", ""}. After that we have a - // sequence of ordinary pairs which are the manifest. At the end of the - // manifest we have the special "end-of-manifest" pair with empty name and - // value: {"", ""}. After that we can either get another start-of-manifest - // pair (in which case the whole sequence repeats from the beginning) or - // we get another end-of-manifest-like pair which signals the end of - // stream (aka EOF) and which we will call the end-of-stream pair. To put - // it another way, the parse sequence always has the following form: - // - // ({"", ""} {"", ""}* {"", ""})* {"", ""} - // - manifest_name_value - next (); - - // Split the manifest value, optionally followed by ';' character and a - // comment into the value/comment pair. Note that ';' characters in the - // value must be escaped by the backslash. - // - static std::pair - split_comment (const std::string&); - - private: - using base = char_scanner; - - void - parse_next (manifest_name_value&); - - void - parse_name (manifest_name_value&); - - void - parse_value (manifest_name_value&); - - // Skip spaces and return the first peeked non-space character and the - // starting position of the line it belongs to. If the later is not - // available (skipped spaces are all in the middle of a line, we are at - // eos, etc.), then fallback to the first peeked character position. - // - std::pair - skip_spaces (); - - // As base::get() but in case of an invalid character throws - // manifest_parsing. - // - xchar - get (const char* what); - - // Get previously peeked character (faster). - // - void - get (const xchar&); - - // As base::peek() but in case of an invalid character throws - // manifest_parsing. - // - xchar - peek (const char* what); - - private: - const std::string name_; - const std::function filter_; - - enum {start, body, end} s_ = start; - std::string version_; // Current format version. - - // Buffer for a get()/peek() potential error. - // - std::string ebuf_; - }; - - // Parse and return a single manifest. Throw manifest_parsing in case of an - // error. - // - // Note that the returned manifest doesn't contain the format version nor - // the end-of-manifest/stream pairs. - // - LIBBUTL_SYMEXPORT std::vector - parse_manifest (manifest_parser&); - - // As above but append the manifest values to an existing list. - // - LIBBUTL_SYMEXPORT void - parse_manifest (manifest_parser&, std::vector&); - - // As above but return nullopt if eos is reached before reading any values. - // - LIBBUTL_SYMEXPORT optional> - try_parse_manifest (manifest_parser&); - - // As above but append the manifest values to an existing list returning - // false if eos is reached before reading any values. - // - LIBBUTL_SYMEXPORT bool - try_parse_manifest (manifest_parser&, std::vector&); -} - -#include diff --git a/libbutl/manifest-rewriter.cxx b/libbutl/manifest-rewriter.cxx index 46bf239..3bddd37 100644 --- a/libbutl/manifest-rewriter.cxx +++ b/libbutl/manifest-rewriter.cxx @@ -1,41 +1,15 @@ // file : libbutl/manifest-rewriter.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include -#include - -// C includes. - -#ifndef __cpp_lib_modules_ts #include +#include #include // uint64_t #include // size_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.manifest_rewriter; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.fdstream; -import butl.manifest_types; -#endif - -import butl.utility; // utf8_length() -import butl.manifest_serializer; -#else -#include -#include -#endif + +#include // utf8_length() +#include using namespace std; diff --git a/libbutl/manifest-rewriter.hxx b/libbutl/manifest-rewriter.hxx new file mode 100644 index 0000000..02a533a --- /dev/null +++ b/libbutl/manifest-rewriter.hxx @@ -0,0 +1,60 @@ +// file : libbutl/manifest-rewriter.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include + +#include + +namespace butl +{ + // Rewrite a hand-written manifest file preserving formatting, comments, + // etc., of the unaffected parts. The general workflow is as follows: + // + // 1. Parse the manifest file using manifest_parser into a sequence of + // name/value pairs and their positions. + // + // 2. Create an instance of manifest_rewriter for the manifest file. This + // opens the file in read/write mode with exclusive access. + // + // 3. Iterate over this sequence in reverse and apply changes to the desired + // name/value pairs using the below API. Doing this in reverse makes sure + // the positions obtained on step 1 remain valid. + // + // Note that if an exception is thrown by replace() or insert(), then the + // writer is no longer usable and there is no guarantees that the file is + // left in a consistent state. + // + class LIBBUTL_SYMEXPORT manifest_rewriter + { + public: + // Unless long_lines is true, break lines in values (see + // manifest_serializer for details). + // + manifest_rewriter (path, bool long_lines = false); + + // Replace the existing value at the specified position (specifically, + // between colon_pos and end_pos) with the specified new value. The new + // value is serialized as if by manifest_serializer. + // + void + replace (const manifest_name_value&); + + // Insert a new name/value after the specified position (specifically, + // after end_pos). To insert before the first value, use the special + // start-of-manifest value as position. The new name/value is serialized + // as if by manifest_serializer. Throw manifest_serialization exception + // on error. + // + void + insert (const manifest_name_value& pos, const manifest_name_value&); + + private: + path path_; + bool long_lines_; + auto_fd fd_; + }; +} diff --git a/libbutl/manifest-rewriter.mxx b/libbutl/manifest-rewriter.mxx deleted file mode 100644 index 907c990..0000000 --- a/libbutl/manifest-rewriter.mxx +++ /dev/null @@ -1,78 +0,0 @@ -// file : libbutl/manifest-rewriter.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.manifest_rewriter; -#ifdef __cpp_lib_modules_ts -#endif -import butl.path; -import butl.fdstream; -import butl.manifest_types; -#else -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Rewrite a hand-written manifest file preserving formatting, comments, - // etc., of the unaffected parts. The general workflow is as follows: - // - // 1. Parse the manifest file using manifest_parser into a sequence of - // name/value pairs and their positions. - // - // 2. Create an instance of manifest_rewriter for the manifest file. This - // opens the file in read/write mode with exclusive access. - // - // 3. Iterate over this sequence in reverse and apply changes to the desired - // name/value pairs using the below API. Doing this in reverse makes sure - // the positions obtained on step 1 remain valid. - // - // Note that if an exception is thrown by replace() or insert(), then the - // writer is no longer usable and there is no guarantees that the file is - // left in a consistent state. - // - class LIBBUTL_SYMEXPORT manifest_rewriter - { - public: - // Unless long_lines is true, break lines in values (see - // manifest_serializer for details). - // - manifest_rewriter (path, bool long_lines = false); - - // Replace the existing value at the specified position (specifically, - // between colon_pos and end_pos) with the specified new value. The new - // value is serialized as if by manifest_serializer. - // - void - replace (const manifest_name_value&); - - // Insert a new name/value after the specified position (specifically, - // after end_pos). To insert before the first value, use the special - // start-of-manifest value as position. The new name/value is serialized - // as if by manifest_serializer. Throw manifest_serialization exception - // on error. - // - void - insert (const manifest_name_value& pos, const manifest_name_value&); - - private: - path path_; - bool long_lines_; - auto_fd fd_; - }; -} diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx index 6a26a15..05cdae5 100644 --- a/libbutl/manifest-serializer.cxx +++ b/libbutl/manifest-serializer.cxx @@ -1,41 +1,12 @@ // file : libbutl/manifest-serializer.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include +#include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.manifest_serializer; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.manifest_types; -#endif - -import butl.utf8; -import butl.utility; -#else -#include -#include -#endif + +#include +#include using namespace std; diff --git a/libbutl/manifest-serializer.hxx b/libbutl/manifest-serializer.hxx new file mode 100644 index 0000000..dfe37da --- /dev/null +++ b/libbutl/manifest-serializer.hxx @@ -0,0 +1,136 @@ +// file : libbutl/manifest-serializer.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include // size_t +#include // runtime_error +#include + +#include + +#include + +namespace butl +{ + class LIBBUTL_SYMEXPORT manifest_serialization: public std::runtime_error + { + public: + manifest_serialization (const std::string& name, + const std::string& description); + + std::string name; + std::string description; + }; + + class LIBBUTL_SYMEXPORT manifest_serializer + { + public: + // The filter, if specified, is called by next() prior to serializing the + // pair into the stream. If the filter returns false, then the pair is + // discarded. + // + // Note that currently there is no way for the filter to modify the name + // or value. If we ever need this functionality, then we can add an + // "extended" filter alternative with two "receiving" arguments: + // + // bool (..., optional& n, optional& v); + // + using filter_function = bool (const std::string& name, + const std::string& value); + + // Unless long_lines is true, break lines in values (including multi-line) + // so that their length does not exceed 78 codepoints (including '\n'). + // + manifest_serializer (std::ostream& os, + const std::string& name, + bool long_lines = false, + std::function filter = {}) + : os_ (os), + name_ (name), + long_lines_ (long_lines), + filter_ (std::move (filter)) + { + } + + const std::string& + name () const {return name_;} + + // The first name-value pair should be the special "start-of-manifest" + // with empty name and value being the format version. After that we + // have a sequence of ordinary pairs which are the manifest. At the + // end of the manifest we have the special "end-of-manifest" pair + // with empty name and value. After that we can either have another + // start-of-manifest pair (in which case the whole sequence repeats + // from the beginning) or we get another end-of-manifest pair which + // signals the end of stream. The end-of-manifest pair can be omitted + // if it is followed by the start-of-manifest pair. + // + void + next (const std::string& name, const std::string& value); + + // Write a comment. The supplied text is prefixed with "# " and + // terminated with a newline. + // + void + comment (const std::string&); + + // Merge the manifest value and a comment into the single string, having + // the '; ' form. Escape ';' characters in the value with + // the backslash. + // + static std::string + merge_comment (const std::string& value, const std::string& comment); + + private: + friend class manifest_rewriter; + + void + write_next (const std::string& name, const std::string& value); + + // Validate and write a name and return its length in codepoints. + // + size_t + write_name (const std::string&); + + // Write a value assuming the current line already has the specified + // codepoint offset. If the resulting line length would be too large then + // the multi-line representation will be used. It is assumed that the + // name, followed by the colon, is already written. + // + void + write_value (const std::string&, std::size_t offset); + + // Write the specified number of characters from the specified string + // (assuming there are no newlines) split into multiple lines at or near + // the 78 codepoints boundary. Assume the current line already has the + // specified codepoint offset. + // + void + write_value (const char* s, std::size_t n, std::size_t offset); + + private: + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + private: + std::ostream& os_; + const std::string name_; + bool long_lines_; + const std::function filter_; + }; + + // Serialize a manifest to a stream adding the leading format version pair + // and the trailing end-of-manifest pair. Unless eos is false, then also + // write the end-of-stream pair. + // + LIBBUTL_SYMEXPORT void + serialize_manifest (manifest_serializer&, + const std::vector&, + bool eos = true); +} + +#include diff --git a/libbutl/manifest-serializer.mxx b/libbutl/manifest-serializer.mxx deleted file mode 100644 index b73c255..0000000 --- a/libbutl/manifest-serializer.mxx +++ /dev/null @@ -1,153 +0,0 @@ -// file : libbutl/manifest-serializer.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include // size_t -#include // runtime_error -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.manifest_serializer; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.manifest_types; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - class LIBBUTL_SYMEXPORT manifest_serialization: public std::runtime_error - { - public: - manifest_serialization (const std::string& name, - const std::string& description); - - std::string name; - std::string description; - }; - - class LIBBUTL_SYMEXPORT manifest_serializer - { - public: - // The filter, if specified, is called by next() prior to serializing the - // pair into the stream. If the filter returns false, then the pair is - // discarded. - // - // Note that currently there is no way for the filter to modify the name - // or value. If we ever need this functionality, then we can add an - // "extended" filter alternative with two "receiving" arguments: - // - // bool (..., optional& n, optional& v); - // - using filter_function = bool (const std::string& name, - const std::string& value); - - // Unless long_lines is true, break lines in values (including multi-line) - // so that their length does not exceed 78 codepoints (including '\n'). - // - manifest_serializer (std::ostream& os, - const std::string& name, - bool long_lines = false, - std::function filter = {}) - : os_ (os), - name_ (name), - long_lines_ (long_lines), - filter_ (std::move (filter)) - { - } - - const std::string& - name () const {return name_;} - - // The first name-value pair should be the special "start-of-manifest" - // with empty name and value being the format version. After that we - // have a sequence of ordinary pairs which are the manifest. At the - // end of the manifest we have the special "end-of-manifest" pair - // with empty name and value. After that we can either have another - // start-of-manifest pair (in which case the whole sequence repeats - // from the beginning) or we get another end-of-manifest pair which - // signals the end of stream. The end-of-manifest pair can be omitted - // if it is followed by the start-of-manifest pair. - // - void - next (const std::string& name, const std::string& value); - - // Write a comment. The supplied text is prefixed with "# " and - // terminated with a newline. - // - void - comment (const std::string&); - - // Merge the manifest value and a comment into the single string, having - // the '; ' form. Escape ';' characters in the value with - // the backslash. - // - static std::string - merge_comment (const std::string& value, const std::string& comment); - - private: - friend class manifest_rewriter; - - void - write_next (const std::string& name, const std::string& value); - - // Validate and write a name and return its length in codepoints. - // - size_t - write_name (const std::string&); - - // Write a value assuming the current line already has the specified - // codepoint offset. If the resulting line length would be too large then - // the multi-line representation will be used. It is assumed that the - // name, followed by the colon, is already written. - // - void - write_value (const std::string&, std::size_t offset); - - // Write the specified number of characters from the specified string - // (assuming there are no newlines) split into multiple lines at or near - // the 78 codepoints boundary. Assume the current line already has the - // specified codepoint offset. - // - void - write_value (const char* s, std::size_t n, std::size_t offset); - - private: - enum {start, body, end} s_ = start; - std::string version_; // Current format version. - - private: - std::ostream& os_; - const std::string name_; - bool long_lines_; - const std::function filter_; - }; - - // Serialize a manifest to a stream adding the leading format version pair - // and the trailing end-of-manifest pair. Unless eos is false, then also - // write the end-of-stream pair. - // - LIBBUTL_SYMEXPORT void - serialize_manifest (manifest_serializer&, - const std::vector&, - bool eos = true); -} - -#include diff --git a/libbutl/manifest-types.hxx b/libbutl/manifest-types.hxx new file mode 100644 index 0000000..23318f0 --- /dev/null +++ b/libbutl/manifest-types.hxx @@ -0,0 +1,32 @@ +// file : libbutl/manifest-types.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // uint64_t + +#include + +namespace butl +{ + class manifest_name_value + { + public: + std::string name; + std::string value; + + std::uint64_t name_line; + std::uint64_t name_column; + + std::uint64_t value_line; + std::uint64_t value_column; + + std::uint64_t start_pos; // Position of name/value-starting character. + std::uint64_t colon_pos; // Position of name/value-separating ':'. + std::uint64_t end_pos; // Position of name/value-terminating '\n' or EOF. + + bool + empty () const {return name.empty () && value.empty ();} + }; +} diff --git a/libbutl/manifest-types.mxx b/libbutl/manifest-types.mxx deleted file mode 100644 index 93f6fc6..0000000 --- a/libbutl/manifest-types.mxx +++ /dev/null @@ -1,48 +0,0 @@ -// file : libbutl/manifest-types.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include // uint64_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.manifest_types; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#else -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - class manifest_name_value - { - public: - std::string name; - std::string value; - - std::uint64_t name_line; - std::uint64_t name_column; - - std::uint64_t value_line; - std::uint64_t value_column; - - std::uint64_t start_pos; // Position of name/value-starting character. - std::uint64_t colon_pos; // Position of name/value-separating ':'. - std::uint64_t end_pos; // Position of name/value-terminating '\n' or EOF. - - bool - empty () const {return name.empty () && value.empty ();} - }; -} diff --git a/libbutl/multi-index.hxx b/libbutl/multi-index.hxx new file mode 100644 index 0000000..a6754cd --- /dev/null +++ b/libbutl/multi-index.hxx @@ -0,0 +1,57 @@ +// file : libbutl/multi-index.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // declval() +#include // hash + +#include + +namespace butl +{ + // Google the "Emulating Boost.MultiIndex with Standard Containers" blog + // post for details. + // + + template + struct map_key + { + mutable const T* p; + + map_key (const T* v = 0): p (v) {} + bool operator< (const map_key& x) const {return *p < *x.p;} + bool operator== (const map_key& x) const {return *p == *x.p;} + }; + + template + struct map_iterator_adapter: I + { + typedef const typename I::value_type::second_type value_type; + typedef value_type* pointer; + typedef value_type& reference; + + map_iterator_adapter () {} + map_iterator_adapter (I i): I (i) {} + + map_iterator_adapter& + operator= (I i) {static_cast (*this) = i; return *this;} + + reference operator* () const {return I::operator* ().second;} + pointer operator-> () const {return &I::operator-> ()->second;} + }; +} + +namespace std +{ + template + struct hash>: hash + { + size_t + operator() (butl::map_key x) const + noexcept (noexcept (declval> () (*x.p))) + { + return hash::operator() (*x.p); + } + }; +} diff --git a/libbutl/multi-index.mxx b/libbutl/multi-index.mxx deleted file mode 100644 index d51bdfc..0000000 --- a/libbutl/multi-index.mxx +++ /dev/null @@ -1,72 +0,0 @@ -// file : libbutl/multi-index.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include // declval() -#include // hash -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.multi_index; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Google the "Emulating Boost.MultiIndex with Standard Containers" blog - // post for details. - // - - template - struct map_key - { - mutable const T* p; - - map_key (const T* v = 0): p (v) {} - bool operator< (const map_key& x) const {return *p < *x.p;} - bool operator== (const map_key& x) const {return *p == *x.p;} - }; - - template - struct map_iterator_adapter: I - { - typedef const typename I::value_type::second_type value_type; - typedef value_type* pointer; - typedef value_type& reference; - - map_iterator_adapter () {} - map_iterator_adapter (I i): I (i) {} - - map_iterator_adapter& - operator= (I i) {static_cast (*this) = i; return *this;} - - reference operator* () const {return I::operator* ().second;} - pointer operator-> () const {return &I::operator-> ()->second;} - }; -} - -LIBBUTL_MODEXPORT namespace std -{ - template - struct hash>: hash - { - size_t - operator() (butl::map_key x) const - noexcept (noexcept (declval> () (*x.p))) - { - return hash::operator() (*x.p); - } - }; -} diff --git a/libbutl/openssl.cxx b/libbutl/openssl.cxx index 8741b35..f9df2e7 100644 --- a/libbutl/openssl.cxx +++ b/libbutl/openssl.cxx @@ -1,35 +1,10 @@ // file : libbutl/openssl.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include - -#ifndef __cpp_lib_modules_ts -#include - #include // move() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.openssl; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.process; -import butl.fdstream; -import butl.small_vector; -#endif - -#endif using namespace std; diff --git a/libbutl/openssl.hxx b/libbutl/openssl.hxx new file mode 100644 index 0000000..58e38f8 --- /dev/null +++ b/libbutl/openssl.hxx @@ -0,0 +1,161 @@ +// file : libbutl/openssl.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include + +namespace butl +{ + // Perform a crypto operation using the openssl(1) program. Throw + // process_error and io_error (both derive from system_error) in case of + // errors. + // + // The I (in) and O (out) can be of the following types/values: + // + // nullfd Signal that no input/output is expected. + // + // path Read input/write output from/to a file. If the special "-" + // value is used, then instead input is connected to the + // openssl::out ofdstream member and output -- to the + // openssl::in ifdstream member. Note that the argument type + // should be path, not string (i.e., pass path("-")). Also + // note that the streams are opened in the binary mode. To + // change that, use fdstream_mode::text instead (see below). + // + // fdstream_mode Only text and binary values are meaningful. Same as + // path("-"), but also specifies the translation mode. + // + // other Forwarded as is to process_start(). Normally either int or + // auto_fd. + // + // For example: + // + // openssl os (path ("key.pub.pem"), // Read key from file, + // path ("-"), // Write result to openssl::in. + // 2, + // "openssl", "pkey", + // "-pubin", "-outform", "DER"); + // + // Typical usage: + // + // try + // { + // openssl os (nullfd, // No input expected. + // path ("-"), // Output to openssl::in. + // 2, // Diagnostics to stderr. + // path ("openssl"), // Program path. + // "rand", // Command. + // 64); // Command options. + // + // vector r (os.in.read_binary ()); + // os.in.close (); + // + // if (!os.wait ()) + // ... // openssl returned non-zero status. + // } + // catch (const system_error& e) + // { + // cerr << "openssl error: " << e << endl; + // } + // + // Notes: + // + // 1. If opened, in stream is in the skip mode (see fdstream_mode). + // + // 2. If opened, in/out must be explicitly closed before calling wait(). + // + // 3. Normally the order of options is not important (unless they override + // each other). However, openssl 1.0.1 seems to have bugs in that + // department (that were apparently fixed in 1.0.2). To work around these + // bugs pass user-supplied options first. + // + class LIBBUTL_SYMEXPORT openssl: public process + { + public: + ifdstream in; + ofdstream out; + + template + openssl (I&& in, + O&& out, + E&& err, + const process_env&, + const std::string& command, + A&&... options); + + // Version with the command line callback (see process_run_callback() for + // details). + // + template + openssl (const C&, + I&& in, + O&& out, + E&& err, + const process_env&, + const std::string& command, + A&&... options); + + private: + template + struct is_other + { + using type = typename std::remove_reference< + typename std::remove_cv::type>::type; + + static const bool value = !(std::is_same::value || + std::is_same::value || + std::is_same::value); + }; + + struct io_data + { + fdpipe pipe; + small_vector options; + }; + + pipe + map_in (nullfd_t, io_data&); + + pipe + map_in (const path&, io_data&); + + pipe + map_in (fdstream_mode, io_data&); + + template + typename std::enable_if::value, I>::type + map_in (I&&, io_data&); + + pipe + map_out (nullfd_t, io_data&); + + pipe + map_out (const path&, io_data&); + + pipe + map_out (fdstream_mode, io_data&); + + template + typename std::enable_if::value, O>::type + map_out (O&&, io_data&); + }; +} + +#include +#include diff --git a/libbutl/openssl.ixx b/libbutl/openssl.ixx index c685b65..1435dcb 100644 --- a/libbutl/openssl.ixx +++ b/libbutl/openssl.ixx @@ -1,7 +1,10 @@ // file : libbutl/openssl.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // size_t +#include // forward() + +namespace butl { template -#include - -#include // size_t -#include // move(), forward() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.openssl; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.process; //@@ MOD TODO: should we re-export? -import butl.fdstream; -import butl.small_vector; -#else -#include -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Perform a crypto operation using the openssl(1) program. Throw - // process_error and io_error (both derive from system_error) in case of - // errors. - // - // The I (in) and O (out) can be of the following types/values: - // - // nullfd Signal that no input/output is expected. - // - // path Read input/write output from/to a file. If the special "-" - // value is used, then instead input is connected to the - // openssl::out ofdstream member and output -- to the - // openssl::in ifdstream member. Note that the argument type - // should be path, not string (i.e., pass path("-")). Also - // note that the streams are opened in the binary mode. To - // change that, use fdstream_mode::text instead (see below). - // - // fdstream_mode Only text and binary values are meaningful. Same as - // path("-"), but also specifies the translation mode. - // - // other Forwarded as is to process_start(). Normally either int or - // auto_fd. - // - // For example: - // - // openssl os (path ("key.pub.pem"), // Read key from file, - // path ("-"), // Write result to openssl::in. - // 2, - // "openssl", "pkey", - // "-pubin", "-outform", "DER"); - // - // Typical usage: - // - // try - // { - // openssl os (nullfd, // No input expected. - // path ("-"), // Output to openssl::in. - // 2, // Diagnostics to stderr. - // path ("openssl"), // Program path. - // "rand", // Command. - // 64); // Command options. - // - // vector r (os.in.read_binary ()); - // os.in.close (); - // - // if (!os.wait ()) - // ... // openssl returned non-zero status. - // } - // catch (const system_error& e) - // { - // cerr << "openssl error: " << e << endl; - // } - // - // Notes: - // - // 1. If opened, in stream is in the skip mode (see fdstream_mode). - // - // 2. If opened, in/out must be explicitly closed before calling wait(). - // - // 3. Normally the order of options is not important (unless they override - // each other). However, openssl 1.0.1 seems to have bugs in that - // department (that were apparently fixed in 1.0.2). To work around these - // bugs pass user-supplied options first. - // - class LIBBUTL_SYMEXPORT openssl: public process - { - public: - ifdstream in; - ofdstream out; - - template - openssl (I&& in, - O&& out, - E&& err, - const process_env&, - const std::string& command, - A&&... options); - - // Version with the command line callback (see process_run_callback() for - // details). - // - template - openssl (const C&, - I&& in, - O&& out, - E&& err, - const process_env&, - const std::string& command, - A&&... options); - - private: - template - struct is_other - { - using type = typename std::remove_reference< - typename std::remove_cv::type>::type; - - static const bool value = !(std::is_same::value || - std::is_same::value || - std::is_same::value); - }; - - struct io_data - { - fdpipe pipe; - small_vector options; - }; - - pipe - map_in (nullfd_t, io_data&); - - pipe - map_in (const path&, io_data&); - - pipe - map_in (fdstream_mode, io_data&); - - template - typename std::enable_if::value, I>::type - map_in (I&&, io_data&); - - pipe - map_out (nullfd_t, io_data&); - - pipe - map_out (const path&, io_data&); - - pipe - map_out (fdstream_mode, io_data&); - - template - typename std::enable_if::value, O>::type - map_out (O&&, io_data&); - }; -} - -#include -#include diff --git a/libbutl/openssl.txx b/libbutl/openssl.txx index 3a2c579..f198c22 100644 --- a/libbutl/openssl.txx +++ b/libbutl/openssl.txx @@ -1,7 +1,9 @@ // file : libbutl/openssl.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // forward() + +namespace butl { template typename std::enable_if::value, I>::type openssl:: diff --git a/libbutl/optional.hxx b/libbutl/optional.hxx new file mode 100644 index 0000000..28aa95a --- /dev/null +++ b/libbutl/optional.hxx @@ -0,0 +1,343 @@ +// file : libbutl/optional.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +// Note: the Clang check must come before GCC since it also defines __GNUC__. +// +#if defined(ODB_COMPILER) + // + // Make sure we use butl::optional during ODB compilation (has to be this + // way until we completely switch to std::optional since we use the same + // generated code for all compilers). + // +#elif defined(_MSC_VER) + // + // Available from 19.10 (15.0). Except it (or the compiler) doesn't seem to + // be constexpr-correct. Things appear to be fixed in 19.20 (16.0) but + // optional is now only available in the C++17 mode or later. + // +# if _MSC_VER >= 1920 +# if defined(_MSVC_LANG) && _MSVC_LANG >= 201703L // See /Zc:__cplusplus +# define LIBBUTL_STD_OPTIONAL +# endif +# endif +#elif defined(__clang__) + // + // Clang's libc++ has it since 4 but we might also be using libstdc++. For + // the latter we will check for the presence of the header which + // only appeared in GCC 7. Also assume both are only available in C++17. + // + // Note that on Mac OS it can still be . + // +# if __cplusplus >= 201703L +# if __has_include(<__config>) +# include <__config> // _LIBCPP_VERSION +# if _LIBCPP_VERSION >= 4000 && __has_include() +# define LIBBUTL_STD_OPTIONAL +# endif +# elif __has_include() +# define LIBBUTL_STD_OPTIONAL +# endif +# endif +#elif defined(__GNUC__) + // + // Available from 7 but only in the C++17 mode. Note also that from 8 + // defines __cpp_lib_optional. + // +# if __GNUC__ >= 7 && __cplusplus >= 201703L +# define LIBBUTL_STD_OPTIONAL +# endif +#endif + +#ifdef LIBBUTL_STD_OPTIONAL +# include +#else +# include // move() +# include // hash +# include // is_* +#endif + +#include + +#ifdef LIBBUTL_STD_OPTIONAL +namespace butl +{ + template + using optional = std::optional; + + using std::nullopt_t; + using std::nullopt; +} +#else + +namespace butl +{ + // Simple optional class template while waiting for std::optional. + // + struct nullopt_t {constexpr explicit nullopt_t (int) {}}; + constexpr nullopt_t nullopt (1); + + namespace details + { + template ::value> + struct optional_data; + + template + struct optional_data + { + struct empty {}; + + union + { + empty e_; + T d_; + }; + bool v_; + +#if !defined(_MSC_VER) || _MSC_VER > 1900 + constexpr optional_data (): e_ (), v_ (false) {} + constexpr optional_data (nullopt_t): e_ (), v_ (false) {} + constexpr optional_data (const T& v): d_ (v), v_ (true) {} + constexpr optional_data (T&& v): d_ (std::move (v)), v_ (true) {} +#else + optional_data (): e_ (), v_ (false) {} + optional_data (nullopt_t): e_ (), v_ (false) {} + optional_data (const T& v): d_ (v), v_ (true) {} + optional_data (T&& v): d_ (std::move (v)), v_ (true) {} +#endif + +#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ + (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) + constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} + constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} +#else + optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} + optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} +#endif + + optional_data& operator= (nullopt_t); + optional_data& operator= (const T&); + optional_data& operator= (T&&); + + optional_data& operator= (const optional_data&); + optional_data& operator= (optional_data&&); + + ~optional_data (); + }; + + template + struct optional_data + { + struct empty {}; + + union + { + empty e_; + T d_; + }; + bool v_; + +#if !defined(_MSC_VER) || _MSC_VER > 1900 + constexpr optional_data (): e_ (), v_ (false) {} + constexpr optional_data (nullopt_t): e_ (), v_ (false) {} + constexpr optional_data (const T& v): d_ (v), v_ (true) {} + constexpr optional_data (T&& v): d_ (std::move (v)), v_ (true) {} +#else + optional_data (): e_ (), v_ (false) {} + optional_data (nullopt_t): e_ (), v_ (false) {} + optional_data (const T& v): d_ (v), v_ (true) {} + optional_data (T&& v): d_ (std::move (v)), v_ (true) {} +#endif + +#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ + (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) + constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} + constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} +#else + optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} + optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} +#endif + + optional_data& operator= (nullopt_t); + optional_data& operator= (const T&); + optional_data& operator= (T&&); + + optional_data& operator= (const optional_data&); + optional_data& operator= (optional_data&&); + }; + + template ::value, + bool = std::is_move_constructible::value> + struct optional_ctors: optional_data + { + using optional_data::optional_data; + }; + + template + struct optional_ctors: optional_ctors + { + using optional_ctors::optional_ctors; + +#if !defined(_MSC_VER) || _MSC_VER > 1900 + constexpr optional_ctors () = default; +#else + optional_ctors () = default; +#endif + + optional_ctors (const optional_ctors&) = delete; + +#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ + (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) + constexpr optional_ctors (optional_ctors&&) = default; +#else + optional_ctors (optional_ctors&&) = default; +#endif + + optional_ctors& operator= (const optional_ctors&) = default; + optional_ctors& operator= (optional_ctors&&) = default; + }; + + template + struct optional_ctors: optional_ctors + { + using optional_ctors::optional_ctors; + +#if !defined(_MSC_VER) || _MSC_VER > 1900 + constexpr optional_ctors () = default; +#else + optional_ctors () = default; +#endif + +#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ + (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) + constexpr optional_ctors (const optional_ctors&) = default; +#else + optional_ctors (const optional_ctors&) = default; +#endif + + optional_ctors (optional_ctors&&) = delete; + + optional_ctors& operator= (const optional_ctors&) = default; + optional_ctors& operator= (optional_ctors&&) = default; + }; + + template + struct optional_ctors: optional_ctors + { + using optional_ctors::optional_ctors; + +#if !defined(_MSC_VER) || _MSC_VER > 1900 + constexpr optional_ctors () = default; +#else + optional_ctors () = default; +#endif + + optional_ctors (const optional_ctors&) = delete; + optional_ctors (optional_ctors&&) = delete; + + optional_ctors& operator= (const optional_ctors&) = default; + optional_ctors& operator= (optional_ctors&&) = default; + }; + } + + template + class optional: private details::optional_ctors + { + using base = details::optional_ctors; + + public: + using value_type = T; + +#if !defined(_MSC_VER) || _MSC_VER > 1900 + constexpr optional () {} + constexpr optional (nullopt_t) {} + constexpr optional (const T& v): base (v) {} + constexpr optional (T&& v): base (std::move (v)) {} +#else + optional () {} + optional (nullopt_t) {} + optional (const T& v): base (v) {} + optional (T&& v): base (std::move (v)) {} +#endif + +#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ + (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) + constexpr optional (const optional&) = default; + constexpr optional (optional&&) = default; +#else + optional (const optional&) = default; + optional (optional&&) = default; +#endif + + optional& operator= (nullopt_t v) {static_cast (*this) = v; return *this;} + optional& operator= (const T& v) {static_cast (*this) = v; return *this;} + optional& operator= (T&& v) {static_cast (*this) = std::move (v); return *this;} + + optional& operator= (const optional&) = default; + optional& operator= (optional&&) = default; + + T& value () {return this->d_;} + const T& value () const {return this->d_;} + + T* operator-> () {return &this->d_;} + const T* operator-> () const {return &this->d_;} + + T& operator* () {return this->d_;} + const T& operator* () const {return this->d_;} + + bool has_value () const {return this->v_;} + explicit operator bool () const {return this->v_;} + }; + + template + inline auto + operator== (const optional& x, const optional& y) + { + bool px (x), py (y); + return px == py && (!px || *x == *y); + } + + template + inline auto + operator!= (const optional& x, const optional& y) + { + return !(x == y); + } + + template + inline auto + operator< (const optional& x, const optional& y) + { + bool px (x), py (y); + return px < py || (px && py && *x < *y); + } + + template + inline auto + operator> (const optional& x, const optional& y) + { + return y < x; + } +} + +namespace std +{ + template + struct hash>: hash + { + using argument_type = butl::optional; + + size_t + operator() (const butl::optional& o) const + noexcept (noexcept (hash {} (*o))) + { + return o ? hash::operator() (*o) : static_cast (-3333); + } + }; +} + +#include + +#endif // !LIBBUTL_STD_OPTIONAL diff --git a/libbutl/optional.mxx b/libbutl/optional.mxx deleted file mode 100644 index d32e14b..0000000 --- a/libbutl/optional.mxx +++ /dev/null @@ -1,358 +0,0 @@ -// file : libbutl/optional.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -// Note: the Clang check must come before GCC since it also defines __GNUC__. -// -#if defined(ODB_COMPILER) - // - // Make sure we use butl::optional during ODB compilation (has to be this - // way until we completely switch to std::optional since we use the same - // generated code for all compilers). - // -#elif defined(_MSC_VER) - // - // Available from 19.10 (15.0). Except it (or the compiler) doesn't seem to - // be constexpr-correct. Things appear to be fixed in 19.20 (16.0) but - // optional is now only available in the C++17 mode or later. - // -# if _MSC_VER >= 1920 -# if defined(_MSVC_LANG) && _MSVC_LANG >= 201703L // See /Zc:__cplusplus -# define LIBBUTL_STD_OPTIONAL -# endif -# endif -#elif defined(__clang__) - // - // Clang's libc++ has it since 4 but we might also be using libstdc++. For - // the latter we will check for the presence of the header which - // only appeared in GCC 7. Also assume both are only available in C++17. - // - // Note that on Mac OS it can still be . - // -# if __cplusplus >= 201703L -# if __has_include(<__config>) -# include <__config> // _LIBCPP_VERSION -# if _LIBCPP_VERSION >= 4000 && __has_include() -# define LIBBUTL_STD_OPTIONAL -# endif -# elif __has_include() -# define LIBBUTL_STD_OPTIONAL -# endif -# endif -#elif defined(__GNUC__) - // - // Available from 7 but only in the C++17 mode. Note also that from 8 - // defines __cpp_lib_optional. - // -# if __GNUC__ >= 7 && __cplusplus >= 201703L -# define LIBBUTL_STD_OPTIONAL -# endif -#endif - -#ifndef __cpp_lib_modules_ts -#ifdef LIBBUTL_STD_OPTIONAL -# include -#else -# include // move() -# include // hash -# include // is_* -#endif -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.optional; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -#ifdef LIBBUTL_STD_OPTIONAL -LIBBUTL_MODEXPORT namespace butl -{ - template - using optional = std::optional; - - using std::nullopt_t; - using std::nullopt; -} -#else - -LIBBUTL_MODEXPORT namespace butl -{ - // Simple optional class template while waiting for std::optional. - // - struct nullopt_t {constexpr explicit nullopt_t (int) {}}; - constexpr nullopt_t nullopt (1); - - namespace details - { - template ::value> - struct optional_data; - - template - struct optional_data - { - struct empty {}; - - union - { - empty e_; - T d_; - }; - bool v_; - -#if !defined(_MSC_VER) || _MSC_VER > 1900 - constexpr optional_data (): e_ (), v_ (false) {} - constexpr optional_data (nullopt_t): e_ (), v_ (false) {} - constexpr optional_data (const T& v): d_ (v), v_ (true) {} - constexpr optional_data (T&& v): d_ (std::move (v)), v_ (true) {} -#else - optional_data (): e_ (), v_ (false) {} - optional_data (nullopt_t): e_ (), v_ (false) {} - optional_data (const T& v): d_ (v), v_ (true) {} - optional_data (T&& v): d_ (std::move (v)), v_ (true) {} -#endif - -#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ - (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) - constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} -#else - optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} -#endif - - optional_data& operator= (nullopt_t); - optional_data& operator= (const T&); - optional_data& operator= (T&&); - - optional_data& operator= (const optional_data&); - optional_data& operator= (optional_data&&); - - ~optional_data (); - }; - - template - struct optional_data - { - struct empty {}; - - union - { - empty e_; - T d_; - }; - bool v_; - -#if !defined(_MSC_VER) || _MSC_VER > 1900 - constexpr optional_data (): e_ (), v_ (false) {} - constexpr optional_data (nullopt_t): e_ (), v_ (false) {} - constexpr optional_data (const T& v): d_ (v), v_ (true) {} - constexpr optional_data (T&& v): d_ (std::move (v)), v_ (true) {} -#else - optional_data (): e_ (), v_ (false) {} - optional_data (nullopt_t): e_ (), v_ (false) {} - optional_data (const T& v): d_ (v), v_ (true) {} - optional_data (T&& v): d_ (std::move (v)), v_ (true) {} -#endif - -#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ - (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) - constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} -#else - optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} -#endif - - optional_data& operator= (nullopt_t); - optional_data& operator= (const T&); - optional_data& operator= (T&&); - - optional_data& operator= (const optional_data&); - optional_data& operator= (optional_data&&); - }; - - template ::value, - bool = std::is_move_constructible::value> - struct optional_ctors: optional_data - { - using optional_data::optional_data; - }; - - template - struct optional_ctors: optional_ctors - { - using optional_ctors::optional_ctors; - -#if !defined(_MSC_VER) || _MSC_VER > 1900 - constexpr optional_ctors () = default; -#else - optional_ctors () = default; -#endif - - optional_ctors (const optional_ctors&) = delete; - -#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ - (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) - constexpr optional_ctors (optional_ctors&&) = default; -#else - optional_ctors (optional_ctors&&) = default; -#endif - - optional_ctors& operator= (const optional_ctors&) = default; - optional_ctors& operator= (optional_ctors&&) = default; - }; - - template - struct optional_ctors: optional_ctors - { - using optional_ctors::optional_ctors; - -#if !defined(_MSC_VER) || _MSC_VER > 1900 - constexpr optional_ctors () = default; -#else - optional_ctors () = default; -#endif - -#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ - (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) - constexpr optional_ctors (const optional_ctors&) = default; -#else - optional_ctors (const optional_ctors&) = default; -#endif - - optional_ctors (optional_ctors&&) = delete; - - optional_ctors& operator= (const optional_ctors&) = default; - optional_ctors& operator= (optional_ctors&&) = default; - }; - - template - struct optional_ctors: optional_ctors - { - using optional_ctors::optional_ctors; - -#if !defined(_MSC_VER) || _MSC_VER > 1900 - constexpr optional_ctors () = default; -#else - optional_ctors () = default; -#endif - - optional_ctors (const optional_ctors&) = delete; - optional_ctors (optional_ctors&&) = delete; - - optional_ctors& operator= (const optional_ctors&) = default; - optional_ctors& operator= (optional_ctors&&) = default; - }; - } - - template - class optional: private details::optional_ctors - { - using base = details::optional_ctors; - - public: - using value_type = T; - -#if !defined(_MSC_VER) || _MSC_VER > 1900 - constexpr optional () {} - constexpr optional (nullopt_t) {} - constexpr optional (const T& v): base (v) {} - constexpr optional (T&& v): base (std::move (v)) {} -#else - optional () {} - optional (nullopt_t) {} - optional (const T& v): base (v) {} - optional (T&& v): base (std::move (v)) {} -#endif - -#if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ - (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) - constexpr optional (const optional&) = default; - constexpr optional (optional&&) = default; -#else - optional (const optional&) = default; - optional (optional&&) = default; -#endif - - optional& operator= (nullopt_t v) {static_cast (*this) = v; return *this;} - optional& operator= (const T& v) {static_cast (*this) = v; return *this;} - optional& operator= (T&& v) {static_cast (*this) = std::move (v); return *this;} - - optional& operator= (const optional&) = default; - optional& operator= (optional&&) = default; - - T& value () {return this->d_;} - const T& value () const {return this->d_;} - - T* operator-> () {return &this->d_;} - const T* operator-> () const {return &this->d_;} - - T& operator* () {return this->d_;} - const T& operator* () const {return this->d_;} - - bool has_value () const {return this->v_;} - explicit operator bool () const {return this->v_;} - }; - - template - inline auto - operator== (const optional& x, const optional& y) - { - bool px (x), py (y); - return px == py && (!px || *x == *y); - } - - template - inline auto - operator!= (const optional& x, const optional& y) - { - return !(x == y); - } - - template - inline auto - operator< (const optional& x, const optional& y) - { - bool px (x), py (y); - return px < py || (px && py && *x < *y); - } - - template - inline auto - operator> (const optional& x, const optional& y) - { - return y < x; - } -} - -namespace std -{ - template - struct hash>: hash - { - using argument_type = butl::optional; - - size_t - operator() (const butl::optional& o) const - noexcept (noexcept (hash {} (*o))) - { - return o ? hash::operator() (*o) : static_cast (-3333); - } - }; -} - -#include - -#endif // !LIBBUTL_STD_OPTIONAL diff --git a/libbutl/pager.cxx b/libbutl/pager.cxx index 44aa83e..e647948 100644 --- a/libbutl/pager.cxx +++ b/libbutl/pager.cxx @@ -1,9 +1,7 @@ // file : libbutl/pager.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include // E* @@ -14,46 +12,20 @@ # include #endif -#ifndef __cpp_lib_modules_ts #include #include -#include - +#include // size_t #include // strchr() #include // move() + #ifndef _WIN32 # include # include // this_thread::sleep_for() #endif -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.pager; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.process; -import butl.fdstream; -#endif -#ifndef _WIN32 -import std.threading; -#endif - -import butl.utility; // operator<<(ostream, exception), throw_generic_error() -import butl.optional; -import butl.fdstream; // fdclose() -#else -#include -#include -#include -#endif +#include +#include +#include using namespace std; diff --git a/libbutl/pager.hxx b/libbutl/pager.hxx new file mode 100644 index 0000000..12a6670 --- /dev/null +++ b/libbutl/pager.hxx @@ -0,0 +1,84 @@ +// file : libbutl/pager.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include + +#include +#include + +#include + +namespace butl +{ + // Try to run the output through a pager program, such as more or less (no + // pun intended, less is used by default). If the default pager program is + // used, then the output is indented so that 80-character long lines will + // appear centered in the terminal. If the default pager program fails to + // start, then the output is sent directly to STDOUT. + // + // If the pager program is specified and is empty, then no pager is used + // and the output is sent directly to STDOUT. + // + // Throw std::system_error if there are problems with the pager program. + // + // Typical usage: + // + // try + // { + // pager p ("help for foo"); + // ostream& os (p.stream ()); + // + // os << "Foo is such and so ..."; + // + // if (!p.wait ()) + // ... // Pager program returned non-zero status. + // } + // catch (const std::system_error& e) + // { + // cerr << "pager error: " << e << endl; + // } + // + class LIBBUTL_SYMEXPORT pager: protected std::streambuf + { + public: + ~pager () {wait (true);} + + // If verbose is true, then print (to STDERR) the pager command line. + // + pager (const std::string& name, + bool verbose = false, + const std::string* pager = nullptr, + const std::vector* pager_options = nullptr); + + std::ostream& + stream () {return os_.is_open () ? os_ : std::cout;} + + bool + wait (bool ignore_errors = false); + + // The streambuf output interface that implements indentation. You can + // override it to implement custom output pre-processing. + // + protected: + using int_type = std::streambuf::int_type; + using traits_type = std::streambuf::traits_type; + + virtual int_type + overflow (int_type); + + virtual int + sync (); + + private: + process p_; + ofdstream os_; + + std::string indent_; + int_type prev_ = '\n'; // Previous character. + std::streambuf* buf_ = nullptr; + }; +} diff --git a/libbutl/pager.mxx b/libbutl/pager.mxx deleted file mode 100644 index a1f640f..0000000 --- a/libbutl/pager.mxx +++ /dev/null @@ -1,102 +0,0 @@ -// file : libbutl/pager.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.pager; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.process; -import butl.fdstream; -#else -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Try to run the output through a pager program, such as more or less (no - // pun intended, less is used by default). If the default pager program is - // used, then the output is indented so that 80-character long lines will - // appear centered in the terminal. If the default pager program fails to - // start, then the output is sent directly to STDOUT. - // - // If the pager program is specified and is empty, then no pager is used - // and the output is sent directly to STDOUT. - // - // Throw std::system_error if there are problems with the pager program. - // - // Typical usage: - // - // try - // { - // pager p ("help for foo"); - // ostream& os (p.stream ()); - // - // os << "Foo is such and so ..."; - // - // if (!p.wait ()) - // ... // Pager program returned non-zero status. - // } - // catch (const std::system_error& e) - // { - // cerr << "pager error: " << e << endl; - // } - // - class LIBBUTL_SYMEXPORT pager: protected std::streambuf - { - public: - ~pager () {wait (true);} - - // If verbose is true, then print (to STDERR) the pager command line. - // - pager (const std::string& name, - bool verbose = false, - const std::string* pager = nullptr, - const std::vector* pager_options = nullptr); - - std::ostream& - stream () {return os_.is_open () ? os_ : std::cout;} - - bool - wait (bool ignore_errors = false); - - // The streambuf output interface that implements indentation. You can - // override it to implement custom output pre-processing. - // - protected: - using int_type = std::streambuf::int_type; - using traits_type = std::streambuf::traits_type; - - virtual int_type - overflow (int_type); - - virtual int - sync (); - - private: - process p_; - ofdstream os_; - - std::string indent_; - int_type prev_ = '\n'; // Previous character. - std::streambuf* buf_ = nullptr; - }; -} diff --git a/libbutl/path-io.hxx b/libbutl/path-io.hxx new file mode 100644 index 0000000..a60527d --- /dev/null +++ b/libbutl/path-io.hxx @@ -0,0 +1,36 @@ +// file : libbutl/path-io.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include + +#include + +#include + +namespace butl +{ + // This is the default path IO implementation. It is separate to allow + // custom implementations. For example, we may want to print paths as + // relative to the working directory. Or we may want to print '~' for the + // home directory prefix. Or we may want to print dir_path with a trailing + // '/'. + // + template + inline std::basic_ostream& + operator<< (std::basic_ostream& os, const basic_path& p) + { + return to_stream (os, p, false /* representation */); + } + + template + inline std::basic_ostream& + operator<< (std::basic_ostream& os, const basic_path_name_view

& v) + { + assert (!v.null ()); + + return v.name != nullptr && *v.name ? (os << **v.name) : (os << *v.path); + } +} diff --git a/libbutl/path-io.mxx b/libbutl/path-io.mxx deleted file mode 100644 index 6b6dbcf..0000000 --- a/libbutl/path-io.mxx +++ /dev/null @@ -1,54 +0,0 @@ -// file : libbutl/path-io.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#include - -#ifndef __cpp_lib_modules_ts -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.path_io; -#ifdef __cpp_lib_modules_ts -import std.core; //@@ MOD TMP (should not be needed). -import std.io; -#endif -import butl.path; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // This is the default path IO implementation. It is separate to allow - // custom implementations. For example, we may want to print paths as - // relative to the working directory. Or we may want to print '~' for the - // home directory prefix. Or we may want to print dir_path with a trailing - // '/'. - // - template - inline std::basic_ostream& - operator<< (std::basic_ostream& os, const basic_path& p) - { - return to_stream (os, p, false /* representation */); - } - - template - inline std::basic_ostream& - operator<< (std::basic_ostream& os, const basic_path_name_view

& v) - { - assert (!v.null ()); - - return v.name != nullptr && *v.name ? (os << **v.name) : (os << *v.path); - } -} diff --git a/libbutl/path-map.hxx b/libbutl/path-map.hxx new file mode 100644 index 0000000..a7b3870 --- /dev/null +++ b/libbutl/path-map.hxx @@ -0,0 +1,128 @@ +// file : libbutl/path-map.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // min() + +#include +#include + +#include + +namespace butl +{ + // prefix_map for filesystem paths + // + // Important: the paths should be normalized but can use different directory + // separators and different case on case-insensitive platforms. + // + // Note that the path's representation of POSIX root ('/') is inconsistent + // in that we have a trailing delimiter at the end of the path (its "proper" + // representation would have been an empty string but that would have + // clashed with empty paths). To work around this snag, this implementation, + // during key comparison, detects '/' and treats it as empty. Note that the + // map will still store the key as you have first inserted it. So if you + // want a particular representation (i.e., empty or '/'), pre- populate the + // map with it. + // + template + struct compare_prefix> + { + typedef basic_path key_type; + + typedef C delimiter_type; + typedef typename key_type::string_type string_type; + typedef typename key_type::size_type size_type; + typedef typename key_type::traits_type traits_type; + + explicit + compare_prefix (delimiter_type) {} + + bool + operator() (const key_type& x, const key_type& y) const + { + const string_type& xs (x.string ()); + const string_type& ys (y.string ()); + + return compare (xs.c_str (), + root (xs) ? 0 : xs.size (), + ys.c_str (), + root (ys) ? 0 : ys.size ()) < 0; + } + + bool + prefix (const key_type& p, const key_type& k) const + { + const string_type& ps (p.string ()); + const string_type& ks (k.string ()); + + return prefix (root (ps) ? string_type () : ps, + root (ks) ? string_type () : ks); + } + + bool + prefix (key_type& k) const + { + if (k.empty ()) + return false; + + k.make_directory (); + return true; + } + + protected: + bool + prefix (const string_type& p, const string_type& k) const + { + // The same code as in prefix_map but using our compare(). + // + size_type pn (p.size ()), kn (k.size ()); + return pn == 0 || // Empty key is always a prefix. + (pn <= kn && + compare (p.c_str (), pn, k.c_str (), pn == kn ? pn : pn + 1) == 0); + } + + int + compare (const C* x, size_type xn, + const C* y, size_type yn) const + { + size_type n (std::min (xn, yn)); + int r (traits_type::compare (x, n, y, n)); + + if (r == 0) + { + // Pretend there is a delimiter characters at the end of the + // shorter string. + // + char xc (xn > n ? x[n] : (xn++, traits_type::directory_separator)); + char yc (yn > n ? y[n] : (yn++, traits_type::directory_separator)); + r = traits_type::compare (&xc, 1, &yc, 1); + + // If we are still equal, then compare the lengths. + // + if (r == 0) + r = (xn == yn ? 0 : (xn < yn ? -1 : 1)); + } + + return r; + } + + static bool + root (const string_type& p) + { + return p.size () == 1 && key_type::traits_type::is_separator (p[0]); + } + }; + + // Note that the delimiter character is not used (is_delimiter() from + // path_traits is used instead). + // + template + using path_map = + prefix_map; + + template + using dir_path_map = + prefix_map; +} diff --git a/libbutl/path-map.mxx b/libbutl/path-map.mxx deleted file mode 100644 index daaf0a4..0000000 --- a/libbutl/path-map.mxx +++ /dev/null @@ -1,145 +0,0 @@ -// file : libbutl/path-map.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include // min() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.path_map; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.prefix_map; -#else -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // prefix_map for filesystem paths - // - // Important: the paths should be normalized but can use different directory - // separators and different case on case-insensitive platforms. - // - // Note that the path's representation of POSIX root ('/') is inconsistent - // in that we have a trailing delimiter at the end of the path (its "proper" - // representation would have been an empty string but that would have - // clashed with empty paths). To work around this snag, this implementation, - // during key comparison, detects '/' and treats it as empty. Note that the - // map will still store the key as you have first inserted it. So if you - // want a particular representation (i.e., empty or '/'), pre- populate the - // map with it. - // - template - struct compare_prefix> - { - typedef basic_path key_type; - - typedef C delimiter_type; - typedef typename key_type::string_type string_type; - typedef typename key_type::size_type size_type; - typedef typename key_type::traits_type traits_type; - - explicit - compare_prefix (delimiter_type) {} - - bool - operator() (const key_type& x, const key_type& y) const - { - const string_type& xs (x.string ()); - const string_type& ys (y.string ()); - - return compare (xs.c_str (), - root (xs) ? 0 : xs.size (), - ys.c_str (), - root (ys) ? 0 : ys.size ()) < 0; - } - - bool - prefix (const key_type& p, const key_type& k) const - { - const string_type& ps (p.string ()); - const string_type& ks (k.string ()); - - return prefix (root (ps) ? string_type () : ps, - root (ks) ? string_type () : ks); - } - - bool - prefix (key_type& k) const - { - if (k.empty ()) - return false; - - k.make_directory (); - return true; - } - - protected: - bool - prefix (const string_type& p, const string_type& k) const - { - // The same code as in prefix_map but using our compare(). - // - size_type pn (p.size ()), kn (k.size ()); - return pn == 0 || // Empty key is always a prefix. - (pn <= kn && - compare (p.c_str (), pn, k.c_str (), pn == kn ? pn : pn + 1) == 0); - } - - int - compare (const C* x, size_type xn, - const C* y, size_type yn) const - { - size_type n (std::min (xn, yn)); - int r (traits_type::compare (x, n, y, n)); - - if (r == 0) - { - // Pretend there is a delimiter characters at the end of the - // shorter string. - // - char xc (xn > n ? x[n] : (xn++, traits_type::directory_separator)); - char yc (yn > n ? y[n] : (yn++, traits_type::directory_separator)); - r = traits_type::compare (&xc, 1, &yc, 1); - - // If we are still equal, then compare the lengths. - // - if (r == 0) - r = (xn == yn ? 0 : (xn < yn ? -1 : 1)); - } - - return r; - } - - static bool - root (const string_type& p) - { - return p.size () == 1 && key_type::traits_type::is_separator (p[0]); - } - }; - - // Note that the delimiter character is not used (is_delimiter() from - // path_traits is used instead). - // - template - using path_map = - prefix_map; - - template - using dir_path_map = - prefix_map; -} diff --git a/libbutl/path-pattern.cxx b/libbutl/path-pattern.cxx index cea5aa7..ed36eb5 100644 --- a/libbutl/path-pattern.cxx +++ b/libbutl/path-pattern.cxx @@ -1,41 +1,14 @@ // file : libbutl/path-pattern.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include #include // reverse_iterator - #include // find() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.path_pattern; -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.optional; -#endif - -import butl.utility; // lcase()[_WIN32] -import butl.filesystem; // path_search() -#else -#include -#include -#endif +#include // lcase()[_WIN32] +#include // path_search() using namespace std; diff --git a/libbutl/path-pattern.hxx b/libbutl/path-pattern.hxx new file mode 100644 index 0000000..f6e01be --- /dev/null +++ b/libbutl/path-pattern.hxx @@ -0,0 +1,224 @@ +// file : libbutl/path-pattern.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // uint16_t +#include // ptrdiff_t, size_t +#include // input_iterator_tag + +#include +#include + +#include + +namespace butl +{ + // Wildcard pattern match (aka glob). + // + // The wildcard pattern contains the literal characters that match + // themselves and the wildcard characters that match a single or multiple + // characters. Currently the following wildcards are supported: + // + // * - match any number of characters (including zero) + // ? - match any single character + // [...] - match a character with a "bracket expression"; currently we only + // support literal characters and ranges (no character/equivalence + // classes, etc; see Pattern Matching Notation section of the Shell + // Command Language POSIX specification for details) + // + // Note also that currently we don't support the special characters + // backslash-escaping (as mandated by POSIX). + + // Path match/search flags. + // + enum class path_match_flags: std::uint16_t + { + // Follow symlinks. This only applies to symlinks that are matched against + // the rightmost component of the pattern. In particular, this mean that + // such symlinks will never match a directory pattern and some results can + // be missing for the recursive rightmost component. + // + // Note that this flag is only used for path_search(). + // + follow_symlinks = 0x1, + + // Make wildcard-only pattern component (e.g., `*/...`, `.../*/...`, or + // `.../*`) match absent path component. For example, with this flag + // set, the `a/*/b` pattern matches not only `a/x/b` path, but also `a/b`. + // + // Note that this does not apply to single-component patterns and the + // pattern type is always preserved. In particular, the `a/*/` pattern + // matches `a/` but not `a`. + // + // Finally, keep in mind that only absent directory components can be + // matched this way. In particular, pattern `a*/*` does not match `ab` + // (but `a*/*/` matches `ab/`). + // + match_absent = 0x2, + + none = 0 + }; + + inline path_match_flags operator& (path_match_flags, path_match_flags); + inline path_match_flags operator| (path_match_flags, path_match_flags); + inline path_match_flags operator&= (path_match_flags&, path_match_flags); + inline path_match_flags operator|= (path_match_flags&, path_match_flags); + + // Return true if name matches pattern. Both must be single path components, + // possibly with a trailing directory separator to indicate a directory. + // + // If the pattern ends with a directory separator, then it only matches a + // directory name (i.e., ends with a directory separator, but potentially + // different). Otherwise, it only matches a non-directory name (no trailing + // directory separator). + // + LIBBUTL_SYMEXPORT bool + path_match (const std::string& name, const std::string& pattern); + + // Return true if path entry matches pattern. Note that the match is + // performed literally, with no paths normalization being performed. The + // start directory is used if the first pattern component is a self-matching + // wildcard (see below for the start directory and wildcard semantics). + // + // In addition to the wildcard characters, it also recognizes the ** and *** + // wildcard sequences (see path_search() for details). + // + LIBBUTL_SYMEXPORT bool + path_match (const path& entry, + const path& pattern, + const dir_path& start = dir_path (), + path_match_flags = path_match_flags::none); + + // Return true if a name contains the wildcard characters. + // + bool + path_pattern (const std::string&); + + // Return true if a name contains the ** wildcard sequences. + // + bool + path_pattern_recursive (const std::string&); + + // Return true if a name contains the *** wildcard sequences. + // + bool + path_pattern_self_matching (const std::string&); + + // Return true if a path contains the pattern components. + // + bool + path_pattern (const path&); + + // Return the number of recursive pattern components. + // + // Knowing the number of such components allows us to make some assumptions + // regarding the search result. For example, if it is zero or one, then the + // result contains no duplicates. + // + // Also note that the result can be used as bool. + // + std::size_t + path_pattern_recursive (const path&); + + // Return true if the path is not empty and its first component is a self- + // matching pattern. + // + bool + path_pattern_self_matching (const path&); + + // Iteration over pattern terminals. + // + enum class path_pattern_term_type + { + literal, // Literal character. + question, // Question mark wildcard. + star, // Star wildcard. + bracket // Bracket expression wildcard. + }; + + class path_pattern_term + { + public: + path_pattern_term_type type; + std::string::const_iterator begin; + std::string::const_iterator end; + + std::size_t + size () const {return end - begin;} + + // Predicates. + // + bool literal () const {return type == path_pattern_term_type::literal;} + bool question () const {return type == path_pattern_term_type::question;} + bool star () const {return type == path_pattern_term_type::star;} + bool bracket () const {return type == path_pattern_term_type::bracket;} + }; + + // Return the literal terminal character. + // + char + get_literal (const path_pattern_term&); + + // Match a character against the bracket expression terminal. + // + LIBBUTL_SYMEXPORT bool + match_bracket (char, const path_pattern_term&); + + class LIBBUTL_SYMEXPORT path_pattern_iterator + { + public: + using value_type = path_pattern_term; + using pointer = const path_pattern_term*; + using reference = const path_pattern_term&; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + explicit + path_pattern_iterator (const std::string&); + + path_pattern_iterator (std::string::const_iterator begin, + std::string::const_iterator end); + + path_pattern_iterator () = default; // Create the end iterator. + + path_pattern_iterator& operator++ () {assert (t_); next (); return *this;} + + reference operator* () const {assert (t_); return *t_;} + pointer operator-> () const {assert (t_); return &*t_;} + + friend bool + operator== (const path_pattern_iterator&, const path_pattern_iterator&); + + friend bool + operator!= (const path_pattern_iterator&, const path_pattern_iterator&); + + private: + void + next (); + + private: + // nullopt denotes the end iterator. + // + // Note that the default-constructed i_ and e_ iterators (having singular + // values) may not represent the end iterator as are not comparable for + // equality. That's why we use an absent term to represent such an + // iterator. + // + optional t_; + + std::string::const_iterator i_; + std::string::const_iterator e_; + }; + + // Range-based for loop support. + // + // for (const path_pattern_term& t: path_pattern_iterator (pattern)) ... + // + path_pattern_iterator begin (const path_pattern_iterator&); + path_pattern_iterator end (const path_pattern_iterator&); +} + +#include diff --git a/libbutl/path-pattern.mxx b/libbutl/path-pattern.mxx deleted file mode 100644 index 6d9684a..0000000 --- a/libbutl/path-pattern.mxx +++ /dev/null @@ -1,241 +0,0 @@ -// file : libbutl/path-pattern.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include // uint16_t -#include // ptrdiff_t, size_t -#include // input_iterator_tag -#endif - -// Other includes. -#ifdef __cpp_modules_ts -export module butl.path_pattern; - -#ifdef __cpp_lib_modules_ts -import std.core; -#endif - -import butl.path; -import butl.optional; -#else -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Wildcard pattern match (aka glob). - // - // The wildcard pattern contains the literal characters that match - // themselves and the wildcard characters that match a single or multiple - // characters. Currently the following wildcards are supported: - // - // * - match any number of characters (including zero) - // ? - match any single character - // [...] - match a character with a "bracket expression"; currently we only - // support literal characters and ranges (no character/equivalence - // classes, etc; see Pattern Matching Notation section of the Shell - // Command Language POSIX specification for details) - // - // Note also that currently we don't support the special characters - // backslash-escaping (as mandated by POSIX). - - // Path match/search flags. - // - enum class path_match_flags: std::uint16_t - { - // Follow symlinks. This only applies to symlinks that are matched against - // the rightmost component of the pattern. In particular, this mean that - // such symlinks will never match a directory pattern and some results can - // be missing for the recursive rightmost component. - // - // Note that this flag is only used for path_search(). - // - follow_symlinks = 0x1, - - // Make wildcard-only pattern component (e.g., `*/...`, `.../*/...`, or - // `.../*`) match absent path component. For example, with this flag - // set, the `a/*/b` pattern matches not only `a/x/b` path, but also `a/b`. - // - // Note that this does not apply to single-component patterns and the - // pattern type is always preserved. In particular, the `a/*/` pattern - // matches `a/` but not `a`. - // - // Finally, keep in mind that only absent directory components can be - // matched this way. In particular, pattern `a*/*` does not match `ab` - // (but `a*/*/` matches `ab/`). - // - match_absent = 0x2, - - none = 0 - }; - - inline path_match_flags operator& (path_match_flags, path_match_flags); - inline path_match_flags operator| (path_match_flags, path_match_flags); - inline path_match_flags operator&= (path_match_flags&, path_match_flags); - inline path_match_flags operator|= (path_match_flags&, path_match_flags); - - // Return true if name matches pattern. Both must be single path components, - // possibly with a trailing directory separator to indicate a directory. - // - // If the pattern ends with a directory separator, then it only matches a - // directory name (i.e., ends with a directory separator, but potentially - // different). Otherwise, it only matches a non-directory name (no trailing - // directory separator). - // - LIBBUTL_SYMEXPORT bool - path_match (const std::string& name, const std::string& pattern); - - // Return true if path entry matches pattern. Note that the match is - // performed literally, with no paths normalization being performed. The - // start directory is used if the first pattern component is a self-matching - // wildcard (see below for the start directory and wildcard semantics). - // - // In addition to the wildcard characters, it also recognizes the ** and *** - // wildcard sequences (see path_search() for details). - // - LIBBUTL_SYMEXPORT bool - path_match (const path& entry, - const path& pattern, - const dir_path& start = dir_path (), - path_match_flags = path_match_flags::none); - - // Return true if a name contains the wildcard characters. - // - bool - path_pattern (const std::string&); - - // Return true if a name contains the ** wildcard sequences. - // - bool - path_pattern_recursive (const std::string&); - - // Return true if a name contains the *** wildcard sequences. - // - bool - path_pattern_self_matching (const std::string&); - - // Return true if a path contains the pattern components. - // - bool - path_pattern (const path&); - - // Return the number of recursive pattern components. - // - // Knowing the number of such components allows us to make some assumptions - // regarding the search result. For example, if it is zero or one, then the - // result contains no duplicates. - // - // Also note that the result can be used as bool. - // - std::size_t - path_pattern_recursive (const path&); - - // Return true if the path is not empty and its first component is a self- - // matching pattern. - // - bool - path_pattern_self_matching (const path&); - - // Iteration over pattern terminals. - // - enum class path_pattern_term_type - { - literal, // Literal character. - question, // Question mark wildcard. - star, // Star wildcard. - bracket // Bracket expression wildcard. - }; - - class path_pattern_term - { - public: - path_pattern_term_type type; - std::string::const_iterator begin; - std::string::const_iterator end; - - std::size_t - size () const {return end - begin;} - - // Predicates. - // - bool literal () const {return type == path_pattern_term_type::literal;} - bool question () const {return type == path_pattern_term_type::question;} - bool star () const {return type == path_pattern_term_type::star;} - bool bracket () const {return type == path_pattern_term_type::bracket;} - }; - - // Return the literal terminal character. - // - char - get_literal (const path_pattern_term&); - - // Match a character against the bracket expression terminal. - // - LIBBUTL_SYMEXPORT bool - match_bracket (char, const path_pattern_term&); - - class LIBBUTL_SYMEXPORT path_pattern_iterator - { - public: - using value_type = path_pattern_term; - using pointer = const path_pattern_term*; - using reference = const path_pattern_term&; - using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; - - explicit - path_pattern_iterator (const std::string&); - - path_pattern_iterator (std::string::const_iterator begin, - std::string::const_iterator end); - - path_pattern_iterator () = default; // Create the end iterator. - - path_pattern_iterator& operator++ () {assert (t_); next (); return *this;} - - reference operator* () const {assert (t_); return *t_;} - pointer operator-> () const {assert (t_); return &*t_;} - - friend bool - operator== (const path_pattern_iterator&, const path_pattern_iterator&); - - friend bool - operator!= (const path_pattern_iterator&, const path_pattern_iterator&); - - private: - void - next (); - - private: - // nullopt denotes the end iterator. - // - // Note that the default-constructed i_ and e_ iterators (having singular - // values) may not represent the end iterator as are not comparable for - // equality. That's why we use an absent term to represent such an - // iterator. - // - optional t_; - - std::string::const_iterator i_; - std::string::const_iterator e_; - }; - - // Range-based for loop support. - // - // for (const path_pattern_term& t: path_pattern_iterator (pattern)) ... - // - path_pattern_iterator begin (const path_pattern_iterator&); - path_pattern_iterator end (const path_pattern_iterator&); -} - -#include diff --git a/libbutl/path.cxx b/libbutl/path.cxx index 3b04730..e4f373e 100644 --- a/libbutl/path.cxx +++ b/libbutl/path.cxx @@ -1,9 +1,7 @@ // file : libbutl/path.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #ifdef _WIN32 # include @@ -25,32 +23,11 @@ #endif #include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include - #include #include // strcpy() -#endif - -#ifdef __cpp_modules_ts -module butl.path; -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -import butl.utility; // throw_*_error() -import butl.process; // process::current_id() -#else -#include -#include -#endif +#include // throw_*_error() +#include // process::current_id() #include diff --git a/libbutl/path.hxx b/libbutl/path.hxx new file mode 100644 index 0000000..8276130 --- /dev/null +++ b/libbutl/path.hxx @@ -0,0 +1,1536 @@ +// file : libbutl/path.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // ptrdiff_t +#include // uint16_t +#include // str*() +#include // move(), swap() +#include +#include // invalid_argument +#include // hash + +#ifdef _WIN32 +#include // replace() +#endif + +#include +#include + +#ifdef _WIN32 +#include // *case*() +#endif + +#include + +namespace butl +{ + // Wish list/ideas for improvements. + // + // - Ability to convert to directory/leaf/base in-place, without dynamic + // allocation. One idea is something like this: + // + // p -= "/*"; // directory + // p -= "*/"; // leaf + // p -= ".*"; // base + // + // - Faster normalize() implementation. In many cases (e.g., in build2) + // the path is either already normal or the difference is just slashes + // (i.e., there are no '.' or '..' components). So a fast path case + // might be in order. + // + + // @@ This should probably be called invalid_path_argument + // + struct LIBBUTL_SYMEXPORT invalid_path_base: public std::invalid_argument + { + invalid_path_base (); + }; + + template + struct invalid_basic_path: invalid_path_base + { + using string_type = std::basic_string; + using size_type = typename string_type::size_type; + + string_type path; + + explicit + invalid_basic_path (const string_type& p): path (p) {} + explicit + invalid_basic_path (const C* p): path (p) {} + invalid_basic_path (const C* p, size_type n): path (p, n) {} + }; + + enum class path_abnormality: std::uint16_t + { + none = 0x00, // Path is normal. + separator = 0x01, // Wrong or multiple consequitive directory separators. + current = 0x02, // Contains current directory (`.`) component. + parent = 0x04 // Contains parent directory (`..`) component. + }; + + inline path_abnormality operator& (path_abnormality, path_abnormality); + inline path_abnormality operator| (path_abnormality, path_abnormality); + inline path_abnormality operator&= (path_abnormality&, path_abnormality); + inline path_abnormality operator|= (path_abnormality&, path_abnormality); + + // The only currently available specialization is for the char type. + // + template + struct path_traits + { + using string_type = std::basic_string; + using char_traits_type = typename string_type::traits_type; + using size_type = typename string_type::size_type; + + // Canonical directory and path seperators. + // +#ifdef _WIN32 + static constexpr const C directory_separator = '\\'; + static constexpr const C path_separator = ';'; +#else + static constexpr const C directory_separator = '/'; + static constexpr const C path_separator = ':'; +#endif + + // Canonical and alternative directory separators. Canonical should be + // first. + // +#ifdef _WIN32 + static constexpr const char* const directory_separators = "\\/"; +#else + static constexpr const char* const directory_separators = "/"; +#endif + + // Directory separator tests. On some platforms there could be multiple + // seperators. For example, on Windows we check for both '/' and '\'. + // + static bool + is_separator (C c) + { +#ifdef _WIN32 + return c == '\\' || c == '/'; +#else + return c == '/'; +#endif + } + + // Return 1-based index in directory_separators string or 0 if not a + // separator. + // + static size_type + separator_index (C c) + { +#ifdef _WIN32 + return c == '\\' ? 1 : c == '/' ? 2 : 0; +#else + return c == '/' ? 1 : 0; +#endif + } + + static bool + absolute (const string_type& s) + { + return absolute (s.c_str (), s.size ()); + } + + static bool + absolute (const C* s) + { + return absolute (s, char_traits_type::length (s)); + } + + static bool + absolute (const C* s, size_type n) + { +#ifdef _WIN32 + return n > 1 && s[1] == ':'; +#else + return n != 0 && is_separator (s[0]); +#endif + } + + static bool + current (const string_type& s) + { + return current (s.c_str (), s.size ()); + } + + static bool + current (const C* s) + { + return current (s, char_traits_type::length (s)); + } + + static bool + current (const C* s, size_type n) + { + return n == 1 && s[0] == '.'; + } + + static bool + parent (const string_type& s) + { + return parent (s.c_str (), s.size ()); + } + + static bool + parent (const C* s) + { + return parent (s, char_traits_type::length (s)); + } + + static bool + parent (const C* s, size_type n) + { + return n == 2 && s[0] == '.' && s[1] == '.'; + } + + static bool + normalized (const string_type& s, bool sep) + { + return normalized (s.c_str (), s.size (), sep); + } + + static bool + normalized (const C* s, bool sep) + { + return normalized (s, char_traits_type::length (s), sep); + } + + static bool + normalized (const C*, size_type, bool); + + static path_abnormality + abnormalities (const string_type& s) + { + return abnormalities (s.c_str (), s.size ()); + } + + static path_abnormality + abnormalities (const C* s) + { + return abnormalities (s, char_traits_type::length (s)); + } + + static path_abnormality + abnormalities (const C*, size_type); + + static bool + root (const string_type& s) + { + return root (s.c_str (), s.size ()); + } + + static bool + root (const C* s) + { + return root (s, char_traits_type::length (s)); + } + + static bool + root (const C* s, size_type n) + { +#ifdef _WIN32 + return n == 2 && s[1] == ':'; +#else + return n == 1 && is_separator (s[0]); +#endif + } + + static size_type + find_separator (string_type const& s, + size_type pos = 0, + size_type n = string_type::npos) + { + if (n == string_type::npos) + n = s.size (); + + const C* r (find_separator (s.c_str () + pos, n - pos)); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + find_separator (const C* s) + { + return find_separator (s, char_traits_type::length (s)); + } + + static const C* + find_separator (const C* s, size_type n) + { + for (const C* e (s + n); s != e; ++s) + { + if (is_separator (*s)) + return s; + } + + return nullptr; + } + + static size_type + rfind_separator (string_type const& s, size_type pos = string_type::npos) + { + if (pos == string_type::npos) + pos = s.size (); + else + pos++; + + const C* r (rfind_separator (s.c_str (), pos)); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + rfind_separator (const C* s) + { + return rfind_separator (s, char_traits_type::length (s)); + } + + static const C* + rfind_separator (const C* s, size_type n) + { + for (; n != 0; --n) + { + if (is_separator (s[n - 1])) + return s + n - 1; + } + + return nullptr; + } + + // Return the position of '.' or npos if there is no extension. + // + static size_type + find_extension (string_type const& s, size_type n = string_type::npos) + { + if (n == string_type::npos) + n = s.size (); + + const C* r (find_extension (s.c_str (), n)); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + find_extension (const C* s) + { + return find_extension (s, char_traits_type::length (s)); + } + + static const C* + find_extension (const C* s, size_type n) + { + size_type i (n); + + for (; i > 0; --i) + { + C c (s[i - 1]); + + if (c == '.') + break; + + if (is_separator (c)) + { + i = 0; + break; + } + } + + // Weed out paths like ".txt" (and "/.txt") and "txt.". + // + if (i > 1 && !is_separator (s[i - 2]) && i != n) + return s + i - 1; + else + return nullptr; + } + + // Return the start of the leaf (last path component) in the path. Note + // that the leaf will include the trailing separator, if any (i.e., the + // leaf of /tmp/bar/ is bar/). + // + static size_type + find_leaf (string_type const& s) + { + const C* r (find_leaf (s.c_str (), s.size ())); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + find_leaf (const C* s) + { + return find_leaf (s, char_traits_type::length (s)); + } + + static const C* + find_leaf (const C* s, size_type n) + { + const C* p; + return n == 0 + ? nullptr + : (p = rfind_separator (s, n - 1)) == nullptr ? s : ++p; + } + + static int + compare (string_type const& l, + string_type const& r, + size_type n = string_type::npos) + { + return compare (l.c_str (), n < l.size () ? n : l.size (), + r.c_str (), n < r.size () ? n : r.size ()); + } + + // @@ Currently for case-insensitive filesystems (Windows) compare() + // works properly only for ASCII. + // + static int + compare (const C* l, size_type ln, const C* r, size_type rn) + { + //@@ TODO: would be nice to ignore difference in trailing slashes + // (except for POSIX root). + + for (size_type i (0), n (ln < rn ? ln : rn); i != n; ++i) + { +#ifdef _WIN32 + C lc (lcase (l[i])), rc (lcase (r[i])); +#else + C lc (l[i]), rc (r[i]); +#endif + if (is_separator (lc) && is_separator (rc)) + continue; + + if (lc < rc) return -1; + if (lc > rc) return 1; + } + + return ln < rn ? -1 : (ln > rn ? 1 : 0); + } + + static void + canonicalize (string_type& s, char ds = '\0') + { + //canonicalize (s.data (), s.size ()); // C++17 + + if (ds == '\0') + ds = directory_separator; + + for (size_t i (0), n (s.size ()); i != n; ++i) + if (is_separator (s[i]) && s[i] != ds) + s[i] = ds; + } + + static void + canonicalize (C* s, size_type n, char ds = '\0') + { + if (ds == '\0') + ds = directory_separator; + + for (const C* e (s + n); s != e; ++s) + if (is_separator (*s) && *s != ds) + *s = ds; + } + + // Get/set current working directory. Throw std::system_error to report + // underlying OS errors. + // + static string_type + current_directory (); + + static void + current_directory (string_type const&); + + // Return the user home directory. Throw std::system_error to report + // underlying OS errors. + // + static string_type + home_directory (); + + // Return the temporary directory. Throw std::system_error to report + // underlying OS errors. + // + static string_type + temp_directory (); + + // Return a temporary name. The name is constructed by starting with the + // prefix followed by the process id following by a unique counter value + // inside the process (MT-safe). Throw std::system_error to report + // underlying OS errors. + // + static string_type + temp_name (string_type const& prefix); + + // Make the path real (by calling realpath(3)). Throw invalid_basic_path + // if the path is invalid (e.g., some components do not exist) and + // std::system_error to report other underlying OS errors. + // +#ifndef _WIN32 + static void + realize (string_type&); +#endif + + // Utilities. + // +#ifdef _WIN32 + static C + tolower (C); + + static C + toupper (C); +#endif + }; + + // This implementation of a filesystem path has two types: path, which can + // represent any path (file, directory, etc) and dir_path, which is derived + // from path. The internal representation of directories maintains a + // trailing directory separator (slash). However, it is ignored in path + // comparison, size, and string spelling. For example: + // + // path p1 ("foo"); // File path. + // path p2 ("bar/"); // Directory path. + // + // path p3 (p1 / p2); // Throw: p1 is not a directory. + // path p4 (p2 / p1); // Ok, file "bar/foo". + // path p5 (p2 / p2); // Ok, directory "bar/bar/". + // + // dir_path d1 ("foo"); // Directory path "foo/". + // dir_path d2 ("bar\\"); // Directory path "bar\". + // + // dir_path d3 (d2 / d1); // "bar\\foo/" + // + // (p4 == d3); // true + // d3.string (); // "bar\\foo" + // d3.representation (); // "bar\\foo/" + // + template + class basic_path; + + template struct any_path_kind; + template struct dir_path_kind; + + using path = basic_path>; + using dir_path = basic_path>; + using invalid_path = invalid_basic_path; + + // Cast from one path kind to another. Note that no checking is performed + // (e.g., that there is a trailing slash if casting to dir_path) but the + // representation is adjusted if necessary (e.g., the trailing slash is + // added to dir_path if missing). + // + template P path_cast (const basic_path&); + template P path_cast (basic_path&&); + + // In certain cases we may need to translate a special path (e.g., `-`) to a + // name that may not be a valid path (e.g., `` or ``), for + // example, for diagnostics. In this case we can use path_name which + // contains the original path plus an optional translation as a string. Note + // that this is a view-like type with the original path shallow-referenced + // rather than copied. + // + template + struct basic_path_name; + + using path_name = basic_path_name; + using dir_path_name = basic_path_name; + + // The copying version of the above that derives from the view (and thus can + // be passed down as a view). + // + template + struct basic_path_name_value; + + using path_name_value = basic_path_name_value; + using dir_name_value = basic_path_name_value; + + // A "full" view version of the above that also shallow-references the + // optional name. The "partial" view derives from this "full" view. + // + template + struct basic_path_name_view; + + using path_name_view = basic_path_name_view; + using dir_name_view = basic_path_name_view; + + // Low-level path data storage. It is also used by the implementation to + // pass around initialized/valid paths. + // + template + struct path_data + { + using string_type = std::basic_string; + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; + + // The idea is as follows: path_ is always the "traditional" form; that + // is, "/" for the root directory and "/tmp" (no trailing slash) for the + // rest. This means we can return/store references to path_. + // + // Then we have tsep_ ("trailing separator") which is the size difference + // between path_ and its "pure" part, that is, without any trailing + // slashes, even for "/". So: + // + // tsep_ == -1 -- trailing slash in path_ (the "/" case) + // tsep_ == 0 -- no trailing slash + // + // Finally, to represent non-root ("/") trailing slashes we use positive + // tsep_ values. In this case tsep_ is interpreted as a 1-based index in + // the path_traits::directory_separators string. + // + // Notes: + // - If path_ is empty, then tsep_ can only be 0. + // - We could have used a much narrower integer for tsep_. + // - We could give the rest of tsep_ to the user to use as flags, etc. + // + string_type path_; + difference_type tsep_; + + size_type + _size () const {return path_.size () + (tsep_ < 0 ? -1 : 0);} + + void + _swap (path_data& d) {path_.swap (d.path_); std::swap (tsep_, d.tsep_);} + + void + _clear () {path_.clear (); tsep_ = 0;} + + // Constructors. + // + path_data () + : tsep_ (0) {} + + path_data (string_type&& p, difference_type ts) + : path_ (std::move (p)), tsep_ (path_.empty () ? 0 : ts) {} + + explicit + path_data (string_type&& p) + : path_ (std::move (p)) { _init (); } + + void + _init () + { + size_type n (path_.size ()), i; + + if (n != 0 && (i = path_traits::separator_index (path_[n - 1])) != 0) + { + if (n == 1) // The "/" case. + tsep_ = -1; + else + { + tsep_ = i; + path_.pop_back (); + } + } + else + tsep_ = 0; + } + }; + + template + struct any_path_kind + { + class base_type: public path_data // In essence protected path_data. + { + protected: + using path_data::path_data; + + base_type () = default; + base_type (path_data&& d): path_data (std::move (d)) {} + }; + + using dir_type = basic_path>; + + // Init and cast. + // + // If exact is true, return the path if the initialization was successful, + // that is, the passed string is a valid path and no modifications were + // necessary. Otherwise, return the empty object and leave the passed + // string untouched. + // + // If extact is false, throw invalid_path if the string is not a valid + // path (e.g., uses an unsupported path notation on Windows). + // + using data_type = path_data; + using string_type = std::basic_string; + + static data_type + init (string_type&&, bool exact = false); + + static void + cast (data_type&) {} + }; + + template + struct dir_path_kind + { + using base_type = basic_path>; + using dir_type = basic_path>; + + // Init and cast. + // + using data_type = path_data; + using string_type = std::basic_string; + + static data_type + init (string_type&&, bool exact = false); + + static void + cast (data_type&); + }; + + template + class basic_path: public K::base_type + { + public: + using string_type = std::basic_string; + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; + using traits_type = path_traits; + + struct iterator; + using reverse_iterator = std::reverse_iterator; + + using base_type = typename K::base_type; + using dir_type = typename K::dir_type; + + // Create a special empty path. Note that we have to provide our own + // implementation rather than using '=default' to make Clang allow + // default-initialized const instances of this type. + // + basic_path () {} + + // Constructors that initialize a path from a string argument throw the + // invalid_path exception if the string is not a valid path (e.g., uses + // unsupported path notations on Windows). Note that an empty string + // initializes an empty path. + // + explicit + basic_path (C const* s): base_type (K::init (s)) {} + + basic_path (C const* s, size_type n) + : base_type (K::init (string_type (s, n))) {} + + explicit + basic_path (string_type s): base_type (K::init (std::move (s))) {} + + basic_path (const string_type& s, size_type n) + : base_type (K::init (string_type (s, 0, n))) {} + + basic_path (const string_type& s, size_type p, size_type n) + : base_type (K::init (string_type (s, p, n))) {} + + // Create a path using the exact string representation. If the string is + // not a valid path or if it would require a modification, then empty path + // is created instead and the passed string rvalue-reference is left + // untouched. Note that no exception is thrown if the path is invalid. See + // also representation()&& below. + // + enum exact_type {exact}; + basic_path (string_type&& s, exact_type) + : base_type (K::init (std::move (s), true)) {} + + // Create a path as a sub-path identified by the [begin, end) range of + // components. + // + basic_path (const iterator& begin, const iterator& end); + + basic_path (const reverse_iterator& rbegin, const reverse_iterator& rend) + : basic_path (rend.base (), rbegin.base ()) {} + + void + swap (basic_path& p) {this->_swap (p);} + + void + clear () {this->_clear ();} + + // Get/set current working directory. Throw std::system_error to report + // underlying OS errors. + // + static dir_type + current_directory () { + return dir_type (traits_type::current_directory ());} + + static void + current_directory (basic_path const&); + + // Return the user home directory. Throw std::system_error to report + // underlying OS errors. + // + static dir_type + home_directory () {return dir_type (traits_type::home_directory ());} + + // Return the temporary directory. Throw std::system_error to report + // underlying OS errors. + // + static dir_type + temp_directory () {return dir_type (traits_type::temp_directory ());} + + // Return a temporary path. The path is constructed by starting with the + // temporary directory and then appending a path component consisting of + // the specified prefix followed by the process id following by a unique + // counter value inside the process (all separated with `-`). Throw + // std::system_error to report underlying OS errors. + // + static basic_path + temp_path (const string_type& prefix) + { + basic_path r (temp_directory ()); + r /= traits_type::temp_name (prefix); + return r; + } + + public: + bool + empty () const {return this->path_.empty ();} + + // Note that size does not include the trailing separator except for + // the POSIX root case. + // + size_type + size () const {return this->path_.size ();} + + // Return true if this path doesn't have any directories. Note that `/foo` + // is not a simple path (it is `foo` in root directory) while `/` is (it + // is the root directory). + // + bool + simple () const; + + bool + absolute () const; + + bool + relative () const {return !absolute ();} + + bool + root () const; + + // The following predicates return true for the `.` and `..` paths, + // respectively. Note that the result doesn't depend on the presence or + // spelling of the trailing directory separator. + // + // Also note that the path must literally match the specified values rather + // than be semantically current or parent. For example for paths `foo/..` + // or `bar/../..` these predicates return false. + // + bool + current () const; + + bool + parent () const; + + // Return true if the path is normalized, that is, does not contain any + // current or parent directory components or multiple consecutive and, + // unless sep is false, non-canonical directory separators. Empty path + // is considered normalized. + // + // Note that for a relative path normalize() may produce a path for which + // normalized() will still return false (for example, ../foo/../ which + // will be normalized to ../). + // + bool + normalized (bool sep = true) const; + + // Similar to normalized() but return details on what renders the path + // abnormal. + // + path_abnormality + abnormalities () const; + + // Test, based on the presence/absence of the trailing separator, if the + // path is to a directory. + // + bool + to_directory () const {return this->tsep_ != 0;} + + // Return true if *this is a sub-path of the specified path (i.e., + // the specified path is a prefix). Expects both paths to be + // normalized. Note that this function returns true if the paths + // are equal. Empty path is considered a prefix of any path. + // + bool + sub (const basic_path&) const; + + // Return true if *this is a super-path of the specified path (i.e., + // the specified path is a suffix). Expects both paths to be + // normalized. Note that this function returns true if the paths + // are equal. Empty path is considered a suffix of any path. + // + bool + sup (const basic_path&) const; + + public: + // Return the path without the directory part. Leaf of a directory is + // itself a directory (contains trailing slash). Leaf of a root is the + // path itself. + // + basic_path + leaf () const; + + // As above but make the instance itself the leaf. Return *this. + // + basic_path& + make_leaf (); + + // Return the path without the specified directory part. Returns empty + // path if the paths are the same. Throws invalid_path if the directory is + // not a prefix of *this. Expects both paths to be normalized. + // + basic_path + leaf (basic_path const&) const; + + // Return the directory part of the path or empty path if there is no + // directory. Directory of a root is an empty path. + // + dir_type + directory () const; + + // As above but make the instance itself the directory. Return *this. + // + basic_path& + make_directory (); + + // Return the directory part of the path without the specified leaf part. + // Throws invalid_path if the leaf is not a suffix of *this. Expects both + // paths to be normalized. + // + dir_type + directory (basic_path const&) const; + + // Return the root directory of the path or empty path if the directory is + // not absolute. + // + dir_type + root_directory () const; + + // Return the path without the extension, if any. + // + basic_path + base () const; + + // As above but make the instance itself the base. Return *this. + // + basic_path& + make_base (); + + // Return the extension or empty string if not present. If not empty, then + // the result starts with the character past the dot. + // + string_type + extension () const; + + // Return the in-place pointer to extension or NULL if not present. If not + // NULL, then the result points to the character past the dot but it is + // legal to decrement it once to obtain the value with the dot. + // + const C* + extension_cstring () const; + + // Return a path relative to the specified path that is equivalent + // to *this. Throws invalid_path if a relative path cannot be derived + // (e.g., paths are on different drives on Windows). + // + basic_path + relative (basic_path) const; + + // As above but return nullopt rather than throw if a relative path cannot + // be derived. + // + optional + try_relative (basic_path) const; + + // Iteration over path components. + // + // Note that for an absolute POSIX path the first component is empty, + // not `/`. Which means recombining a path with operator/= is not going + // to work. Instead, do something along these lines: + // + // dir_path r; + // for (auto i (d.begin ()); i != d.end (); ++i) + // r.combine (*i, i.separator ()); + // + // @@ TODO: would be nice to skip consecutive separators (foo//bar). + // + public: + struct iterator + { + using value_type = string_type ; + using pointer = string_type*; + using reference = string_type ; + using size_type = typename string_type::size_type; + using difference_type = std::ptrdiff_t ; + using iterator_category = std::bidirectional_iterator_tag ; + + using data_type = path_data; + + iterator (): p_ (nullptr) {} + iterator (const data_type* p, size_type b, size_type e) + : p_ (p), b_ (b), e_ (e) {} + + // Create an iterator by "rebasing" an old iterator onto a new path + // object. Can, for example, be used to "move" an iterator when moving + // the path object. Note: potentially dangerous if the old iterator used + // to point to a different path. + // + iterator (const basic_path& p, const iterator& i) + : p_ (&p), b_ (i.b_), e_ (i.e_) {} + + iterator& + operator++ () + { + const string_type& s (p_->path_); + + // Position past trailing separator, if any. + // + b_ = e_ != string_type::npos && ++e_ != s.size () + ? e_ + : string_type::npos; + + // Find next trailing separator. + // + e_ = b_ != string_type::npos + ? traits_type::find_separator (s, b_) + : b_; + + return *this; + } + + iterator& + operator-- () + { + const string_type& s (p_->path_); + + // Find the new end. + // + e_ = b_ == string_type::npos // Past end? + ? (traits_type::is_separator (s.back ()) // Have trailing slash? + ? s.size () - 1 + : string_type::npos) + : b_ - 1; + + // Find the new begin. + // + b_ = e_ == 0 // Empty component? + ? string_type::npos + : traits_type::rfind_separator (s, + e_ != string_type::npos + ? e_ - 1 + : e_); + + b_ = b_ == string_type::npos // First component? + ? 0 + : b_ + 1; + + return *this; + } + + iterator + operator++ (int) {iterator r (*this); operator++ (); return r;} + + iterator + operator-- (int) {iterator r (*this); operator-- (); return r;} + + // @@ TODO: this should return string_view. + // + string_type + operator* () const + { + return string_type (p_->path_, + b_, + e_ != string_type::npos ? e_ - b_ : e_); + } + + // Return the directory separator after this component or '\0' if there + // is none. This, for example, can be used to determine if the last + // component is a directory. + // + C + separator () const + { + return e_ != string_type::npos + ? p_->path_[e_] + : (p_->tsep_ > 0 + ? path_traits::directory_separators[p_->tsep_ - 1] + : 0); + } + + pointer operator-> () const = delete; + + friend bool + operator== (const iterator& x, const iterator& y) + { + return x.p_ == y.p_ && x.b_ == y.b_ && x.e_ == y.e_; + } + + friend bool + operator!= (const iterator& x, const iterator& y) {return !(x == y);} + + private: + friend class basic_path; + + // b - first character of component + // e - separator after component (or npos if none) + // b == npos && e == npos - one past last component (end) + // + const data_type* p_; + size_type b_; + size_type e_; + }; + + iterator begin () const; + iterator end () const; + + reverse_iterator rbegin () const {return reverse_iterator (end ());} + reverse_iterator rend () const {return reverse_iterator (begin ());} + + public: + // Canonicalize the path and return *this. Canonicalization involves + // converting all directory separators to the canonical form (or to the + // alternative separator if specified). Note that multiple directory + // separators are not collapsed. + // + // Note that the alternative separator must be listed in path_trait:: + // directory_separators. + // + basic_path& + canonicalize (char dir_sep = '\0'); + + // Normalize the path and return *this. Normalization involves collapsing + // the '.' and '..' directories if possible, collapsing multiple + // directory separators, and converting all directory separators to the + // canonical form. If cur_empty is true then collapse relative paths + // representing the current directory (for example, '.', './', 'foo/..') + // to an empty path. Otherwise convert it to the canonical form (./ on + // POSIX systems). Note that a non-empty path cannot become an empty one + // in the latter case. + // + // If actual is true, then for case-insensitive filesystems obtain the + // actual spelling of the path. Only an absolute path can be actualized. + // If a path component does not exist, then its (and all subsequent) + // spelling is unchanged. This is a potentially expensive operation. + // Normally one can assume that "well-known" directories (current, home, + // etc.) are returned in their actual spelling. + // + // Note that for a relative path normalize() may produce a path for which + // normalized() will still return false (for example, ../foo/../ which + // will be normalized to ../). + // + // Note also that on POSIX the parent directory ('..') components are + // resolved relative to a symlink target. As a result, it's possible to + // construct a valid path that this function will either consider as + // invalid or produce a path that points to an incorrect filesystem entry + // (it's also possible that it returns the correct path by accident). For + // example: + // + // /tmp/sym/../../../ -> (should be /tmp) + // | + // /tmp/sub1/sub2/tgt + // + // /tmp/sym/../../ -> / (should be /tmp/sub1) + // | + // /tmp/sub1/sub2/tgt + // + // The common property of such paths is '..' crossing symlink boundaries + // and it's impossible to normalize them without touching the filesystem + // *and* resolving their symlink components (see realize() below). + // + basic_path& + normalize (bool actual = false, bool cur_empty = false); + + // Make the path absolute using the current directory unless it is already + // absolute. Return *this. + // + basic_path& + complete (); + + // Make the path real, that is, absolute, normalized, and with resolved + // symlinks. On POSIX systems this is accomplished with the call to + // realpath(3). On Windows -- complete() and normalize(). Return *this. + // + basic_path& + realize (); + + public: + // Combine two paths. Note: empty path on RHS has no effect. + // + basic_path& + operator/= (basic_path const&); + + // Combine a single path component (must not contain directory separators) + // as a string, without first constructing the path object. Note: empty + // string has no effect. + // + basic_path& + operator/= (string_type const&); + + basic_path& + operator/= (const C*); + + // As above but with an optional separator after the component. Note that + // if the LHS is empty and the string is empty but the separator is not + // '\0', then on POSIX this is treated as a root component. + // + void + combine (string_type const&, C separator); + + void + combine (const C*, C separator); + + void + combine (const C*, size_type, C separator); + + // Append to the end of the path (normally an extension, etc). + // + basic_path& + operator+= (string_type const&); + + basic_path& + operator+= (const C*); + + basic_path& + operator+= (C); + + void + append (const C*, size_type); + + // Note that comparison is case-insensitive if the filesystem is not + // case-sensitive (e.g., Windows). And it ignored trailing slashes + // except for the root case. + // + template + int + compare (const basic_path& x) const { + return traits_type::compare (this->path_, x.path_);} + + public: + // Path string and representation. The string does not contain the + // trailing slash except for the root case. In other words, it is the + // "traditional" spelling of the path that can be passed to system calls, + // etc. Representation, on the other hand is the "precise" spelling that + // includes the trailing slash, if any. One cannot always round-trip a + // path using string() but can using representation(). Note also that + // representation() returns a copy while string() returns a (tracking) + // reference. + // + const string_type& + string () const& {return this->path_;} + + string_type + representation () const&; + + // Moves the underlying path string out of the path object. The path + // object becomes empty. Usage: std::move (p).string (). + // + string_type + string () && {string_type r; r.swap (this->path_); return r;} + + string_type + representation () &&; + + // Trailing directory separator or '\0' if there is none. + // + C + separator () const; + + // As above but return it as a (potentially empty) string. + // + string_type + separator_string () const; + + // If possible, return a POSIX version of the path. For example, for a + // Windows path in the form foo\bar this function will return foo/bar. If + // it is not possible to create a POSIX version for this path (e.g., + // c:\foo), this function will throw the invalid_path exception. + // + string_type + posix_string () const&; + + string_type + posix_representation () const&; + + string_type + posix_string () &&; + + string_type + posix_representation () &&; + + // Implementation details. + // + protected: + using data_type = path_data; + + // Direct initialization without init()/cast(). + // + explicit + basic_path (data_type&& d): base_type (std::move (d)) {} + + using base_type::_size; + using base_type::_init; + + // Common implementation for operator/=. + // + void + combine_impl (const C*, size_type, difference_type); + + void + combine_impl (const C*, size_type); + + // Friends. + // + template + friend class basic_path; + + template + friend basic_path + path_cast_impl (const basic_path&, basic_path*); + + template + friend basic_path + path_cast_impl (basic_path&&, basic_path*); + }; + + template + inline basic_path + operator/ (const basic_path& x, const basic_path& y) + { + basic_path r (x); + r /= y; + return r; + } + + template + inline basic_path + operator+ (const basic_path& x, const std::basic_string& y) + { + basic_path r (x); + r += y; + return r; + } + + template + inline basic_path + operator+ (const basic_path& x, const C* y) + { + basic_path r (x); + r += y; + return r; + } + + template + inline basic_path + operator+ (const basic_path& x, C y) + { + basic_path r (x); + r += y; + return r; + } + + template + inline bool + operator== (const basic_path& x, const basic_path& y) + { + return x.compare (y) == 0; + } + + template + inline bool + operator!= (const basic_path& x, const basic_path& y) + { + return !(x == y); + } + + template + inline bool + operator< (const basic_path& x, const basic_path& y) + { + return x.compare (y) < 0; + } + + // Additional operators for certain path kind combinations. + // + template + inline basic_path> + operator/ (const basic_path>& x, + const basic_path>& y) + { + basic_path> r (x); + r /= y; + return r; + } + + // Note that the result of (foo / "bar") is always a path, even if foo + // is dir_path. An idiom to force it to dir_path is: + // + // dir_path foo_bar (dir_path (foo) /= "bar"); + // + template + inline basic_path> + operator/ (const basic_path& x, const std::basic_string& y) + { + basic_path> r (x); + r /= y; + return r; + } + + template + inline basic_path> + operator/ (const basic_path& x, const C* y) + { + basic_path> r (x); + r /= y; + return r; + } + + template + std::basic_ostream& + to_stream (std::basic_ostream&, + const basic_path&, + bool representation); + + // For operator<< (ostream) see the path-io header. + + // path_name + // + + template + struct basic_path_name_view + { + using path_type = P; + using string_type = typename path_type::string_type; + + const path_type* path; + const optional* name; + + explicit + basic_path_name_view (const basic_path_name

& v) + : path (v.path), name (&v.name) {} + + basic_path_name_view (const path_type* p, const optional* n) + : path (p), name (n) {} + + basic_path_name_view () // Create empty/NULL path name. + : path (nullptr), name (nullptr) {} + + + bool + null () const + { + return path == nullptr && (name == nullptr || !*name); + } + + bool + empty () const + { + // assert (!null ()); + return name != nullptr && *name ? (*name)->empty () : path->empty (); + } + }; + + template + struct basic_path_name: basic_path_name_view

+ { + using base = basic_path_name_view

; + + using path_type = typename base::path_type; + using string_type = typename base::string_type; + + optional name; + + // Note that a NULL name is converted to absent. + // + explicit + basic_path_name (const basic_path_name_view

& v) + : base (v.path, &name), + name (v.name != nullptr ? *v.name : nullopt) {} + + explicit + basic_path_name (const path_type& p, optional n = nullopt) + : base (&p, &name), name (std::move (n)) {} + + explicit + basic_path_name (path_type&&, optional = nullopt) = delete; + + explicit + basic_path_name (string_type n) + : base (nullptr, &name), name (std::move (n)) {} + + explicit + basic_path_name (const path_type* p, optional n = nullopt) + : base (p, &name), name (std::move (n)) {} + + basic_path_name (): // Create empty/NULL path name. + base (nullptr, &name) {} + + basic_path_name (basic_path_name&&); + basic_path_name (const basic_path_name&); + basic_path_name& operator= (basic_path_name&&); + basic_path_name& operator= (const basic_path_name&); + }; + + template + struct basic_path_name_value: basic_path_name

+ { + using base = basic_path_name

; + + using path_type = typename base::path_type; + using string_type = typename base::string_type; + + path_type path; + + // Note that a NULL path/name is converted to empty/absent. + // + explicit + basic_path_name_value (const basic_path_name_view

& v) + : base (&path, v.name != nullptr ? *v.name : nullopt), + path (v.path != nullptr ? *v.path : path_type ()) {} + + explicit + basic_path_name_value (path_type p, optional n = nullopt) + : base (&path, std::move (n)), path (std::move (p)) {} + + basic_path_name_value (): base (&path) {} // Create empty/NULL path name. + + basic_path_name_value (basic_path_name_value&&); + basic_path_name_value (const basic_path_name_value&); + basic_path_name_value& operator= (basic_path_name_value&&); + basic_path_name_value& operator= (const basic_path_name_value&); + }; +} + +namespace std +{ + template + struct hash>: hash> + { + using argument_type = butl::basic_path; + + size_t + operator() (const butl::basic_path& p) const noexcept + { +#ifndef _WIN32 + return hash>::operator() (p.string ()); +#else + // Case-insensitive FNV hash. + // + const auto& s (p.string ()); + + size_t hash (static_cast (2166136261UL)); + for (size_t i (0), n (s.size ()); i != n; ++i) + { + hash ^= static_cast (butl::lcase (s[i])); + + // We are using C-style cast to suppress VC warning for 32-bit target + // (the value is compiled but not used). + // + hash *= sizeof (size_t) == 4 + ? static_cast (16777619UL) + : (size_t) 1099511628211ULL; + } + return hash; +#endif + } + }; +} + +#include +#include diff --git a/libbutl/path.ixx b/libbutl/path.ixx index 9c96cfc..2e4df2c 100644 --- a/libbutl/path.ixx +++ b/libbutl/path.ixx @@ -1,7 +1,7 @@ // file : libbutl/path.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +namespace butl { // path_abnormality // diff --git a/libbutl/path.mxx b/libbutl/path.mxx deleted file mode 100644 index 5a41ddc..0000000 --- a/libbutl/path.mxx +++ /dev/null @@ -1,1555 +0,0 @@ -// file : libbutl/path.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include // ptrdiff_t -#include // uint16_t -#include // str*() -#include // move(), swap() -#include -#include // invalid_argument -#include // hash - -#ifdef _WIN32 -#include // replace() -#endif -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.path; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.optional; -import butl.small_vector; -#ifdef _WIN32 -import butl.utility; -#endif -#else -#include -#include -#ifdef _WIN32 -#include // *case*() -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Wish list/ideas for improvements. - // - // - Ability to convert to directory/leaf/base in-place, without dynamic - // allocation. One idea is something like this: - // - // p -= "/*"; // directory - // p -= "*/"; // leaf - // p -= ".*"; // base - // - // - Faster normalize() implementation. In many cases (e.g., in build2) - // the path is either already normal or the difference is just slashes - // (i.e., there are no '.' or '..' components). So a fast path case - // might be in order. - // - - // @@ This should probably be called invalid_path_argument - // - struct LIBBUTL_SYMEXPORT invalid_path_base: public std::invalid_argument - { - invalid_path_base (); - }; - - template - struct invalid_basic_path: invalid_path_base - { - using string_type = std::basic_string; - using size_type = typename string_type::size_type; - - string_type path; - - explicit - invalid_basic_path (const string_type& p): path (p) {} - explicit - invalid_basic_path (const C* p): path (p) {} - invalid_basic_path (const C* p, size_type n): path (p, n) {} - }; - - enum class path_abnormality: std::uint16_t - { - none = 0x00, // Path is normal. - separator = 0x01, // Wrong or multiple consequitive directory separators. - current = 0x02, // Contains current directory (`.`) component. - parent = 0x04 // Contains parent directory (`..`) component. - }; - - inline path_abnormality operator& (path_abnormality, path_abnormality); - inline path_abnormality operator| (path_abnormality, path_abnormality); - inline path_abnormality operator&= (path_abnormality&, path_abnormality); - inline path_abnormality operator|= (path_abnormality&, path_abnormality); - - // The only currently available specialization is for the char type. - // - template - struct path_traits - { - using string_type = std::basic_string; - using char_traits_type = typename string_type::traits_type; - using size_type = typename string_type::size_type; - - // Canonical directory and path seperators. - // -#ifdef _WIN32 - static constexpr const C directory_separator = '\\'; - static constexpr const C path_separator = ';'; -#else - static constexpr const C directory_separator = '/'; - static constexpr const C path_separator = ':'; -#endif - - // Canonical and alternative directory separators. Canonical should be - // first. - // -#ifdef _WIN32 - static constexpr const char* const directory_separators = "\\/"; -#else - static constexpr const char* const directory_separators = "/"; -#endif - - // Directory separator tests. On some platforms there could be multiple - // seperators. For example, on Windows we check for both '/' and '\'. - // - static bool - is_separator (C c) - { -#ifdef _WIN32 - return c == '\\' || c == '/'; -#else - return c == '/'; -#endif - } - - // Return 1-based index in directory_separators string or 0 if not a - // separator. - // - static size_type - separator_index (C c) - { -#ifdef _WIN32 - return c == '\\' ? 1 : c == '/' ? 2 : 0; -#else - return c == '/' ? 1 : 0; -#endif - } - - static bool - absolute (const string_type& s) - { - return absolute (s.c_str (), s.size ()); - } - - static bool - absolute (const C* s) - { - return absolute (s, char_traits_type::length (s)); - } - - static bool - absolute (const C* s, size_type n) - { -#ifdef _WIN32 - return n > 1 && s[1] == ':'; -#else - return n != 0 && is_separator (s[0]); -#endif - } - - static bool - current (const string_type& s) - { - return current (s.c_str (), s.size ()); - } - - static bool - current (const C* s) - { - return current (s, char_traits_type::length (s)); - } - - static bool - current (const C* s, size_type n) - { - return n == 1 && s[0] == '.'; - } - - static bool - parent (const string_type& s) - { - return parent (s.c_str (), s.size ()); - } - - static bool - parent (const C* s) - { - return parent (s, char_traits_type::length (s)); - } - - static bool - parent (const C* s, size_type n) - { - return n == 2 && s[0] == '.' && s[1] == '.'; - } - - static bool - normalized (const string_type& s, bool sep) - { - return normalized (s.c_str (), s.size (), sep); - } - - static bool - normalized (const C* s, bool sep) - { - return normalized (s, char_traits_type::length (s), sep); - } - - static bool - normalized (const C*, size_type, bool); - - static path_abnormality - abnormalities (const string_type& s) - { - return abnormalities (s.c_str (), s.size ()); - } - - static path_abnormality - abnormalities (const C* s) - { - return abnormalities (s, char_traits_type::length (s)); - } - - static path_abnormality - abnormalities (const C*, size_type); - - static bool - root (const string_type& s) - { - return root (s.c_str (), s.size ()); - } - - static bool - root (const C* s) - { - return root (s, char_traits_type::length (s)); - } - - static bool - root (const C* s, size_type n) - { -#ifdef _WIN32 - return n == 2 && s[1] == ':'; -#else - return n == 1 && is_separator (s[0]); -#endif - } - - static size_type - find_separator (string_type const& s, - size_type pos = 0, - size_type n = string_type::npos) - { - if (n == string_type::npos) - n = s.size (); - - const C* r (find_separator (s.c_str () + pos, n - pos)); - return r != nullptr ? r - s.c_str () : string_type::npos; - } - - static const C* - find_separator (const C* s) - { - return find_separator (s, char_traits_type::length (s)); - } - - static const C* - find_separator (const C* s, size_type n) - { - for (const C* e (s + n); s != e; ++s) - { - if (is_separator (*s)) - return s; - } - - return nullptr; - } - - static size_type - rfind_separator (string_type const& s, size_type pos = string_type::npos) - { - if (pos == string_type::npos) - pos = s.size (); - else - pos++; - - const C* r (rfind_separator (s.c_str (), pos)); - return r != nullptr ? r - s.c_str () : string_type::npos; - } - - static const C* - rfind_separator (const C* s) - { - return rfind_separator (s, char_traits_type::length (s)); - } - - static const C* - rfind_separator (const C* s, size_type n) - { - for (; n != 0; --n) - { - if (is_separator (s[n - 1])) - return s + n - 1; - } - - return nullptr; - } - - // Return the position of '.' or npos if there is no extension. - // - static size_type - find_extension (string_type const& s, size_type n = string_type::npos) - { - if (n == string_type::npos) - n = s.size (); - - const C* r (find_extension (s.c_str (), n)); - return r != nullptr ? r - s.c_str () : string_type::npos; - } - - static const C* - find_extension (const C* s) - { - return find_extension (s, char_traits_type::length (s)); - } - - static const C* - find_extension (const C* s, size_type n) - { - size_type i (n); - - for (; i > 0; --i) - { - C c (s[i - 1]); - - if (c == '.') - break; - - if (is_separator (c)) - { - i = 0; - break; - } - } - - // Weed out paths like ".txt" (and "/.txt") and "txt.". - // - if (i > 1 && !is_separator (s[i - 2]) && i != n) - return s + i - 1; - else - return nullptr; - } - - // Return the start of the leaf (last path component) in the path. Note - // that the leaf will include the trailing separator, if any (i.e., the - // leaf of /tmp/bar/ is bar/). - // - static size_type - find_leaf (string_type const& s) - { - const C* r (find_leaf (s.c_str (), s.size ())); - return r != nullptr ? r - s.c_str () : string_type::npos; - } - - static const C* - find_leaf (const C* s) - { - return find_leaf (s, char_traits_type::length (s)); - } - - static const C* - find_leaf (const C* s, size_type n) - { - const C* p; - return n == 0 - ? nullptr - : (p = rfind_separator (s, n - 1)) == nullptr ? s : ++p; - } - - static int - compare (string_type const& l, - string_type const& r, - size_type n = string_type::npos) - { - return compare (l.c_str (), n < l.size () ? n : l.size (), - r.c_str (), n < r.size () ? n : r.size ()); - } - - // @@ Currently for case-insensitive filesystems (Windows) compare() - // works properly only for ASCII. - // - static int - compare (const C* l, size_type ln, const C* r, size_type rn) - { - //@@ TODO: would be nice to ignore difference in trailing slashes - // (except for POSIX root). - - for (size_type i (0), n (ln < rn ? ln : rn); i != n; ++i) - { -#ifdef _WIN32 - C lc (lcase (l[i])), rc (lcase (r[i])); -#else - C lc (l[i]), rc (r[i]); -#endif - if (is_separator (lc) && is_separator (rc)) - continue; - - if (lc < rc) return -1; - if (lc > rc) return 1; - } - - return ln < rn ? -1 : (ln > rn ? 1 : 0); - } - - static void - canonicalize (string_type& s, char ds = '\0') - { - //canonicalize (s.data (), s.size ()); // C++17 - - if (ds == '\0') - ds = directory_separator; - - for (size_t i (0), n (s.size ()); i != n; ++i) - if (is_separator (s[i]) && s[i] != ds) - s[i] = ds; - } - - static void - canonicalize (C* s, size_type n, char ds = '\0') - { - if (ds == '\0') - ds = directory_separator; - - for (const C* e (s + n); s != e; ++s) - if (is_separator (*s) && *s != ds) - *s = ds; - } - - // Get/set current working directory. Throw std::system_error to report - // underlying OS errors. - // - static string_type - current_directory (); - - static void - current_directory (string_type const&); - - // Return the user home directory. Throw std::system_error to report - // underlying OS errors. - // - static string_type - home_directory (); - - // Return the temporary directory. Throw std::system_error to report - // underlying OS errors. - // - static string_type - temp_directory (); - - // Return a temporary name. The name is constructed by starting with the - // prefix followed by the process id following by a unique counter value - // inside the process (MT-safe). Throw std::system_error to report - // underlying OS errors. - // - static string_type - temp_name (string_type const& prefix); - - // Make the path real (by calling realpath(3)). Throw invalid_basic_path - // if the path is invalid (e.g., some components do not exist) and - // std::system_error to report other underlying OS errors. - // -#ifndef _WIN32 - static void - realize (string_type&); -#endif - - // Utilities. - // -#ifdef _WIN32 - static C - tolower (C); - - static C - toupper (C); -#endif - }; - - // This implementation of a filesystem path has two types: path, which can - // represent any path (file, directory, etc) and dir_path, which is derived - // from path. The internal representation of directories maintains a - // trailing directory separator (slash). However, it is ignored in path - // comparison, size, and string spelling. For example: - // - // path p1 ("foo"); // File path. - // path p2 ("bar/"); // Directory path. - // - // path p3 (p1 / p2); // Throw: p1 is not a directory. - // path p4 (p2 / p1); // Ok, file "bar/foo". - // path p5 (p2 / p2); // Ok, directory "bar/bar/". - // - // dir_path d1 ("foo"); // Directory path "foo/". - // dir_path d2 ("bar\\"); // Directory path "bar\". - // - // dir_path d3 (d2 / d1); // "bar\\foo/" - // - // (p4 == d3); // true - // d3.string (); // "bar\\foo" - // d3.representation (); // "bar\\foo/" - // - template - class basic_path; - - template struct any_path_kind; - template struct dir_path_kind; - - using path = basic_path>; - using dir_path = basic_path>; - using invalid_path = invalid_basic_path; - - // Cast from one path kind to another. Note that no checking is performed - // (e.g., that there is a trailing slash if casting to dir_path) but the - // representation is adjusted if necessary (e.g., the trailing slash is - // added to dir_path if missing). - // - template P path_cast (const basic_path&); - template P path_cast (basic_path&&); - - // In certain cases we may need to translate a special path (e.g., `-`) to a - // name that may not be a valid path (e.g., `` or ``), for - // example, for diagnostics. In this case we can use path_name which - // contains the original path plus an optional translation as a string. Note - // that this is a view-like type with the original path shallow-referenced - // rather than copied. - // - template - struct basic_path_name; - - using path_name = basic_path_name; - using dir_path_name = basic_path_name; - - // The copying version of the above that derives from the view (and thus can - // be passed down as a view). - // - template - struct basic_path_name_value; - - using path_name_value = basic_path_name_value; - using dir_name_value = basic_path_name_value; - - // A "full" view version of the above that also shallow-references the - // optional name. The "partial" view derives from this "full" view. - // - template - struct basic_path_name_view; - - using path_name_view = basic_path_name_view; - using dir_name_view = basic_path_name_view; - - // Low-level path data storage. It is also used by the implementation to - // pass around initialized/valid paths. - // - template - struct path_data - { - using string_type = std::basic_string; - using size_type = typename string_type::size_type; - using difference_type = typename string_type::difference_type; - - // The idea is as follows: path_ is always the "traditional" form; that - // is, "/" for the root directory and "/tmp" (no trailing slash) for the - // rest. This means we can return/store references to path_. - // - // Then we have tsep_ ("trailing separator") which is the size difference - // between path_ and its "pure" part, that is, without any trailing - // slashes, even for "/". So: - // - // tsep_ == -1 -- trailing slash in path_ (the "/" case) - // tsep_ == 0 -- no trailing slash - // - // Finally, to represent non-root ("/") trailing slashes we use positive - // tsep_ values. In this case tsep_ is interpreted as a 1-based index in - // the path_traits::directory_separators string. - // - // Notes: - // - If path_ is empty, then tsep_ can only be 0. - // - We could have used a much narrower integer for tsep_. - // - We could give the rest of tsep_ to the user to use as flags, etc. - // - string_type path_; - difference_type tsep_; - - size_type - _size () const {return path_.size () + (tsep_ < 0 ? -1 : 0);} - - void - _swap (path_data& d) {path_.swap (d.path_); std::swap (tsep_, d.tsep_);} - - void - _clear () {path_.clear (); tsep_ = 0;} - - // Constructors. - // - path_data () - : tsep_ (0) {} - - path_data (string_type&& p, difference_type ts) - : path_ (std::move (p)), tsep_ (path_.empty () ? 0 : ts) {} - - explicit - path_data (string_type&& p) - : path_ (std::move (p)) { _init (); } - - void - _init () - { - size_type n (path_.size ()), i; - - if (n != 0 && (i = path_traits::separator_index (path_[n - 1])) != 0) - { - if (n == 1) // The "/" case. - tsep_ = -1; - else - { - tsep_ = i; - path_.pop_back (); - } - } - else - tsep_ = 0; - } - }; - - template - struct any_path_kind - { - class base_type: public path_data // In essence protected path_data. - { - protected: - using path_data::path_data; - - base_type () = default; - base_type (path_data&& d): path_data (std::move (d)) {} - }; - - using dir_type = basic_path>; - - // Init and cast. - // - // If exact is true, return the path if the initialization was successful, - // that is, the passed string is a valid path and no modifications were - // necessary. Otherwise, return the empty object and leave the passed - // string untouched. - // - // If extact is false, throw invalid_path if the string is not a valid - // path (e.g., uses an unsupported path notation on Windows). - // - using data_type = path_data; - using string_type = std::basic_string; - - static data_type - init (string_type&&, bool exact = false); - - static void - cast (data_type&) {} - }; - - template - struct dir_path_kind - { - using base_type = basic_path>; - using dir_type = basic_path>; - - // Init and cast. - // - using data_type = path_data; - using string_type = std::basic_string; - - static data_type - init (string_type&&, bool exact = false); - - static void - cast (data_type&); - }; - - template - class basic_path: public K::base_type - { - public: - using string_type = std::basic_string; - using size_type = typename string_type::size_type; - using difference_type = typename string_type::difference_type; - using traits_type = path_traits; - - struct iterator; - using reverse_iterator = std::reverse_iterator; - - using base_type = typename K::base_type; - using dir_type = typename K::dir_type; - - // Create a special empty path. Note that we have to provide our own - // implementation rather than using '=default' to make Clang allow - // default-initialized const instances of this type. - // - basic_path () {} - - // Constructors that initialize a path from a string argument throw the - // invalid_path exception if the string is not a valid path (e.g., uses - // unsupported path notations on Windows). Note that an empty string - // initializes an empty path. - // - explicit - basic_path (C const* s): base_type (K::init (s)) {} - - basic_path (C const* s, size_type n) - : base_type (K::init (string_type (s, n))) {} - - explicit - basic_path (string_type s): base_type (K::init (std::move (s))) {} - - basic_path (const string_type& s, size_type n) - : base_type (K::init (string_type (s, 0, n))) {} - - basic_path (const string_type& s, size_type p, size_type n) - : base_type (K::init (string_type (s, p, n))) {} - - // Create a path using the exact string representation. If the string is - // not a valid path or if it would require a modification, then empty path - // is created instead and the passed string rvalue-reference is left - // untouched. Note that no exception is thrown if the path is invalid. See - // also representation()&& below. - // - enum exact_type {exact}; - basic_path (string_type&& s, exact_type) - : base_type (K::init (std::move (s), true)) {} - - // Create a path as a sub-path identified by the [begin, end) range of - // components. - // - basic_path (const iterator& begin, const iterator& end); - - basic_path (const reverse_iterator& rbegin, const reverse_iterator& rend) - : basic_path (rend.base (), rbegin.base ()) {} - - void - swap (basic_path& p) {this->_swap (p);} - - void - clear () {this->_clear ();} - - // Get/set current working directory. Throw std::system_error to report - // underlying OS errors. - // - static dir_type - current_directory () { - return dir_type (traits_type::current_directory ());} - - static void - current_directory (basic_path const&); - - // Return the user home directory. Throw std::system_error to report - // underlying OS errors. - // - static dir_type - home_directory () {return dir_type (traits_type::home_directory ());} - - // Return the temporary directory. Throw std::system_error to report - // underlying OS errors. - // - static dir_type - temp_directory () {return dir_type (traits_type::temp_directory ());} - - // Return a temporary path. The path is constructed by starting with the - // temporary directory and then appending a path component consisting of - // the specified prefix followed by the process id following by a unique - // counter value inside the process (all separated with `-`). Throw - // std::system_error to report underlying OS errors. - // - static basic_path - temp_path (const string_type& prefix) - { - basic_path r (temp_directory ()); - r /= traits_type::temp_name (prefix); - return r; - } - - public: - bool - empty () const {return this->path_.empty ();} - - // Note that size does not include the trailing separator except for - // the POSIX root case. - // - size_type - size () const {return this->path_.size ();} - - // Return true if this path doesn't have any directories. Note that `/foo` - // is not a simple path (it is `foo` in root directory) while `/` is (it - // is the root directory). - // - bool - simple () const; - - bool - absolute () const; - - bool - relative () const {return !absolute ();} - - bool - root () const; - - // The following predicates return true for the `.` and `..` paths, - // respectively. Note that the result doesn't depend on the presence or - // spelling of the trailing directory separator. - // - // Also note that the path must literally match the specified values rather - // than be semantically current or parent. For example for paths `foo/..` - // or `bar/../..` these predicates return false. - // - bool - current () const; - - bool - parent () const; - - // Return true if the path is normalized, that is, does not contain any - // current or parent directory components or multiple consecutive and, - // unless sep is false, non-canonical directory separators. Empty path - // is considered normalized. - // - // Note that for a relative path normalize() may produce a path for which - // normalized() will still return false (for example, ../foo/../ which - // will be normalized to ../). - // - bool - normalized (bool sep = true) const; - - // Similar to normalized() but return details on what renders the path - // abnormal. - // - path_abnormality - abnormalities () const; - - // Test, based on the presence/absence of the trailing separator, if the - // path is to a directory. - // - bool - to_directory () const {return this->tsep_ != 0;} - - // Return true if *this is a sub-path of the specified path (i.e., - // the specified path is a prefix). Expects both paths to be - // normalized. Note that this function returns true if the paths - // are equal. Empty path is considered a prefix of any path. - // - bool - sub (const basic_path&) const; - - // Return true if *this is a super-path of the specified path (i.e., - // the specified path is a suffix). Expects both paths to be - // normalized. Note that this function returns true if the paths - // are equal. Empty path is considered a suffix of any path. - // - bool - sup (const basic_path&) const; - - public: - // Return the path without the directory part. Leaf of a directory is - // itself a directory (contains trailing slash). Leaf of a root is the - // path itself. - // - basic_path - leaf () const; - - // As above but make the instance itself the leaf. Return *this. - // - basic_path& - make_leaf (); - - // Return the path without the specified directory part. Returns empty - // path if the paths are the same. Throws invalid_path if the directory is - // not a prefix of *this. Expects both paths to be normalized. - // - basic_path - leaf (basic_path const&) const; - - // Return the directory part of the path or empty path if there is no - // directory. Directory of a root is an empty path. - // - dir_type - directory () const; - - // As above but make the instance itself the directory. Return *this. - // - basic_path& - make_directory (); - - // Return the directory part of the path without the specified leaf part. - // Throws invalid_path if the leaf is not a suffix of *this. Expects both - // paths to be normalized. - // - dir_type - directory (basic_path const&) const; - - // Return the root directory of the path or empty path if the directory is - // not absolute. - // - dir_type - root_directory () const; - - // Return the path without the extension, if any. - // - basic_path - base () const; - - // As above but make the instance itself the base. Return *this. - // - basic_path& - make_base (); - - // Return the extension or empty string if not present. If not empty, then - // the result starts with the character past the dot. - // - string_type - extension () const; - - // Return the in-place pointer to extension or NULL if not present. If not - // NULL, then the result points to the character past the dot but it is - // legal to decrement it once to obtain the value with the dot. - // - const C* - extension_cstring () const; - - // Return a path relative to the specified path that is equivalent - // to *this. Throws invalid_path if a relative path cannot be derived - // (e.g., paths are on different drives on Windows). - // - basic_path - relative (basic_path) const; - - // As above but return nullopt rather than throw if a relative path cannot - // be derived. - // - optional - try_relative (basic_path) const; - - // Iteration over path components. - // - // Note that for an absolute POSIX path the first component is empty, - // not `/`. Which means recombining a path with operator/= is not going - // to work. Instead, do something along these lines: - // - // dir_path r; - // for (auto i (d.begin ()); i != d.end (); ++i) - // r.combine (*i, i.separator ()); - // - // @@ TODO: would be nice to skip consecutive separators (foo//bar). - // - public: - struct iterator - { - using value_type = string_type ; - using pointer = string_type*; - using reference = string_type ; - using size_type = typename string_type::size_type; - using difference_type = std::ptrdiff_t ; - using iterator_category = std::bidirectional_iterator_tag ; - - using data_type = path_data; - - iterator (): p_ (nullptr) {} - iterator (const data_type* p, size_type b, size_type e) - : p_ (p), b_ (b), e_ (e) {} - - // Create an iterator by "rebasing" an old iterator onto a new path - // object. Can, for example, be used to "move" an iterator when moving - // the path object. Note: potentially dangerous if the old iterator used - // to point to a different path. - // - iterator (const basic_path& p, const iterator& i) - : p_ (&p), b_ (i.b_), e_ (i.e_) {} - - iterator& - operator++ () - { - const string_type& s (p_->path_); - - // Position past trailing separator, if any. - // - b_ = e_ != string_type::npos && ++e_ != s.size () - ? e_ - : string_type::npos; - - // Find next trailing separator. - // - e_ = b_ != string_type::npos - ? traits_type::find_separator (s, b_) - : b_; - - return *this; - } - - iterator& - operator-- () - { - const string_type& s (p_->path_); - - // Find the new end. - // - e_ = b_ == string_type::npos // Past end? - ? (traits_type::is_separator (s.back ()) // Have trailing slash? - ? s.size () - 1 - : string_type::npos) - : b_ - 1; - - // Find the new begin. - // - b_ = e_ == 0 // Empty component? - ? string_type::npos - : traits_type::rfind_separator (s, - e_ != string_type::npos - ? e_ - 1 - : e_); - - b_ = b_ == string_type::npos // First component? - ? 0 - : b_ + 1; - - return *this; - } - - iterator - operator++ (int) {iterator r (*this); operator++ (); return r;} - - iterator - operator-- (int) {iterator r (*this); operator-- (); return r;} - - // @@ TODO: this should return string_view. - // - string_type - operator* () const - { - return string_type (p_->path_, - b_, - e_ != string_type::npos ? e_ - b_ : e_); - } - - // Return the directory separator after this component or '\0' if there - // is none. This, for example, can be used to determine if the last - // component is a directory. - // - C - separator () const - { - return e_ != string_type::npos - ? p_->path_[e_] - : (p_->tsep_ > 0 - ? path_traits::directory_separators[p_->tsep_ - 1] - : 0); - } - - pointer operator-> () const = delete; - - friend bool - operator== (const iterator& x, const iterator& y) - { - return x.p_ == y.p_ && x.b_ == y.b_ && x.e_ == y.e_; - } - - friend bool - operator!= (const iterator& x, const iterator& y) {return !(x == y);} - - private: - friend class basic_path; - - // b - first character of component - // e - separator after component (or npos if none) - // b == npos && e == npos - one past last component (end) - // - const data_type* p_; - size_type b_; - size_type e_; - }; - - iterator begin () const; - iterator end () const; - - reverse_iterator rbegin () const {return reverse_iterator (end ());} - reverse_iterator rend () const {return reverse_iterator (begin ());} - - public: - // Canonicalize the path and return *this. Canonicalization involves - // converting all directory separators to the canonical form (or to the - // alternative separator if specified). Note that multiple directory - // separators are not collapsed. - // - // Note that the alternative separator must be listed in path_trait:: - // directory_separators. - // - basic_path& - canonicalize (char dir_sep = '\0'); - - // Normalize the path and return *this. Normalization involves collapsing - // the '.' and '..' directories if possible, collapsing multiple - // directory separators, and converting all directory separators to the - // canonical form. If cur_empty is true then collapse relative paths - // representing the current directory (for example, '.', './', 'foo/..') - // to an empty path. Otherwise convert it to the canonical form (./ on - // POSIX systems). Note that a non-empty path cannot become an empty one - // in the latter case. - // - // If actual is true, then for case-insensitive filesystems obtain the - // actual spelling of the path. Only an absolute path can be actualized. - // If a path component does not exist, then its (and all subsequent) - // spelling is unchanged. This is a potentially expensive operation. - // Normally one can assume that "well-known" directories (current, home, - // etc.) are returned in their actual spelling. - // - // Note that for a relative path normalize() may produce a path for which - // normalized() will still return false (for example, ../foo/../ which - // will be normalized to ../). - // - // Note also that on POSIX the parent directory ('..') components are - // resolved relative to a symlink target. As a result, it's possible to - // construct a valid path that this function will either consider as - // invalid or produce a path that points to an incorrect filesystem entry - // (it's also possible that it returns the correct path by accident). For - // example: - // - // /tmp/sym/../../../ -> (should be /tmp) - // | - // /tmp/sub1/sub2/tgt - // - // /tmp/sym/../../ -> / (should be /tmp/sub1) - // | - // /tmp/sub1/sub2/tgt - // - // The common property of such paths is '..' crossing symlink boundaries - // and it's impossible to normalize them without touching the filesystem - // *and* resolving their symlink components (see realize() below). - // - basic_path& - normalize (bool actual = false, bool cur_empty = false); - - // Make the path absolute using the current directory unless it is already - // absolute. Return *this. - // - basic_path& - complete (); - - // Make the path real, that is, absolute, normalized, and with resolved - // symlinks. On POSIX systems this is accomplished with the call to - // realpath(3). On Windows -- complete() and normalize(). Return *this. - // - basic_path& - realize (); - - public: - // Combine two paths. Note: empty path on RHS has no effect. - // - basic_path& - operator/= (basic_path const&); - - // Combine a single path component (must not contain directory separators) - // as a string, without first constructing the path object. Note: empty - // string has no effect. - // - basic_path& - operator/= (string_type const&); - - basic_path& - operator/= (const C*); - - // As above but with an optional separator after the component. Note that - // if the LHS is empty and the string is empty but the separator is not - // '\0', then on POSIX this is treated as a root component. - // - void - combine (string_type const&, C separator); - - void - combine (const C*, C separator); - - void - combine (const C*, size_type, C separator); - - // Append to the end of the path (normally an extension, etc). - // - basic_path& - operator+= (string_type const&); - - basic_path& - operator+= (const C*); - - basic_path& - operator+= (C); - - void - append (const C*, size_type); - - // Note that comparison is case-insensitive if the filesystem is not - // case-sensitive (e.g., Windows). And it ignored trailing slashes - // except for the root case. - // - template - int - compare (const basic_path& x) const { - return traits_type::compare (this->path_, x.path_);} - - public: - // Path string and representation. The string does not contain the - // trailing slash except for the root case. In other words, it is the - // "traditional" spelling of the path that can be passed to system calls, - // etc. Representation, on the other hand is the "precise" spelling that - // includes the trailing slash, if any. One cannot always round-trip a - // path using string() but can using representation(). Note also that - // representation() returns a copy while string() returns a (tracking) - // reference. - // - const string_type& - string () const& {return this->path_;} - - string_type - representation () const&; - - // Moves the underlying path string out of the path object. The path - // object becomes empty. Usage: std::move (p).string (). - // - string_type - string () && {string_type r; r.swap (this->path_); return r;} - - string_type - representation () &&; - - // Trailing directory separator or '\0' if there is none. - // - C - separator () const; - - // As above but return it as a (potentially empty) string. - // - string_type - separator_string () const; - - // If possible, return a POSIX version of the path. For example, for a - // Windows path in the form foo\bar this function will return foo/bar. If - // it is not possible to create a POSIX version for this path (e.g., - // c:\foo), this function will throw the invalid_path exception. - // - string_type - posix_string () const&; - - string_type - posix_representation () const&; - - string_type - posix_string () &&; - - string_type - posix_representation () &&; - - // Implementation details. - // - protected: - using data_type = path_data; - - // Direct initialization without init()/cast(). - // - explicit - basic_path (data_type&& d): base_type (std::move (d)) {} - - using base_type::_size; - using base_type::_init; - - // Common implementation for operator/=. - // - void - combine_impl (const C*, size_type, difference_type); - - void - combine_impl (const C*, size_type); - - // Friends. - // - template - friend class basic_path; - - template - friend basic_path - path_cast_impl (const basic_path&, basic_path*); - - template - friend basic_path - path_cast_impl (basic_path&&, basic_path*); - }; - - template - inline basic_path - operator/ (const basic_path& x, const basic_path& y) - { - basic_path r (x); - r /= y; - return r; - } - - template - inline basic_path - operator+ (const basic_path& x, const std::basic_string& y) - { - basic_path r (x); - r += y; - return r; - } - - template - inline basic_path - operator+ (const basic_path& x, const C* y) - { - basic_path r (x); - r += y; - return r; - } - - template - inline basic_path - operator+ (const basic_path& x, C y) - { - basic_path r (x); - r += y; - return r; - } - - template - inline bool - operator== (const basic_path& x, const basic_path& y) - { - return x.compare (y) == 0; - } - - template - inline bool - operator!= (const basic_path& x, const basic_path& y) - { - return !(x == y); - } - - template - inline bool - operator< (const basic_path& x, const basic_path& y) - { - return x.compare (y) < 0; - } - - // Additional operators for certain path kind combinations. - // - template - inline basic_path> - operator/ (const basic_path>& x, - const basic_path>& y) - { - basic_path> r (x); - r /= y; - return r; - } - - // Note that the result of (foo / "bar") is always a path, even if foo - // is dir_path. An idiom to force it to dir_path is: - // - // dir_path foo_bar (dir_path (foo) /= "bar"); - // - template - inline basic_path> - operator/ (const basic_path& x, const std::basic_string& y) - { - basic_path> r (x); - r /= y; - return r; - } - - template - inline basic_path> - operator/ (const basic_path& x, const C* y) - { - basic_path> r (x); - r /= y; - return r; - } - - template - std::basic_ostream& - to_stream (std::basic_ostream&, - const basic_path&, - bool representation); - - // For operator<< (ostream) see the path-io header. - - // path_name - // - - template - struct basic_path_name_view - { - using path_type = P; - using string_type = typename path_type::string_type; - - const path_type* path; - const optional* name; - - explicit - basic_path_name_view (const basic_path_name

& v) - : path (v.path), name (&v.name) {} - - basic_path_name_view (const path_type* p, const optional* n) - : path (p), name (n) {} - - basic_path_name_view () // Create empty/NULL path name. - : path (nullptr), name (nullptr) {} - - - bool - null () const - { - return path == nullptr && (name == nullptr || !*name); - } - - bool - empty () const - { - // assert (!null ()); - return name != nullptr && *name ? (*name)->empty () : path->empty (); - } - }; - - template - struct basic_path_name: basic_path_name_view

- { - using base = basic_path_name_view

; - - using path_type = typename base::path_type; - using string_type = typename base::string_type; - - optional name; - - // Note that a NULL name is converted to absent. - // - explicit - basic_path_name (const basic_path_name_view

& v) - : base (v.path, &name), - name (v.name != nullptr ? *v.name : nullopt) {} - - explicit - basic_path_name (const path_type& p, optional n = nullopt) - : base (&p, &name), name (std::move (n)) {} - - explicit - basic_path_name (path_type&&, optional = nullopt) = delete; - - explicit - basic_path_name (string_type n) - : base (nullptr, &name), name (std::move (n)) {} - - explicit - basic_path_name (const path_type* p, optional n = nullopt) - : base (p, &name), name (std::move (n)) {} - - basic_path_name (): // Create empty/NULL path name. - base (nullptr, &name) {} - - basic_path_name (basic_path_name&&); - basic_path_name (const basic_path_name&); - basic_path_name& operator= (basic_path_name&&); - basic_path_name& operator= (const basic_path_name&); - }; - - template - struct basic_path_name_value: basic_path_name

- { - using base = basic_path_name

; - - using path_type = typename base::path_type; - using string_type = typename base::string_type; - - path_type path; - - // Note that a NULL path/name is converted to empty/absent. - // - explicit - basic_path_name_value (const basic_path_name_view

& v) - : base (&path, v.name != nullptr ? *v.name : nullopt), - path (v.path != nullptr ? *v.path : path_type ()) {} - - explicit - basic_path_name_value (path_type p, optional n = nullopt) - : base (&path, std::move (n)), path (std::move (p)) {} - - basic_path_name_value (): base (&path) {} // Create empty/NULL path name. - - basic_path_name_value (basic_path_name_value&&); - basic_path_name_value (const basic_path_name_value&); - basic_path_name_value& operator= (basic_path_name_value&&); - basic_path_name_value& operator= (const basic_path_name_value&); - }; -} - -LIBBUTL_MODEXPORT namespace std -{ - template - struct hash>: hash> - { - using argument_type = butl::basic_path; - - size_t - operator() (const butl::basic_path& p) const noexcept - { -#ifndef _WIN32 - return hash>::operator() (p.string ()); -#else - // Case-insensitive FNV hash. - // - const auto& s (p.string ()); - - size_t hash (static_cast (2166136261UL)); - for (size_t i (0), n (s.size ()); i != n; ++i) - { - hash ^= static_cast (butl::lcase (s[i])); - - // We are using C-style cast to suppress VC warning for 32-bit target - // (the value is compiled but not used). - // - hash *= sizeof (size_t) == 4 - ? static_cast (16777619UL) - : (size_t) 1099511628211ULL; - } - return hash; -#endif - } - }; -} - -#include -#include diff --git a/libbutl/path.txx b/libbutl/path.txx index 84fc038..60e0f1a 100644 --- a/libbutl/path.txx +++ b/libbutl/path.txx @@ -1,7 +1,7 @@ // file : libbutl/path.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +namespace butl { template basic_path basic_path:: diff --git a/libbutl/prefix-map.hxx b/libbutl/prefix-map.hxx new file mode 100644 index 0000000..9706ebd --- /dev/null +++ b/libbutl/prefix-map.hxx @@ -0,0 +1,173 @@ +// file : libbutl/prefix-map.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // move() +#include // min() + +#include + +namespace butl +{ + // A map of hierarchical "paths", e.g., 'foo.bar' or 'foo/bar' with the + // ability to retrieve a range of entries that have a specific prefix as + // well as finding the most qualified entry for specific path. The '.' and + // '/' above are the delimiter characters. + // + // Note that as a special rule, the default implementation of compare_prefix + // treats empty key as everyone's prefix even if the paths don't start with + // the delimiter (useful to represent a "root path"). + // + // Implementation-wise, the idea is to pretend that each key ends with the + // delimiter. This way we automatically avoid matching 'foobar' as having a + // prefix 'foo'. + // + template + struct compare_prefix; + + template + struct compare_prefix> + { + typedef std::basic_string K; + + typedef C delimiter_type; + typedef typename K::size_type size_type; + typedef typename K::traits_type traits_type; + + explicit + compare_prefix (delimiter_type d): d_ (d) {} + + bool + operator() (const K& x, const K& y) const + { + return compare (x.c_str (), x.size (), y.c_str (), y.size ()) < 0; + } + + bool + prefix (const K& p, const K& k) const + { + size_type pn (p.size ()), kn (k.size ()); + return pn == 0 || // Empty key is always a prefix. + (pn <= kn && + compare (p.c_str (), pn, k.c_str (), pn == kn ? pn : pn + 1) == 0); + } + + // If the key is not empty, convert the key to its prefix and return + // true. Return false otherwise. + // + bool + prefix (K& k) const + { + if (k.empty ()) + return false; + + size_type p (k.rfind (d_)); + k.resize (p != K::npos ? p : 0); + return true; + } + + protected: + int + compare (const C* x, size_type xn, + const C* y, size_type yn) const + { + size_type n (std::min (xn, yn)); + int r (traits_type::compare (x, y, n)); + + if (r == 0) + { + // Pretend there is the delimiter characters at the end of the + // shorter string. + // + char xc (xn > n ? x[n] : (xn++, d_)); + char yc (yn > n ? y[n] : (yn++, d_)); + r = traits_type::compare (&xc, &yc, 1); + + // If we are still equal, then compare the lengths. + // + if (r == 0) + r = (xn == yn ? 0 : (xn < yn ? -1 : 1)); + } + + return r; + } + + private: + delimiter_type d_; + }; + + template + struct prefix_map_common: M + { + typedef M map_type; + typedef typename map_type::key_type key_type; + typedef typename map_type::value_type value_type; + typedef typename map_type::key_compare compare_type; + typedef typename compare_type::delimiter_type delimiter_type; + + typedef typename map_type::iterator iterator; + typedef typename map_type::const_iterator const_iterator; + + explicit + prefix_map_common (delimiter_type d) + : map_type (compare_type (d)) {} + + prefix_map_common (std::initializer_list i, delimiter_type d) + : map_type (std::move (i), compare_type (d)) {} + + // Find all the entries that are sub-prefixes of the specified prefix. + // + std::pair + find_sub (const key_type&); + + std::pair + find_sub (const key_type&) const; + + // Find the most qualified entry that is a super-prefix of the specified + // prefix. + // + iterator + find_sup (const key_type&); + + const_iterator + find_sup (const key_type&) const; + + + // As above but additionally evaluate a predicate on each matching entry + // returning the one for which it returns true. + // + template + iterator + find_sup_if (const key_type&, P); + + template + const_iterator + find_sup_if (const key_type&, P) const; + }; + + template ::delimiter_type D> + struct prefix_map_impl: prefix_map_common + { + typedef typename prefix_map_common::value_type value_type; + + prefix_map_impl (): prefix_map_common (D) {} + prefix_map_impl (std::initializer_list i) + : prefix_map_common (std::move (i), D) {} + }; + + template ::delimiter_type D> + using prefix_map = prefix_map_impl>, D>; + + template ::delimiter_type D> + using prefix_multimap = + prefix_map_impl>, D>; +} + +#include diff --git a/libbutl/prefix-map.mxx b/libbutl/prefix-map.mxx deleted file mode 100644 index 634b8da..0000000 --- a/libbutl/prefix-map.mxx +++ /dev/null @@ -1,188 +0,0 @@ -// file : libbutl/prefix-map.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include // move() -#include // min() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.prefix_map; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // A map of hierarchical "paths", e.g., 'foo.bar' or 'foo/bar' with the - // ability to retrieve a range of entries that have a specific prefix as - // well as finding the most qualified entry for specific path. The '.' and - // '/' above are the delimiter characters. - // - // Note that as a special rule, the default implementation of compare_prefix - // treats empty key as everyone's prefix even if the paths don't start with - // the delimiter (useful to represent a "root path"). - // - // Implementation-wise, the idea is to pretend that each key ends with the - // delimiter. This way we automatically avoid matching 'foobar' as having a - // prefix 'foo'. - // - template - struct compare_prefix; - - template - struct compare_prefix> - { - typedef std::basic_string K; - - typedef C delimiter_type; - typedef typename K::size_type size_type; - typedef typename K::traits_type traits_type; - - explicit - compare_prefix (delimiter_type d): d_ (d) {} - - bool - operator() (const K& x, const K& y) const - { - return compare (x.c_str (), x.size (), y.c_str (), y.size ()) < 0; - } - - bool - prefix (const K& p, const K& k) const - { - size_type pn (p.size ()), kn (k.size ()); - return pn == 0 || // Empty key is always a prefix. - (pn <= kn && - compare (p.c_str (), pn, k.c_str (), pn == kn ? pn : pn + 1) == 0); - } - - // If the key is not empty, convert the key to its prefix and return - // true. Return false otherwise. - // - bool - prefix (K& k) const - { - if (k.empty ()) - return false; - - size_type p (k.rfind (d_)); - k.resize (p != K::npos ? p : 0); - return true; - } - - protected: - int - compare (const C* x, size_type xn, - const C* y, size_type yn) const - { - size_type n (std::min (xn, yn)); - int r (traits_type::compare (x, y, n)); - - if (r == 0) - { - // Pretend there is the delimiter characters at the end of the - // shorter string. - // - char xc (xn > n ? x[n] : (xn++, d_)); - char yc (yn > n ? y[n] : (yn++, d_)); - r = traits_type::compare (&xc, &yc, 1); - - // If we are still equal, then compare the lengths. - // - if (r == 0) - r = (xn == yn ? 0 : (xn < yn ? -1 : 1)); - } - - return r; - } - - private: - delimiter_type d_; - }; - - template - struct prefix_map_common: M - { - typedef M map_type; - typedef typename map_type::key_type key_type; - typedef typename map_type::value_type value_type; - typedef typename map_type::key_compare compare_type; - typedef typename compare_type::delimiter_type delimiter_type; - - typedef typename map_type::iterator iterator; - typedef typename map_type::const_iterator const_iterator; - - explicit - prefix_map_common (delimiter_type d) - : map_type (compare_type (d)) {} - - prefix_map_common (std::initializer_list i, delimiter_type d) - : map_type (std::move (i), compare_type (d)) {} - - // Find all the entries that are sub-prefixes of the specified prefix. - // - std::pair - find_sub (const key_type&); - - std::pair - find_sub (const key_type&) const; - - // Find the most qualified entry that is a super-prefix of the specified - // prefix. - // - iterator - find_sup (const key_type&); - - const_iterator - find_sup (const key_type&) const; - - - // As above but additionally evaluate a predicate on each matching entry - // returning the one for which it returns true. - // - template - iterator - find_sup_if (const key_type&, P); - - template - const_iterator - find_sup_if (const key_type&, P) const; - }; - - template ::delimiter_type D> - struct prefix_map_impl: prefix_map_common - { - typedef typename prefix_map_common::value_type value_type; - - prefix_map_impl (): prefix_map_common (D) {} - prefix_map_impl (std::initializer_list i) - : prefix_map_common (std::move (i), D) {} - }; - - template ::delimiter_type D> - using prefix_map = prefix_map_impl>, D>; - - template ::delimiter_type D> - using prefix_multimap = - prefix_map_impl>, D>; -} - -#include diff --git a/libbutl/prefix-map.txx b/libbutl/prefix-map.txx index edab8e1..502119a 100644 --- a/libbutl/prefix-map.txx +++ b/libbutl/prefix-map.txx @@ -1,7 +1,7 @@ // file : libbutl/prefix-map.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +namespace butl { template auto prefix_map_common:: diff --git a/libbutl/process-details.hxx b/libbutl/process-details.hxx index cf7624d..571c750 100644 --- a/libbutl/process-details.hxx +++ b/libbutl/process-details.hxx @@ -5,15 +5,10 @@ #include -#ifdef __cpp_lib_modules_ts -import std.core; //@@ MOD TMP (dummy std.threading). -import std.threading; -#else #include #if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex) # include #endif -#endif namespace butl { diff --git a/libbutl/process-io.cxx b/libbutl/process-io.cxx index c29bbc0..0be3a77 100644 --- a/libbutl/process-io.cxx +++ b/libbutl/process-io.cxx @@ -1,36 +1,11 @@ // file : libbutl/process-io.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include +#include #include // strchr() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.process_io; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.process; -#endif -import butl.path-io; -#else -#include -#endif +#include using namespace std; diff --git a/libbutl/process-io.hxx b/libbutl/process-io.hxx new file mode 100644 index 0000000..29d6d8b --- /dev/null +++ b/libbutl/process-io.hxx @@ -0,0 +1,50 @@ +// file : libbutl/process-io.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include + +#include + +#include + +namespace butl +{ + inline std::ostream& + operator<< (std::ostream& o, const process_path& p) + { + return o << p.recall_string (); + } + + inline std::ostream& + operator<< (std::ostream& o, const process_args& a) + { + process::print (o, a.argv, a.argc); + return o; + } + + // Print the environment variables and the current working directory (if + // specified) in a POSIX shell command line notation. The process path + // itself is not printed. For example: + // + // LC_ALL=C + // + // If an environment variable is in the `name` rather than in the + // `name=value` form, then it is considered unset. Since there is no POSIX + // way to unset a variable on the command line, this information is printed + // as `name=` (ambiguous with assigning an empty value but the two cases are + // normally handled in the same way). For example: + // + // PATH= LC_ALL=C + // + // Note that since there is no POSIX way to change the current working + // directory of a command to be executed, this information is printed in a + // pseudo-notation by assigning to PWD (which, according POSIX, would result + // in the undefined behavior of the cwd utility). For example: + // + // PWD=/tmp LC_ALL=C + // + LIBBUTL_SYMEXPORT std::ostream& + operator<< (std::ostream&, const process_env&); +} diff --git a/libbutl/process-io.mxx b/libbutl/process-io.mxx deleted file mode 100644 index d07a212..0000000 --- a/libbutl/process-io.mxx +++ /dev/null @@ -1,67 +0,0 @@ -// file : libbutl/process-io.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.process_io; -#ifdef __cpp_lib_modules_ts -import std.core; //@@ MOD TMP (should not be needed). -import std.io; -#endif -import butl.process; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - inline std::ostream& - operator<< (std::ostream& o, const process_path& p) - { - return o << p.recall_string (); - } - - inline std::ostream& - operator<< (std::ostream& o, const process_args& a) - { - process::print (o, a.argv, a.argc); - return o; - } - - // Print the environment variables and the current working directory (if - // specified) in a POSIX shell command line notation. The process path - // itself is not printed. For example: - // - // LC_ALL=C - // - // If an environment variable is in the `name` rather than in the - // `name=value` form, then it is considered unset. Since there is no POSIX - // way to unset a variable on the command line, this information is printed - // as `name=` (ambiguous with assigning an empty value but the two cases are - // normally handled in the same way). For example: - // - // PATH= LC_ALL=C - // - // Note that since there is no POSIX way to change the current working - // directory of a command to be executed, this information is printed in a - // pseudo-notation by assigning to PWD (which, according POSIX, would result - // in the undefined behavior of the cwd utility). For example: - // - // PWD=/tmp LC_ALL=C - // - LIBBUTL_SYMEXPORT std::ostream& - operator<< (std::ostream&, const process_env&); -} diff --git a/libbutl/process-run.cxx b/libbutl/process-run.cxx index c26c20d..a5014f6 100644 --- a/libbutl/process-run.cxx +++ b/libbutl/process-run.cxx @@ -1,35 +1,12 @@ // file : libbutl/process-run.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include -// C includes. - -#ifndef __cpp_lib_modules_ts #include // exit() #include // cerr -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.process; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -#endif -import butl.utility; // operator<<(ostream,exception) -#else -#include -#endif +#include // operator<<(ostream,exception) using namespace std; diff --git a/libbutl/process-run.txx b/libbutl/process-run.txx index aa1e381..8e6ca57 100644 --- a/libbutl/process-run.txx +++ b/libbutl/process-run.txx @@ -1,7 +1,9 @@ // file : libbutl/process-run.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // forward(), index_sequence + +namespace butl { template void process_env:: diff --git a/libbutl/process.cxx b/libbutl/process.cxx index f6433fb..92a70c4 100644 --- a/libbutl/process.cxx +++ b/libbutl/process.cxx @@ -1,9 +1,7 @@ // file : libbutl/process.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include @@ -87,29 +85,19 @@ # endif // _MSC_VER #endif -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include -#include - #include // ios_base::failure #include // strlen(), strchr(), strpbrk(), strncmp() #include // move() #include +#include #ifndef _WIN32 -#include // this_thread::sleep_for() +# include // this_thread::sleep_for() #else -#include -#include // milli -#include // __argv[] -#include // find() -#endif +# include +# include // milli +# include // __argv[] +# include // find() #endif #include @@ -119,32 +107,8 @@ namespace butl shared_mutex process_spawn_mutex; // Out of module purview. } -#ifdef __cpp_modules_ts -module butl.process; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.threading; // Clang wants it in purview (see process-details.hxx). -#endif -import butl.path; -import butl.fdstream; -import butl.vector_view; -import butl.small_vector; -#endif - -#ifndef _WIN32 -import std.threading; -#endif - -import butl.utility; // icasecmp() -import butl.fdstream; // fdopen_null() -#else -#include -#include -#endif +#include // icasecmp() +#include // fdopen_null() using namespace std; @@ -1898,7 +1862,7 @@ namespace butl return PeekNamedPipe (h, &c, 1, &n, nullptr, nullptr) && n == 1; }; - // Hidden by butl::duration that is introduced via fdstream.mxx. + // Hidden by butl::duration that is introduced via fdstream.hxx. // using milli_duration = chrono::duration; diff --git a/libbutl/process.hxx b/libbutl/process.hxx new file mode 100644 index 0000000..47cc507 --- /dev/null +++ b/libbutl/process.hxx @@ -0,0 +1,832 @@ +// file : libbutl/process.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#ifndef _WIN32 +# include // pid_t +#endif + +#include +#include +#include +#include +#include // size_t +#include // uint32_t +#include + +#include +#include +#include // auto_fd, fdpipe +#include +#include + +#include + +namespace butl +{ + struct process_error: std::system_error + { + const bool child; + + process_error (int e, bool child = false) + : system_error (e, std::generic_category ()), child (child) {} + +#ifdef _WIN32 + process_error (const std::string& d, int fallback_errno_code = 0) + : system_error (fallback_errno_code, std::system_category (), d), + child (false) {} +#endif + }; + + struct process_child_error: process_error + { + explicit + process_child_error (int e): process_error (e, true) {} + }; + + // Process arguments (i.e., the command line). The first must be an + // executable name and the last element should be NULL. Can also be the + // multi-process piped command line (see process::print() for details). + // + struct process_args + { + const char* const* argv; + std::size_t argc; + }; + + // A process executable has three paths: initial, recall, and effective. + // Initial is the original "command" that you specify in argv[0] and on + // POSIX that's what ends up in the child's argv[0]. But not on Windows. On + // Windows the command is first searched for in the parent executable's + // directory and if found then that's what should end up in child's argv[0]. + // So this is the recall path. It is called recall because this is what the + // caller of the parent process will be able to execute if you printed the + // command line (provided you haven't changed the CWD). Finally, effective + // is the absolute path to the executable that will include the directory + // part if found in PATH, the .exe extension if one is missing, etc. + // + // As an example, let's say we run foo\foo.exe that itself spawns bar which + // is found as foo\bar.exe. The paths will then be: + // + // initial: bar + // recall: foo\bar + // effective: c:\...\foo\bar.exe + // + // In most cases, at least on POSIX, the first two paths will be the same. + // As an optimization, if the recall path is empty, then it means it is the + // same as initial. Similarly, if the effective path is empty then, it is + // the same as recall (and if that is empty, as initial). + // + // Note that the call to path_search() below adjust args[0] to point to the + // recall path which brings up lifetime issues. To address this this class + // also implements an optional RAII-based auto-restore of args[0] to its + // initial value. + // + class process_path + { + public: + const char* initial = nullptr; + path recall; + path effect; + + // Handle empty recall/effect. + // + const char* recall_string () const; + const char* effect_string () const; + + bool + empty () const + { + return (initial == nullptr || *initial == '\0') && + recall.empty () && effect.empty (); + } + + // Clear recall making it the same as effective. + // + void + clear_recall (); + + // Make all three paths the same. + // + explicit + process_path (path effect); + + process_path (const char* initial, path&& recall, path&& effect); + process_path () = default; + + // Moveable-only type. + // + process_path (process_path&&); + process_path& operator= (process_path&&); + + process_path (const process_path&) = delete; + process_path& operator= (const process_path&) = delete; + + ~process_path (); + + // Manual copying. Should not use args[0] RAII. See path_search() for the + // init semantics. + // + process_path (const process_path&, bool init); + + private: + friend class process; + const char** args0_ = nullptr; + }; + + // Process exit information. + // + struct LIBBUTL_SYMEXPORT process_exit + { + // Status type is the raw exit value as returned by GetExitCodeProcess() + // (NTSTATUS value that represents exit or error codes; MSDN refers to the + // error code as "value of the exception that caused the termination") or + // waitpid(1). Code type is the return value if the process exited + // normally. + // +#ifndef _WIN32 + using status_type = int; + using code_type = std::uint8_t; +#else + using status_type = std::uint32_t; // Win32 DWORD + using code_type = std::uint16_t; // Win32 WORD +#endif + + status_type status; + + process_exit () = default; + + explicit + process_exit (code_type); + + enum as_status_type {as_status}; + process_exit (status_type s, as_status_type): status (s) {} + + // Return false if the process exited abnormally. + // + bool + normal () const; + + // C/C++ don't apply constraints on program exit code other than it being + // of type int. + // + // POSIX specifies that only the least significant 8 bits shall be + // available from wait() and waitpid(); the full value shall be available + // from waitid() (read more at _Exit, _exit Open Group spec). + // + // While the Linux man page for waitid() doesn't mention any deviations + // from the standard, the FreeBSD implementation (as of version 11.0) only + // returns 8 bits like the other wait*() calls. + // + // Windows supports 32-bit exit codes. + // + // Note that in shells some exit values can have special meaning so using + // them can be a source of confusion. For bash values in the [126, 255] + // range are such a special ones (see Appendix E, "Exit Codes With Special + // Meanings" in the Advanced Bash-Scripting Guide). + // + // So [0, 125] appears to be the usable exit code range. + // + code_type + code () const; + + explicit operator bool () const {return normal () && code () == 0;} + + // Abnormal termination information. + // + + // Return the signal number that caused the termination or 0 if no such + // information is available. + // + int + signal () const; + + // Return true if the core file was generated. + // + bool + core () const; + + // Return a description of the reason that caused the process to terminate + // abnormally. On POSIX this is the signal name, on Windows -- the summary + // produced from the corresponding error identifier defined in ntstatus.h. + // + std::string + description () const; + }; + + // Canonical exit status description: + // + // "terminated abnormally: <...> (core dumped)" + // "exited with code <...>" + // + // So you would normally do: + // + // cerr << "process " << args[0] << " " << *pr.exit << endl; + // + LIBBUTL_SYMEXPORT std::string + to_string (process_exit); + + inline std::ostream& + operator<< (std::ostream& os, process_exit pe) + { + return os << to_string (pe); + } + + class LIBBUTL_SYMEXPORT process + { + public: +#ifndef _WIN32 + using handle_type = pid_t; + using id_type = pid_t; +#else + using handle_type = void*; // Win32 HANDLE + using id_type = std::uint32_t; // Win32 DWORD +#endif + + // Start another process using the specified command line. The default + // values to the in, out and err arguments indicate that the child process + // should inherit the parent process stdin, stdout, and stderr, + // respectively. If -1 is passed instead, then the corresponding child + // process descriptor is connected (via a pipe) to out_fd for stdin, + // in_ofd for stdout, and in_efd for stderr (see data members below). If + // -2 is passed, then the corresponding child process descriptor is + // replaced with the null device descriptor (e.g., /dev/null). This + // results in the child process not being able to read anything from stdin + // (gets immediate EOF) and all data written to stdout/stderr being + // discarded. + // + // On Windows parent process pipe descriptors are set to text mode to be + // consistent with the default (text) mode of standard file descriptors of + // the child process. When reading in the text mode the sequence of 0xD, + // 0xA characters is translated into the single OxA character and 0x1A is + // interpreted as EOF. When writing in the text mode the OxA character is + // translated into the 0xD, 0xA sequence. Use the fdmode() function to + // change the mode, if required. + // + // Instead of passing -1, -2 or the default value, you can also pass your + // own descriptors. Note, however, that in this case they are not closed by + // the parent. So you should do this yourself, if required. For example, + // to redirect the child process stdout to stderr, you can do: + // + // process p (..., 0, 2); + // + // The cwd argument allows to change the current working directory of the + // child process. NULL and empty arguments are ignored. + // + // The envvars argument allows to set and unset environment variables in + // the child process. If not NULL, it must contain strings in the + // "name=value" (set) or "name" (unset) forms and be terminated with + // NULL. Note that all other variables are inherited from the parent + // process. + // + // Throw process_error if anything goes wrong. Note that some of the + // exceptions (e.g., if exec() failed) can be thrown in the child + // version of us (as process_child_error). + // + // Note that the versions without the the process_path argument may + // temporarily change args[0] (see path_search() for details). + // + process (const char* [], + int in = 0, int out = 1, int err = 2, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const char* [], + int in = 0, int out = 1, int err = 2, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + // If the descriptors are pipes that you have created, then you should use + // this constructor instead to communicate this information. + // + // For generality, if the "other" end of the pipe is -1, then assume this + // is not a pipe. + // + struct pipe + { + int in = -1; + int out = -1; + + pipe () = default; + pipe (int i, int o): in (i), out (o) {} + + explicit + pipe (const fdpipe& p): in (p.in.get ()), out (p.out.get ()) {} + }; + + process (const process_path&, const char* [], + pipe in, pipe out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + // The "piping" constructor, for example: + // + // process lhs (..., 0, -1); // Redirect stdout to a pipe. + // process rhs (..., lhs); // Redirect stdin to lhs's pipe. + // + // rhs.wait (); // Wait for last first. + // lhs.wait (); + // + process (const char* [], + process&, int out = 1, int err = 2, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const char* [], + process&, int out = 1, int err = 2, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + // Wait for the process to terminate. Return true if the process + // terminated normally and with the zero exit code. Unless ignore_error + // is true, throw process_error if anything goes wrong. This function can + // be called multiple times with subsequent calls simply returning the + // status. + // + bool + wait (bool ignore_errors = false); + + // Return the same result as wait() if the process has already terminated + // and nullopt otherwise. + // + optional + try_wait (); + + // Wait for the process to terminate for up to the specified time + // duration. Return the same result as wait() if the process has + // terminated in this timeframe and nullopt otherwise. + // + template + optional + timed_wait (const std::chrono::duration&); + + // Note that the destructor will wait for the process but will ignore + // any errors and the exit status. + // + ~process () {if (handle != 0) wait (true);} + + // Process termination. + // + + // Send SIGKILL to the process on POSIX and call TerminateProcess() with + // DBG_TERMINATE_PROCESS exit code on Windows. Noop for an already + // terminated process. + // + // Note that if the process is killed, it terminates as if it has called + // abort() (functions registered with atexit() are not called, etc). + // + // Also note that on Windows calling this function for a terminating + // process results in the EPERM process_error exception. + // + void + kill (); + + // Send SIGTERM to the process on POSIX and call kill() on Windows (where + // there is no general way to terminate a console process gracefully). + // Noop for an already terminated process. + // + void + term (); + + // Moveable-only type. + // + process (process&&); + process& operator= (process&&); + + process (const process&) = delete; + process& operator= (const process&) = delete; + + // Create an empty or "already terminated" process. By default the + // termination status is unknown but you can change that. + // + explicit + process (optional = nullopt); + + // Resolve process' paths based on the initial path in args0. If recall + // differs from initial, adjust args0 to point to the recall path. If + // resolution fails, throw process_error. Normally, you will use this + // function like this: + // + // const char* args[] = {"foo", ..., nullptr}; + // + // process_path pp (process::path_search (args[0])) + // + // ... // E.g., print args[0]. + // + // process p (pp, args); + // + // You can also specify the fallback directory which will be tried last. + // This, for example, can be used to implement the Windows "search in the + // parent executable's directory" semantics across platforms. + // + // If path_only is true then only search in the PATH environment variable + // (or in CWD if there is a directory component) ignoring other places + // (like calling process' directory and, gasp, CWD on Windows). + // + // If the paths argument is not NULL, search in this list of paths rather + // than in the PATH environment variable. Note that in this case you may + // want to clear the recall path (process_path::clear_recall()) since the + // path won't be "recallable" (unless you've passed a cache of the PATH + // environment variable or some such). + // + static process_path + path_search (const char*& args0, + const dir_path& fallback = dir_path (), + bool path_only = false, + const char* paths = nullptr); + + // This version is primarily useful when you want to pre-search the + // executable before creating the args[] array. In this case you will + // use the recall path for args[0]. + // + // The init argument determines whether to initialize the initial path to + // the shallow copy of file. If it is true, then initial is the same as + // file and recall is either empty or contain a different path. If it is + // false then initial contains a shallow copy of recall, and recall is + // either a different path or a deep copy of file. Normally you don't care + // about initial once you got recall and the main reason to pass true to + // this argument is to save a copy (since initial and recall are usually + // the same). + // + static process_path + path_search (const char* file, bool init, + const dir_path& = dir_path (), + bool = false, + const char* = nullptr); + + static process_path + path_search (const std::string&, bool, + const dir_path& = dir_path (), + bool = false, + const char* = nullptr); + + static process_path + path_search (const path&, bool, + const dir_path& = dir_path (), + bool = false, + const char* = nullptr); + + // As above but if not found return empty process_path instead of + // throwing. + // + static process_path + try_path_search (const char*, bool, + const dir_path& = dir_path (), + bool = false, + const char* = nullptr); + + static process_path + try_path_search (const std::string&, bool, + const dir_path& = dir_path (), + bool = false, + const char* = nullptr); + + static process_path + try_path_search (const path&, bool, + const dir_path& = dir_path (), + bool = false, + const char* = nullptr); + + // Print process commmand line. If the number of elements is specified, + // then it will print the piped multi-process command line, if present. + // In this case, the expected format is as follows: + // + // name1 arg arg ... nullptr + // name2 arg arg ... nullptr + // ... + // nameN arg arg ... nullptr nullptr + // + static void + print (std::ostream&, const char* const args[], size_t n = 0); + + // Quote and escape the specified command line argument. If batch is true + // then also quote the equal (`=`), comma (`,`) and semicolon (`;`) + // characters which are treated as argument separators in batch file. + // Return the original string if neither is necessary and a pointer to the + // provided buffer string containing the escaped version otherwise. + // +#ifdef _WIN32 + static const char* + quote_argument (const char*, std::string& buffer, bool batch); +#endif + + public: + id_type + id () const; + + static id_type + current_id (); + + public: + handle_type handle; + + // Absence means that the exit information is not (yet) known. This can be + // because you haven't called wait() yet or because wait() failed. + // + optional exit; + + // Use the following file descriptors to communicate with the new process's + // standard streams. + // + auto_fd out_fd; // Write to it to send to stdin. + auto_fd in_ofd; // Read from it to receive from stdout. + auto_fd in_efd; // Read from it to receive from stderr. + }; + + // Higher-level process running interface that aims to make executing a + // process for the common cases as simple as calling a functions. Normally + // it is further simplified by project-specific wrapper functions that + // handle the process_error exception as well as abnormal and/or non-zero + // exit status. + // + // The I/O/E arguments determine the child's stdin/stdout/stderr. They can + // be of type int, auto_fd, fd_pipe and process::pipe (and, in the future, + // perhaps also string, buffer, etc). For example, the following call will + // make stdin read from /dev/null, stdout redirect to stderr, and inherit + // the parent's stderr. + // + // process_run (fdopen_null (), 2, 2, ...) + // + // The P argument is the program path. It can be anything that can be passed + // to process::path_search() (const char*, std::string, path) or the + // process_path itself. + // + // The A arguments can be anything convertible to const char* via the + // overloaded process_arg_as() (see below). Out of the box you can use const + // char* (with NULL values ignored), std::string, path/dir_path, (as well as + // [small_]vector[_view] of these), numeric types, as well as optional of + // all the above with absent arguments ignored. + // + struct process_env + { + const process_path* path; + const dir_path* cwd = nullptr; + const char* const* vars = nullptr; + + // Return true if there is an "environment", that is, either the current + // working directory or environment variables. + // + bool + env () const + { + return (cwd != nullptr && !cwd->empty ()) || + (vars != nullptr && *vars != nullptr); + } + + process_env (): path (nullptr) {} + + process_env (const process_path& p, + const dir_path& c = dir_path (), + const char* const* v = nullptr) + : path (&p), + + // Note that this is not just an optimization. It is required when + // the ctor is called with the default arguments (not to keep the + // temporary object pointer). + // + cwd (!c.empty () ? &c : nullptr), + + vars (v) {} + + process_env (const process_path& p, const char* const* v) + : path (&p), cwd (nullptr), vars (v) {} + + template + process_env (const process_path& p, const dir_path& c, const V& v) + : process_env (p, v) {cwd = &c;} + + template + process_env (const process_path& p, const V& v) + : process_env (p) {init_vars (v);} + + process_env (const char* p, + const dir_path& c = dir_path (), + const char* const* v = nullptr) + : process_env (path_, c, v) {path_ = process::path_search (p, true);} + + process_env (const std::string& p, + const dir_path& c = dir_path (), + const char* const* v = nullptr) + : process_env (p.c_str (), c, v) {} + + process_env (const butl::path& p, + const dir_path& c = dir_path (), + const char* const* v = nullptr) + : process_env (p.string (), c, v) {} + + template + process_env (const char* p, const dir_path& c, const V& v) + : process_env (path_, c, v) {path_ = process::path_search (p, true);} + + template + process_env (const std::string& p, const dir_path& c, const V& v) + : process_env (p.c_str (), c, v) {} + + template + process_env (const butl::path& p, const dir_path& c, const V& v) + : process_env (p.string (), c, v) {} + + template + process_env (const char* p, const V& v) + : process_env (path_, v) {path_ = process::path_search (p, true);} + + template + process_env (const std::string& p, const V& v) + : process_env (p.c_str (), v) {} + + template + process_env (const butl::path& p, const V& v) + : process_env (p.string (), v) {} + + // Moveable-only type. + // + process_env (process_env&&); + process_env& operator= (process_env&&); + + process_env (const process_env&) = delete; + process_env& operator= (const process_env&) = delete; + + private: + template + void + init_vars (const V&); + + template + void + init_vars (const char* const (&v)[N]) + { + vars = v; + } + + process_path path_; + small_vector vars_; + }; + + template + process_exit + process_run (I&& in, + O&& out, + E&& err, + const process_env&, + A&&... args); + + // The version with the command callback that can be used for printing the + // command line or similar. It should be callable with the following + // signature: + // + // void (const char*[], std::size_t) + // + template + process_exit + process_run_callback (const C&, + I&& in, + O&& out, + E&& err, + const process_env&, + A&&... args); + + // Versions that start the process without waiting. + // + template + process + process_start (I&& in, + O&& out, + E&& err, + const process_env&, + A&&... args); + + template + process + process_start_callback (const C&, + I&& in, + O&& out, + E&& err, + const process_env&, + A&&... args); + + // Conversion of types to their C string representations. Can be overloaded + // (including via ADL) for custom types. The default implementation calls + // to_string() which covers all the numeric values via std::to_string () and + // also any type that defines to_string() (via ADL). + // + template + inline const char* + process_arg_as (const T& x, std::string& storage) + { + return (storage = std::to_string (x)).c_str (); + } + + inline const char* + process_arg_as (const std::string& s, std::string&) {return s.c_str ();} + + template + inline const char* + process_arg_as (const basic_path& p, std::string&) + { + return p.string ().c_str (); + } + + // char[N] + // + template + inline void + process_args_as (V& v, const char* s, std::string&) + { + if (s != nullptr) + v.push_back (s); + } + + template + inline const char* + process_arg_as (char (&s)[N], std::string&) {return s;} + + template + inline const char* + process_arg_as (const char (&s)[N], std::string&) {return s;} + + template + inline void + process_args_as (V& v, const T& x, std::string& storage) + { + v.push_back (process_arg_as (x, storage)); + } + + template + inline void + process_args_as (V& v, const optional& x, std::string& storage) + { + if (x) + process_args_as (v, *x, storage); + } + + // [small_]vector[_view]<> + // + template + inline void + process_args_as (V& v, const std::vector& vs, std::string&) + { + for (const std::string& s: vs) + v.push_back (s.c_str ()); + } + + template + inline void + process_args_as (V& v, const small_vector& vs, std::string&) + { + for (const std::string& s: vs) + v.push_back (s.c_str ()); + } + + template + inline void + process_args_as (V& v, const vector_view& vs, std::string&) + { + for (const std::string& s: vs) + v.push_back (s.c_str ()); + } + + template + inline void + process_args_as (V& v, const std::vector& vs, std::string&) + { + for (const char* s: vs) + if (s != nullptr) + v.push_back (s); + } + + template + inline void + process_args_as (V& v, const small_vector& vs, std::string&) + { + for (const char* s: vs) + if (s != nullptr) + v.push_back (s); + } + + template + inline void + process_args_as (V& v, const vector_view& vs, std::string&) + { + for (const char* s: vs) + if (s != nullptr) + v.push_back (s); + } +} + +#include +#include diff --git a/libbutl/process.ixx b/libbutl/process.ixx index 7676ce3..256454b 100644 --- a/libbutl/process.ixx +++ b/libbutl/process.ixx @@ -1,6 +1,9 @@ // file : libbutl/process.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file +#include +#include // move() + namespace butl { // process_path diff --git a/libbutl/process.mxx b/libbutl/process.mxx deleted file mode 100644 index 0677525..0000000 --- a/libbutl/process.mxx +++ /dev/null @@ -1,855 +0,0 @@ -// file : libbutl/process.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#ifndef _WIN32 -# include // pid_t -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include -#include // size_t -#include // uint32_t -#include - -#include // move(), forward(), index_sequence -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.process; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.optional; -import butl.fdstream; // auto_fd, fdpipe -import butl.vector_view; -import butl.small_vector; -#else -#include -#include -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - struct process_error: std::system_error - { - const bool child; - - process_error (int e, bool child = false) - : system_error (e, std::generic_category ()), child (child) {} - -#ifdef _WIN32 - process_error (const std::string& d, int fallback_errno_code = 0) - : system_error (fallback_errno_code, std::system_category (), d), - child (false) {} -#endif - }; - - struct process_child_error: process_error - { - explicit - process_child_error (int e): process_error (e, true) {} - }; - - // Process arguments (i.e., the command line). The first must be an - // executable name and the last element should be NULL. Can also be the - // multi-process piped command line (see process::print() for details). - // - struct process_args - { - const char* const* argv; - std::size_t argc; - }; - - // A process executable has three paths: initial, recall, and effective. - // Initial is the original "command" that you specify in argv[0] and on - // POSIX that's what ends up in the child's argv[0]. But not on Windows. On - // Windows the command is first searched for in the parent executable's - // directory and if found then that's what should end up in child's argv[0]. - // So this is the recall path. It is called recall because this is what the - // caller of the parent process will be able to execute if you printed the - // command line (provided you haven't changed the CWD). Finally, effective - // is the absolute path to the executable that will include the directory - // part if found in PATH, the .exe extension if one is missing, etc. - // - // As an example, let's say we run foo\foo.exe that itself spawns bar which - // is found as foo\bar.exe. The paths will then be: - // - // initial: bar - // recall: foo\bar - // effective: c:\...\foo\bar.exe - // - // In most cases, at least on POSIX, the first two paths will be the same. - // As an optimization, if the recall path is empty, then it means it is the - // same as initial. Similarly, if the effective path is empty then, it is - // the same as recall (and if that is empty, as initial). - // - // Note that the call to path_search() below adjust args[0] to point to the - // recall path which brings up lifetime issues. To address this this class - // also implements an optional RAII-based auto-restore of args[0] to its - // initial value. - // - class process_path - { - public: - const char* initial = nullptr; - path recall; - path effect; - - // Handle empty recall/effect. - // - const char* recall_string () const; - const char* effect_string () const; - - bool - empty () const - { - return (initial == nullptr || *initial == '\0') && - recall.empty () && effect.empty (); - } - - // Clear recall making it the same as effective. - // - void - clear_recall (); - - // Make all three paths the same. - // - explicit - process_path (path effect); - - process_path (const char* initial, path&& recall, path&& effect); - process_path () = default; - - // Moveable-only type. - // - process_path (process_path&&); - process_path& operator= (process_path&&); - - process_path (const process_path&) = delete; - process_path& operator= (const process_path&) = delete; - - ~process_path (); - - // Manual copying. Should not use args[0] RAII. See path_search() for the - // init semantics. - // - process_path (const process_path&, bool init); - - private: - friend class process; - const char** args0_ = nullptr; - }; - - // Process exit information. - // - struct LIBBUTL_SYMEXPORT process_exit - { - // Status type is the raw exit value as returned by GetExitCodeProcess() - // (NTSTATUS value that represents exit or error codes; MSDN refers to the - // error code as "value of the exception that caused the termination") or - // waitpid(1). Code type is the return value if the process exited - // normally. - // -#ifndef _WIN32 - using status_type = int; - using code_type = std::uint8_t; -#else - using status_type = std::uint32_t; // Win32 DWORD - using code_type = std::uint16_t; // Win32 WORD -#endif - - status_type status; - - process_exit () = default; - - explicit - process_exit (code_type); - - enum as_status_type {as_status}; - process_exit (status_type s, as_status_type): status (s) {} - - // Return false if the process exited abnormally. - // - bool - normal () const; - - // C/C++ don't apply constraints on program exit code other than it being - // of type int. - // - // POSIX specifies that only the least significant 8 bits shall be - // available from wait() and waitpid(); the full value shall be available - // from waitid() (read more at _Exit, _exit Open Group spec). - // - // While the Linux man page for waitid() doesn't mention any deviations - // from the standard, the FreeBSD implementation (as of version 11.0) only - // returns 8 bits like the other wait*() calls. - // - // Windows supports 32-bit exit codes. - // - // Note that in shells some exit values can have special meaning so using - // them can be a source of confusion. For bash values in the [126, 255] - // range are such a special ones (see Appendix E, "Exit Codes With Special - // Meanings" in the Advanced Bash-Scripting Guide). - // - // So [0, 125] appears to be the usable exit code range. - // - code_type - code () const; - - explicit operator bool () const {return normal () && code () == 0;} - - // Abnormal termination information. - // - - // Return the signal number that caused the termination or 0 if no such - // information is available. - // - int - signal () const; - - // Return true if the core file was generated. - // - bool - core () const; - - // Return a description of the reason that caused the process to terminate - // abnormally. On POSIX this is the signal name, on Windows -- the summary - // produced from the corresponding error identifier defined in ntstatus.h. - // - std::string - description () const; - }; - - // Canonical exit status description: - // - // "terminated abnormally: <...> (core dumped)" - // "exited with code <...>" - // - // So you would normally do: - // - // cerr << "process " << args[0] << " " << *pr.exit << endl; - // - LIBBUTL_SYMEXPORT std::string - to_string (process_exit); - - inline std::ostream& - operator<< (std::ostream& os, process_exit pe) - { - return os << to_string (pe); - } - - class LIBBUTL_SYMEXPORT process - { - public: -#ifndef _WIN32 - using handle_type = pid_t; - using id_type = pid_t; -#else - using handle_type = void*; // Win32 HANDLE - using id_type = std::uint32_t; // Win32 DWORD -#endif - - // Start another process using the specified command line. The default - // values to the in, out and err arguments indicate that the child process - // should inherit the parent process stdin, stdout, and stderr, - // respectively. If -1 is passed instead, then the corresponding child - // process descriptor is connected (via a pipe) to out_fd for stdin, - // in_ofd for stdout, and in_efd for stderr (see data members below). If - // -2 is passed, then the corresponding child process descriptor is - // replaced with the null device descriptor (e.g., /dev/null). This - // results in the child process not being able to read anything from stdin - // (gets immediate EOF) and all data written to stdout/stderr being - // discarded. - // - // On Windows parent process pipe descriptors are set to text mode to be - // consistent with the default (text) mode of standard file descriptors of - // the child process. When reading in the text mode the sequence of 0xD, - // 0xA characters is translated into the single OxA character and 0x1A is - // interpreted as EOF. When writing in the text mode the OxA character is - // translated into the 0xD, 0xA sequence. Use the fdmode() function to - // change the mode, if required. - // - // Instead of passing -1, -2 or the default value, you can also pass your - // own descriptors. Note, however, that in this case they are not closed by - // the parent. So you should do this yourself, if required. For example, - // to redirect the child process stdout to stderr, you can do: - // - // process p (..., 0, 2); - // - // The cwd argument allows to change the current working directory of the - // child process. NULL and empty arguments are ignored. - // - // The envvars argument allows to set and unset environment variables in - // the child process. If not NULL, it must contain strings in the - // "name=value" (set) or "name" (unset) forms and be terminated with - // NULL. Note that all other variables are inherited from the parent - // process. - // - // Throw process_error if anything goes wrong. Note that some of the - // exceptions (e.g., if exec() failed) can be thrown in the child - // version of us (as process_child_error). - // - // Note that the versions without the the process_path argument may - // temporarily change args[0] (see path_search() for details). - // - process (const char* [], - int in = 0, int out = 1, int err = 2, - const char* cwd = nullptr, - const char* const* envvars = nullptr); - - process (const process_path&, const char* [], - int in = 0, int out = 1, int err = 2, - const char* cwd = nullptr, - const char* const* envvars = nullptr); - - // If the descriptors are pipes that you have created, then you should use - // this constructor instead to communicate this information. - // - // For generality, if the "other" end of the pipe is -1, then assume this - // is not a pipe. - // - struct pipe - { - int in = -1; - int out = -1; - - pipe () = default; - pipe (int i, int o): in (i), out (o) {} - - explicit - pipe (const fdpipe& p): in (p.in.get ()), out (p.out.get ()) {} - }; - - process (const process_path&, const char* [], - pipe in, pipe out, pipe err, - const char* cwd = nullptr, - const char* const* envvars = nullptr); - - // The "piping" constructor, for example: - // - // process lhs (..., 0, -1); // Redirect stdout to a pipe. - // process rhs (..., lhs); // Redirect stdin to lhs's pipe. - // - // rhs.wait (); // Wait for last first. - // lhs.wait (); - // - process (const char* [], - process&, int out = 1, int err = 2, - const char* cwd = nullptr, - const char* const* envvars = nullptr); - - process (const process_path&, const char* [], - process&, int out = 1, int err = 2, - const char* cwd = nullptr, - const char* const* envvars = nullptr); - - // Wait for the process to terminate. Return true if the process - // terminated normally and with the zero exit code. Unless ignore_error - // is true, throw process_error if anything goes wrong. This function can - // be called multiple times with subsequent calls simply returning the - // status. - // - bool - wait (bool ignore_errors = false); - - // Return the same result as wait() if the process has already terminated - // and nullopt otherwise. - // - optional - try_wait (); - - // Wait for the process to terminate for up to the specified time - // duration. Return the same result as wait() if the process has - // terminated in this timeframe and nullopt otherwise. - // - template - optional - timed_wait (const std::chrono::duration&); - - // Note that the destructor will wait for the process but will ignore - // any errors and the exit status. - // - ~process () {if (handle != 0) wait (true);} - - // Process termination. - // - - // Send SIGKILL to the process on POSIX and call TerminateProcess() with - // DBG_TERMINATE_PROCESS exit code on Windows. Noop for an already - // terminated process. - // - // Note that if the process is killed, it terminates as if it has called - // abort() (functions registered with atexit() are not called, etc). - // - // Also note that on Windows calling this function for a terminating - // process results in the EPERM process_error exception. - // - void - kill (); - - // Send SIGTERM to the process on POSIX and call kill() on Windows (where - // there is no general way to terminate a console process gracefully). - // Noop for an already terminated process. - // - void - term (); - - // Moveable-only type. - // - process (process&&); - process& operator= (process&&); - - process (const process&) = delete; - process& operator= (const process&) = delete; - - // Create an empty or "already terminated" process. By default the - // termination status is unknown but you can change that. - // - explicit - process (optional = nullopt); - - // Resolve process' paths based on the initial path in args0. If recall - // differs from initial, adjust args0 to point to the recall path. If - // resolution fails, throw process_error. Normally, you will use this - // function like this: - // - // const char* args[] = {"foo", ..., nullptr}; - // - // process_path pp (process::path_search (args[0])) - // - // ... // E.g., print args[0]. - // - // process p (pp, args); - // - // You can also specify the fallback directory which will be tried last. - // This, for example, can be used to implement the Windows "search in the - // parent executable's directory" semantics across platforms. - // - // If path_only is true then only search in the PATH environment variable - // (or in CWD if there is a directory component) ignoring other places - // (like calling process' directory and, gasp, CWD on Windows). - // - // If the paths argument is not NULL, search in this list of paths rather - // than in the PATH environment variable. Note that in this case you may - // want to clear the recall path (process_path::clear_recall()) since the - // path won't be "recallable" (unless you've passed a cache of the PATH - // environment variable or some such). - // - static process_path - path_search (const char*& args0, - const dir_path& fallback = dir_path (), - bool path_only = false, - const char* paths = nullptr); - - // This version is primarily useful when you want to pre-search the - // executable before creating the args[] array. In this case you will - // use the recall path for args[0]. - // - // The init argument determines whether to initialize the initial path to - // the shallow copy of file. If it is true, then initial is the same as - // file and recall is either empty or contain a different path. If it is - // false then initial contains a shallow copy of recall, and recall is - // either a different path or a deep copy of file. Normally you don't care - // about initial once you got recall and the main reason to pass true to - // this argument is to save a copy (since initial and recall are usually - // the same). - // - static process_path - path_search (const char* file, bool init, - const dir_path& = dir_path (), - bool = false, - const char* = nullptr); - - static process_path - path_search (const std::string&, bool, - const dir_path& = dir_path (), - bool = false, - const char* = nullptr); - - static process_path - path_search (const path&, bool, - const dir_path& = dir_path (), - bool = false, - const char* = nullptr); - - // As above but if not found return empty process_path instead of - // throwing. - // - static process_path - try_path_search (const char*, bool, - const dir_path& = dir_path (), - bool = false, - const char* = nullptr); - - static process_path - try_path_search (const std::string&, bool, - const dir_path& = dir_path (), - bool = false, - const char* = nullptr); - - static process_path - try_path_search (const path&, bool, - const dir_path& = dir_path (), - bool = false, - const char* = nullptr); - - // Print process commmand line. If the number of elements is specified, - // then it will print the piped multi-process command line, if present. - // In this case, the expected format is as follows: - // - // name1 arg arg ... nullptr - // name2 arg arg ... nullptr - // ... - // nameN arg arg ... nullptr nullptr - // - static void - print (std::ostream&, const char* const args[], size_t n = 0); - - // Quote and escape the specified command line argument. If batch is true - // then also quote the equal (`=`), comma (`,`) and semicolon (`;`) - // characters which are treated as argument separators in batch file. - // Return the original string if neither is necessary and a pointer to the - // provided buffer string containing the escaped version otherwise. - // -#ifdef _WIN32 - static const char* - quote_argument (const char*, std::string& buffer, bool batch); -#endif - - public: - id_type - id () const; - - static id_type - current_id (); - - public: - handle_type handle; - - // Absence means that the exit information is not (yet) known. This can be - // because you haven't called wait() yet or because wait() failed. - // - optional exit; - - // Use the following file descriptors to communicate with the new process's - // standard streams. - // - auto_fd out_fd; // Write to it to send to stdin. - auto_fd in_ofd; // Read from it to receive from stdout. - auto_fd in_efd; // Read from it to receive from stderr. - }; - - // Higher-level process running interface that aims to make executing a - // process for the common cases as simple as calling a functions. Normally - // it is further simplified by project-specific wrapper functions that - // handle the process_error exception as well as abnormal and/or non-zero - // exit status. - // - // The I/O/E arguments determine the child's stdin/stdout/stderr. They can - // be of type int, auto_fd, fd_pipe and process::pipe (and, in the future, - // perhaps also string, buffer, etc). For example, the following call will - // make stdin read from /dev/null, stdout redirect to stderr, and inherit - // the parent's stderr. - // - // process_run (fdopen_null (), 2, 2, ...) - // - // The P argument is the program path. It can be anything that can be passed - // to process::path_search() (const char*, std::string, path) or the - // process_path itself. - // - // The A arguments can be anything convertible to const char* via the - // overloaded process_arg_as() (see below). Out of the box you can use const - // char* (with NULL values ignored), std::string, path/dir_path, (as well as - // [small_]vector[_view] of these), numeric types, as well as optional of - // all the above with absent arguments ignored. - // - struct process_env - { - const process_path* path; - const dir_path* cwd = nullptr; - const char* const* vars = nullptr; - - // Return true if there is an "environment", that is, either the current - // working directory or environment variables. - // - bool - env () const - { - return (cwd != nullptr && !cwd->empty ()) || - (vars != nullptr && *vars != nullptr); - } - - process_env (): path (nullptr) {} - - process_env (const process_path& p, - const dir_path& c = dir_path (), - const char* const* v = nullptr) - : path (&p), - - // Note that this is not just an optimization. It is required when - // the ctor is called with the default arguments (not to keep the - // temporary object pointer). - // - cwd (!c.empty () ? &c : nullptr), - - vars (v) {} - - process_env (const process_path& p, const char* const* v) - : path (&p), cwd (nullptr), vars (v) {} - - template - process_env (const process_path& p, const dir_path& c, const V& v) - : process_env (p, v) {cwd = &c;} - - template - process_env (const process_path& p, const V& v) - : process_env (p) {init_vars (v);} - - process_env (const char* p, - const dir_path& c = dir_path (), - const char* const* v = nullptr) - : process_env (path_, c, v) {path_ = process::path_search (p, true);} - - process_env (const std::string& p, - const dir_path& c = dir_path (), - const char* const* v = nullptr) - : process_env (p.c_str (), c, v) {} - - process_env (const butl::path& p, - const dir_path& c = dir_path (), - const char* const* v = nullptr) - : process_env (p.string (), c, v) {} - - template - process_env (const char* p, const dir_path& c, const V& v) - : process_env (path_, c, v) {path_ = process::path_search (p, true);} - - template - process_env (const std::string& p, const dir_path& c, const V& v) - : process_env (p.c_str (), c, v) {} - - template - process_env (const butl::path& p, const dir_path& c, const V& v) - : process_env (p.string (), c, v) {} - - template - process_env (const char* p, const V& v) - : process_env (path_, v) {path_ = process::path_search (p, true);} - - template - process_env (const std::string& p, const V& v) - : process_env (p.c_str (), v) {} - - template - process_env (const butl::path& p, const V& v) - : process_env (p.string (), v) {} - - // Moveable-only type. - // - process_env (process_env&&); - process_env& operator= (process_env&&); - - process_env (const process_env&) = delete; - process_env& operator= (const process_env&) = delete; - - private: - template - void - init_vars (const V&); - - template - void - init_vars (const char* const (&v)[N]) - { - vars = v; - } - - process_path path_; - small_vector vars_; - }; - - template - process_exit - process_run (I&& in, - O&& out, - E&& err, - const process_env&, - A&&... args); - - // The version with the command callback that can be used for printing the - // command line or similar. It should be callable with the following - // signature: - // - // void (const char*[], std::size_t) - // - template - process_exit - process_run_callback (const C&, - I&& in, - O&& out, - E&& err, - const process_env&, - A&&... args); - - // Versions that start the process without waiting. - // - template - process - process_start (I&& in, - O&& out, - E&& err, - const process_env&, - A&&... args); - - template - process - process_start_callback (const C&, - I&& in, - O&& out, - E&& err, - const process_env&, - A&&... args); - - // Conversion of types to their C string representations. Can be overloaded - // (including via ADL) for custom types. The default implementation calls - // to_string() which covers all the numeric values via std::to_string () and - // also any type that defines to_string() (via ADL). - // - template - inline const char* - process_arg_as (const T& x, std::string& storage) - { - return (storage = std::to_string (x)).c_str (); - } - - inline const char* - process_arg_as (const std::string& s, std::string&) {return s.c_str ();} - - template - inline const char* - process_arg_as (const basic_path& p, std::string&) - { - return p.string ().c_str (); - } - - // char[N] - // - template - inline void - process_args_as (V& v, const char* s, std::string&) - { - if (s != nullptr) - v.push_back (s); - } - - template - inline const char* - process_arg_as (char (&s)[N], std::string&) {return s;} - - template - inline const char* - process_arg_as (const char (&s)[N], std::string&) {return s;} - - template - inline void - process_args_as (V& v, const T& x, std::string& storage) - { - v.push_back (process_arg_as (x, storage)); - } - - template - inline void - process_args_as (V& v, const optional& x, std::string& storage) - { - if (x) - process_args_as (v, *x, storage); - } - - // [small_]vector[_view]<> - // - template - inline void - process_args_as (V& v, const std::vector& vs, std::string&) - { - for (const std::string& s: vs) - v.push_back (s.c_str ()); - } - - template - inline void - process_args_as (V& v, const small_vector& vs, std::string&) - { - for (const std::string& s: vs) - v.push_back (s.c_str ()); - } - - template - inline void - process_args_as (V& v, const vector_view& vs, std::string&) - { - for (const std::string& s: vs) - v.push_back (s.c_str ()); - } - - template - inline void - process_args_as (V& v, const std::vector& vs, std::string&) - { - for (const char* s: vs) - if (s != nullptr) - v.push_back (s); - } - - template - inline void - process_args_as (V& v, const small_vector& vs, std::string&) - { - for (const char* s: vs) - if (s != nullptr) - v.push_back (s); - } - - template - inline void - process_args_as (V& v, const vector_view& vs, std::string&) - { - for (const char* s: vs) - if (s != nullptr) - v.push_back (s); - } -} - -#include -#include diff --git a/libbutl/project-name.cxx b/libbutl/project-name.cxx index 7a14b49..a7ed8a8 100644 --- a/libbutl/project-name.cxx +++ b/libbutl/project-name.cxx @@ -1,38 +1,16 @@ // file : libbutl/project-name.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include -#ifndef __cpp_lib_modules_ts #include #include #include // move() #include // find() #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.project_name; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utility; -#endif - -import butl.path; // path::traits -import butl.utility; // alpha(), alnum() -#else -#include -#include -#endif + +#include // path::traits +#include // alpha(), alnum() using namespace std; diff --git a/libbutl/project-name.hxx b/libbutl/project-name.hxx new file mode 100644 index 0000000..6e1f925 --- /dev/null +++ b/libbutl/project-name.hxx @@ -0,0 +1,216 @@ +// file : libbutl/project-name.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // move() +#include + +#include // icasecmp(), sanitize_identifier() + +#include + +namespace butl +{ + // Build system project name. + // + // Since a build system project is often a package, it is also used as a + // package name by the package dependency manager. And since a package is + // often a project (in the "collection of related packages" sense), it is + // also used as a project name by the project dependency manager. + // + class LIBBUTL_SYMEXPORT project_name + { + public: + // Create project name from string verifying that it complies with the + // specification and throw std::invalid_argument if that's not the + // case. Note that in this case the passed value is guaranteed to be + // unchanged. + // + explicit + project_name (const std::string& s): project_name (std::string (s)) {} + + explicit + project_name (std::string&&); + + // Create a special empty project name. + // + project_name () {} // For Clang 3.8 (const initialization). + + // Create an arbitrary string that can be used in contexts that expect a + // project name. For example, a project name pattern for use in ODB query + // expressions. + // + enum raw_string_type {raw_string}; + project_name (std::string s, raw_string_type): value_ (std::move (s)) {} + + bool + empty () const noexcept {return value_.empty ();} + + const std::string& + string () const& noexcept {return value_;} + + // Moves the underlying project name string out of the project name object. + // The object becomes empty. Usage: std::move (name).string (). + // + std::string + string () && {std::string r; r.swap (this->value_); return r;} + + // Project name base and extension (without the dot). If there is no + // extension, then the base name is the same as the full name and the + // returned extension is empty. + // + // If the ext argument is not NULL, then only remove the specified + // extension. Note that the extension should not include the dot and the + // comparison is always case-insensitive. + // + std::string + base (const char* ext = nullptr) const; + + std::string + extension () const; + + // Project name sanitized to a canonical variable name. Specifically, + // '.', '-', and '+' are replaced with '_'. + // + std::string + variable () const {return sanitize_identifier (value_);} + + // Compare ignoring case. Note that a string is not checked to be a valid + // project name. + // + int compare (const project_name& n) const {return compare (n.value_);} + int compare (const std::string& n) const {return compare (n.c_str ());} + int compare (const char* n) const {return icasecmp (value_, n);} + + private: + std::string value_; + }; + + inline bool + operator< (const project_name& x, const project_name& y) + { + return x.compare (y) < 0; + } + + inline bool + operator> (const project_name& x, const project_name& y) + { + return x.compare (y) > 0; + } + + inline bool + operator== (const project_name& x, const project_name& y) + { + return x.compare (y) == 0; + } + + inline bool + operator<= (const project_name& x, const project_name& y) + { + return x.compare (y) <= 0; + } + + inline bool + operator>= (const project_name& x, const project_name& y) + { + return x.compare (y) >= 0; + } + + inline bool + operator!= (const project_name& x, const project_name& y) + { + return x.compare (y) != 0; + } + + template + inline auto + operator< (const project_name& x, const T& y) + { + return x.compare (y) < 0; + } + + template + inline auto + operator> (const project_name& x, const T& y) + { + return x.compare (y) > 0; + } + + template + inline auto + operator== (const project_name& x, const T& y) + { + return x.compare (y) == 0; + } + + template + inline auto + operator<= (const project_name& x, const T& y) + { + return x.compare (y) <= 0; + } + + template + inline auto + operator>= (const project_name& x, const T& y) + { + return x.compare (y) >= 0; + } + + template + inline auto + operator!= (const project_name& x, const T& y) + { + return x.compare (y) != 0; + } + + template + inline auto + operator< (const T& x, const project_name& y) + { + return y > x; + } + + template + inline auto + operator> (const T& x, const project_name& y) + { + return y < x; + } + + template + inline auto + operator== (const T& x, const project_name& y) + { + return y == x; + } + + template + inline auto + operator<= (const T& x, const project_name& y) + { + return y >= x; + } + + template + inline auto + operator>= (const T& x, const project_name& y) + { + return y <= x; + } + + template + inline auto + operator!= (const T& x, const project_name& y) + { + return y != x; + } + + inline std::ostream& + operator<< (std::ostream& os, const project_name& v) + { + return os << v.string (); + } +} diff --git a/libbutl/project-name.mxx b/libbutl/project-name.mxx deleted file mode 100644 index 1117e28..0000000 --- a/libbutl/project-name.mxx +++ /dev/null @@ -1,233 +0,0 @@ -// file : libbutl/project-name.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include // move() -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.project_name; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utility; // icasecmp(), sanitize_identifier() -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Build system project name. - // - // Since a build system project is often a package, it is also used as a - // package name by the package dependency manager. And since a package is - // often a project (in the "collection of related packages" sense), it is - // also used as a project name by the project dependency manager. - // - class LIBBUTL_SYMEXPORT project_name - { - public: - // Create project name from string verifying that it complies with the - // specification and throw std::invalid_argument if that's not the - // case. Note that in this case the passed value is guaranteed to be - // unchanged. - // - explicit - project_name (const std::string& s): project_name (std::string (s)) {} - - explicit - project_name (std::string&&); - - // Create a special empty project name. - // - project_name () {} // For Clang 3.8 (const initialization). - - // Create an arbitrary string that can be used in contexts that expect a - // project name. For example, a project name pattern for use in ODB query - // expressions. - // - enum raw_string_type {raw_string}; - project_name (std::string s, raw_string_type): value_ (std::move (s)) {} - - bool - empty () const noexcept {return value_.empty ();} - - const std::string& - string () const& noexcept {return value_;} - - // Moves the underlying project name string out of the project name object. - // The object becomes empty. Usage: std::move (name).string (). - // - std::string - string () && {std::string r; r.swap (this->value_); return r;} - - // Project name base and extension (without the dot). If there is no - // extension, then the base name is the same as the full name and the - // returned extension is empty. - // - // If the ext argument is not NULL, then only remove the specified - // extension. Note that the extension should not include the dot and the - // comparison is always case-insensitive. - // - std::string - base (const char* ext = nullptr) const; - - std::string - extension () const; - - // Project name sanitized to a canonical variable name. Specifically, - // '.', '-', and '+' are replaced with '_'. - // - std::string - variable () const {return sanitize_identifier (value_);} - - // Compare ignoring case. Note that a string is not checked to be a valid - // project name. - // - int compare (const project_name& n) const {return compare (n.value_);} - int compare (const std::string& n) const {return compare (n.c_str ());} - int compare (const char* n) const {return icasecmp (value_, n);} - - private: - std::string value_; - }; - - inline bool - operator< (const project_name& x, const project_name& y) - { - return x.compare (y) < 0; - } - - inline bool - operator> (const project_name& x, const project_name& y) - { - return x.compare (y) > 0; - } - - inline bool - operator== (const project_name& x, const project_name& y) - { - return x.compare (y) == 0; - } - - inline bool - operator<= (const project_name& x, const project_name& y) - { - return x.compare (y) <= 0; - } - - inline bool - operator>= (const project_name& x, const project_name& y) - { - return x.compare (y) >= 0; - } - - inline bool - operator!= (const project_name& x, const project_name& y) - { - return x.compare (y) != 0; - } - - template - inline auto - operator< (const project_name& x, const T& y) - { - return x.compare (y) < 0; - } - - template - inline auto - operator> (const project_name& x, const T& y) - { - return x.compare (y) > 0; - } - - template - inline auto - operator== (const project_name& x, const T& y) - { - return x.compare (y) == 0; - } - - template - inline auto - operator<= (const project_name& x, const T& y) - { - return x.compare (y) <= 0; - } - - template - inline auto - operator>= (const project_name& x, const T& y) - { - return x.compare (y) >= 0; - } - - template - inline auto - operator!= (const project_name& x, const T& y) - { - return x.compare (y) != 0; - } - - template - inline auto - operator< (const T& x, const project_name& y) - { - return y > x; - } - - template - inline auto - operator> (const T& x, const project_name& y) - { - return y < x; - } - - template - inline auto - operator== (const T& x, const project_name& y) - { - return y == x; - } - - template - inline auto - operator<= (const T& x, const project_name& y) - { - return y >= x; - } - - template - inline auto - operator>= (const T& x, const project_name& y) - { - return y <= x; - } - - template - inline auto - operator!= (const T& x, const project_name& y) - { - return y != x; - } - - inline std::ostream& - operator<< (std::ostream& os, const project_name& v) - { - return os << v.string (); - } -} diff --git a/libbutl/prompt.cxx b/libbutl/prompt.cxx index 1c0820a..2e42dd5 100644 --- a/libbutl/prompt.cxx +++ b/libbutl/prompt.cxx @@ -1,33 +1,11 @@ // file : libbutl/prompt.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -#ifndef __cpp_lib_modules_ts -#include +#include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.prompt; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif -import butl.diagnostics; -#else -#include // diag_stream -#endif +#include // diag_stream using namespace std; diff --git a/libbutl/prompt.hxx b/libbutl/prompt.hxx new file mode 100644 index 0000000..90b8dbf --- /dev/null +++ b/libbutl/prompt.hxx @@ -0,0 +1,20 @@ +// file : libbutl/prompt.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include + +#include + +namespace butl +{ + // The Y/N prompt. The def argument, if specified, should be either 'y' or + // 'n'. It is used as the default answer, in case the user just hits enter. + // + // Write the prompt to diag_stream. Throw ios_base::failure if no answer + // could be extracted from stdin (for example, because it was closed). + // + LIBBUTL_SYMEXPORT bool + yn_prompt (const std::string&, char def = '\0'); +} diff --git a/libbutl/prompt.mxx b/libbutl/prompt.mxx deleted file mode 100644 index 2489b2f..0000000 --- a/libbutl/prompt.mxx +++ /dev/null @@ -1,35 +0,0 @@ -// file : libbutl/prompt.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.prompt; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // The Y/N prompt. The def argument, if specified, should be either 'y' or - // 'n'. It is used as the default answer, in case the user just hits enter. - // - // Write the prompt to diag_stream. Throw ios_base::failure if no answer - // could be extracted from stdin (for example, because it was closed). - // - LIBBUTL_SYMEXPORT bool - yn_prompt (const std::string&, char def = '\0'); -} diff --git a/libbutl/regex.cxx b/libbutl/regex.cxx index 83e296c..34536f2 100644 --- a/libbutl/regex.cxx +++ b/libbutl/regex.cxx @@ -1,42 +1,17 @@ // file : libbutl/regex.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include +#include #include #include #include // runtime_error + #if defined(_MSC_VER) && _MSC_VER < 2000 # include // strstr() #endif -#endif - -// Other includes. -#ifdef __cpp_modules_ts -module butl.regex; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.regex; -#endif -#endif - -import butl.utility; // operator<<(ostream, exception) -#else -#include -#endif +#include // operator<<(ostream, exception) namespace std { diff --git a/libbutl/regex.hxx b/libbutl/regex.hxx new file mode 100644 index 0000000..9b31075 --- /dev/null +++ b/libbutl/regex.hxx @@ -0,0 +1,133 @@ +// file : libbutl/regex.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include // pair +#include // size_t + +#if defined(__clang__) +# if __has_include(<__config>) +# include <__config> // _LIBCPP_VERSION +# endif +#endif + +#include + +namespace butl +{ + // The regex semantics for the following functions is like that of + // std::regex_replace() extended the standard ECMA-262 substitution escape + // sequences with a subset of Perl sequences: + // + // \\, \n, \u, \l, \U, \L, \E, \1, ..., \9 + // + // Notes and limitations: + // + // - The only valid regex_constants flags are match_default, + // format_first_only and format_no_copy. + // + // - If backslash doesn't start any of the listed sequences then it is + // silently dropped and the following character is copied as is. + // + // - The character case conversion is performed according to the global + // C++ locale (which is, unless changed, is the same as C locale and + // both default to the POSIX locale aka "C"). + // + + // Call specified append() function for non-matched substrings and matched + // substring replacements returning true if search succeeded. The function + // must be callable with the following signature: + // + // void + // append(basic_string::iterator begin, basic_string::iterator end); + // + template + bool + regex_replace_search (const std::basic_string&, + const std::basic_regex&, + const std::basic_string& fmt, + F&& append, + std::regex_constants::match_flag_type = + std::regex_constants::match_default); + + // As above but concatenate non-matched substrings and matched substring + // replacements into a string returning it as well as whether the search + // succeeded. + // + template + std::pair, bool> + regex_replace_search (const std::basic_string&, + const std::basic_regex&, + const std::basic_string& fmt, + std::regex_constants::match_flag_type = + std::regex_constants::match_default); + + // Match the entire string and, if it matches, return the string replacement. + // + template + std::pair, bool> + regex_replace_match (const std::basic_string&, + const std::basic_regex&, + const std::basic_string& fmt); + + // As above but using match_results. + // + template + std::basic_string + regex_replace_match_results ( + const std::match_results::const_iterator>&, + const std::basic_string& fmt); + + template + std::basic_string + regex_replace_match_results ( + const std::match_results::const_iterator>&, + const C* fmt, std::size_t fmt_n); + + // Parse the '///' replacement string into the regex/format + // pair. Other character can be used as a delimiter instead of '/'. Throw + // std::invalid_argument or std::regex_error on parsing error. + // + // Note: escaping of the delimiter character is not (yet) supported. + // + template + std::pair, std::basic_string> + regex_replace_parse (const std::basic_string&, + std::regex_constants::syntax_option_type = + std::regex_constants::ECMAScript); + + template + std::pair, std::basic_string> + regex_replace_parse (const C*, + std::regex_constants::syntax_option_type = + std::regex_constants::ECMAScript); + + template + std::pair, std::basic_string> + regex_replace_parse (const C*, size_t, + std::regex_constants::syntax_option_type = + std::regex_constants::ECMAScript); + + // As above but return string instead of regex and do not fail if there is + // text after the last delimiter instead returning its position. + // + template + std::pair, std::basic_string> + regex_replace_parse (const C*, size_t, size_t& end); +} + +namespace std +{ + // Print regex error description but only if it is meaningful (this is also + // why we have to print leading colon). + // + LIBBUTL_SYMEXPORT ostream& + operator<< (ostream&, const regex_error&); +} + +#include +#include diff --git a/libbutl/regex.ixx b/libbutl/regex.ixx index f55cb07..08962cf 100644 --- a/libbutl/regex.ixx +++ b/libbutl/regex.ixx @@ -1,7 +1,9 @@ // file : libbutl/regex.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // move(), make_pair() + +namespace butl { template inline std::pair, bool> diff --git a/libbutl/regex.mxx b/libbutl/regex.mxx deleted file mode 100644 index b1ba1b9..0000000 --- a/libbutl/regex.mxx +++ /dev/null @@ -1,154 +0,0 @@ -// file : libbutl/regex.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include // pair - -#include -#include // size_t -#include // move(), make_pair() -#include // invalid_argument -#endif - -#if defined(__clang__) -# if __has_include(<__config>) -# include <__config> // _LIBCPP_VERSION -# endif -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.regex; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.regex; // @@ MOD TODO should probably be re-exported. -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // The regex semantics for the following functions is like that of - // std::regex_replace() extended the standard ECMA-262 substitution escape - // sequences with a subset of Perl sequences: - // - // \\, \n, \u, \l, \U, \L, \E, \1, ..., \9 - // - // Notes and limitations: - // - // - The only valid regex_constants flags are match_default, - // format_first_only and format_no_copy. - // - // - If backslash doesn't start any of the listed sequences then it is - // silently dropped and the following character is copied as is. - // - // - The character case conversion is performed according to the global - // C++ locale (which is, unless changed, is the same as C locale and - // both default to the POSIX locale aka "C"). - // - - // Call specified append() function for non-matched substrings and matched - // substring replacements returning true if search succeeded. The function - // must be callable with the following signature: - // - // void - // append(basic_string::iterator begin, basic_string::iterator end); - // - template - bool - regex_replace_search (const std::basic_string&, - const std::basic_regex&, - const std::basic_string& fmt, - F&& append, - std::regex_constants::match_flag_type = - std::regex_constants::match_default); - - // As above but concatenate non-matched substrings and matched substring - // replacements into a string returning it as well as whether the search - // succeeded. - // - template - std::pair, bool> - regex_replace_search (const std::basic_string&, - const std::basic_regex&, - const std::basic_string& fmt, - std::regex_constants::match_flag_type = - std::regex_constants::match_default); - - // Match the entire string and, if it matches, return the string replacement. - // - template - std::pair, bool> - regex_replace_match (const std::basic_string&, - const std::basic_regex&, - const std::basic_string& fmt); - - // As above but using match_results. - // - template - std::basic_string - regex_replace_match_results ( - const std::match_results::const_iterator>&, - const std::basic_string& fmt); - - template - std::basic_string - regex_replace_match_results ( - const std::match_results::const_iterator>&, - const C* fmt, std::size_t fmt_n); - - // Parse the '///' replacement string into the regex/format - // pair. Other character can be used as a delimiter instead of '/'. Throw - // std::invalid_argument or std::regex_error on parsing error. - // - // Note: escaping of the delimiter character is not (yet) supported. - // - template - std::pair, std::basic_string> - regex_replace_parse (const std::basic_string&, - std::regex_constants::syntax_option_type = - std::regex_constants::ECMAScript); - - template - std::pair, std::basic_string> - regex_replace_parse (const C*, - std::regex_constants::syntax_option_type = - std::regex_constants::ECMAScript); - - template - std::pair, std::basic_string> - regex_replace_parse (const C*, size_t, - std::regex_constants::syntax_option_type = - std::regex_constants::ECMAScript); - - // As above but return string instead of regex and do not fail if there is - // text after the last delimiter instead returning its position. - // - template - std::pair, std::basic_string> - regex_replace_parse (const C*, size_t, size_t& end); -} - -LIBBUTL_MODEXPORT namespace std -{ - // Print regex error description but only if it is meaningful (this is also - // why we have to print leading colon). - // - LIBBUTL_SYMEXPORT ostream& - operator<< (ostream&, const regex_error&); -} - -#include -#include diff --git a/libbutl/regex.txx b/libbutl/regex.txx index 8fe5308..214d949 100644 --- a/libbutl/regex.txx +++ b/libbutl/regex.txx @@ -1,7 +1,10 @@ // file : libbutl/regex.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include +#include // invalid_argument + +namespace butl { template std::basic_string diff --git a/libbutl/semantic-version.cxx b/libbutl/semantic-version.cxx index 445890d..3be382f 100644 --- a/libbutl/semantic-version.cxx +++ b/libbutl/semantic-version.cxx @@ -1,38 +1,11 @@ // file : libbutl/semantic-version.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include +#include #include // strchr() #include // move() #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.semantic_version; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.optional; -#endif -#else -#endif using namespace std; diff --git a/libbutl/semantic-version.hxx b/libbutl/semantic-version.hxx new file mode 100644 index 0000000..16f3d56 --- /dev/null +++ b/libbutl/semantic-version.hxx @@ -0,0 +1,175 @@ +// file : libbutl/semantic-version.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // size_t +#include // uint*_t +#include // move() +#include + +#include + +#include + +// FreeBSD defines these macros in its . +// +#ifdef major +# undef major +#endif + +#ifdef minor +# undef minor +#endif + +namespace butl +{ + // Semantic or semantic-like version. + // + // .[.][] + // + // If the patch component is absent, then it defaults to 0. + // + // @@ Currently there is no way to enforce the three-component version. + // Supporting this will require changing allow_build to a bit-wise + // flag. See parse_semantic_version_impl() for some sketched code. + // We may also want to pass these flags to string() to not print + // 0 patch. + // + // By default, a version containing the component is considered + // valid only if separated from with '-' (semver pre-release) or '+' + // (semver build metadata). However, as discussed below, the list of valid + // separators can be customized to recognize other semver-like formats. + // + // Note also that the format of semver pre-release and build metadata are + // not validated. + // + struct LIBBUTL_SYMEXPORT semantic_version + { + std::uint64_t major = 0; + std::uint64_t minor = 0; + std::uint64_t patch = 0; + std::string build; + + // Construct the semantic version from various representations. Throw + // std::invalid_argument if the format is not recognizable or components + // are invalid. + // + semantic_version () = default; + + semantic_version (std::uint64_t major, + std::uint64_t minor, + std::uint64_t patch, + std::string build = ""); + + // The build_separators argument can be NULL (no build component allowed), + // empty (any build component allowed), or a string of characters to allow + // as separators. When allow_build is true build_separators defaults to + // "-+". + // + explicit + semantic_version (const std::string&, bool allow_build = true); + + semantic_version (const std::string&, const char* build_separators); + + // As above but parse from the specified position until the end of the + // string. + // + semantic_version (const std::string&, std::size_t pos, bool = true); + + semantic_version (const std::string&, std::size_t pos, const char*); + + std::string + string (bool ignore_build = false) const; + + // Numeric representation in the AAAAABBBBBCCCCC0000 form, where: + // + // AAAAA - major version number + // BBBBB - minor version number + // CCCCC - patch version number + // + // See standard version for details. + // + explicit + semantic_version (std::uint64_t numeric, std::string build = ""); + + // If any of the major/minor/patch components is greater than 99999, then + // throw std::invalid_argument. The build component is ignored. + // + std::uint64_t + numeric () const; + + // Unless instructed to ignore, the build components are compared + // lexicographically. + // + int + compare (const semantic_version& v, bool ignore_build = false) const + { + return (major != v.major ? (major < v.major ? -1 : 1) : + minor != v.minor ? (minor < v.minor ? -1 : 1) : + patch != v.patch ? (patch < v.patch ? -1 : 1) : + ignore_build ? 0 : build.compare (v.build)); + } + }; + + // Try to parse a string as a semantic version returning nullopt if invalid. + // + optional + parse_semantic_version (const std::string&, bool allow_build = true); + + optional + parse_semantic_version (const std::string&, const char* build_separators); + + optional + parse_semantic_version (const std::string&, std::size_t pos, bool = true); + + optional + parse_semantic_version (const std::string&, std::size_t pos, const char*); + + // NOTE: comparison operators take the build component into account. + // + inline bool + operator< (const semantic_version& x, const semantic_version& y) + { + return x.compare (y) < 0; + } + + inline bool + operator> (const semantic_version& x, const semantic_version& y) + { + return x.compare (y) > 0; + } + + inline bool + operator== (const semantic_version& x, const semantic_version& y) + { + return x.compare (y) == 0; + } + + inline bool + operator<= (const semantic_version& x, const semantic_version& y) + { + return x.compare (y) <= 0; + } + + inline bool + operator>= (const semantic_version& x, const semantic_version& y) + { + return x.compare (y) >= 0; + } + + inline bool + operator!= (const semantic_version& x, const semantic_version& y) + { + return !(x == y); + } + + inline std::ostream& + operator<< (std::ostream& o, const semantic_version& x) + { + return o << x.string (); + } +} + +#include diff --git a/libbutl/semantic-version.mxx b/libbutl/semantic-version.mxx deleted file mode 100644 index 566d192..0000000 --- a/libbutl/semantic-version.mxx +++ /dev/null @@ -1,192 +0,0 @@ -// file : libbutl/semantic-version.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include // size_t -#include // uint*_t -#include // move() -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.semantic_version; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.optional; -#else -#include -#endif - -#include - -// FreeBSD defines these macros in its . -// -#ifdef major -# undef major -#endif - -#ifdef minor -# undef minor -#endif - -LIBBUTL_MODEXPORT namespace butl -{ - // Semantic or semantic-like version. - // - // .[.][] - // - // If the patch component is absent, then it defaults to 0. - // - // @@ Currently there is no way to enforce the three-component version. - // Supporting this will require changing allow_build to a bit-wise - // flag. See parse_semantic_version_impl() for some sketched code. - // We may also want to pass these flags to string() to not print - // 0 patch. - // - // By default, a version containing the component is considered - // valid only if separated from with '-' (semver pre-release) or '+' - // (semver build metadata). However, as discussed below, the list of valid - // separators can be customized to recognize other semver-like formats. - // - // Note also that the format of semver pre-release and build metadata are - // not validated. - // - struct LIBBUTL_SYMEXPORT semantic_version - { - std::uint64_t major = 0; - std::uint64_t minor = 0; - std::uint64_t patch = 0; - std::string build; - - // Construct the semantic version from various representations. Throw - // std::invalid_argument if the format is not recognizable or components - // are invalid. - // - semantic_version () = default; - - semantic_version (std::uint64_t major, - std::uint64_t minor, - std::uint64_t patch, - std::string build = ""); - - // The build_separators argument can be NULL (no build component allowed), - // empty (any build component allowed), or a string of characters to allow - // as separators. When allow_build is true build_separators defaults to - // "-+". - // - explicit - semantic_version (const std::string&, bool allow_build = true); - - semantic_version (const std::string&, const char* build_separators); - - // As above but parse from the specified position until the end of the - // string. - // - semantic_version (const std::string&, std::size_t pos, bool = true); - - semantic_version (const std::string&, std::size_t pos, const char*); - - std::string - string (bool ignore_build = false) const; - - // Numeric representation in the AAAAABBBBBCCCCC0000 form, where: - // - // AAAAA - major version number - // BBBBB - minor version number - // CCCCC - patch version number - // - // See standard version for details. - // - explicit - semantic_version (std::uint64_t numeric, std::string build = ""); - - // If any of the major/minor/patch components is greater than 99999, then - // throw std::invalid_argument. The build component is ignored. - // - std::uint64_t - numeric () const; - - // Unless instructed to ignore, the build components are compared - // lexicographically. - // - int - compare (const semantic_version& v, bool ignore_build = false) const - { - return (major != v.major ? (major < v.major ? -1 : 1) : - minor != v.minor ? (minor < v.minor ? -1 : 1) : - patch != v.patch ? (patch < v.patch ? -1 : 1) : - ignore_build ? 0 : build.compare (v.build)); - } - }; - - // Try to parse a string as a semantic version returning nullopt if invalid. - // - optional - parse_semantic_version (const std::string&, bool allow_build = true); - - optional - parse_semantic_version (const std::string&, const char* build_separators); - - optional - parse_semantic_version (const std::string&, std::size_t pos, bool = true); - - optional - parse_semantic_version (const std::string&, std::size_t pos, const char*); - - // NOTE: comparison operators take the build component into account. - // - inline bool - operator< (const semantic_version& x, const semantic_version& y) - { - return x.compare (y) < 0; - } - - inline bool - operator> (const semantic_version& x, const semantic_version& y) - { - return x.compare (y) > 0; - } - - inline bool - operator== (const semantic_version& x, const semantic_version& y) - { - return x.compare (y) == 0; - } - - inline bool - operator<= (const semantic_version& x, const semantic_version& y) - { - return x.compare (y) <= 0; - } - - inline bool - operator>= (const semantic_version& x, const semantic_version& y) - { - return x.compare (y) >= 0; - } - - inline bool - operator!= (const semantic_version& x, const semantic_version& y) - { - return !(x == y); - } - - inline std::ostream& - operator<< (std::ostream& o, const semantic_version& x) - { - return o << x.string (); - } -} - -#include diff --git a/libbutl/sendmail.cxx b/libbutl/sendmail.cxx index 1038cf4..5fec1a6 100644 --- a/libbutl/sendmail.cxx +++ b/libbutl/sendmail.cxx @@ -1,32 +1,7 @@ // file : libbutl/sendmail.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.sendmail; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.process; -import butl.fdstream; -import butl.small_vector; -#endif - -#endif +#include using namespace std; diff --git a/libbutl/sendmail.hxx b/libbutl/sendmail.hxx new file mode 100644 index 0000000..97a4d82 --- /dev/null +++ b/libbutl/sendmail.hxx @@ -0,0 +1,116 @@ +// file : libbutl/sendmail.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include + +#include +#include +#include + +#include + +namespace butl +{ + // Send email using the sendmail(1) program. + // + // Write the body of the email to out. Note that you must explicitly close + // it before calling wait(). Throw process_error and io_error (both derive + // from system_error) in case of errors. + // + // Typical usage: + // + // try + // { + // sendmail sm (2, // Diagnostics to stderr. + // "", // Default From: address. + // "Test subject", + // {"test@example.com"}); + // + // sm.out << "Test body" << endl; + // + // sm.out.close (); + // + // if (!sm.wait ()) + // ... // sendmail returned non-zero status. + // } + // catch (const std::system_error& e) + // { + // cerr << "sendmail error: " << e << endl; + // } + // + class LIBBUTL_SYMEXPORT sendmail: public process + { + public: + ofdstream out; + + // Notes: + // + // - If from is empty then the process user's address is used. + // + // - The to/cc/bcc addressed should already be quoted if required. + // + using recipients_type = small_vector; + + template + sendmail (E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to); + + template + sendmail (E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to, + const recipients_type& cc); + + template + sendmail (E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to, + const recipients_type& cc, + const recipients_type& bcc, + O&&... options); + + // Version with the command line callback (see process_run_callback() for + // details). + // + template + sendmail (const C&, + E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to); + + template + sendmail (const C&, + E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to, + const recipients_type& cc); + + template + sendmail (const C&, + E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to, + const recipients_type& cc, + const recipients_type& bcc, + O&&... options); + + private: + void + headers (const std::string& from, + const std::string& subj, + const recipients_type& to, + const recipients_type& cc, + const recipients_type& bcc); + }; +} + +#include diff --git a/libbutl/sendmail.ixx b/libbutl/sendmail.ixx index 105c1af..35b5c47 100644 --- a/libbutl/sendmail.ixx +++ b/libbutl/sendmail.ixx @@ -1,7 +1,10 @@ // file : libbutl/sendmail.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // size_t +#include // move(), forward() + +namespace butl { template inline sendmail:: diff --git a/libbutl/sendmail.mxx b/libbutl/sendmail.mxx deleted file mode 100644 index 0d5b239..0000000 --- a/libbutl/sendmail.mxx +++ /dev/null @@ -1,137 +0,0 @@ -// file : libbutl/sendmail.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include - -#include // size_t -#include // move(), forward() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.sendmail; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.process; -import butl.fdstream; -import butl.small_vector; -#else -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Send email using the sendmail(1) program. - // - // Write the body of the email to out. Note that you must explicitly close - // it before calling wait(). Throw process_error and io_error (both derive - // from system_error) in case of errors. - // - // Typical usage: - // - // try - // { - // sendmail sm (2, // Diagnostics to stderr. - // "", // Default From: address. - // "Test subject", - // {"test@example.com"}); - // - // sm.out << "Test body" << endl; - // - // sm.out.close (); - // - // if (!sm.wait ()) - // ... // sendmail returned non-zero status. - // } - // catch (const std::system_error& e) - // { - // cerr << "sendmail error: " << e << endl; - // } - // - class LIBBUTL_SYMEXPORT sendmail: public process - { - public: - ofdstream out; - - // Notes: - // - // - If from is empty then the process user's address is used. - // - // - The to/cc/bcc addressed should already be quoted if required. - // - using recipients_type = small_vector; - - template - sendmail (E&& err, - const std::string& from, - const std::string& subject, - const recipients_type& to); - - template - sendmail (E&& err, - const std::string& from, - const std::string& subject, - const recipients_type& to, - const recipients_type& cc); - - template - sendmail (E&& err, - const std::string& from, - const std::string& subject, - const recipients_type& to, - const recipients_type& cc, - const recipients_type& bcc, - O&&... options); - - // Version with the command line callback (see process_run_callback() for - // details). - // - template - sendmail (const C&, - E&& err, - const std::string& from, - const std::string& subject, - const recipients_type& to); - - template - sendmail (const C&, - E&& err, - const std::string& from, - const std::string& subject, - const recipients_type& to, - const recipients_type& cc); - - template - sendmail (const C&, - E&& err, - const std::string& from, - const std::string& subject, - const recipients_type& to, - const recipients_type& cc, - const recipients_type& bcc, - O&&... options); - - private: - void - headers (const std::string& from, - const std::string& subj, - const recipients_type& to, - const recipients_type& cc, - const recipients_type& bcc); - }; -} - -#include diff --git a/libbutl/sha1.cxx b/libbutl/sha1.cxx index f4a6bad..e546922 100644 --- a/libbutl/sha1.cxx +++ b/libbutl/sha1.cxx @@ -1,9 +1,7 @@ // file : libbutl/sha1.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include // C interface for sha1c. // @@ -42,29 +40,9 @@ extern "C" #define SHA1_Final(x, y) sha1_result((y), (char(&)[20])(x)) #include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include - #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.sha1; -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif -#else #include -#endif using namespace std; diff --git a/libbutl/sha1.hxx b/libbutl/sha1.hxx new file mode 100644 index 0000000..62710f4 --- /dev/null +++ b/libbutl/sha1.hxx @@ -0,0 +1,120 @@ +// file : libbutl/sha1.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // istream +#include +#include // size_t +#include +#include // strlen() + +#include + +namespace butl +{ + // SHA1 checksum calculator. + // + // For a single chunk of data a sum can be obtained in one line, for + // example: + // + // cerr << sha1 ("123").string () << endl; + // + class LIBBUTL_SYMEXPORT sha1 + { + public: + sha1 () {reset ();} + + // Append binary data. + // + void + append (const void*, std::size_t); + + sha1 (const void* b, std::size_t n): sha1 () {append (b, n);} + + // Append string. + // + // Note that the hash includes the '\0' terminator. Failed that, a call + // with an empty string will be indistinguishable from no call at all. + // + void + append (const std::string& s) {append (s.c_str (), s.size () + 1);} + + void + append (const char* s) {append (s, std::strlen (s) + 1);} + + explicit + sha1 (const std::string& s): sha1 () {append (s);} + + explicit + sha1 (const char* s): sha1 () {append (s);} + + // Append stream. + // + // Note that currently the stream is expected to be bufstreambuf-based + // (e.g., ifdstream). + // + void + append (std::istream&); + + explicit + sha1 (std::istream& i): sha1 () {append (i);} + + // Check if any data has been hashed. + // + bool + empty () const {return empty_;} + + // Reset to the default-constructed state. + // + void + reset (); + + // Extract result. + // + // It can be obtained as either a 20-byte binary digest or as a 40- + // character hex-encoded C-string. + // + using digest_type = std::uint8_t[20]; + + const digest_type& + binary () const; + + const char* + string () const; + + std::string + abbreviated_string (std::size_t n) const + { + return std::string (string (), n < 40 ? n : 40); + } + + private: + struct context // Note: identical to SHA1_CTX. + { + union { + std::uint8_t b8[20]; + std::uint32_t b32[5]; + } h; + union { + std::uint8_t b8[8]; + std::uint64_t b64[1]; + } c; + union { + std::uint8_t b8[64]; + std::uint32_t b32[16]; + } m; + std::uint8_t count; + }; + + union + { + mutable context ctx_; + mutable char buf_[sizeof (context)]; // Also used to store string rep. + }; + + mutable digest_type bin_; + mutable bool done_; + bool empty_; + }; +} diff --git a/libbutl/sha1.mxx b/libbutl/sha1.mxx deleted file mode 100644 index f6fafc0..0000000 --- a/libbutl/sha1.mxx +++ /dev/null @@ -1,135 +0,0 @@ -// file : libbutl/sha1.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include // istream -#include -#include // size_t -#include -#include // strlen() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.sha1; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // SHA1 checksum calculator. - // - // For a single chunk of data a sum can be obtained in one line, for - // example: - // - // cerr << sha1 ("123").string () << endl; - // - class LIBBUTL_SYMEXPORT sha1 - { - public: - sha1 () {reset ();} - - // Append binary data. - // - void - append (const void*, std::size_t); - - sha1 (const void* b, std::size_t n): sha1 () {append (b, n);} - - // Append string. - // - // Note that the hash includes the '\0' terminator. Failed that, a call - // with an empty string will be indistinguishable from no call at all. - // - void - append (const std::string& s) {append (s.c_str (), s.size () + 1);} - - void - append (const char* s) {append (s, std::strlen (s) + 1);} - - explicit - sha1 (const std::string& s): sha1 () {append (s);} - - explicit - sha1 (const char* s): sha1 () {append (s);} - - // Append stream. - // - // Note that currently the stream is expected to be bufstreambuf-based - // (e.g., ifdstream). - // - void - append (std::istream&); - - explicit - sha1 (std::istream& i): sha1 () {append (i);} - - // Check if any data has been hashed. - // - bool - empty () const {return empty_;} - - // Reset to the default-constructed state. - // - void - reset (); - - // Extract result. - // - // It can be obtained as either a 20-byte binary digest or as a 40- - // character hex-encoded C-string. - // - using digest_type = std::uint8_t[20]; - - const digest_type& - binary () const; - - const char* - string () const; - - std::string - abbreviated_string (std::size_t n) const - { - return std::string (string (), n < 40 ? n : 40); - } - - private: - struct context // Note: identical to SHA1_CTX. - { - union { - std::uint8_t b8[20]; - std::uint32_t b32[5]; - } h; - union { - std::uint8_t b8[8]; - std::uint64_t b64[1]; - } c; - union { - std::uint8_t b8[64]; - std::uint32_t b32[16]; - } m; - std::uint8_t count; - }; - - union - { - mutable context ctx_; - mutable char buf_[sizeof (context)]; // Also used to store string rep. - }; - - mutable digest_type bin_; - mutable bool done_; - bool empty_; - }; -} diff --git a/libbutl/sha256.cxx b/libbutl/sha256.cxx index 8a34402..95987ec 100644 --- a/libbutl/sha256.cxx +++ b/libbutl/sha256.cxx @@ -1,9 +1,7 @@ // file : libbutl/sha256.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include // C interface for sha256c. // @@ -26,39 +24,13 @@ extern "C" #include "sha256c.c" } -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include - #include // isxdigit() +#include #include #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.sha256; - -// Only imports additional to interface. -#ifdef __cpp_lib_modules_ts -import std.io; -#endif - -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif -import butl.utility; // *case() -#else -#include +#include // *case() #include -#endif using namespace std; diff --git a/libbutl/sha256.hxx b/libbutl/sha256.hxx new file mode 100644 index 0000000..566068f --- /dev/null +++ b/libbutl/sha256.hxx @@ -0,0 +1,159 @@ +// file : libbutl/sha256.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // istream +#include // size_t +#include +#include // strlen(), memcpy() +#include // enable_if, is_integral + +#include + +namespace butl +{ + // SHA256 checksum calculator. + // + // For a single chunk of data a sum can be obtained in one line, for + // example: + // + // cerr << sha256 ("123").string () << endl; + // + class LIBBUTL_SYMEXPORT sha256 + { + public: + sha256 () {reset ();} + + // Append binary data. + // + void + append (const void*, std::size_t); + + sha256 (const void* b, std::size_t n): sha256 () {append (b, n);} + + // Append string. + // + // Note that the hash includes the '\0' terminator. Failed that, a call + // with an empty string will be indistinguishable from no call at all. + // + void + append (const std::string& s) {append (s.c_str (), s.size () + 1);} + + void + append (const char* s) {append (s, std::strlen (s) + 1);} + + explicit + sha256 (const std::string& s): sha256 () {append (s);} + + explicit + sha256 (const char* s): sha256 () {append (s);} + + // Append an integral type with a fast path optimization (see + // SHA256_Update() for details). + // + void + append (char c) + { + std::uint32_t r ((ctx_.count >> 3) & 0x3f); + + if (1 < 64 - r) + { + ctx_.buf[r] = static_cast (c); + ctx_.count += 8; + } + else + append (&c, 1); + } + + template + typename std::enable_if::value>::type + append (T x) + { + const std::size_t len (sizeof (x)); + std::uint32_t r ((ctx_.count >> 3) & 0x3f); + + if (len < 64 - r) + { + std::memcpy (&ctx_.buf[r], &x, sizeof (x)); + ctx_.count += len << 3; + } + else + append (&x, len); + } + + // Append stream. + // + // Note that currently the stream is expected to be bufstreambuf-based + // (e.g., ifdstream). + // + void + append (std::istream&); + + explicit + sha256 (std::istream& i): sha256 () {append (i);} + + // Check if any data has been hashed. + // + bool + empty () const {return empty_;} + + // Reset to the default-constructed state. + // + void + reset (); + + // Extract result. + // + // It can be obtained as either a 32-byte binary digest or as a 64- + // character hex-encoded C-string. + // + using digest_type = std::uint8_t[32]; + + const digest_type& + binary () const; + + const char* + string () const; + + std::string + abbreviated_string (std::size_t n) const + { + return std::string (string (), n < 64 ? n : 64); + } + + private: + struct context // Note: identical to SHA256_CTX. + { + std::uint32_t state[8]; + std::uint64_t count; + std::uint8_t buf[64]; + }; + + union + { + mutable context ctx_; + mutable char buf_[sizeof (context)]; // Also used to store string rep. + }; + + mutable digest_type bin_; + mutable bool done_; + bool empty_; + }; + + // Convert a SHA256 string representation (64 hex digits) to the fingerprint + // canonical representation (32 colon-separated upper case hex digit pairs, + // like 01:AB:CD:...). Throw invalid_argument if the argument is not a valid + // SHA256 string. + // + LIBBUTL_SYMEXPORT std::string + sha256_to_fingerprint (const std::string&); + + // Convert a fingerprint (32 colon-separated hex digit pairs) to the possibly + // abbreviated SHA256 string representation (up to 64 lower case hex digits). + // Throw invalid_argument if the first argument is not a valid fingerprint. + // + LIBBUTL_SYMEXPORT std::string + fingerprint_to_sha256 (const std::string&, std::size_t = 64); +} diff --git a/libbutl/sha256.mxx b/libbutl/sha256.mxx deleted file mode 100644 index d5128b1..0000000 --- a/libbutl/sha256.mxx +++ /dev/null @@ -1,174 +0,0 @@ -// file : libbutl/sha256.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include // istream -#include // size_t -#include -#include // strlen(), memcpy() -#include // enable_if, is_integral -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.sha256; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // SHA256 checksum calculator. - // - // For a single chunk of data a sum can be obtained in one line, for - // example: - // - // cerr << sha256 ("123").string () << endl; - // - class LIBBUTL_SYMEXPORT sha256 - { - public: - sha256 () {reset ();} - - // Append binary data. - // - void - append (const void*, std::size_t); - - sha256 (const void* b, std::size_t n): sha256 () {append (b, n);} - - // Append string. - // - // Note that the hash includes the '\0' terminator. Failed that, a call - // with an empty string will be indistinguishable from no call at all. - // - void - append (const std::string& s) {append (s.c_str (), s.size () + 1);} - - void - append (const char* s) {append (s, std::strlen (s) + 1);} - - explicit - sha256 (const std::string& s): sha256 () {append (s);} - - explicit - sha256 (const char* s): sha256 () {append (s);} - - // Append an integral type with a fast path optimization (see - // SHA256_Update() for details). - // - void - append (char c) - { - std::uint32_t r ((ctx_.count >> 3) & 0x3f); - - if (1 < 64 - r) - { - ctx_.buf[r] = static_cast (c); - ctx_.count += 8; - } - else - append (&c, 1); - } - - template - typename std::enable_if::value>::type - append (T x) - { - const std::size_t len (sizeof (x)); - std::uint32_t r ((ctx_.count >> 3) & 0x3f); - - if (len < 64 - r) - { - std::memcpy (&ctx_.buf[r], &x, sizeof (x)); - ctx_.count += len << 3; - } - else - append (&x, len); - } - - // Append stream. - // - // Note that currently the stream is expected to be bufstreambuf-based - // (e.g., ifdstream). - // - void - append (std::istream&); - - explicit - sha256 (std::istream& i): sha256 () {append (i);} - - // Check if any data has been hashed. - // - bool - empty () const {return empty_;} - - // Reset to the default-constructed state. - // - void - reset (); - - // Extract result. - // - // It can be obtained as either a 32-byte binary digest or as a 64- - // character hex-encoded C-string. - // - using digest_type = std::uint8_t[32]; - - const digest_type& - binary () const; - - const char* - string () const; - - std::string - abbreviated_string (std::size_t n) const - { - return std::string (string (), n < 64 ? n : 64); - } - - private: - struct context // Note: identical to SHA256_CTX. - { - std::uint32_t state[8]; - std::uint64_t count; - std::uint8_t buf[64]; - }; - - union - { - mutable context ctx_; - mutable char buf_[sizeof (context)]; // Also used to store string rep. - }; - - mutable digest_type bin_; - mutable bool done_; - bool empty_; - }; - - // Convert a SHA256 string representation (64 hex digits) to the fingerprint - // canonical representation (32 colon-separated upper case hex digit pairs, - // like 01:AB:CD:...). Throw invalid_argument if the argument is not a valid - // SHA256 string. - // - LIBBUTL_SYMEXPORT std::string - sha256_to_fingerprint (const std::string&); - - // Convert a fingerprint (32 colon-separated hex digit pairs) to the possibly - // abbreviated SHA256 string representation (up to 64 lower case hex digits). - // Throw invalid_argument if the first argument is not a valid fingerprint. - // - LIBBUTL_SYMEXPORT std::string - fingerprint_to_sha256 (const std::string&, std::size_t = 64); -} diff --git a/libbutl/small-allocator.hxx b/libbutl/small-allocator.hxx new file mode 100644 index 0000000..429ba41 --- /dev/null +++ b/libbutl/small-allocator.hxx @@ -0,0 +1,181 @@ +// file : libbutl/small-allocator.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // size_t +#include // move() +#include // true_type, is_same + +#include + +namespace butl +{ + // Implementation of the allocator (and its buffer) for small containers. + // + template + struct small_allocator_buffer + { + using value_type = T; + + // Note that the names are decorated in order not to conflict with + // the container's interface. + + // If free_ is true then the buffer is not allocated. + // + alignas (alignof (value_type)) char data_[sizeof (value_type) * N]; + bool free_ = true; + + // Note that the buffer should be constructed before the container and + // destroyed after (since the container's destructor will be destroying + // elements potentially residing in the buffer). This means that the + // buffer should be inherited from and before the std:: container. + // + small_allocator_buffer () = default; + + small_allocator_buffer (small_allocator_buffer&&) = delete; + small_allocator_buffer (const small_allocator_buffer&) = delete; + + small_allocator_buffer& operator= (small_allocator_buffer&&) = delete; + small_allocator_buffer& operator= (const small_allocator_buffer&) = delete; + }; + + template > + class small_allocator + { + public: + using buffer_type = B; + + explicit + small_allocator (buffer_type* b) noexcept: buf_ (b) {} + + // Allocator interface. + // + public: + using value_type = T; + + // These shouldn't be required but as usual there are old/broken + // implementations (like std::list in GCC 4.9). + // + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + + static void destroy (T* p) {p->~T ();} + + template + static void construct (T* p, A&&... a) + { + ::new (static_cast (p)) T (std::forward (a)...); + } + + // Allocator rebinding. + // + // We assume that only one of the rebound allocators will actually be + // doing allocations and that its value type is the same as buffer value + // type. This is needed, for instance, for std::list since what actually + // gets allocated is the node type, not T (see small_list for details). + // + template + struct rebind {using other = small_allocator;}; + + template + explicit + small_allocator (const small_allocator& x) noexcept + : buf_ (x.buf_) {} + + T* + allocate (std::size_t n) + { + // An implementation can rebind the allocator to something completely + // different. For example, VC15u3 with _ITERATOR_DEBUG_LEVEL != 0 + // allocates some extra stuff which cannot possibly come from the static + // buffer. + // + if (std::is_same::value) + { + if (buf_->free_) + { + assert (n >= N); // We should never be asked for less than N. + + if (n == N) + { + buf_->free_ = false; + return reinterpret_cast (buf_->data_); + } + // Fall through. + } + } + + return static_cast (::operator new (sizeof (T) * n)); + } + + void + deallocate (void* p, std::size_t) noexcept + { + if (p == buf_->data_) + buf_->free_ = true; + else + ::operator delete (p); + } + + friend bool + operator== (small_allocator x, small_allocator y) noexcept + { + // We can use y to deallocate x's allocations if they use the same small + // buffer or neither uses its small buffer (which means all allocations, + // if any, have been from the shared heap). + // + // Things get trickier with rebinding. If A is allocator and B is its + // rebinding, then the following must hold true: + // + // A a1(a) => a1==a + // A a(b) => B(a)==b && A(b)==a + // + // As a result, the rebinding constructor above always copies the buffer + // pointer and we decide whether to use the small buffer by comparing + // allocator/buffer value types. + // + // We also expect that any copy of the original allocator made by the + // std:: implementation of the container is temporary (that is, it + // doesn't outlive the small buffer). + // + return (x.buf_ == y.buf_) || (x.buf_->free_ && y.buf_->free_); + } + + friend bool + operator!= (small_allocator x, small_allocator y) noexcept + { + return !(x == y); + } + + // It might get instantiated but should not be called. + // + small_allocator + select_on_container_copy_construction () const noexcept + { + assert (false); + return small_allocator (nullptr); + } + + // propagate_on_container_copy_assignment = false + // propagate_on_container_move_assignment = false + + // Swap is not supported (see explanation in small_vector::swap()). + // + using propagate_on_container_swap = std::true_type; + + void + swap (small_allocator&) = delete; + + private: + template + friend class small_allocator; // For buffer access in rebind. + + buffer_type* buf_; // Must not be NULL. + }; +} diff --git a/libbutl/small-allocator.mxx b/libbutl/small-allocator.mxx deleted file mode 100644 index 5ef74be..0000000 --- a/libbutl/small-allocator.mxx +++ /dev/null @@ -1,195 +0,0 @@ -// file : libbutl/small-allocator.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include // size_t -#include // move() -#include // true_type, is_same -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.small_allocator; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Implementation of the allocator (and its buffer) for small containers. - // - template - struct small_allocator_buffer - { - using value_type = T; - - // Note that the names are decorated in order not to conflict with - // the container's interface. - - // If free_ is true then the buffer is not allocated. - // - alignas (alignof (value_type)) char data_[sizeof (value_type) * N]; - bool free_ = true; - - // Note that the buffer should be constructed before the container and - // destroyed after (since the container's destructor will be destroying - // elements potentially residing in the buffer). This means that the - // buffer should be inherited from and before the std:: container. - // - small_allocator_buffer () = default; - - small_allocator_buffer (small_allocator_buffer&&) = delete; - small_allocator_buffer (const small_allocator_buffer&) = delete; - - small_allocator_buffer& operator= (small_allocator_buffer&&) = delete; - small_allocator_buffer& operator= (const small_allocator_buffer&) = delete; - }; - - template > - class small_allocator - { - public: - using buffer_type = B; - - explicit - small_allocator (buffer_type* b) noexcept: buf_ (b) {} - - // Allocator interface. - // - public: - using value_type = T; - - // These shouldn't be required but as usual there are old/broken - // implementations (like std::list in GCC 4.9). - // - using pointer = value_type*; - using const_pointer = const value_type*; - using reference = value_type&; - using const_reference = const value_type&; - - static void destroy (T* p) {p->~T ();} - - template - static void construct (T* p, A&&... a) - { - ::new (static_cast (p)) T (std::forward (a)...); - } - - // Allocator rebinding. - // - // We assume that only one of the rebound allocators will actually be - // doing allocations and that its value type is the same as buffer value - // type. This is needed, for instance, for std::list since what actually - // gets allocated is the node type, not T (see small_list for details). - // - template - struct rebind {using other = small_allocator;}; - - template - explicit - small_allocator (const small_allocator& x) noexcept - : buf_ (x.buf_) {} - - T* - allocate (std::size_t n) - { - // An implementation can rebind the allocator to something completely - // different. For example, VC15u3 with _ITERATOR_DEBUG_LEVEL != 0 - // allocates some extra stuff which cannot possibly come from the static - // buffer. - // - if (std::is_same::value) - { - if (buf_->free_) - { - assert (n >= N); // We should never be asked for less than N. - - if (n == N) - { - buf_->free_ = false; - return reinterpret_cast (buf_->data_); - } - // Fall through. - } - } - - return static_cast (::operator new (sizeof (T) * n)); - } - - void - deallocate (void* p, std::size_t) noexcept - { - if (p == buf_->data_) - buf_->free_ = true; - else - ::operator delete (p); - } - - friend bool - operator== (small_allocator x, small_allocator y) noexcept - { - // We can use y to deallocate x's allocations if they use the same small - // buffer or neither uses its small buffer (which means all allocations, - // if any, have been from the shared heap). - // - // Things get trickier with rebinding. If A is allocator and B is its - // rebinding, then the following must hold true: - // - // A a1(a) => a1==a - // A a(b) => B(a)==b && A(b)==a - // - // As a result, the rebinding constructor above always copies the buffer - // pointer and we decide whether to use the small buffer by comparing - // allocator/buffer value types. - // - // We also expect that any copy of the original allocator made by the - // std:: implementation of the container is temporary (that is, it - // doesn't outlive the small buffer). - // - return (x.buf_ == y.buf_) || (x.buf_->free_ && y.buf_->free_); - } - - friend bool - operator!= (small_allocator x, small_allocator y) noexcept - { - return !(x == y); - } - - // It might get instantiated but should not be called. - // - small_allocator - select_on_container_copy_construction () const noexcept - { - assert (false); - return small_allocator (nullptr); - } - - // propagate_on_container_copy_assignment = false - // propagate_on_container_move_assignment = false - - // Swap is not supported (see explanation in small_vector::swap()). - // - using propagate_on_container_swap = std::true_type; - - void - swap (small_allocator&) = delete; - - private: - template - friend class small_allocator; // For buffer access in rebind. - - buffer_type* buf_; // Must not be NULL. - }; -} diff --git a/libbutl/small-forward-list.hxx b/libbutl/small-forward-list.hxx new file mode 100644 index 0000000..1278dc2 --- /dev/null +++ b/libbutl/small-forward-list.hxx @@ -0,0 +1,145 @@ +// file : libbutl/small-forward-list.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include // size_t +#include // move() +#include + +#include + +#include + +namespace butl +{ + // Issues and limitations. + // + // - Because small_allocator currently expects us to allocate the entire + // small buffer and there is no reserve() in std::forward_list, we + // currently only support N==1 (which we static_assert). + // + // - swap() is deleted (see notes below). + // + // - The implementation doesn't allocate T but rather a "node" that normally + // consists of a pointers (next) and T. + // + template + using small_forward_list_node = +#if defined (_MSC_VER) + std::_Flist_node; +#elif defined (__GLIBCXX__) + std::_Fwd_list_node; +#elif defined (_LIBCPP_VERSION) + std::__forward_list_node; +#else +#error unknown standard library implementation +#endif + + template + using small_forward_list_buffer = + small_allocator_buffer, N>; + + template + class small_forward_list: + private small_forward_list_buffer, + public std::forward_list>> + { + static_assert (N == 1, "only small_forward_list or 1 currently supported"); + + public: + static constexpr const std::size_t small_size = N; + + using buffer_type = small_forward_list_buffer; + using allocator_type = small_allocator; + using base_type = std::forward_list; + + small_forward_list () + : base_type (allocator_type (this)) {} + + small_forward_list (std::initializer_list v) + : base_type (allocator_type (this)) + { + static_cast (*this) = v; + } + + template + small_forward_list (I b, I e) + : base_type (allocator_type (this)) + { + this->assign (b, e); + } + + explicit + small_forward_list (std::size_t n) + : base_type (allocator_type (this)) + { + this->resize (n); + } + + small_forward_list (std::size_t n, const T& x) + : base_type (allocator_type (this)) + { + this->assign (n, x); + } + + small_forward_list (const small_forward_list& v) + : buffer_type (), base_type (allocator_type (this)) + { + static_cast (*this) = v; + } + + small_forward_list& + operator= (const small_forward_list& v) + { + // Note: propagate_on_container_copy_assignment = false + // + static_cast (*this) = v; + return *this; + } + + small_forward_list (small_forward_list&& v) + : base_type (allocator_type (this)) + { + *this = std::move (v); // Delegate to operator=(&&). + } + + small_forward_list& + operator= (small_forward_list&& v) + { + // VC14's implementation of operator=(&&) swaps pointers without regard + // for allocator (fixed in 15). + // +#if defined(_MSC_VER) && _MSC_VER <= 1900 + clear (); + for (T& x: v) + push_front (std::move (x)); + reverse (); + v.clear (); +#else + // Note: propagate_on_container_move_assignment = false + // + static_cast (*this) = std::move (v); +#endif + + return *this; + } + + small_forward_list& + operator= (std::initializer_list v) + { + static_cast (*this) = v; + return *this; + } + + // Implementing swap() under small buffer optimization is not trivial, to + // say the least (think of swapping two such buffers of different sizes). + // One easy option would be to force both in to the heap. + // + void + swap (small_forward_list&) = delete; + }; +} diff --git a/libbutl/small-forward-list.mxx b/libbutl/small-forward-list.mxx deleted file mode 100644 index 6aa4986..0000000 --- a/libbutl/small-forward-list.mxx +++ /dev/null @@ -1,159 +0,0 @@ -// file : libbutl/small-forward-list.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#ifndef __cpp_lib_modules_ts -#include // size_t -#include // move() -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.small_forward_list; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.small_allocator; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Issues and limitations. - // - // - Because small_allocator currently expects us to allocate the entire - // small buffer and there is no reserve() in std::forward_list, we - // currently only support N==1 (which we static_assert). - // - // - swap() is deleted (see notes below). - // - // - The implementation doesn't allocate T but rather a "node" that normally - // consists of a pointers (next) and T. - // - template - using small_forward_list_node = -#if defined (_MSC_VER) - std::_Flist_node; -#elif defined (__GLIBCXX__) - std::_Fwd_list_node; -#elif defined (_LIBCPP_VERSION) - std::__forward_list_node; -#else -#error unknown standard library implementation -#endif - - template - using small_forward_list_buffer = - small_allocator_buffer, N>; - - template - class small_forward_list: - private small_forward_list_buffer, - public std::forward_list>> - { - static_assert (N == 1, "only small_forward_list or 1 currently supported"); - - public: - static constexpr const std::size_t small_size = N; - - using buffer_type = small_forward_list_buffer; - using allocator_type = small_allocator; - using base_type = std::forward_list; - - small_forward_list () - : base_type (allocator_type (this)) {} - - small_forward_list (std::initializer_list v) - : base_type (allocator_type (this)) - { - static_cast (*this) = v; - } - - template - small_forward_list (I b, I e) - : base_type (allocator_type (this)) - { - this->assign (b, e); - } - - explicit - small_forward_list (std::size_t n) - : base_type (allocator_type (this)) - { - this->resize (n); - } - - small_forward_list (std::size_t n, const T& x) - : base_type (allocator_type (this)) - { - this->assign (n, x); - } - - small_forward_list (const small_forward_list& v) - : buffer_type (), base_type (allocator_type (this)) - { - static_cast (*this) = v; - } - - small_forward_list& - operator= (const small_forward_list& v) - { - // Note: propagate_on_container_copy_assignment = false - // - static_cast (*this) = v; - return *this; - } - - small_forward_list (small_forward_list&& v) - : base_type (allocator_type (this)) - { - *this = std::move (v); // Delegate to operator=(&&). - } - - small_forward_list& - operator= (small_forward_list&& v) - { - // VC14's implementation of operator=(&&) swaps pointers without regard - // for allocator (fixed in 15). - // -#if defined(_MSC_VER) && _MSC_VER <= 1900 - clear (); - for (T& x: v) - push_front (std::move (x)); - reverse (); - v.clear (); -#else - // Note: propagate_on_container_move_assignment = false - // - static_cast (*this) = std::move (v); -#endif - - return *this; - } - - small_forward_list& - operator= (std::initializer_list v) - { - static_cast (*this) = v; - return *this; - } - - // Implementing swap() under small buffer optimization is not trivial, to - // say the least (think of swapping two such buffers of different sizes). - // One easy option would be to force both in to the heap. - // - void - swap (small_forward_list&) = delete; - }; -} diff --git a/libbutl/small-list.hxx b/libbutl/small-list.hxx new file mode 100644 index 0000000..aaeef22 --- /dev/null +++ b/libbutl/small-list.hxx @@ -0,0 +1,150 @@ +// file : libbutl/small-list.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // size_t +#include // move() + +#include + +#include + +namespace butl +{ + // Issues and limitations. + // + // - VC's implementation of std::list allocates an extra "headnode" + // (something to do with additional iterator stability guarantees). Which + // means only empty small list will actually be "small". As a result, + // unless you don't care about VC, you should use small_forward_list + // instead. + // + // - Because small_allocator currently expects us to allocate the entire + // small buffer and there is no reserve() in std::list, we currently + // only support N==1 (which we static_assert). + // + // - swap() is deleted (see notes below). + // + // - The implementation doesn't allocate T but rather a "node" that normally + // consists of two pointers (prev/next) and T. + // + template + using small_list_node = +#if defined (_MSC_VER) + std::_List_node; +#elif defined (__GLIBCXX__) + std::_List_node; +#elif defined (_LIBCPP_VERSION) + std::__list_node; +#else +#error unknown standard library implementation +#endif + + template + using small_list_buffer = small_allocator_buffer, N>; + + template + class small_list: + private small_list_buffer, + public std::list>> + { + static_assert (N == 1, "only small_list or 1 currently supported"); + + public: + static constexpr const std::size_t small_size = N; + + using buffer_type = small_list_buffer; + using allocator_type = small_allocator; + using base_type = std::list; + + small_list () + : base_type (allocator_type (this)) {} + + small_list (std::initializer_list v) + : base_type (allocator_type (this)) + { + static_cast (*this) = v; + } + + template + small_list (I b, I e) + : base_type (allocator_type (this)) + { + this->assign (b, e); + } + + explicit + small_list (std::size_t n) + : base_type (allocator_type (this)) + { + this->resize (n); + } + + small_list (std::size_t n, const T& x) + : base_type (allocator_type (this)) + { + this->assign (n, x); + } + + small_list (const small_list& v) + : buffer_type (), base_type (allocator_type (this)) + { + static_cast (*this) = v; + } + + small_list& + operator= (const small_list& v) + { + // Note: propagate_on_container_copy_assignment = false + // + static_cast (*this) = v; + return *this; + } + + small_list (small_list&& v) + : base_type (allocator_type (this)) + { + *this = std::move (v); // Delegate to operator=(&&). + } + + small_list& + operator= (small_list&& v) + { + // libstdc++'s implementation prior to GCC 6 is broken (calls swap()). + // Since there is no easy way to determine this library's version, for + // now this is always enabled. + // + // Similarly, VC14's implementation of operator=(&&) swaps pointers + // without regard for allocator (fixed in 15). + // +#if defined(__GLIBCXX__) || (defined(_MSC_VER) && _MSC_VER <= 1900) + this->clear (); + for (T& x: v) + this->push_back (std::move (x)); + v.clear (); +#else + // Note: propagate_on_container_move_assignment = false + // + static_cast (*this) = std::move (v); +#endif + + return *this; + } + + small_list& + operator= (std::initializer_list v) + { + static_cast (*this) = v; + return *this; + } + + // Implementing swap() under small buffer optimization is not trivial, to + // say the least (think of swapping two such buffers of different sizes). + // One easy option would be to force both in to the heap. + // + void + swap (small_list&) = delete; + }; +} diff --git a/libbutl/small-list.mxx b/libbutl/small-list.mxx deleted file mode 100644 index ff62192..0000000 --- a/libbutl/small-list.mxx +++ /dev/null @@ -1,164 +0,0 @@ -// file : libbutl/small-list.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#ifndef __cpp_lib_modules_ts -#include -#include // size_t -#include // move() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.small_list; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.small_allocator; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Issues and limitations. - // - // - VC's implementation of std::list allocates an extra "headnode" - // (something to do with additional iterator stability guarantees). Which - // means only empty small list will actually be "small". As a result, - // unless you don't care about VC, you should use small_forward_list - // instead. - // - // - Because small_allocator currently expects us to allocate the entire - // small buffer and there is no reserve() in std::list, we currently - // only support N==1 (which we static_assert). - // - // - swap() is deleted (see notes below). - // - // - The implementation doesn't allocate T but rather a "node" that normally - // consists of two pointers (prev/next) and T. - // - template - using small_list_node = -#if defined (_MSC_VER) - std::_List_node; -#elif defined (__GLIBCXX__) - std::_List_node; -#elif defined (_LIBCPP_VERSION) - std::__list_node; -#else -#error unknown standard library implementation -#endif - - template - using small_list_buffer = small_allocator_buffer, N>; - - template - class small_list: - private small_list_buffer, - public std::list>> - { - static_assert (N == 1, "only small_list or 1 currently supported"); - - public: - static constexpr const std::size_t small_size = N; - - using buffer_type = small_list_buffer; - using allocator_type = small_allocator; - using base_type = std::list; - - small_list () - : base_type (allocator_type (this)) {} - - small_list (std::initializer_list v) - : base_type (allocator_type (this)) - { - static_cast (*this) = v; - } - - template - small_list (I b, I e) - : base_type (allocator_type (this)) - { - this->assign (b, e); - } - - explicit - small_list (std::size_t n) - : base_type (allocator_type (this)) - { - this->resize (n); - } - - small_list (std::size_t n, const T& x) - : base_type (allocator_type (this)) - { - this->assign (n, x); - } - - small_list (const small_list& v) - : buffer_type (), base_type (allocator_type (this)) - { - static_cast (*this) = v; - } - - small_list& - operator= (const small_list& v) - { - // Note: propagate_on_container_copy_assignment = false - // - static_cast (*this) = v; - return *this; - } - - small_list (small_list&& v) - : base_type (allocator_type (this)) - { - *this = std::move (v); // Delegate to operator=(&&). - } - - small_list& - operator= (small_list&& v) - { - // libstdc++'s implementation prior to GCC 6 is broken (calls swap()). - // Since there is no easy way to determine this library's version, for - // now this is always enabled. - // - // Similarly, VC14's implementation of operator=(&&) swaps pointers - // without regard for allocator (fixed in 15). - // -#if defined(__GLIBCXX__) || (defined(_MSC_VER) && _MSC_VER <= 1900) - this->clear (); - for (T& x: v) - this->push_back (std::move (x)); - v.clear (); -#else - // Note: propagate_on_container_move_assignment = false - // - static_cast (*this) = std::move (v); -#endif - - return *this; - } - - small_list& - operator= (std::initializer_list v) - { - static_cast (*this) = v; - return *this; - } - - // Implementing swap() under small buffer optimization is not trivial, to - // say the least (think of swapping two such buffers of different sizes). - // One easy option would be to force both in to the heap. - // - void - swap (small_list&) = delete; - }; -} diff --git a/libbutl/small-vector-odb.hxx b/libbutl/small-vector-odb.hxx index af9d96c..289ca38 100644 --- a/libbutl/small-vector-odb.hxx +++ b/libbutl/small-vector-odb.hxx @@ -5,7 +5,7 @@ #include -#include +#include #include diff --git a/libbutl/small-vector.hxx b/libbutl/small-vector.hxx new file mode 100644 index 0000000..f0594b1 --- /dev/null +++ b/libbutl/small-vector.hxx @@ -0,0 +1,175 @@ +// file : libbutl/small-vector.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // size_t +#include // move() + +#include + +#include + +namespace butl +{ + // Issues and limitations. + // + // - vector::reserve() may allocate more per the spec. But the three main + // C++ runtimes (libstdc++, libc++, and msvc) all seem to do the right + // thing. + // + // - What if in most cases the vector is empty? How can we avoid initial + // reserve? Provide no_reserve flag or some such? Is it really worth it? + // + // - swap() is deleted (see notes below). + // + template + class small_vector: private small_allocator_buffer, + public std::vector> + { + public: + static constexpr const std::size_t small_size = N; + + using buffer_type = small_allocator_buffer; + using allocator_type = small_allocator; + using base_type = std::vector; + + small_vector () + : base_type (allocator_type (this)) + { + reserve (); + } + + small_vector (std::initializer_list v) + : base_type (allocator_type (this)) + { + if (v.size () <= N) + reserve (); + + static_cast (*this) = v; + } + + template + small_vector (I b, I e) + : base_type (allocator_type (this)) + { + // While we could optimize this for random access iterators, N will + // usually be pretty small. Let's hope the compiler sees this and does + // some magic for us. + // + std::size_t n (0); + for (I i (b); i != e && n <= N; ++i) ++n; + + if (n <= N) + reserve (); + + this->assign (b, e); + } + + explicit + small_vector (std::size_t n) + : base_type (allocator_type (this)) + { + if (n <= N) + reserve (); + + this->resize (n); + } + + small_vector (std::size_t n, const T& x) + : base_type (allocator_type (this)) + { + if (n <= N) + reserve (); + + this->assign (n, x); + } + + small_vector (const small_vector& v) + : buffer_type (), base_type (allocator_type (this)) + { + if (v.size () <= N) + reserve (); + + static_cast (*this) = v; + } + + small_vector& + operator= (const small_vector& v) + { + // Note: propagate_on_container_copy_assignment = false + // + static_cast (*this) = v; + return *this; + } + + small_vector (small_vector&& v) + : base_type (allocator_type (this)) + { + if (v.size () <= N) + reserve (); + + *this = std::move (v); // Delegate to operator=(&&). + + // Note that in contrast to the move assignment operator, the + // constructor must clear the other vector. + // + v.clear (); + } + + small_vector& + operator= (small_vector&& v) + { + // VC's implementation of operator=(&&) (both 14 and 15) frees the + // memory and then reallocated with capacity equal to v.size(). This is + // clearly sub-optimal (the existing buffer could be reused) so we hope + // this will be fixed eventually (VSO#367146; reportedly fixed for + // VC15U1). + // +#if defined(_MSC_VER) && _MSC_VER <= 1910 + if (v.size () <= N) + { + clear (); + for (T& x: v) + push_back (std::move (x)); + v.clear (); + } + else +#endif + + // Note: propagate_on_container_move_assignment = false + // + static_cast (*this) = std::move (v); + + return *this; + } + + small_vector& + operator= (std::initializer_list v) + { + static_cast (*this) = v; + return *this; + } + + // Implementing swap() under small buffer optimization is not trivial, to + // say the least (think of swapping two such buffers of different sizes). + // One easy option would be to force both in to the heap. + // + void + swap (small_vector&) = delete; + + void + reserve (std::size_t n = N) + { + base_type::reserve (n < N ? N : n); + } + + void + shrink_to_fit () + { + if (this->capacity () > N) + base_type::shrink_to_fit (); + } + }; +} diff --git a/libbutl/small-vector.mxx b/libbutl/small-vector.mxx deleted file mode 100644 index 7f9bb1e..0000000 --- a/libbutl/small-vector.mxx +++ /dev/null @@ -1,189 +0,0 @@ -// file : libbutl/small-vector.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#ifndef __cpp_lib_modules_ts -#include -#include // size_t -#include // move() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.small_vector; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.small_allocator; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Issues and limitations. - // - // - vector::reserve() may allocate more per the spec. But the three main - // C++ runtimes (libstdc++, libc++, and msvc) all seem to do the right - // thing. - // - // - What if in most cases the vector is empty? How can we avoid initial - // reserve? Provide no_reserve flag or some such? Is it really worth it? - // - // - swap() is deleted (see notes below). - // - template - class small_vector: private small_allocator_buffer, - public std::vector> - { - public: - static constexpr const std::size_t small_size = N; - - using buffer_type = small_allocator_buffer; - using allocator_type = small_allocator; - using base_type = std::vector; - - small_vector () - : base_type (allocator_type (this)) - { - reserve (); - } - - small_vector (std::initializer_list v) - : base_type (allocator_type (this)) - { - if (v.size () <= N) - reserve (); - - static_cast (*this) = v; - } - - template - small_vector (I b, I e) - : base_type (allocator_type (this)) - { - // While we could optimize this for random access iterators, N will - // usually be pretty small. Let's hope the compiler sees this and does - // some magic for us. - // - std::size_t n (0); - for (I i (b); i != e && n <= N; ++i) ++n; - - if (n <= N) - reserve (); - - this->assign (b, e); - } - - explicit - small_vector (std::size_t n) - : base_type (allocator_type (this)) - { - if (n <= N) - reserve (); - - this->resize (n); - } - - small_vector (std::size_t n, const T& x) - : base_type (allocator_type (this)) - { - if (n <= N) - reserve (); - - this->assign (n, x); - } - - small_vector (const small_vector& v) - : buffer_type (), base_type (allocator_type (this)) - { - if (v.size () <= N) - reserve (); - - static_cast (*this) = v; - } - - small_vector& - operator= (const small_vector& v) - { - // Note: propagate_on_container_copy_assignment = false - // - static_cast (*this) = v; - return *this; - } - - small_vector (small_vector&& v) - : base_type (allocator_type (this)) - { - if (v.size () <= N) - reserve (); - - *this = std::move (v); // Delegate to operator=(&&). - - // Note that in contrast to the move assignment operator, the - // constructor must clear the other vector. - // - v.clear (); - } - - small_vector& - operator= (small_vector&& v) - { - // VC's implementation of operator=(&&) (both 14 and 15) frees the - // memory and then reallocated with capacity equal to v.size(). This is - // clearly sub-optimal (the existing buffer could be reused) so we hope - // this will be fixed eventually (VSO#367146; reportedly fixed for - // VC15U1). - // -#if defined(_MSC_VER) && _MSC_VER <= 1910 - if (v.size () <= N) - { - clear (); - for (T& x: v) - push_back (std::move (x)); - v.clear (); - } - else -#endif - - // Note: propagate_on_container_move_assignment = false - // - static_cast (*this) = std::move (v); - - return *this; - } - - small_vector& - operator= (std::initializer_list v) - { - static_cast (*this) = v; - return *this; - } - - // Implementing swap() under small buffer optimization is not trivial, to - // say the least (think of swapping two such buffers of different sizes). - // One easy option would be to force both in to the heap. - // - void - swap (small_vector&) = delete; - - void - reserve (std::size_t n = N) - { - base_type::reserve (n < N ? N : n); - } - - void - shrink_to_fit () - { - if (this->capacity () > N) - base_type::shrink_to_fit (); - } - }; -} diff --git a/libbutl/standard-version.cxx b/libbutl/standard-version.cxx index 863cb29..36f4830 100644 --- a/libbutl/standard-version.cxx +++ b/libbutl/standard-version.cxx @@ -1,41 +1,14 @@ // file : libbutl/standard-version.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include - #include // strtoull() #include // move() #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.standard_version; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.optional; -#endif - -import butl.utility; -#else -#include // alnum() -#endif + +#include // alnum() using namespace std; diff --git a/libbutl/standard-version.hxx b/libbutl/standard-version.hxx new file mode 100644 index 0000000..5810e13 --- /dev/null +++ b/libbutl/standard-version.hxx @@ -0,0 +1,340 @@ +// file : libbutl/standard-version.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // uint*_t +#include // size_t +#include + +#include + +#include + +// FreeBSD defines these macros in its . +// +#ifdef major +# undef major +#endif + +#ifdef minor +# undef minor +#endif + +namespace butl +{ + // The build2 "standard version" (normal, earliest, and stub): + // + // [+-]..[-(a|b).[.[.]]][+] + // [+-]..- + // 0[+] + // + // The normal version can be release, final pre-release, or a pre-release + // snapshot (release is naturally always final). Pre-release can be alpha or + // beta. + // + // The numeric version format is AAAAABBBBBCCCCCDDDE where: + // + // AAAAA - major version number + // BBBBB - minor version number + // CCCCC - patch version number + // DDD - alpha / beta (DDD + 500) version number + // E - final (0) / snapshot (1) + // + // When DDDE is not 0, 1 is subtracted from AAAAABBBBBCCCCC. For example: + // + // Version AAAAABBBBBCCCCCDDDE + // + // 0.1.0 0000000001000000000 + // 0.1.2 0000000001000020000 + // 1.2.3 0000100002000030000 + // 2.2.0-a.1 0000200001999990010 + // 3.0.0-b.2 0000299999999995020 + // 2.2.0-a.1.z 0000200001999990011 + // + // Stub is represented as ~0 (but is not considered a pre-release). + // + struct LIBBUTL_SYMEXPORT standard_version + { + // Invariants: + // + // 1. allow_earliest + // ? (E == 1) || (snapshot_sn == 0) + // : (E == 0) == (snapshot_sn == 0) + // + // 2. version != 0 || allow_stub && epoch == 0 && snapshot_sn == 0 + // + // 3. snapshot_sn != latest_sn && snapshot_sn != 0 || snapshot_id.empty () + // + static const std::uint64_t latest_sn = std::uint64_t (~0); + + std::uint16_t epoch = 1; // 0 if a stub, 1 if not specified. + std::uint64_t version = 0; // AAAAABBBBBCCCCCDDDE or ~0 for stub. + std::uint64_t snapshot_sn = 0; // 0 if not specifed, latest_sn if 'z'. + std::string snapshot_id; // Empty if not specified. + std::uint16_t revision = 0; // 0 if not specified. + + std::uint32_t major () const noexcept; + std::uint32_t minor () const noexcept; + std::uint32_t patch () const noexcept; + + // Return the alpha/beta version number if pre-release and nullopt + // otherwise. + // + // Can be used as a predicate and also to get the value. + // + optional alpha () const noexcept; + optional beta () const noexcept; + + // Return the DDD version part if a pre-release and nullopt otherwise. + // + // Can be used as a predicate and also to get the value. Note that 0 is + // ambiguous (-[ab].0.z, or earliest version; see below). + // + optional pre_release () const noexcept; + + // String representations. + // + // Note: return empty if the corresponding component is unspecified. + // + std::string string () const; // Package version. + + // Project version (no epoch). + // + std::string string_project (bool revision = false) const; + + std::string string_project_id () const; // Project version id (no snapsn). + std::string string_version () const; // Version only (no snapshot). + std::string string_pre_release () const; // Pre-release part only (a.1). + std::string string_snapshot () const; // Snapshot part only (1234.1f23). + + // Predicates. See also alpha(), beta(), and pre_release() above. + // + // The earliest version is represented as the (otherwise illegal) DDDE + // value 0001 and snapshot_sn 0. Note that the earliest version is a final + // alpha pre-release. + // + bool empty () const noexcept {return version == 0;} + bool stub () const noexcept {return version == std::uint64_t (~0);} + bool earliest () const noexcept; + bool release () const noexcept; + bool snapshot () const noexcept {return snapshot_sn != 0;} + bool latest_snapshot () const noexcept; + bool final () const noexcept; + + // Comparison of empty or stub versions doesn't make sense. + // + int + compare (const standard_version& v, + bool ignore_revision = false) const noexcept + { + if (epoch != v.epoch) + return epoch < v.epoch ? -1 : 1; + + if (version != v.version) + return version < v.version ? -1 : 1; + + if (snapshot_sn != v.snapshot_sn) + return snapshot_sn < v.snapshot_sn ? -1 : 1; + + if (!ignore_revision) + { + if (revision != v.revision) + return revision < v.revision ? -1 : 1; + } + + return 0; + } + + // Parse the version. Throw std::invalid_argument if the format is not + // recognizable or components are invalid. + // + enum flags + { + none = 0, + allow_earliest = 0x01, // Allow ..- form. + allow_stub = 0x02 // Allow 0[+] form. + }; + + explicit + standard_version (const std::string&, flags = none); + + explicit + standard_version (std::uint64_t version, flags = none); + + standard_version (std::uint64_t version, + const std::string& snapshot, + flags = none); + + // Note that the default epoch is 1 for real versions and 0 for stubs. + // + standard_version (std::uint16_t epoch, + std::uint64_t version, + const std::string& snapshot, + std::uint16_t revision, + flags = none); + + standard_version (std::uint16_t epoch, + std::uint64_t version, + std::uint64_t snapshot_sn, + std::string snapshot_id, + std::uint16_t revision, + flags = none); + + // Version as separate major, minor, patch, and pre-release components. + // Note that the pre-release here is in the DDD form, that is, incremented + // by 500 for betas. + // + standard_version (std::uint16_t epoch, + std::uint32_t major, + std::uint32_t minor, + std::uint32_t patch, + std::uint16_t pre_release = 0, + std::uint16_t revision = 0); + + standard_version (std::uint16_t epoch, + std::uint32_t major, + std::uint32_t minor, + std::uint32_t patch, + std::uint16_t pre_release, + std::uint64_t snapshot_sn, + std::string snapshot_id, + std::uint16_t revision = 0); + + // Create empty version. + // + standard_version () {} // = default; @@ MOD VC + }; + + // Try to parse a string as a standard version returning nullopt if invalid. + // + LIBBUTL_SYMEXPORT optional + parse_standard_version (const std::string&, + standard_version::flags = standard_version::none); + + inline bool + operator< (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) < 0; + } + + inline bool + operator> (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) > 0; + } + + inline bool + operator== (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) == 0; + } + + inline bool + operator<= (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) <= 0; + } + + inline bool + operator>= (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) >= 0; + } + + inline bool + operator!= (const standard_version& x, const standard_version& y) noexcept + { + return !(x == y); + } + + inline std::ostream& + operator<< (std::ostream& o, const standard_version& x) + { + return o << x.string (); + } + + inline standard_version::flags + operator& (standard_version::flags, standard_version::flags); + + inline standard_version::flags + operator| (standard_version::flags, standard_version::flags); + + inline standard_version::flags + operator&= (standard_version::flags&, standard_version::flags); + + inline standard_version::flags + operator|= (standard_version::flags&, standard_version::flags); + + // The build2 "standard version" constraint: + // + // ('==' | '>' | '<' | '>=' | '<=') + // ('^' | '~') + // ('(' | '[') (')' | ']') + // + // The version may be `$` which refers to the dependent package version. + // + struct LIBBUTL_SYMEXPORT standard_version_constraint + { + butl::optional min_version; + butl::optional max_version; + bool min_open; + bool max_open; + + // Parse the version constraint. Throw std::invalid_argument on error. + // + explicit + standard_version_constraint (const std::string&); + + // As above but also completes the special `$` version using the specified + // dependent package version. + // + standard_version_constraint (const std::string&, + const standard_version& dependent_version); + + // Throw std::invalid_argument if the specified version range is invalid. + // + standard_version_constraint ( + butl::optional min_version, bool min_open, + butl::optional max_version, bool max_open); + + explicit + standard_version_constraint (const standard_version& v) + : standard_version_constraint (v, false, v, false) {} + + standard_version_constraint () = default; + + std::string + string () const; + + bool + empty () const noexcept {return !min_version && !max_version;} + + bool + satisfies (const standard_version&) const noexcept; + }; + + inline bool + operator== (const standard_version_constraint& x, + const standard_version_constraint& y) + { + return x.min_version == y.min_version && x.max_version == y.max_version && + x.min_open == y.min_open && x.max_open == y.max_open; + } + + inline bool + operator!= (const standard_version_constraint& x, + const standard_version_constraint& y) + { + return !(x == y); + } + + inline std::ostream& + operator<< (std::ostream& o, const standard_version_constraint& x) + { + return o << x.string (); + } +} + +#include diff --git a/libbutl/standard-version.mxx b/libbutl/standard-version.mxx deleted file mode 100644 index b86e3a9..0000000 --- a/libbutl/standard-version.mxx +++ /dev/null @@ -1,357 +0,0 @@ -// file : libbutl/standard-version.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include // uint*_t -#include // size_t -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.standard_version; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.optional; -#else -#include -#endif - -#include - -// FreeBSD defines these macros in its . -// -#ifdef major -# undef major -#endif - -#ifdef minor -# undef minor -#endif - -LIBBUTL_MODEXPORT namespace butl -{ - // The build2 "standard version" (normal, earliest, and stub): - // - // [+-]..[-(a|b).[.[.]]][+] - // [+-]..- - // 0[+] - // - // The normal version can be release, final pre-release, or a pre-release - // snapshot (release is naturally always final). Pre-release can be alpha or - // beta. - // - // The numeric version format is AAAAABBBBBCCCCCDDDE where: - // - // AAAAA - major version number - // BBBBB - minor version number - // CCCCC - patch version number - // DDD - alpha / beta (DDD + 500) version number - // E - final (0) / snapshot (1) - // - // When DDDE is not 0, 1 is subtracted from AAAAABBBBBCCCCC. For example: - // - // Version AAAAABBBBBCCCCCDDDE - // - // 0.1.0 0000000001000000000 - // 0.1.2 0000000001000020000 - // 1.2.3 0000100002000030000 - // 2.2.0-a.1 0000200001999990010 - // 3.0.0-b.2 0000299999999995020 - // 2.2.0-a.1.z 0000200001999990011 - // - // Stub is represented as ~0 (but is not considered a pre-release). - // - struct LIBBUTL_SYMEXPORT standard_version - { - // Invariants: - // - // 1. allow_earliest - // ? (E == 1) || (snapshot_sn == 0) - // : (E == 0) == (snapshot_sn == 0) - // - // 2. version != 0 || allow_stub && epoch == 0 && snapshot_sn == 0 - // - // 3. snapshot_sn != latest_sn && snapshot_sn != 0 || snapshot_id.empty () - // - static const std::uint64_t latest_sn = std::uint64_t (~0); - - std::uint16_t epoch = 1; // 0 if a stub, 1 if not specified. - std::uint64_t version = 0; // AAAAABBBBBCCCCCDDDE or ~0 for stub. - std::uint64_t snapshot_sn = 0; // 0 if not specifed, latest_sn if 'z'. - std::string snapshot_id; // Empty if not specified. - std::uint16_t revision = 0; // 0 if not specified. - - std::uint32_t major () const noexcept; - std::uint32_t minor () const noexcept; - std::uint32_t patch () const noexcept; - - // Return the alpha/beta version number if pre-release and nullopt - // otherwise. - // - // Can be used as a predicate and also to get the value. - // - optional alpha () const noexcept; - optional beta () const noexcept; - - // Return the DDD version part if a pre-release and nullopt otherwise. - // - // Can be used as a predicate and also to get the value. Note that 0 is - // ambiguous (-[ab].0.z, or earliest version; see below). - // - optional pre_release () const noexcept; - - // String representations. - // - // Note: return empty if the corresponding component is unspecified. - // - std::string string () const; // Package version. - - // Project version (no epoch). - // - std::string string_project (bool revision = false) const; - - std::string string_project_id () const; // Project version id (no snapsn). - std::string string_version () const; // Version only (no snapshot). - std::string string_pre_release () const; // Pre-release part only (a.1). - std::string string_snapshot () const; // Snapshot part only (1234.1f23). - - // Predicates. See also alpha(), beta(), and pre_release() above. - // - // The earliest version is represented as the (otherwise illegal) DDDE - // value 0001 and snapshot_sn 0. Note that the earliest version is a final - // alpha pre-release. - // - bool empty () const noexcept {return version == 0;} - bool stub () const noexcept {return version == std::uint64_t (~0);} - bool earliest () const noexcept; - bool release () const noexcept; - bool snapshot () const noexcept {return snapshot_sn != 0;} - bool latest_snapshot () const noexcept; - bool final () const noexcept; - - // Comparison of empty or stub versions doesn't make sense. - // - int - compare (const standard_version& v, - bool ignore_revision = false) const noexcept - { - if (epoch != v.epoch) - return epoch < v.epoch ? -1 : 1; - - if (version != v.version) - return version < v.version ? -1 : 1; - - if (snapshot_sn != v.snapshot_sn) - return snapshot_sn < v.snapshot_sn ? -1 : 1; - - if (!ignore_revision) - { - if (revision != v.revision) - return revision < v.revision ? -1 : 1; - } - - return 0; - } - - // Parse the version. Throw std::invalid_argument if the format is not - // recognizable or components are invalid. - // - enum flags - { - none = 0, - allow_earliest = 0x01, // Allow ..- form. - allow_stub = 0x02 // Allow 0[+] form. - }; - - explicit - standard_version (const std::string&, flags = none); - - explicit - standard_version (std::uint64_t version, flags = none); - - standard_version (std::uint64_t version, - const std::string& snapshot, - flags = none); - - // Note that the default epoch is 1 for real versions and 0 for stubs. - // - standard_version (std::uint16_t epoch, - std::uint64_t version, - const std::string& snapshot, - std::uint16_t revision, - flags = none); - - standard_version (std::uint16_t epoch, - std::uint64_t version, - std::uint64_t snapshot_sn, - std::string snapshot_id, - std::uint16_t revision, - flags = none); - - // Version as separate major, minor, patch, and pre-release components. - // Note that the pre-release here is in the DDD form, that is, incremented - // by 500 for betas. - // - standard_version (std::uint16_t epoch, - std::uint32_t major, - std::uint32_t minor, - std::uint32_t patch, - std::uint16_t pre_release = 0, - std::uint16_t revision = 0); - - standard_version (std::uint16_t epoch, - std::uint32_t major, - std::uint32_t minor, - std::uint32_t patch, - std::uint16_t pre_release, - std::uint64_t snapshot_sn, - std::string snapshot_id, - std::uint16_t revision = 0); - - // Create empty version. - // - standard_version () {} // = default; @@ MOD VC - }; - - // Try to parse a string as a standard version returning nullopt if invalid. - // - LIBBUTL_SYMEXPORT optional - parse_standard_version (const std::string&, - standard_version::flags = standard_version::none); - - inline bool - operator< (const standard_version& x, const standard_version& y) noexcept - { - return x.compare (y) < 0; - } - - inline bool - operator> (const standard_version& x, const standard_version& y) noexcept - { - return x.compare (y) > 0; - } - - inline bool - operator== (const standard_version& x, const standard_version& y) noexcept - { - return x.compare (y) == 0; - } - - inline bool - operator<= (const standard_version& x, const standard_version& y) noexcept - { - return x.compare (y) <= 0; - } - - inline bool - operator>= (const standard_version& x, const standard_version& y) noexcept - { - return x.compare (y) >= 0; - } - - inline bool - operator!= (const standard_version& x, const standard_version& y) noexcept - { - return !(x == y); - } - - inline std::ostream& - operator<< (std::ostream& o, const standard_version& x) - { - return o << x.string (); - } - - inline standard_version::flags - operator& (standard_version::flags, standard_version::flags); - - inline standard_version::flags - operator| (standard_version::flags, standard_version::flags); - - inline standard_version::flags - operator&= (standard_version::flags&, standard_version::flags); - - inline standard_version::flags - operator|= (standard_version::flags&, standard_version::flags); - - // The build2 "standard version" constraint: - // - // ('==' | '>' | '<' | '>=' | '<=') - // ('^' | '~') - // ('(' | '[') (')' | ']') - // - // The version may be `$` which refers to the dependent package version. - // - struct LIBBUTL_SYMEXPORT standard_version_constraint - { - butl::optional min_version; - butl::optional max_version; - bool min_open; - bool max_open; - - // Parse the version constraint. Throw std::invalid_argument on error. - // - explicit - standard_version_constraint (const std::string&); - - // As above but also completes the special `$` version using the specified - // dependent package version. - // - standard_version_constraint (const std::string&, - const standard_version& dependent_version); - - // Throw std::invalid_argument if the specified version range is invalid. - // - standard_version_constraint ( - butl::optional min_version, bool min_open, - butl::optional max_version, bool max_open); - - explicit - standard_version_constraint (const standard_version& v) - : standard_version_constraint (v, false, v, false) {} - - standard_version_constraint () = default; - - std::string - string () const; - - bool - empty () const noexcept {return !min_version && !max_version;} - - bool - satisfies (const standard_version&) const noexcept; - }; - - inline bool - operator== (const standard_version_constraint& x, - const standard_version_constraint& y) - { - return x.min_version == y.min_version && x.max_version == y.max_version && - x.min_open == y.min_open && x.max_open == y.max_open; - } - - inline bool - operator!= (const standard_version_constraint& x, - const standard_version_constraint& y) - { - return !(x == y); - } - - inline std::ostream& - operator<< (std::ostream& o, const standard_version_constraint& x) - { - return o << x.string (); - } -} - -#include diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx index 5d5ec47..aea1338 100644 --- a/libbutl/string-parser.cxx +++ b/libbutl/string-parser.cxx @@ -1,33 +1,7 @@ // file : libbutl/string-parser.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include // move() -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.string_parser; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#endif +#include using namespace std; diff --git a/libbutl/string-parser.hxx b/libbutl/string-parser.hxx new file mode 100644 index 0000000..9fc20c0 --- /dev/null +++ b/libbutl/string-parser.hxx @@ -0,0 +1,51 @@ +// file : libbutl/string-parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // size_t +#include // pair +#include // invalid_argument + +#include + +namespace butl +{ + namespace string_parser + { + class LIBBUTL_SYMEXPORT invalid_string: public std::invalid_argument + { + public: + invalid_string (std::size_t p, const std::string& d) + : invalid_argument (d), position (p) {} + + std::size_t position; // Zero-based. + }; + + // Parse a whitespace-separated list of strings. Can contain single or + // double quoted substrings. No escaping is supported. If unquote is true, + // return one-level unquoted values. Throw invalid_string in case of + // invalid quoting. + // + LIBBUTL_SYMEXPORT std::vector + parse_quoted (const std::string&, bool unquote); + + // As above but return a list of string and zero-based position pairs. + // Position is useful for issuing diagnostics about an invalid string + // during second-level parsing. + // + LIBBUTL_SYMEXPORT std::vector> + parse_quoted_position (const std::string&, bool unquote); + + // Remove a single level of quotes. Note that the format or the + // correctness of the quotation is not validated. + // + LIBBUTL_SYMEXPORT std::string + unquote (const std::string&); + + LIBBUTL_SYMEXPORT std::vector + unquote (const std::vector&); + } +} diff --git a/libbutl/string-parser.mxx b/libbutl/string-parser.mxx deleted file mode 100644 index 4ff1590..0000000 --- a/libbutl/string-parser.mxx +++ /dev/null @@ -1,66 +0,0 @@ -// file : libbutl/string-parser.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include // size_t -#include // pair -#include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.string_parser; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - namespace string_parser - { - class LIBBUTL_SYMEXPORT invalid_string: public std::invalid_argument - { - public: - invalid_string (std::size_t p, const std::string& d) - : invalid_argument (d), position (p) {} - - std::size_t position; // Zero-based. - }; - - // Parse a whitespace-separated list of strings. Can contain single or - // double quoted substrings. No escaping is supported. If unquote is true, - // return one-level unquoted values. Throw invalid_string in case of - // invalid quoting. - // - LIBBUTL_SYMEXPORT std::vector - parse_quoted (const std::string&, bool unquote); - - // As above but return a list of string and zero-based position pairs. - // Position is useful for issuing diagnostics about an invalid string - // during second-level parsing. - // - LIBBUTL_SYMEXPORT std::vector> - parse_quoted_position (const std::string&, bool unquote); - - // Remove a single level of quotes. Note that the format or the - // correctness of the quotation is not validated. - // - LIBBUTL_SYMEXPORT std::string - unquote (const std::string&); - - LIBBUTL_SYMEXPORT std::vector - unquote (const std::vector&); - } -} diff --git a/libbutl/string-table.hxx b/libbutl/string-table.hxx new file mode 100644 index 0000000..010fb01 --- /dev/null +++ b/libbutl/string-table.hxx @@ -0,0 +1,95 @@ +// file : libbutl/string-table.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace butl +{ + // A pool of strings and, optionally, other accompanying data in which each + // entry is assigned an individual index (or id) of type I (e.g., uint8_t, + // uint16_t, etc., depending on how many entries are expected). Index value + // 0 is reserved to indicate the "no entry" condition. + // + template + struct string_table_element + { + const I i; + const D d; + }; + + template + struct string_table_element + { + const I i; + const std::string d; + }; + + // For custom data the options are to call the data member 'key' or to + // specialize this traits. + // + template + struct string_table_traits + { + static const std::string& + key (const D& d) {return d.key;} + }; + + template <> + struct string_table_traits + { + static const std::string& + key (const std::string& d) {return d;} + }; + + template + struct string_table + { + // Insert new entry unless one already exists. + // + I + insert (const D&); + + // Find existing. + // + I + find (const std::string& k) const + { + auto i (map_.find (key_type (&k))); + return i != map_.end () ? i->second.i : 0; + } + + // Reverse lookup. + // + const D& + operator[] (I i) const {assert (i > 0); return vec_[i - 1]->second.d;} + + I + size () const {return static_cast (vec_.size ());} + + bool + empty () const {return vec_.empty ();} + + void + clear () {vec_.clear (); map_.clear ();} + + private: + using key_type = butl::map_key; + using value_type = string_table_element; + using map_type = std::unordered_map; + using traits_type = string_table_traits; + + map_type map_; + std::vector vec_; + }; +} + +#include diff --git a/libbutl/string-table.mxx b/libbutl/string-table.mxx deleted file mode 100644 index 78c6cd6..0000000 --- a/libbutl/string-table.mxx +++ /dev/null @@ -1,113 +0,0 @@ -// file : libbutl/string-table.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include - -#include // numeric_limits -#include // size_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.string_table; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.multi_index; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // A pool of strings and, optionally, other accompanying data in which each - // entry is assigned an individual index (or id) of type I (e.g., uint8_t, - // uint16_t, etc., depending on how many entries are expected). Index value - // 0 is reserved to indicate the "no entry" condition. - // - template - struct string_table_element - { - const I i; - const D d; - }; - - template - struct string_table_element - { - const I i; - const std::string d; - }; - - // For custom data the options are to call the data member 'key' or to - // specialize this traits. - // - template - struct string_table_traits - { - static const std::string& - key (const D& d) {return d.key;} - }; - - template <> - struct string_table_traits - { - static const std::string& - key (const std::string& d) {return d;} - }; - - template - struct string_table - { - // Insert new entry unless one already exists. - // - I - insert (const D&); - - // Find existing. - // - I - find (const std::string& k) const - { - auto i (map_.find (key_type (&k))); - return i != map_.end () ? i->second.i : 0; - } - - // Reverse lookup. - // - const D& - operator[] (I i) const {assert (i > 0); return vec_[i - 1]->second.d;} - - I - size () const {return static_cast (vec_.size ());} - - bool - empty () const {return vec_.empty ();} - - void - clear () {vec_.clear (); map_.clear ();} - - private: - using key_type = butl::map_key; - using value_type = string_table_element; - using map_type = std::unordered_map; - using traits_type = string_table_traits; - - map_type map_; - std::vector vec_; - }; -} - -#include diff --git a/libbutl/string-table.txx b/libbutl/string-table.txx index 4db0a6b..8416b48 100644 --- a/libbutl/string-table.txx +++ b/libbutl/string-table.txx @@ -1,6 +1,9 @@ // file : libbutl/string-table.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file +#include // numeric_limits +#include // size_t + namespace butl { template diff --git a/libbutl/tab-parser.cxx b/libbutl/tab-parser.cxx index cca2792..d7e5a14 100644 --- a/libbutl/tab-parser.cxx +++ b/libbutl/tab-parser.cxx @@ -1,39 +1,12 @@ // file : libbutl/tab-parser.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include +#include #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.tab_parser; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif -import butl.string_parser; -#else -#include -#endif +#include using namespace std; diff --git a/libbutl/tab-parser.hxx b/libbutl/tab-parser.hxx new file mode 100644 index 0000000..2dc612b --- /dev/null +++ b/libbutl/tab-parser.hxx @@ -0,0 +1,68 @@ +// file : libbutl/tab-parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include +#include // uint64_t +#include // runtime_error + +#include + +namespace butl +{ + class LIBBUTL_SYMEXPORT tab_parsing: public std::runtime_error + { + public: + tab_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + // Line and columns are useful for issuing diagnostics about invalid or + // missing fields. + // + struct tab_field + { + std::string value; // Field string (quoting preserved). + std::uint64_t column; // Field start column number (one-based). + }; + + struct tab_fields: std::vector + { + std::uint64_t line; // Line number (one-based). + std::uint64_t end_column; // End-of-line column (line length). + }; + + // Read and parse lines consisting of space-separated fields. Field can + // contain single or double quoted substrings (with spaces) which are + // interpreted but preserved. No escaping of the quote characters is + // supported. Blank lines and lines that start with # (collectively called + // empty lines) are ignored. + // + class LIBBUTL_SYMEXPORT tab_parser + { + public: + tab_parser (std::istream& is, const std::string& name) + : is_ (is), name_ (name) {} + + // Return next line of fields. Skip empty lines. Empty result denotes the + // end of stream. + // + tab_fields + next (); + + private: + std::istream& is_; + const std::string name_; + std::uint64_t line_ = 0; + }; +} diff --git a/libbutl/tab-parser.mxx b/libbutl/tab-parser.mxx deleted file mode 100644 index a7f7e01..0000000 --- a/libbutl/tab-parser.mxx +++ /dev/null @@ -1,84 +0,0 @@ -// file : libbutl/tab-parser.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#include // uint64_t -#include // runtime_error -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.tab_parser; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - class LIBBUTL_SYMEXPORT tab_parsing: public std::runtime_error - { - public: - tab_parsing (const std::string& name, - std::uint64_t line, - std::uint64_t column, - const std::string& description); - - std::string name; - std::uint64_t line; - std::uint64_t column; - std::string description; - }; - - // Line and columns are useful for issuing diagnostics about invalid or - // missing fields. - // - struct tab_field - { - std::string value; // Field string (quoting preserved). - std::uint64_t column; // Field start column number (one-based). - }; - - struct tab_fields: std::vector - { - std::uint64_t line; // Line number (one-based). - std::uint64_t end_column; // End-of-line column (line length). - }; - - // Read and parse lines consisting of space-separated fields. Field can - // contain single or double quoted substrings (with spaces) which are - // interpreted but preserved. No escaping of the quote characters is - // supported. Blank lines and lines that start with # (collectively called - // empty lines) are ignored. - // - class LIBBUTL_SYMEXPORT tab_parser - { - public: - tab_parser (std::istream& is, const std::string& name) - : is_ (is), name_ (name) {} - - // Return next line of fields. Skip empty lines. Empty result denotes the - // end of stream. - // - tab_fields - next (); - - private: - std::istream& is_; - const std::string name_; - std::uint64_t line_ = 0; - }; -} diff --git a/libbutl/target-triplet.cxx b/libbutl/target-triplet.cxx index 611b758..209f75e 100644 --- a/libbutl/target-triplet.cxx +++ b/libbutl/target-triplet.cxx @@ -1,33 +1,9 @@ // file : libbutl/target-triplet.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include +#include #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.target_triplet; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#endif using namespace std; diff --git a/libbutl/target-triplet.hxx b/libbutl/target-triplet.hxx new file mode 100644 index 0000000..45db457 --- /dev/null +++ b/libbutl/target-triplet.hxx @@ -0,0 +1,171 @@ +// file : libbutl/target-triplet.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include + +#include + +namespace butl +{ + // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS + // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus + // some fields can sometimes be omitted. This looseness makes it hard to base + // any kind of decisions on the triplet without canonicalizing it and then + // splitting it into components. The way we are going to split it is like + // this: + // + // CPU + // + // This one is reasonably straightforward. Note that we always expect at + // least two components with the first being the CPU. In other words, we + // don't try to guess what just 'mingw32' might mean like config.sub does. + // Note that we canonicalize arm64 to aarch64 similar to config.sub. + // + // VENDOR + // + // This can be a machine vendor as in i686-apple-darwin8, a toolchain vendor + // as in i686-lfs-linux-gnu, or something else as in arm-softfloat-linux-gnu. + // Just as we think vendor is pretty irrelevant and can be ignored, comes + // MinGW-W64 and calls itself *-w64-mingw32. While it is tempting to + // attribute w64 to OS-ABI, the MinGW-W64 folks insist it is a (presumably + // toolchain) vendor. + // + // Another example where the vendor seems to be reused for something else + // entirely is the Intel's MIC architecture: x86_64-k1om-linux. + // + // To make things more regular we also convert the information-free vendor + // names 'pc', 'unknown' and 'none' to the empty name. + // + // OS/KERNEL-OS/OS-ABI + // + // This is where things get really messy and instead of trying to guess, we + // call the entire thing SYSTEM. Except, in certain cases, we factor out the + // trailing version, again, to make SYSTEM easier to compare to. For example, + // *-darwin14.5.0 becomes 'darwin' and '14.5.0'. + // + // Note also that sometimes the first component in SYSTEM can be 'none' (to + // indicate the absence of an operating system) which is ambigous with the + // vendor (for example, arm-none-eabi). We currently don't try to deal with + // that (that is, you will need to specify arm-unknown-none-eabi). + // + // Values for two-component systems (e.g., linux-gnu) that don't specify + // VENDOR explicitly are inherently ambiguous: is 'linux' VENDOR or part of + // SYSTEM? The only way to handle this is to recognize their specific names + // as special cases and this is what we do for some of the more common + // ones. The alternative would be to first run such names through config.sub + // which adds explicit VENDOR and this could be a reasonable fallback + // strategy for (presumably less common) cases were we don't split things + // correctly. + // + // Note also that the version splitting is only done for certain commonly- + // used targets. + // + // Some examples of canonicalization and splitting: + // + // x86_64-apple-darwin14.5.0 x86_64 apple darwin 14.5.0 + // x86_64-unknown-freebsd10.2 x86_64 freebsd 10.2 + // x86_64-unknown-netbsd9.0 x86_64 netbsd 9.0 + // i686-elf i686 elf + // arm-eabi arm eabi + // arm-none-eabi arm eabi + // arm-none-linux-gnueabi arm linux-gnueabi + // arm-softfloat-linux-gnu arm softfloat linux-gnu + // i686-pc-mingw32 i686 mingw32 + // i686-w64-mingw32 i686 w64 mingw32 + // i686-lfs-linux-gnu i686 lfs linux-gnu + // x86_64-unknown-linux-gnu x86_64 linux-gnu + // x86_64-linux-gnux32 x86_64 linux-gnux32 + // x86_64-microsoft-win32-msvc14.0 x86_64 microsoft win32-msvc 14.0 + // x86_64-pc-windows-msvc x86_64 windows-msvc + // x86_64-pc-windows-msvc19.11.25547 x86_64 windows-msvc 19.11.25547 + // wasm32-unknown-emscripten wasm32 emscripten + // arm64-apple-darwin20.1.0 aarch64 apple darwin 20.1.0 + // arm64-apple-ios14.4 aarch64 apple ios 14.4 + // arm64-apple-ios14.4-simulator aarch64 apple ios-simulator 14.4 + // x86_64-apple-ios14.4-macabi x86_64 apple ios-macabi 14.4 + // + // Similar to version splitting, for certain commonly-used targets we also + // derive the "target class" which can be used as a shorthand, more + // convenient way to identify a targets. If the target is not recognized, + // then the special 'other' value is used. Currently the following classes + // are recognized: + // + // linux *-*-linux-* + // macos *-apple-darwin* + // bsd *-*-(freebsd|openbsd|netbsd)* + // windows *-*-win32-* | *-*-windows-* | *-*-mingw32 + // ios *-apple-ios* + // + // References: + // + // 1. The libtool repository contains the PLATFORM file that lists many known + // triplets. + // + // 2. LLVM has the Triple class with similar goals. + // + struct LIBBUTL_SYMEXPORT target_triplet + { + std::string cpu; + std::string vendor; + std::string system; + std::string version; + std::string class_; + + // Assemble and returning the canonical (i.e., without unknown vendor) + // target triplet string. + // + // Note: not necessarily round-tripp'able, see representation(). + // + std::string + string () const; + + // Return a round-tripp'able target triplet string that always contains + // the vendor. + // + std::string + representation () const; + + bool + empty () const {return cpu.empty ();} + + int + compare (const target_triplet& y) const + { + int r; + return + (r = cpu.compare (y.cpu)) != 0 ? r : + (r = vendor.compare (y.vendor)) != 0 ? r : + (r = system.compare (y.system)) != 0 ? r : + ( version.compare (y.version)); + } + + // Parse the triplet throw std::invalid_argument if the triplet is not + // recognizable. + // + explicit + target_triplet (const std::string&); + + target_triplet () = default; + }; + + inline bool + operator== (const target_triplet& x, const target_triplet& y) + { + return x.compare (y) == 0; + } + + inline bool + operator!= (const target_triplet& x, const target_triplet& y) + { + return !(x == y); + } + + inline std::ostream& + operator<< (std::ostream& o, const target_triplet& x) + { + return o << x.string (); + } +} diff --git a/libbutl/target-triplet.mxx b/libbutl/target-triplet.mxx deleted file mode 100644 index 3861809..0000000 --- a/libbutl/target-triplet.mxx +++ /dev/null @@ -1,187 +0,0 @@ -// file : libbutl/target-triplet.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.target_triplet; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS - // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus - // some fields can sometimes be omitted. This looseness makes it hard to base - // any kind of decisions on the triplet without canonicalizing it and then - // splitting it into components. The way we are going to split it is like - // this: - // - // CPU - // - // This one is reasonably straightforward. Note that we always expect at - // least two components with the first being the CPU. In other words, we - // don't try to guess what just 'mingw32' might mean like config.sub does. - // Note that we canonicalize arm64 to aarch64 similar to config.sub. - // - // VENDOR - // - // This can be a machine vendor as in i686-apple-darwin8, a toolchain vendor - // as in i686-lfs-linux-gnu, or something else as in arm-softfloat-linux-gnu. - // Just as we think vendor is pretty irrelevant and can be ignored, comes - // MinGW-W64 and calls itself *-w64-mingw32. While it is tempting to - // attribute w64 to OS-ABI, the MinGW-W64 folks insist it is a (presumably - // toolchain) vendor. - // - // Another example where the vendor seems to be reused for something else - // entirely is the Intel's MIC architecture: x86_64-k1om-linux. - // - // To make things more regular we also convert the information-free vendor - // names 'pc', 'unknown' and 'none' to the empty name. - // - // OS/KERNEL-OS/OS-ABI - // - // This is where things get really messy and instead of trying to guess, we - // call the entire thing SYSTEM. Except, in certain cases, we factor out the - // trailing version, again, to make SYSTEM easier to compare to. For example, - // *-darwin14.5.0 becomes 'darwin' and '14.5.0'. - // - // Note also that sometimes the first component in SYSTEM can be 'none' (to - // indicate the absence of an operating system) which is ambigous with the - // vendor (for example, arm-none-eabi). We currently don't try to deal with - // that (that is, you will need to specify arm-unknown-none-eabi). - // - // Values for two-component systems (e.g., linux-gnu) that don't specify - // VENDOR explicitly are inherently ambiguous: is 'linux' VENDOR or part of - // SYSTEM? The only way to handle this is to recognize their specific names - // as special cases and this is what we do for some of the more common - // ones. The alternative would be to first run such names through config.sub - // which adds explicit VENDOR and this could be a reasonable fallback - // strategy for (presumably less common) cases were we don't split things - // correctly. - // - // Note also that the version splitting is only done for certain commonly- - // used targets. - // - // Some examples of canonicalization and splitting: - // - // x86_64-apple-darwin14.5.0 x86_64 apple darwin 14.5.0 - // x86_64-unknown-freebsd10.2 x86_64 freebsd 10.2 - // x86_64-unknown-netbsd9.0 x86_64 netbsd 9.0 - // i686-elf i686 elf - // arm-eabi arm eabi - // arm-none-eabi arm eabi - // arm-none-linux-gnueabi arm linux-gnueabi - // arm-softfloat-linux-gnu arm softfloat linux-gnu - // i686-pc-mingw32 i686 mingw32 - // i686-w64-mingw32 i686 w64 mingw32 - // i686-lfs-linux-gnu i686 lfs linux-gnu - // x86_64-unknown-linux-gnu x86_64 linux-gnu - // x86_64-linux-gnux32 x86_64 linux-gnux32 - // x86_64-microsoft-win32-msvc14.0 x86_64 microsoft win32-msvc 14.0 - // x86_64-pc-windows-msvc x86_64 windows-msvc - // x86_64-pc-windows-msvc19.11.25547 x86_64 windows-msvc 19.11.25547 - // wasm32-unknown-emscripten wasm32 emscripten - // arm64-apple-darwin20.1.0 aarch64 apple darwin 20.1.0 - // arm64-apple-ios14.4 aarch64 apple ios 14.4 - // arm64-apple-ios14.4-simulator aarch64 apple ios-simulator 14.4 - // x86_64-apple-ios14.4-macabi x86_64 apple ios-macabi 14.4 - // - // Similar to version splitting, for certain commonly-used targets we also - // derive the "target class" which can be used as a shorthand, more - // convenient way to identify a targets. If the target is not recognized, - // then the special 'other' value is used. Currently the following classes - // are recognized: - // - // linux *-*-linux-* - // macos *-apple-darwin* - // bsd *-*-(freebsd|openbsd|netbsd)* - // windows *-*-win32-* | *-*-windows-* | *-*-mingw32 - // ios *-apple-ios* - // - // References: - // - // 1. The libtool repository contains the PLATFORM file that lists many known - // triplets. - // - // 2. LLVM has the Triple class with similar goals. - // - struct LIBBUTL_SYMEXPORT target_triplet - { - std::string cpu; - std::string vendor; - std::string system; - std::string version; - std::string class_; - - // Assemble and returning the canonical (i.e., without unknown vendor) - // target triplet string. - // - // Note: not necessarily round-tripp'able, see representation(). - // - std::string - string () const; - - // Return a round-tripp'able target triplet string that always contains - // the vendor. - // - std::string - representation () const; - - bool - empty () const {return cpu.empty ();} - - int - compare (const target_triplet& y) const - { - int r; - return - (r = cpu.compare (y.cpu)) != 0 ? r : - (r = vendor.compare (y.vendor)) != 0 ? r : - (r = system.compare (y.system)) != 0 ? r : - ( version.compare (y.version)); - } - - // Parse the triplet throw std::invalid_argument if the triplet is not - // recognizable. - // - explicit - target_triplet (const std::string&); - - target_triplet () {} // = default; @@ MOD VC - }; - - inline bool - operator== (const target_triplet& x, const target_triplet& y) - { - return x.compare (y) == 0; - } - - inline bool - operator!= (const target_triplet& x, const target_triplet& y) - { - return !(x == y); - } - - inline std::ostream& - operator<< (std::ostream& o, const target_triplet& x) - { - return o << x.string (); - } -} diff --git a/libbutl/timestamp.cxx b/libbutl/timestamp.cxx index 589c29c..260fbef 100644 --- a/libbutl/timestamp.cxx +++ b/libbutl/timestamp.cxx @@ -1,9 +1,7 @@ // file : libbutl/timestamp.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #include // localtime_{r,s}(), gmtime_{r,s}(), strptime(), timegm() #include // EINVAL @@ -25,17 +23,13 @@ #ifdef __GLIBCXX__ extern "C" { -#include "strptime.c" +# include "strptime.c" } #else -#include // LC_ALL +# include // LC_ALL #endif #endif -#ifndef __cpp_lib_modules_ts -#include -#include - #include // tm, time_t, mktime(), strftime()[libstdc++] #include // strtoull() #include // ostringstream, stringstream[VC] @@ -49,31 +43,14 @@ extern "C" // #ifdef _WIN32 #ifndef __GLIBCXX__ -#include -#include -#include -#include -#endif -#endif -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -module butl.timestamp; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; +# include +# include +# include +# include #endif #endif -import butl.utility; -#else -#include // throw_generic_error() -#endif +#include // throw_generic_error() using namespace std; diff --git a/libbutl/timestamp.hxx b/libbutl/timestamp.hxx new file mode 100644 index 0000000..2714a0d --- /dev/null +++ b/libbutl/timestamp.hxx @@ -0,0 +1,179 @@ +// file : libbutl/timestamp.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include + +#include + +namespace butl +{ + // On all three main platforms that we target (GNU/Linux, Windows (both + // VC++ and GCC/MinGW64), and MacOS X) with recent C++ runtimes, + // system_clock has nanoseconds resolution and counts from the UNIX + // epoch. The latter is important since struct stat also returns times + // based on UNIX epoch. + // + // The underlying type for nanoseconds duration is signed integer type + // of at least 64 bits (currently int64_t, available as duration::rep). + // Because it is signed, we will overflow in year 2262 but by then the + // underlying type will most likely have changed to something larger + // than 64-bit. + // + // So to support other platforms that could possibly use a different + // system_clock resolutions (e.g., microseconds), we actually not going + // to assume anywhere (except perhaps timestamp.cxx) that we are dealing + // with nanoseconds or the 64-bit underlying type. + // + using std::chrono::system_clock; + + // Note that the default-initialized timestamp has the timestamp_nonexistent + // value. + // + using timestamp = system_clock::time_point; + using duration = system_clock::duration; + + // Generally-useful special values. + // + // Note that unknown is less than nonexistent which in turn is less than + // unreal and all of them are less than any non-special value (strictly + // speaking unreal is no greater (older) than any real value). + // + const timestamp::rep timestamp_unknown_rep = -1; + const timestamp timestamp_unknown = timestamp (duration (-1)); + const timestamp::rep timestamp_nonexistent_rep = 0; + const timestamp timestamp_nonexistent = timestamp (duration (0)); + const timestamp::rep timestamp_unreal_rep = 1; + const timestamp timestamp_unreal = timestamp (duration (1)); + + // Print human-readable representation of the timestamp. + // + // By default the timestamp is converted by localtime_r() to the local + // timezone, so tzset() from should be called prior to using the + // corresponding operator or the to_stream() function (normally from main() + // or equivalent). + // + // The format argument in the to_stream() function is the put_time() format + // string except that it also supports the nanoseconds conversion specifier + // in the form %[N] where is the optional single delimiter character, + // for example '.'. If the nanoseconds part is 0, then it is not printed + // (nor the delimiter character). Otherwise, if necessary, the nanoseconds + // part is padded to 9 characters with leading zeros. + // + // The special argument in the to_stream() function indicates whether the + // special timestamp_{unknown,nonexistent,unreal} values should be printed + // as '', '', and '', respectively. + // + // The local argument in the to_stream() function indicates whether to use + // localtime_r() or gmtime_r(). + // + // Note also that these operators/function may throw std::system_error. + // + // Finally, padding is not fully supported by these operators/function. They + // throw runtime_error if nanoseconds conversion specifier is present and + // the stream's width field has been set to non-zero value before the call. + // + // Potential improvements: + // - add flag to to_stream() to use + // - support %[U] (microseconds) and %[M] (milliseconds). + // - make to_stream() a manipulator, similar to put_time() + // - support %(N) version for non-optional printing + // - support for suffix %[N], for example %[N nsec] + // + LIBBUTL_SYMEXPORT std::ostream& + to_stream (std::ostream&, + const timestamp&, + const char* format, + bool special, + bool local); + + // Same as above, but provide the result as a string. Note that it is + // implemented via to_stream() and std::ostringstream. + // + LIBBUTL_SYMEXPORT std::string + to_string (const timestamp&, + const char* format, + bool special, + bool local); + + inline std::ostream& + operator<< (std::ostream& os, const timestamp& ts) + { + return to_stream (os, ts, "%Y-%m-%d %H:%M:%S%[.N]", true, true); + } + + // Print human-readable representation of the duration. + // + LIBBUTL_SYMEXPORT std::ostream& + to_stream (std::ostream&, const duration&, bool nanoseconds); + + // Same as above, but provide the result as a string. Note that it is + // implemented via to_stream() and std::ostringstream. + // + LIBBUTL_SYMEXPORT std::string + to_string (const duration&, bool nanoseconds); + + inline std::ostream& + operator<< (std::ostream& os, const duration& d) + { + return to_stream (os, d, true); + } + + // Parse human-readable representation of the timestamp. + // + // The format argument is the strptime() format string except that it also + // supports the fraction of a second specifier in the form %[], where + // is the optional single delimiter character, for example '.', and + // is one of the 'N', 'U', 'M' characters, denoting nanoseconds, + // microseconds and milliseconds, respectively. + // + // The delimiter is mandatory. If no such character is encountered at + // the corresponding position of the input string, the function behaves as + // if no %[] specifier were provided. Only single %[] specifier in the + // format string is currently supported. + // + // If the delimiter is present, then it should be followed by 9 (N), 6 (U), + // or 3 (M) digit value padded with leading zeros if necessary. + // + // If the local argument is true, then the input is assume to be local time + // and the result is returned as local time as well. Otherwise, UCT is used + // in both cases. + // + // If the end argument is not NULL, then it points to the first character + // that was not parsed. Otherwise, throw invalid_argument in case of any + // unparsed characters. + // + // Throw std::system_error on input/format mismatch and underlying time + // conversion function failures. + // + // Note that internally from_string() calls strptime(), which behaves + // according to the process' C locale (set with std::setlocale()) and not + // the C++ locale (set with std::locale::global()). However the behaviour + // can be affected by std::locale::global() as well, as it itself calls + // std::setlocale() for the locale with a name. + // + // Potential improvements: + // - support %() version for non-optional component but with optional + // delimiter + // - ability to parse local, return UTC and vice-versa + // - handle timezone parsing + // + LIBBUTL_SYMEXPORT timestamp + from_string (const char* input, + const char* format, + bool local, + const char** end = nullptr); + + // Rebase a time point from UNIX epoch to midnight in the local time zone + // (so the returned duration is always less than 24 hours). + // + // Specifically, convert the time point from Epoch to the local time and + // return the time elapsed since midnight. Throw std::system_error on + // underlying time conversion function failures. + // + LIBBUTL_SYMEXPORT duration + daytime (timestamp); +} diff --git a/libbutl/timestamp.mxx b/libbutl/timestamp.mxx deleted file mode 100644 index 141e13d..0000000 --- a/libbutl/timestamp.mxx +++ /dev/null @@ -1,207 +0,0 @@ -// file : libbutl/timestamp.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.timestamp; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -//@@ MOD TODO: should't we re-export chrono (for somparison operator, etc)? -// or ADL should kick in? - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // On all three main platforms that we target (GNU/Linux, Windows (both - // VC++ and GCC/MinGW64), and MacOS X) with recent C++ runtimes, - // system_clock has nanoseconds resolution and counts from the UNIX - // epoch. The latter is important since struct stat also returns times - // based on UNIX epoch. - // - // The underlying type for nanoseconds duration is signed integer type - // of at least 64 bits (currently int64_t, available as duration::rep). - // Because it is signed, we will overflow in year 2262 but by then the - // underlying type will most likely have changed to something larger - // than 64-bit. - // - // So to support other platforms that could possibly use a different - // system_clock resolutions (e.g., microseconds), we actually not going - // to assume anywhere (except perhaps timestamp.cxx) that we are dealing - // with nanoseconds or the 64-bit underlying type. - // - using std::chrono::system_clock; - - // Note that the default-initialized timestamp has the timestamp_nonexistent - // value. - // - using timestamp = system_clock::time_point; - using duration = system_clock::duration; - - // Generally-useful special values. - // - // Note that unknown is less than nonexistent which in turn is less than - // unreal and all of them are less than any non-special value (strictly - // speaking unreal is no greater (older) than any real value). - // -#if defined(__cpp_modules_ts) && defined(__clang__) //@@ MOD Clang duplicate sym. - inline const timestamp::rep timestamp_unknown_rep = -1; - inline const timestamp timestamp_unknown = timestamp (duration (-1)); - inline const timestamp::rep timestamp_nonexistent_rep = 0; - inline const timestamp timestamp_nonexistent = timestamp (duration (0)); - inline const timestamp::rep timestamp_unreal_rep = 1; - inline const timestamp timestamp_unreal = timestamp (duration (1)); -#else - const timestamp::rep timestamp_unknown_rep = -1; - const timestamp timestamp_unknown = timestamp (duration (-1)); - const timestamp::rep timestamp_nonexistent_rep = 0; - const timestamp timestamp_nonexistent = timestamp (duration (0)); - const timestamp::rep timestamp_unreal_rep = 1; - const timestamp timestamp_unreal = timestamp (duration (1)); -#endif - - // Print human-readable representation of the timestamp. - // - // By default the timestamp is converted by localtime_r() to the local - // timezone, so tzset() from should be called prior to using the - // corresponding operator or the to_stream() function (normally from main() - // or equivalent). - // - // The format argument in the to_stream() function is the put_time() format - // string except that it also supports the nanoseconds conversion specifier - // in the form %[N] where is the optional single delimiter character, - // for example '.'. If the nanoseconds part is 0, then it is not printed - // (nor the delimiter character). Otherwise, if necessary, the nanoseconds - // part is padded to 9 characters with leading zeros. - // - // The special argument in the to_stream() function indicates whether the - // special timestamp_{unknown,nonexistent,unreal} values should be printed - // as '', '', and '', respectively. - // - // The local argument in the to_stream() function indicates whether to use - // localtime_r() or gmtime_r(). - // - // Note also that these operators/function may throw std::system_error. - // - // Finally, padding is not fully supported by these operators/function. They - // throw runtime_error if nanoseconds conversion specifier is present and - // the stream's width field has been set to non-zero value before the call. - // - // Potential improvements: - // - add flag to to_stream() to use - // - support %[U] (microseconds) and %[M] (milliseconds). - // - make to_stream() a manipulator, similar to put_time() - // - support %(N) version for non-optional printing - // - support for suffix %[N], for example %[N nsec] - // - LIBBUTL_SYMEXPORT std::ostream& - to_stream (std::ostream&, - const timestamp&, - const char* format, - bool special, - bool local); - - // Same as above, but provide the result as a string. Note that it is - // implemented via to_stream() and std::ostringstream. - // - LIBBUTL_SYMEXPORT std::string - to_string (const timestamp&, - const char* format, - bool special, - bool local); - - inline std::ostream& - operator<< (std::ostream& os, const timestamp& ts) - { - return to_stream (os, ts, "%Y-%m-%d %H:%M:%S%[.N]", true, true); - } - - // Print human-readable representation of the duration. - // - LIBBUTL_SYMEXPORT std::ostream& - to_stream (std::ostream&, const duration&, bool nanoseconds); - - // Same as above, but provide the result as a string. Note that it is - // implemented via to_stream() and std::ostringstream. - // - LIBBUTL_SYMEXPORT std::string - to_string (const duration&, bool nanoseconds); - - inline std::ostream& - operator<< (std::ostream& os, const duration& d) - { - return to_stream (os, d, true); - } - - // Parse human-readable representation of the timestamp. - // - // The format argument is the strptime() format string except that it also - // supports the fraction of a second specifier in the form %[], where - // is the optional single delimiter character, for example '.', and - // is one of the 'N', 'U', 'M' characters, denoting nanoseconds, - // microseconds and milliseconds, respectively. - // - // The delimiter is mandatory. If no such character is encountered at - // the corresponding position of the input string, the function behaves as - // if no %[] specifier were provided. Only single %[] specifier in the - // format string is currently supported. - // - // If the delimiter is present, then it should be followed by 9 (N), 6 (U), - // or 3 (M) digit value padded with leading zeros if necessary. - // - // If the local argument is true, then the input is assume to be local time - // and the result is returned as local time as well. Otherwise, UCT is used - // in both cases. - // - // If the end argument is not NULL, then it points to the first character - // that was not parsed. Otherwise, throw invalid_argument in case of any - // unparsed characters. - // - // Throw std::system_error on input/format mismatch and underlying time - // conversion function failures. - // - // Note that internally from_string() calls strptime(), which behaves - // according to the process' C locale (set with std::setlocale()) and not - // the C++ locale (set with std::locale::global()). However the behaviour - // can be affected by std::locale::global() as well, as it itself calls - // std::setlocale() for the locale with a name. - // - // Potential improvements: - // - support %() version for non-optional component but with optional - // delimiter - // - ability to parse local, return UTC and vice-versa - // - handle timezone parsing - // - LIBBUTL_SYMEXPORT timestamp - from_string (const char* input, - const char* format, - bool local, - const char** end = nullptr); - - // Rebase a time point from UNIX epoch to midnight in the local time zone - // (so the returned duration is always less than 24 hours). - // - // Specifically, convert the time point from Epoch to the local time and - // return the time elapsed since midnight. Throw std::system_error on - // underlying time conversion function failures. - // - LIBBUTL_SYMEXPORT duration - daytime (timestamp); -} diff --git a/libbutl/unicode.cxx b/libbutl/unicode.cxx index 4219846..294bb3f 100644 --- a/libbutl/unicode.cxx +++ b/libbutl/unicode.cxx @@ -1,32 +1,11 @@ // file : libbutl/unicode.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif - -#ifndef __cpp_lib_modules_ts -#include -#include -#include +#include #include // size_t #include // pair #include // lower_bound() -#endif - -#ifdef __cpp_modules_ts -module butl.unicode; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#endif using namespace std; diff --git a/libbutl/unicode.hxx b/libbutl/unicode.hxx new file mode 100644 index 0000000..8d99d0e --- /dev/null +++ b/libbutl/unicode.hxx @@ -0,0 +1,66 @@ +// file : libbutl/unicode.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // uint16_t + +#include + +namespace butl +{ + // Note that the Unicode Standard requires the surrogates ([D800 DFFF]) to + // only be used in the context of the UTF-16 character encoding form. Thus, + // we omit the surrogate codepoint type and assume surrogates as invalid + // codepoints. + // + enum class codepoint_types: std::uint16_t + { + // Useful to denote invalid codepoints or when building the type set + // incrementally. + // + none = 0x00, + + graphic = 0x01, // L(etter), M(ark), N(number), P(uncturation), + // S(symbol), Zs(separator, space) + format = 0x02, + control = 0x04, + private_use = 0x08, + non_character = 0x10, + reserved = 0x20, + + any = 0x3f + }; + + codepoint_types operator& (codepoint_types, codepoint_types); + codepoint_types operator| (codepoint_types, codepoint_types); + codepoint_types operator&= (codepoint_types&, codepoint_types); + codepoint_types operator|= (codepoint_types&, codepoint_types); + + // Return the codepoint type for a valid codepoint value and none otherwise. + // + // Note that the valid codepoint ranges are [0 D800) and (DFFF 10FFFF]. + // + codepoint_types + codepoint_type (char32_t); + + // Return the type name for a single codepoint type and empty string for + // `none` and `any`. + // + // Potential future improvements: + // - add the none value name parameter ("invalid" by default) + // - produce names for type masks ("graphic, format", "any", etc) + // + std::string + to_string (codepoint_types); + + inline std::ostream& + operator<< (std::ostream& os, codepoint_types ts) + { + return os << to_string (ts); + } +} + +#include diff --git a/libbutl/unicode.mxx b/libbutl/unicode.mxx deleted file mode 100644 index b846476..0000000 --- a/libbutl/unicode.mxx +++ /dev/null @@ -1,82 +0,0 @@ -// file : libbutl/unicode.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include -#include // uint16_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.unicode; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Note that the Unicode Standard requires the surrogates ([D800 DFFF]) to - // only be used in the context of the UTF-16 character encoding form. Thus, - // we omit the surrogate codepoint type and assume surrogates as invalid - // codepoints. - // - enum class codepoint_types: std::uint16_t - { - // Useful to denote invalid codepoints or when building the type set - // incrementally. - // - none = 0x00, - - graphic = 0x01, // L(etter), M(ark), N(number), P(uncturation), - // S(symbol), Zs(separator, space) - format = 0x02, - control = 0x04, - private_use = 0x08, - non_character = 0x10, - reserved = 0x20, - - any = 0x3f - }; - - codepoint_types operator& (codepoint_types, codepoint_types); - codepoint_types operator| (codepoint_types, codepoint_types); - codepoint_types operator&= (codepoint_types&, codepoint_types); - codepoint_types operator|= (codepoint_types&, codepoint_types); - - // Return the codepoint type for a valid codepoint value and none otherwise. - // - // Note that the valid codepoint ranges are [0 D800) and (DFFF 10FFFF]. - // - codepoint_types - codepoint_type (char32_t); - - // Return the type name for a single codepoint type and empty string for - // `none` and `any`. - // - // Potential future improvements: - // - add the none value name parameter ("invalid" by default) - // - produce names for type masks ("graphic, format", "any", etc) - // - std::string - to_string (codepoint_types); - - inline std::ostream& - operator<< (std::ostream& os, codepoint_types ts) - { - return os << to_string (ts); - } -} - -#include diff --git a/libbutl/url.hxx b/libbutl/url.hxx new file mode 100644 index 0000000..5721cfd --- /dev/null +++ b/libbutl/url.hxx @@ -0,0 +1,552 @@ +// file : libbutl/url.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include +#include // size_t +#include // uint*_t +#include // move() +#include +#include // back_inserter + +#include +#include +#include + +#include + +namespace butl +{ + // RFC3986 Uniform Resource Locator (URL). + // + // = :[//[]][/][?][#] | + // :[][?][#] + // + // = [@][:] + // + // Some examples of equivalent URLs to meditate upon: + // + // file://localhost/tmp (localhost authority) + // file:///tmp (empty authority) + // file:/tmp (absent authority) + // + // file://localhost/c:/tmp + // file:///c:/tmp + // file:/c:/tmp + // + // We think of the slash between and as a separator but + // with the path always interpreted as starting from the "root" of the + // authority. Thus: + // + // file://localhost/tmp -> 'file'://'localhost'/'tmp' -> /tmp + // file://localhost/c:/tmp -> 'file'://'localhost'/'c:/tmp' -> c:/tmp + // + // This means that the component is represented as a relative path + // and, in the general case, we cannot use our path type for its storage + // since it assumes the path is for the host platform. In other words, the + // interpretation of the path has to take into account the platform of the + // authority host. Note, however, that a custom url_traits implementation + // can choose to use the path type if local paths are to be interpreted as + // relative to the host. + // + // For authority-less schemes the component is also represented as a + // relative path. Some examples of such URLs (let's call them rootless + // rather than authority-less not to confuse with a case where authority is + // empty/implied): + // + // pkcs11:token=sign;object=SIGN%20key + // pkcs11:id=%02%38%01?pin-value=12345 + // pkcs11: + // + // Note that a scheme can theoretically allow both rootless and "rootfull" + // representations. + // + // Note also that we currently forbid one character schemes to support + // scheme-less (Windows) paths which can be done by + // url_traits::translate_scheme() (see below). (A Windows path that uses + // forward slashes would be parsed as a valid authority-less URL). + + // URL host component can be an IPv4 address (if matches its dotted-decimal + // notation), an IPv6 address (if enclosed in [square brackets]) or + // otherwise a name. + // + // Note that non-ASCII host names are allowed in URLs. They must be + // UTF8-encoded and URL-encoded afterwards. Curently we store the parsed + // host name UTF8-encoded without regards to the template argument string + // type. Later we may add support for more appropriate encodings for + // multi-byte character types. + // + enum class url_host_kind {ipv4, ipv6, name}; + + template + struct basic_url_host + { + using string_type = S; + using kind_type = url_host_kind; + + string_type value; + kind_type kind; + + // Can be treated as const string_type&. + // + operator const string_type& () const noexcept {return value;} + + // Create an empty host. + // + basic_url_host (): kind (kind_type::name) {} + + // Create the host object from its string representation as it appears in + // a URL, throwing std::invalid_argument if invalid. Remove the enclosing + // square brackets for IPv6 addresses, and URL-decode host names. + // + // Note that the 'x:x:x:x:x:x:d.d.d.d' IPv6 address mixed notation is not + // supported. + // + explicit + basic_url_host (string_type); + + basic_url_host (string_type v, kind_type k) + : value (std::move (v)), kind (k) {} + + bool + empty () const + { + assert (kind == kind_type::name || !value.empty ()); + return value.empty (); + } + + // Return string representation of the host as it would appear in a URL. + // + string_type + string () const; + + // Normalize the host value in accordance with its type: + // + // Name - convert to the lower case. Note: only ASCII names are currently + // supported. + // + // IPv4 - strip the leading zeros in its octets. + // + // IPv6 - strip the leading zeros in its groups (hextets), squash the + // longest zero-only hextet sequence, and convert to the lower case + // (as per RFC5952). + // + // Assume that the host value is valid. + // + void + normalize (); + }; + + template + struct basic_url_authority + { + using string_type = S; + using host_type = basic_url_host; + + string_type user; // Empty if not specified. + host_type host; + std::uint16_t port; // Zero if not specified. + + bool + empty () const + { + assert (!host.empty () || (user.empty () && port == 0)); + return host.empty (); + } + + // Return a string representation of the URL authority. String + // representation of an empty instance is the empty string. + // + string_type + string () const; + }; + + template + struct url_traits + { + using scheme_type = H; + using string_type = S; + using path_type = P; + + using authority_type = basic_url_authority; + + // Translate the scheme string representation to its type. May throw + // std::invalid_argument. May change the URL components. Should not return + // nullopt if called with a non-empty scheme. + // + // This function is called with an empty scheme if the URL has no scheme, + // the scheme is invalid, or it could not be parsed into components + // according to the URL syntax. In this case all the passed components + // reference empty/absent/false values. If nullopt is returned, the URL is + // considered invalid and the std::invalid_argument exception with an + // appropriate description is thrown by the URL object constructor. This + // can be used to support scheme-less URLs, local paths, etc. + // + static optional + translate_scheme (const string_type& /*url*/, + string_type&& scheme, + optional& /*authority*/, + optional& /*path*/, + optional& /*query*/, + optional& /*fragment*/, + bool& /*rootless*/) + { + return !scheme.empty () + ? optional (std::move (scheme)) + : nullopt; // Leave the URL object constructor to throw. + } + + // Translate scheme type back to its string representation. + // + // Similar to the above the function is called with an empty string + // representation. If on return this value is no longer empty, then it is + // assume the URL has been translated in a custom manner (in which case + // the returned scheme value is ignored). + // + static string_type + translate_scheme (string_type&, /*url*/ + const scheme_type& scheme, + const optional& /*authority*/, + const optional& /*path*/, + const optional& /*query*/, + const optional& /*fragment*/, + bool /*rootless*/) + { + return string_type (scheme); + } + + // Translate the URL-encoded path string representation to its type. + // + // Note that encoding for non-ASCII paths is not specified (in contrast + // to the host name), and presumably is local to the referenced authority. + // Furthermore, for some schemes, the path component can contain encoded + // binary data, for example for pkcs11. + // + static path_type + translate_path (string_type&&); + + // Translate path type back to its URL-encoded string representation. + // + static string_type + translate_path (const path_type&); + + // Check whether a string looks like a non-rootless URL by searching for + // the first ':' (unless its position is specified with the second + // argument) and then making sure it's both followed by '/' (e.g., http:// + // or file:/) and preceded by a valid scheme at least 2 characters long + // (so we don't confuse it with an absolute Windows path, e.g., c:/). + // + // Return the start of the URL substring or string_type::npos. + // + static std::size_t + find (const string_type&, std::size_t pos = string_type::npos); + }; + + template > + class basic_url + { + public: + using traits_type = T; + + using string_type = typename traits_type::string_type; + using char_type = typename string_type::value_type; + using path_type = typename traits_type::path_type; + + using scheme_type = typename traits_type::scheme_type; + using authority_type = typename traits_type::authority_type; + using host_type = typename authority_type::host_type; + + scheme_type scheme; + optional authority; + optional path; + optional query; + optional fragment; + bool rootless = false; + + // Create an empty URL object. + // + basic_url (): scheme (), empty_ (true) {} + + // Create the URL object from its string representation. Verify that the + // string is compliant to the generic URL syntax. URL-decode and validate + // components with common for all schemes syntax (scheme, host, port). + // Throw std::invalid_argument if the passed string is not a valid URL + // representation. + // + // Validation and URL-decoding of the scheme-specific components can be + // provided by a custom url_traits::translate_scheme() implementation. + // + explicit + basic_url (const string_type&); + + // Create the URL object from individual components. Performs no + // components URL-decoding or verification. + // + basic_url (scheme_type, + optional, + optional path, + optional query = nullopt, + optional fragment = nullopt); + + basic_url (scheme_type, + host_type host, + optional path, + optional query = nullopt, + optional fragment = nullopt); + + basic_url (scheme_type, + host_type host, + std::uint16_t port, + optional path, + optional query = nullopt, + optional fragment = nullopt); + + basic_url (scheme_type, + string_type host, + optional path, + optional query = nullopt, + optional fragment = nullopt); + + basic_url (scheme_type, + string_type host, + std::uint16_t port, + optional path, + optional query = nullopt, + optional fragment = nullopt); + + // Create a rootless URL. + // + basic_url (scheme_type, + optional path, + optional query = nullopt, + optional fragment = nullopt); + + bool + empty () const noexcept {return empty_;} + + // Return a string representation of the URL. Note that while this is not + // necessarily syntactically the same string as what was used to + // initialize this instance, it should be semantically equivalent. String + // representation of an empty instance is the empty string. + // + string_type + string () const; + + // Normalize the URL host, if present. + // + void + normalize (); + + // The following predicates can be used to classify URL characters while + // parsing, validating or encoding scheme-specific components. For the + // semantics of character classes see RFC3986. + // + static bool + gen_delim (char_type c) + { + return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || + c == ']' || c == '@'; + } + + static bool + sub_delim (char_type c) + { + return c == '!' || c == '$' || c == '&' || c == '=' || c == '(' || + c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || + c == '\''; + } + + static bool + reserved (char_type c) {return sub_delim (c) || gen_delim (c);} + + static bool + unreserved (char_type c) + { + return alnum (c) || c == '-' || c == '.' || c =='_' || c == '~'; + } + + static bool + path_char (char_type c) + { + return c == '/' || c == ':' || unreserved (c) || c == '@' || + sub_delim (c); + } + + // URL-encode a character sequence. + // + // Note that the set of characters that should be encoded may differ for + // different URL components. The optional callback function must return + // true for characters that should be percent-encoded. The function may + // encode the passed character in it's own way with another character (but + // never with '%'), and return false. By default all characters other than + // unreserved are percent-encoded. + // + // Also note that the characters are interpreted as bytes. In other words, + // each character may result in a single encoding triplet. + // + template + static void + encode (I begin, I end, O output, F&& efunc); + + template + static void + encode (I b, I e, O o) + { + encode (b, e, o, [] (char_type& c) {return !unreserved (c);}); + } + + template + static string_type + encode (const string_type& s, F&& f) + { + string_type r; + encode (s.begin (), s.end (), std::back_inserter (r), f); + return r; + } + + static string_type + encode (const string_type& s) + { + return encode (s, [] (char_type& c) {return !unreserved (c);}); + } + + template + static string_type + encode (const char_type* s, F&& f) + { + string_type r; + encode (s, s + string_type::traits_type::length (s), + std::back_inserter (r), + f); + return r; + } + + static string_type + encode (const char_type* s) + { + return encode (s, [] (char_type& c) {return !unreserved (c);}); + } + + // URL-decode a character sequence. Throw std::invalid_argument if an + // invalid encoding sequence is encountered. + // + // If some characters in the sequence are encoded with another characters + // (rather than percent-encoded), then one must provide the callback + // function to decode them. + // + template + static void + decode (I begin, I end, O output, F&& dfunc); + + template + static void + decode (I b, I e, O o) + { + decode (b, e, o, [] (char_type&) {}); + } + + template + static string_type + decode (const string_type& s, F&& f) + { + string_type r; + decode (s.begin (), s.end (), std::back_inserter (r), f); + return r; + } + + static string_type + decode (const string_type& s) + { + return decode (s, [] (char_type&) {}); + } + + template + static string_type + decode (const char_type* s, F&& f) + { + string_type r; + decode (s, s + string_type::traits_type::length (s), + std::back_inserter (r), + f); + return r; + } + + static string_type + decode (const char_type* s) + { + return decode (s, [] (char_type&) {}); + } + + private: + bool empty_ = false; + }; + + using url_authority = basic_url_authority; + using url = basic_url ; + + template + inline bool + operator== (const basic_url_host& x, const basic_url_host& y) noexcept + { + return x.value == y.value && x.kind == y.kind; + } + + template + inline bool + operator!= (const basic_url_host& x, const basic_url_host& y) noexcept + { + return !(x == y); + } + + template + inline bool + operator== (const basic_url_authority& x, + const basic_url_authority& y) noexcept + { + return x.user == y.user && x.host == y.host && x.port == y.port; + } + + template + inline bool + operator!= (const basic_url_authority& x, + const basic_url_authority& y) noexcept + { + return !(x == y); + } + + template + inline bool + operator== (const basic_url& x, const basic_url& y) noexcept + { + if (x.empty () || y.empty ()) + return x.empty () == y.empty (); + + return x.scheme == y.scheme && + x.authority == y.authority && + x.path == y.path && + x.query == y.query && + x.fragment == y.fragment && + x.rootless == y.rootless; + } + + template + inline bool + operator!= (const basic_url& x, const basic_url& y) noexcept + { + return !(x == y); + } + + template + inline auto + operator<< (std::basic_ostream& o, + const basic_url& u) -> decltype (o) + { + return o << u.string (); + } +} + +#include +#include diff --git a/libbutl/url.ixx b/libbutl/url.ixx index b823ee7..19d54c7 100644 --- a/libbutl/url.ixx +++ b/libbutl/url.ixx @@ -1,7 +1,7 @@ // file : libbutl/url.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +namespace butl { // url_traits // diff --git a/libbutl/url.mxx b/libbutl/url.mxx deleted file mode 100644 index 713bc3e..0000000 --- a/libbutl/url.mxx +++ /dev/null @@ -1,579 +0,0 @@ -// file : libbutl/url.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include // uint*_t -#include // move() -#include -#include // back_inserter - -#include // size_t -#include // invalid_argument -#include // find(), find_if() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.url; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.utility; -import butl.optional; - -import butl.small_vector; -#else -#include -#include -#include - -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // RFC3986 Uniform Resource Locator (URL). - // - // = :[//[]][/][?][#] | - // :[][?][#] - // - // = [@][:] - // - // Some examples of equivalent URLs to meditate upon: - // - // file://localhost/tmp (localhost authority) - // file:///tmp (empty authority) - // file:/tmp (absent authority) - // - // file://localhost/c:/tmp - // file:///c:/tmp - // file:/c:/tmp - // - // We think of the slash between and as a separator but - // with the path always interpreted as starting from the "root" of the - // authority. Thus: - // - // file://localhost/tmp -> 'file'://'localhost'/'tmp' -> /tmp - // file://localhost/c:/tmp -> 'file'://'localhost'/'c:/tmp' -> c:/tmp - // - // This means that the component is represented as a relative path - // and, in the general case, we cannot use our path type for its storage - // since it assumes the path is for the host platform. In other words, the - // interpretation of the path has to take into account the platform of the - // authority host. Note, however, that a custom url_traits implementation - // can choose to use the path type if local paths are to be interpreted as - // relative to the host. - // - // For authority-less schemes the component is also represented as a - // relative path. Some examples of such URLs (let's call them rootless - // rather than authority-less not to confuse with a case where authority is - // empty/implied): - // - // pkcs11:token=sign;object=SIGN%20key - // pkcs11:id=%02%38%01?pin-value=12345 - // pkcs11: - // - // Note that a scheme can theoretically allow both rootless and "rootfull" - // representations. - // - // Note also that we currently forbid one character schemes to support - // scheme-less (Windows) paths which can be done by - // url_traits::translate_scheme() (see below). (A Windows path that uses - // forward slashes would be parsed as a valid authority-less URL). - - // URL host component can be an IPv4 address (if matches its dotted-decimal - // notation), an IPv6 address (if enclosed in [square brackets]) or - // otherwise a name. - // - // Note that non-ASCII host names are allowed in URLs. They must be - // UTF8-encoded and URL-encoded afterwards. Curently we store the parsed - // host name UTF8-encoded without regards to the template argument string - // type. Later we may add support for more appropriate encodings for - // multi-byte character types. - // - enum class url_host_kind {ipv4, ipv6, name}; - - template - struct basic_url_host - { - using string_type = S; - using kind_type = url_host_kind; - - string_type value; - kind_type kind; - - // Can be treated as const string_type&. - // - operator const string_type& () const noexcept {return value;} - - // Create an empty host. - // - basic_url_host (): kind (kind_type::name) {} - - // Create the host object from its string representation as it appears in - // a URL, throwing std::invalid_argument if invalid. Remove the enclosing - // square brackets for IPv6 addresses, and URL-decode host names. - // - // Note that the 'x:x:x:x:x:x:d.d.d.d' IPv6 address mixed notation is not - // supported. - // - explicit - basic_url_host (string_type); - - basic_url_host (string_type v, kind_type k) - : value (std::move (v)), kind (k) {} - - bool - empty () const - { - assert (kind == kind_type::name || !value.empty ()); - return value.empty (); - } - - // Return string representation of the host as it would appear in a URL. - // - string_type - string () const; - - // Normalize the host value in accordance with its type: - // - // Name - convert to the lower case. Note: only ASCII names are currently - // supported. - // - // IPv4 - strip the leading zeros in its octets. - // - // IPv6 - strip the leading zeros in its groups (hextets), squash the - // longest zero-only hextet sequence, and convert to the lower case - // (as per RFC5952). - // - // Assume that the host value is valid. - // - void - normalize (); - }; - - template - struct basic_url_authority - { - using string_type = S; - using host_type = basic_url_host; - - string_type user; // Empty if not specified. - host_type host; - std::uint16_t port; // Zero if not specified. - - bool - empty () const - { - assert (!host.empty () || (user.empty () && port == 0)); - return host.empty (); - } - - // Return a string representation of the URL authority. String - // representation of an empty instance is the empty string. - // - string_type - string () const; - }; - - template - struct url_traits - { - using scheme_type = H; - using string_type = S; - using path_type = P; - - using authority_type = basic_url_authority; - - // Translate the scheme string representation to its type. May throw - // std::invalid_argument. May change the URL components. Should not return - // nullopt if called with a non-empty scheme. - // - // This function is called with an empty scheme if the URL has no scheme, - // the scheme is invalid, or it could not be parsed into components - // according to the URL syntax. In this case all the passed components - // reference empty/absent/false values. If nullopt is returned, the URL is - // considered invalid and the std::invalid_argument exception with an - // appropriate description is thrown by the URL object constructor. This - // can be used to support scheme-less URLs, local paths, etc. - // - static optional - translate_scheme (const string_type& /*url*/, - string_type&& scheme, - optional& /*authority*/, - optional& /*path*/, - optional& /*query*/, - optional& /*fragment*/, - bool& /*rootless*/) - { - return !scheme.empty () - ? optional (std::move (scheme)) - : nullopt; // Leave the URL object constructor to throw. - } - - // Translate scheme type back to its string representation. - // - // Similar to the above the function is called with an empty string - // representation. If on return this value is no longer empty, then it is - // assume the URL has been translated in a custom manner (in which case - // the returned scheme value is ignored). - // - static string_type - translate_scheme (string_type&, /*url*/ - const scheme_type& scheme, - const optional& /*authority*/, - const optional& /*path*/, - const optional& /*query*/, - const optional& /*fragment*/, - bool /*rootless*/) - { - return string_type (scheme); - } - - // Translate the URL-encoded path string representation to its type. - // - // Note that encoding for non-ASCII paths is not specified (in contrast - // to the host name), and presumably is local to the referenced authority. - // Furthermore, for some schemes, the path component can contain encoded - // binary data, for example for pkcs11. - // - static path_type - translate_path (string_type&&); - - // Translate path type back to its URL-encoded string representation. - // - static string_type - translate_path (const path_type&); - - // Check whether a string looks like a non-rootless URL by searching for - // the first ':' (unless its position is specified with the second - // argument) and then making sure it's both followed by '/' (e.g., http:// - // or file:/) and preceded by a valid scheme at least 2 characters long - // (so we don't confuse it with an absolute Windows path, e.g., c:/). - // - // Return the start of the URL substring or string_type::npos. - // - static std::size_t - find (const string_type&, std::size_t pos = string_type::npos); - }; - - template > - class basic_url - { - public: - using traits_type = T; - - using string_type = typename traits_type::string_type; - using char_type = typename string_type::value_type; - using path_type = typename traits_type::path_type; - - using scheme_type = typename traits_type::scheme_type; - using authority_type = typename traits_type::authority_type; - using host_type = typename authority_type::host_type; - - scheme_type scheme; - optional authority; - optional path; - optional query; - optional fragment; - bool rootless = false; - - // Create an empty URL object. - // - basic_url (): scheme (), empty_ (true) {} - - // Create the URL object from its string representation. Verify that the - // string is compliant to the generic URL syntax. URL-decode and validate - // components with common for all schemes syntax (scheme, host, port). - // Throw std::invalid_argument if the passed string is not a valid URL - // representation. - // - // Validation and URL-decoding of the scheme-specific components can be - // provided by a custom url_traits::translate_scheme() implementation. - // - explicit - basic_url (const string_type&); - - // Create the URL object from individual components. Performs no - // components URL-decoding or verification. - // - basic_url (scheme_type, - optional, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - host_type host, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - host_type host, - std::uint16_t port, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - string_type host, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - string_type host, - std::uint16_t port, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - // Create a rootless URL. - // - basic_url (scheme_type, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - bool - empty () const noexcept {return empty_;} - - // Return a string representation of the URL. Note that while this is not - // necessarily syntactically the same string as what was used to - // initialize this instance, it should be semantically equivalent. String - // representation of an empty instance is the empty string. - // - string_type - string () const; - - // Normalize the URL host, if present. - // - void - normalize (); - - // The following predicates can be used to classify URL characters while - // parsing, validating or encoding scheme-specific components. For the - // semantics of character classes see RFC3986. - // - static bool - gen_delim (char_type c) - { - return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || - c == ']' || c == '@'; - } - - static bool - sub_delim (char_type c) - { - return c == '!' || c == '$' || c == '&' || c == '=' || c == '(' || - c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || - c == '\''; - } - - static bool - reserved (char_type c) {return sub_delim (c) || gen_delim (c);} - - static bool - unreserved (char_type c) - { - return alnum (c) || c == '-' || c == '.' || c =='_' || c == '~'; - } - - static bool - path_char (char_type c) - { - return c == '/' || c == ':' || unreserved (c) || c == '@' || - sub_delim (c); - } - - // URL-encode a character sequence. - // - // Note that the set of characters that should be encoded may differ for - // different URL components. The optional callback function must return - // true for characters that should be percent-encoded. The function may - // encode the passed character in it's own way with another character (but - // never with '%'), and return false. By default all characters other than - // unreserved are percent-encoded. - // - // Also note that the characters are interpreted as bytes. In other words, - // each character may result in a single encoding triplet. - // - template - static void - encode (I begin, I end, O output, F&& efunc); - - template - static void - encode (I b, I e, O o) - { - encode (b, e, o, [] (char_type& c) {return !unreserved (c);}); - } - - template - static string_type - encode (const string_type& s, F&& f) - { - string_type r; - encode (s.begin (), s.end (), std::back_inserter (r), f); - return r; - } - - static string_type - encode (const string_type& s) - { - return encode (s, [] (char_type& c) {return !unreserved (c);}); - } - - template - static string_type - encode (const char_type* s, F&& f) - { - string_type r; - encode (s, s + string_type::traits_type::length (s), - std::back_inserter (r), - f); - return r; - } - - static string_type - encode (const char_type* s) - { - return encode (s, [] (char_type& c) {return !unreserved (c);}); - } - - // URL-decode a character sequence. Throw std::invalid_argument if an - // invalid encoding sequence is encountered. - // - // If some characters in the sequence are encoded with another characters - // (rather than percent-encoded), then one must provide the callback - // function to decode them. - // - template - static void - decode (I begin, I end, O output, F&& dfunc); - - template - static void - decode (I b, I e, O o) - { - decode (b, e, o, [] (char_type&) {}); - } - - template - static string_type - decode (const string_type& s, F&& f) - { - string_type r; - decode (s.begin (), s.end (), std::back_inserter (r), f); - return r; - } - - static string_type - decode (const string_type& s) - { - return decode (s, [] (char_type&) {}); - } - - template - static string_type - decode (const char_type* s, F&& f) - { - string_type r; - decode (s, s + string_type::traits_type::length (s), - std::back_inserter (r), - f); - return r; - } - - static string_type - decode (const char_type* s) - { - return decode (s, [] (char_type&) {}); - } - - private: - bool empty_ = false; - }; - - using url_authority = basic_url_authority; - using url = basic_url ; - - template - inline bool - operator== (const basic_url_host& x, const basic_url_host& y) noexcept - { - return x.value == y.value && x.kind == y.kind; - } - - template - inline bool - operator!= (const basic_url_host& x, const basic_url_host& y) noexcept - { - return !(x == y); - } - - template - inline bool - operator== (const basic_url_authority& x, - const basic_url_authority& y) noexcept - { - return x.user == y.user && x.host == y.host && x.port == y.port; - } - - template - inline bool - operator!= (const basic_url_authority& x, - const basic_url_authority& y) noexcept - { - return !(x == y); - } - - template - inline bool - operator== (const basic_url& x, const basic_url& y) noexcept - { - if (x.empty () || y.empty ()) - return x.empty () == y.empty (); - - return x.scheme == y.scheme && - x.authority == y.authority && - x.path == y.path && - x.query == y.query && - x.fragment == y.fragment && - x.rootless == y.rootless; - } - - template - inline bool - operator!= (const basic_url& x, const basic_url& y) noexcept - { - return !(x == y); - } - - template - inline auto - operator<< (std::basic_ostream& o, - const basic_url& u) -> decltype (o) - { - return o << u.string (); - } -} - -#include -#include diff --git a/libbutl/url.txx b/libbutl/url.txx index 0951e80..b2caa37 100644 --- a/libbutl/url.txx +++ b/libbutl/url.txx @@ -1,7 +1,12 @@ // file : libbutl/url.txx -*- C++ -*- // license : MIT; see accompanying LICENSE file -LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. +#include // invalid_argument +#include // find(), find_if() + +#include + +namespace butl { // Convenience functions. // diff --git a/libbutl/utf8.hxx b/libbutl/utf8.hxx new file mode 100644 index 0000000..697f77a --- /dev/null +++ b/libbutl/utf8.hxx @@ -0,0 +1,114 @@ +// file : libbutl/utf8.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // uint8_t +#include // pair + +#include + +#include + +namespace butl +{ + // Here and below we will refer to bytes that encode a singe Unicode + // codepoint as "UTF-8 byte sequence" ("UTF-8 sequence" or "byte sequence" + // for short) and a sequence of such sequences as "UTF-8 encoded byte + // string" ("byte string" for short). + // + + // Validate a UTF-8 encoded byte string one byte at a time. Optionally, also + // validate that its decoded codepoints belong to the specified types or + // codepoint whitelist. + // + class utf8_validator + { + public: + // Note: use whitelist via shallow copy. + // + explicit + utf8_validator (codepoint_types = codepoint_types::any, + const char32_t* whitelist = nullptr); + + // Validate the next byte returning true if it is valid (first) and + // whether it is the last byte of a codepoint (second). The {false, true} + // result indicates a byte sequence decoded into a codepoint of undesired + // type rather than an invalid byte that happens to be the last in the + // sequence (and may well be a valid starting byte of the next sequence). + // + // Note that in case the byte is invalid, calling this function again + // without recovery is illegal. + // + std::pair + validate (char); + + // As above but in case of an invalid byte also return the description of + // why it is invalid. + // + // Note that the description only contains the reason why the specified + // byte is not part of a valid UTF-8 sequence or the desired codepoint + // type, for example: + // + // "invalid UTF-8 sequence first byte (0xB0)" + // "invalid Unicode codepoint (reserved)" + // + // It can be used to form complete diagnostics along these lines: + // + // cerr << "invalid manifest value " << name << ": " << what << endl; + // + std::pair + validate (char, std::string& what); + + // As above but decide whether the description is needed at runtime (what + // may be NULL). + // + std::pair + validate (char, std::string* what); + + // Recover from an invalid byte. + // + // This function must be called with the first invalid and then subsequent + // bytes until it signals that the specified byte is valid. Note that it + // shall not be called if the sequence is decoded into a codepoint of an + // undesired type. + // + // Note also that a byte being invalid in the middle of a UTF-8 sequence + // may be valid as a first byte of the next sequence. + // + std::pair + recover (char); + + // Return the codepoint of the last byte sequence. + // + // This function can only be legally called after validate() or recover() + // signal that the preceding byte is valid and last. + // + char32_t + codepoint () const; + + private: + codepoint_types types_; + const char32_t* whitelist_; + + // State machine. + // + uint8_t seq_size_; // [1 4]; calculated at the first byte validation. + uint8_t seq_index_ = 0; // [0 3] + + // Last byte sequence decoded codepoint (built incrementally). + // + char32_t codepoint_; + + // The byte range a valid UTF-8 sequence second byte must belong to as + // calculated during the first byte validation. + // + // Note that the subsequent (third and forth) bytes must belong to the + // [80 BF] range regardless to the previous bytes. + // + std::pair byte2_range_; + }; +} + +#include diff --git a/libbutl/utf8.mxx b/libbutl/utf8.mxx deleted file mode 100644 index 15e8ded..0000000 --- a/libbutl/utf8.mxx +++ /dev/null @@ -1,130 +0,0 @@ -// file : libbutl/utf8.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include // uint8_t -#include // pair -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.utf8; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.unicode; -#else -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Here and below we will refer to bytes that encode a singe Unicode - // codepoint as "UTF-8 byte sequence" ("UTF-8 sequence" or "byte sequence" - // for short) and a sequence of such sequences as "UTF-8 encoded byte - // string" ("byte string" for short). - // - - // Validate a UTF-8 encoded byte string one byte at a time. Optionally, also - // validate that its decoded codepoints belong to the specified types or - // codepoint whitelist. - // - class utf8_validator - { - public: - // Note: use whitelist via shallow copy. - // - explicit - utf8_validator (codepoint_types = codepoint_types::any, - const char32_t* whitelist = nullptr); - - // Validate the next byte returning true if it is valid (first) and - // whether it is the last byte of a codepoint (second). The {false, true} - // result indicates a byte sequence decoded into a codepoint of undesired - // type rather than an invalid byte that happens to be the last in the - // sequence (and may well be a valid starting byte of the next sequence). - // - // Note that in case the byte is invalid, calling this function again - // without recovery is illegal. - // - std::pair - validate (char); - - // As above but in case of an invalid byte also return the description of - // why it is invalid. - // - // Note that the description only contains the reason why the specified - // byte is not part of a valid UTF-8 sequence or the desired codepoint - // type, for example: - // - // "invalid UTF-8 sequence first byte (0xB0)" - // "invalid Unicode codepoint (reserved)" - // - // It can be used to form complete diagnostics along these lines: - // - // cerr << "invalid manifest value " << name << ": " << what << endl; - // - std::pair - validate (char, std::string& what); - - // As above but decide whether the description is needed at runtime (what - // may be NULL). - // - std::pair - validate (char, std::string* what); - - // Recover from an invalid byte. - // - // This function must be called with the first invalid and then subsequent - // bytes until it signals that the specified byte is valid. Note that it - // shall not be called if the sequence is decoded into a codepoint of an - // undesired type. - // - // Note also that a byte being invalid in the middle of a UTF-8 sequence - // may be valid as a first byte of the next sequence. - // - std::pair - recover (char); - - // Return the codepoint of the last byte sequence. - // - // This function can only be legally called after validate() or recover() - // signal that the preceding byte is valid and last. - // - char32_t - codepoint () const; - - private: - codepoint_types types_; - const char32_t* whitelist_; - - // State machine. - // - uint8_t seq_size_; // [1 4]; calculated at the first byte validation. - uint8_t seq_index_ = 0; // [0 3] - - // Last byte sequence decoded codepoint (built incrementally). - // - char32_t codepoint_; - - // The byte range a valid UTF-8 sequence second byte must belong to as - // calculated during the first byte validation. - // - // Note that the subsequent (third and forth) bytes must belong to the - // [80 BF] range regardless to the previous bytes. - // - std::pair byte2_range_; - }; -} - -#include diff --git a/libbutl/utility.cxx b/libbutl/utility.cxx index a891fc2..78abbd8 100644 --- a/libbutl/utility.cxx +++ b/libbutl/utility.cxx @@ -1,9 +1,7 @@ // file : libbutl/utility.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include -#endif +#include #ifdef _WIN32 #include @@ -11,35 +9,15 @@ #include // getenv(), setenv(), unsetenv(), _putenv() -#ifndef __cpp_lib_modules_ts -#include -#include -#include - #include // strncmp(), strlen() #include #include // enable_if, is_base_of #include -#endif #include #include -#ifdef __cpp_modules_ts -module butl.utility; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -#endif - -import butl.utf8; -#else -#include -#endif +#include namespace butl { diff --git a/libbutl/utility.hxx b/libbutl/utility.hxx new file mode 100644 index 0000000..49b61b3 --- /dev/null +++ b/libbutl/utility.hxx @@ -0,0 +1,541 @@ +// file : libbutl/utility.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#ifndef _WIN32 +# include // strcasecmp(), strncasecmp() +#else +# include // _stricmp(), _strnicmp() +#endif + +#include +#include // ostream +#include +#include // size_t +#include // move(), forward(), pair +#include // strcmp(), strlen() +#include // exception, uncaught_exception[s]() +//#include // hash + +#include // thread_local +#include // uncaught_exceptions + +#include +#include +#include + +#include + +namespace butl +{ + // Throw std::system_error with generic_category or system_category, + // respectively. + // + // The generic version should be used for portable errno codes (those that + // are mapped to std::errc). The system version should be used for platform- + // specific codes, for example, additional errno codes on POSIX systems or + // the result of GetLastError() on Windows. + // + // See also the exception sanitization below. + // + [[noreturn]] LIBBUTL_SYMEXPORT void + throw_generic_error (int errno_code, const char* what = nullptr); + + [[noreturn]] LIBBUTL_SYMEXPORT void + throw_system_error (int system_code, int fallback_errno_code = 0); + + // Throw std::ios::failure with the specified description and, if it is + // derived from std::system_error (as it should), error code. + // + [[noreturn]] LIBBUTL_SYMEXPORT void + throw_generic_ios_failure (int errno_code, const char* what = nullptr); + + [[noreturn]] LIBBUTL_SYMEXPORT void + throw_system_ios_failure (int system_code, const char* what = nullptr); + + // Convert ASCII character/string case. If there is no upper/lower case + // counterpart, leave the character unchanged. The POSIX locale (also known + // as C locale) must be the current application locale. Otherwise the + // behavior is undefined. + // + // Note that the POSIX locale specifies behaviour on data consisting + // entirely of characters from the portable character set (subset of ASCII + // including 103 non-negative characters and English alphabet letters in + // particular) and the control character set (more about them at + // http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap06.html). + // + // Also note that according to the POSIX locale definition the case + // conversion can be applied only to [A-Z] and [a-z] character ranges being + // translated to each other (more about that at + // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02) + // + char ucase (char); + std::string ucase (const char*, std::size_t n = std::string::npos); + std::string ucase (const std::string&, + std::size_t p = 0, + std::size_t n = std::string::npos); + std::string& ucase (std::string&, + std::size_t p = 0, + std::size_t n = std::string::npos); + void ucase (char*, std::size_t); + + char lcase (char); + std::string lcase (const char*, std::size_t n = std::string::npos); + std::string lcase (const std::string&, + std::size_t p = 0, + std::size_t n = std::string::npos); + std::string& lcase (std::string&, + std::size_t p = 0, + std::size_t n = std::string::npos); + void lcase (char*, std::size_t); + + // Compare ASCII characters/strings ignoring case. Behave as if characters + // had been converted to the lower case and then byte-compared. Return a + // negative, zero or positive value if the left hand side is less, equal or + // greater than the right hand side, respectivelly. The POSIX locale (also + // known as C locale) must be the current application locale. Otherwise the + // behavior is undefined. + // + // The optional size argument specifies the maximum number of characters + // to compare. + // + int icasecmp (char, char); + + int icasecmp (const std::string&, const std::string&, + std::size_t = std::string::npos); + + int icasecmp (const std::string&, const char*, + std::size_t = std::string::npos); + + int icasecmp (const char*, const char*, std::size_t = std::string::npos); + + // Case-insensitive key comparators (i.e., to be used in sets, maps, etc). + // + struct icase_compare_string + { + bool operator() (const std::string& x, const std::string& y) const + { + return icasecmp (x, y) < 0; + } + }; + + struct icase_compare_c_string + { + bool operator() (const char* x, const char* y) const + { + return icasecmp (x, y) < 0; + } + }; + + bool alpha (char); + bool digit (char); + bool alnum (char); + bool xdigit (char); + + bool alpha (wchar_t); + bool digit (wchar_t); + bool alnum (wchar_t); + bool xdigit (wchar_t); + + // Basic string utilities. + // + + // Trim leading/trailing whitespaces, including '\n' and '\r'. + // + LIBBUTL_SYMEXPORT std::string& + trim (std::string&); + + inline std::string + trim (std::string&& s) + { + return move (trim (s)); + } + + // Find the beginning and end poistions of the next word. Return the size + // of the word or 0 and set b = e = n if there are no more words. For + // example: + // + // for (size_t b (0), e (0); next_word (s, b, e); ) + // { + // string w (s, b, e - b); + // } + // + // Or: + // + // for (size_t b (0), e (0), n; n = next_word (s, b, e, ' ', ','); ) + // { + // string w (s, b, n); + // } + // + // The second version examines up to the n'th character in the string. + // + std::size_t + next_word (const std::string&, std::size_t& b, std::size_t& e, + char d1 = ' ', char d2 = '\0'); + + std::size_t + next_word (const std::string&, std::size_t n, std::size_t& b, std::size_t& e, + char d1 = ' ', char d2 = '\0'); + + // Sanitize a string to only contain characters valid in an identifier + // (ASCII alphanumeric plus `_`) replacing all others with `_`. + // + // Note that it doesn't make sure the first character is not a digit. + // + std::string& sanitize_identifier (std::string&); + std::string sanitize_identifier (std::string&&); + std::string sanitize_identifier (const std::string&); + + // Sanitize a string (e.g., a path) to be a valid C string literal by + // escaping backslahes, double-quotes, and newlines. + // + // Note that in the second version the result is appended to out. + // + std::string sanitize_strlit (const std::string&); + void sanitize_strlit (const std::string&, std::string& out); + + // Return true if the string is a valid UTF-8 encoded byte string and, + // optionally, its decoded codepoints belong to the specified types or + // codepoint whitelist. + // + bool + utf8 (const std::string&, + codepoint_types = codepoint_types::any, + const char32_t* whitelist = nullptr); + + // As above but in case of an invalid sequence also return the description + // of why it is invalid. + // + bool + utf8 (const std::string&, + std::string& what, + codepoint_types = codepoint_types::any, + const char32_t* whitelist = nullptr); + + // Return UTF-8 byte string length in codepoints. Throw + // std::invalid_argument if this is not a valid UTF-8. + // + std::size_t + utf8_length (const std::string&, + codepoint_types = codepoint_types::any, + const char32_t* whitelist = nullptr); + + // Fixup the specified string (in place) to be valid UTF-8 replacing invalid + // bytes and codepoints with the specified character, for example, '?'. + // + // Potential future improvements: + // - char32_t replacement (will need UTF-8 encoding) + // - different replacement for bytes and codepoints + // + LIBBUTL_SYMEXPORT void + to_utf8 (std::string&, + char replacement, + codepoint_types = codepoint_types::any, + const char32_t* whitelist = nullptr); + + // If an input stream is in a failed state, then return true if this is + // because of the eof and throw istream::failure otherwise. If the stream + // is not in a failed state, return false. This helper function is normally + // used like this: + // + // is.exceptions (istream::badbit); + // + // for (string l; !eof (getline (is, l)); ) + // { + // ... + // } + // + bool + eof (std::istream&); + + // Environment variables. + // + // Our getenv() wrapper (as well as the relevant process startup functions) + // have a notion of a "thread environment", that is, thread-specific + // environment variables. However, unlike the process environment (in the + // form of the environ array), the thread environment is specified as a set + // of overrides over the process environment (sets and unsets), the same as + // for the process startup. + // + extern +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + const char* const* thread_env_; + + // On Windows one cannot export a thread-local variable so we have to + // use wrapper functions. + // +#ifdef _WIN32 + LIBBUTL_SYMEXPORT const char* const* + thread_env (); + + LIBBUTL_SYMEXPORT void + thread_env (const char* const*); +#else + const char* const* + thread_env (); + + void + thread_env (const char* const*); +#endif + + struct auto_thread_env + { + optional prev_env; + + auto_thread_env () = default; + + explicit + auto_thread_env (const char* const*); + + // Move-to-empty-only type. + // + auto_thread_env (auto_thread_env&&); + auto_thread_env& operator= (auto_thread_env&&); + + auto_thread_env (const auto_thread_env&) = delete; + auto_thread_env& operator= (const auto_thread_env&) = delete; + + ~auto_thread_env (); + }; + + // Get the environment variables taking into account the current thread's + // overrides (thread_env). + // + LIBBUTL_SYMEXPORT optional + getenv (const char*); + + inline optional + getenv (const std::string& n) + { + return getenv (n.c_str ()); + } + + // Set the process environment variable. Best done before starting any + // threads (see thread_env). Throw system_error on failure. + // + // Note that on Windows setting an empty value unsets the variable. + // + LIBBUTL_SYMEXPORT void + setenv (const std::string& name, const std::string& value); + + // Unset the process environment variable. Best done before starting any + // threads (see thread_env). Throw system_error on failure. + // + LIBBUTL_SYMEXPORT void + unsetenv (const std::string&); + + // Key comparators (i.e., to be used in sets, maps, etc). + // + struct compare_c_string + { + bool operator() (const char* x, const char* y) const noexcept + { + return std::strcmp (x, y) < 0; + } + }; + + struct compare_pointer_target + { + template + bool operator() (const P& x, const P& y) const + { + return *x < *y; + } + }; + + //struct hash_pointer_target + //{ + // template + // std::size_t operator() (const P& x) const {return std::hash (*x);} + //}; + + // Compare two std::reference_wrapper's. + // + struct compare_reference_target + { + template + bool operator() (const R& x, const R& y) const + { + return x.get () < y.get (); + } + }; + + // Combine one or more hash values. + // + inline std::size_t + combine_hash (std::size_t s, std::size_t h) + { + // Magic formula from boost::hash_combine(). + // + return s ^ (h + 0x9e3779b9 + (s << 6) + (s >> 2)); + } + + template + inline std::size_t + combine_hash (std::size_t s, std::size_t h, S... hs) + { + return combine_hash (combine_hash (s, h), hs...); + } + + // Support for reverse iteration using range-based for-loop: + // + // for (... : reverse_iterate (x)) ... + // + template + class reverse_range + { + T x_; + + public: + reverse_range (T&& x): x_ (std::forward (x)) {} + + auto begin () const -> decltype (this->x_.rbegin ()) {return x_.rbegin ();} + auto end () const -> decltype (this->x_.rend ()) {return x_.rend ();} + }; + + template + inline reverse_range + reverse_iterate (T&& x) {return reverse_range (std::forward (x));} + + // Cleanly cast between incompatible function types or dlsym() result + // (void*) to a function pointer. + // + template + F + function_cast (P*); + + // Call a function on destruction. + // + template + struct guard_impl; + + template + inline guard_impl + make_guard (F f) + { + return guard_impl (std::move (f)); + } + + template + struct guard_impl + { + guard_impl (F f): function (std::move (f)), active (true) {} + ~guard_impl () {if (active) function ();} + + void + cancel () {active = false;} + + F function; + bool active; + }; + + // Call a function if there is an exception. + // + + template + struct exception_guard_impl; + + template + inline exception_guard_impl + make_exception_guard (F f) + { + return exception_guard_impl (std::move (f)); + } + +#ifdef __cpp_lib_uncaught_exceptions + template + struct exception_guard_impl + { + exception_guard_impl (F f) + : f_ (std::move (f)), + u_ (std::uncaught_exceptions ()) {} + + ~exception_guard_impl () + { + if (u_ != std::uncaught_exceptions ()) + f_ (); + } + + private: + F f_; + int u_; + }; +#else + // Fallback implementation using a TLS flag. + // + // True means we are in the body of a destructor that is being called as + // part of the exception stack unwindining. + // + extern +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + // Work around glibc bug #14898. + // +#if defined(__GLIBC__) && \ + defined(__GLIBC_MINOR__) && \ + (__GLIBC__ < 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ < 17) + int +#else + bool +#endif + exception_unwinding_dtor_; + + // On Windows one cannot export a thread-local variable so we have to + // use wrapper functions. + // +#ifdef _WIN32 + LIBBUTL_SYMEXPORT bool + exception_unwinding_dtor (); + + LIBBUTL_SYMEXPORT void + exception_unwinding_dtor (bool); +#else + inline bool + exception_unwinding_dtor () {return exception_unwinding_dtor_;} + + inline void + exception_unwinding_dtor (bool v) {exception_unwinding_dtor_ = v;} +#endif + + template + struct exception_guard_impl + { + exception_guard_impl (F f): f_ (std::move (f)) {} + ~exception_guard_impl () + { + if (std::uncaught_exception ()) + { + exception_unwinding_dtor (true); + f_ (); + exception_unwinding_dtor (false); + } + } + + private: + F f_; + }; +#endif +} + +namespace std +{ + // Sanitize the exception description before printing. This includes: + // + // - stripping leading colons and spaces (see fdstream.cxx) + // - stripping trailing newlines, periods, and spaces + // - stripping system error redundant suffix (see utility.cxx) + // - lower-case the first letter if the beginning looks like a word + // + LIBBUTL_SYMEXPORT ostream& + operator<< (ostream&, const exception&); +} + +#include diff --git a/libbutl/utility.ixx b/libbutl/utility.ixx index 6501bf7..4180ad7 100644 --- a/libbutl/utility.ixx +++ b/libbutl/utility.ixx @@ -1,12 +1,10 @@ // file : libbutl/utility.ixx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include // toupper(), tolower(), is*() #include // isw*() #include // for_each() #include // invalid_argument -#endif namespace butl { diff --git a/libbutl/utility.mxx b/libbutl/utility.mxx deleted file mode 100644 index bd24ffd..0000000 --- a/libbutl/utility.mxx +++ /dev/null @@ -1,556 +0,0 @@ -// file : libbutl/utility.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -#ifndef _WIN32 -# include // strcasecmp(), strncasecmp() -#else -# include // _stricmp(), _strnicmp() -#endif - -#ifndef __cpp_lib_modules_ts -#include -#include // ostream -#include -#include // size_t -#include // move(), forward(), pair -#include // strcmp(), strlen() -#include // exception, uncaught_exception[s]() -//#include // hash -#endif - -#include // thread_local -#include // uncaught_exceptions - -#ifdef __cpp_modules_ts -export module butl.utility; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utf8; -import butl.unicode; -import butl.optional; -#else -#include -#include -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // Throw std::system_error with generic_category or system_category, - // respectively. - // - // The generic version should be used for portable errno codes (those that - // are mapped to std::errc). The system version should be used for platform- - // specific codes, for example, additional errno codes on POSIX systems or - // the result of GetLastError() on Windows. - // - // See also the exception sanitization below. - // - [[noreturn]] LIBBUTL_SYMEXPORT void - throw_generic_error (int errno_code, const char* what = nullptr); - - [[noreturn]] LIBBUTL_SYMEXPORT void - throw_system_error (int system_code, int fallback_errno_code = 0); - - // Throw std::ios::failure with the specified description and, if it is - // derived from std::system_error (as it should), error code. - // - [[noreturn]] LIBBUTL_SYMEXPORT void - throw_generic_ios_failure (int errno_code, const char* what = nullptr); - - [[noreturn]] LIBBUTL_SYMEXPORT void - throw_system_ios_failure (int system_code, const char* what = nullptr); - - // Convert ASCII character/string case. If there is no upper/lower case - // counterpart, leave the character unchanged. The POSIX locale (also known - // as C locale) must be the current application locale. Otherwise the - // behavior is undefined. - // - // Note that the POSIX locale specifies behaviour on data consisting - // entirely of characters from the portable character set (subset of ASCII - // including 103 non-negative characters and English alphabet letters in - // particular) and the control character set (more about them at - // http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap06.html). - // - // Also note that according to the POSIX locale definition the case - // conversion can be applied only to [A-Z] and [a-z] character ranges being - // translated to each other (more about that at - // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02) - // - char ucase (char); - std::string ucase (const char*, std::size_t n = std::string::npos); - std::string ucase (const std::string&, - std::size_t p = 0, - std::size_t n = std::string::npos); - std::string& ucase (std::string&, - std::size_t p = 0, - std::size_t n = std::string::npos); - void ucase (char*, std::size_t); - - char lcase (char); - std::string lcase (const char*, std::size_t n = std::string::npos); - std::string lcase (const std::string&, - std::size_t p = 0, - std::size_t n = std::string::npos); - std::string& lcase (std::string&, - std::size_t p = 0, - std::size_t n = std::string::npos); - void lcase (char*, std::size_t); - - // Compare ASCII characters/strings ignoring case. Behave as if characters - // had been converted to the lower case and then byte-compared. Return a - // negative, zero or positive value if the left hand side is less, equal or - // greater than the right hand side, respectivelly. The POSIX locale (also - // known as C locale) must be the current application locale. Otherwise the - // behavior is undefined. - // - // The optional size argument specifies the maximum number of characters - // to compare. - // - int icasecmp (char, char); - - int icasecmp (const std::string&, const std::string&, - std::size_t = std::string::npos); - - int icasecmp (const std::string&, const char*, - std::size_t = std::string::npos); - - int icasecmp (const char*, const char*, std::size_t = std::string::npos); - - // Case-insensitive key comparators (i.e., to be used in sets, maps, etc). - // - struct icase_compare_string - { - bool operator() (const std::string& x, const std::string& y) const - { - return icasecmp (x, y) < 0; - } - }; - - struct icase_compare_c_string - { - bool operator() (const char* x, const char* y) const - { - return icasecmp (x, y) < 0; - } - }; - - bool alpha (char); - bool digit (char); - bool alnum (char); - bool xdigit (char); - - bool alpha (wchar_t); - bool digit (wchar_t); - bool alnum (wchar_t); - bool xdigit (wchar_t); - - // Basic string utilities. - // - - // Trim leading/trailing whitespaces, including '\n' and '\r'. - // - LIBBUTL_SYMEXPORT std::string& - trim (std::string&); - - inline std::string - trim (std::string&& s) - { - return move (trim (s)); - } - - // Find the beginning and end poistions of the next word. Return the size - // of the word or 0 and set b = e = n if there are no more words. For - // example: - // - // for (size_t b (0), e (0); next_word (s, b, e); ) - // { - // string w (s, b, e - b); - // } - // - // Or: - // - // for (size_t b (0), e (0), n; n = next_word (s, b, e, ' ', ','); ) - // { - // string w (s, b, n); - // } - // - // The second version examines up to the n'th character in the string. - // - std::size_t - next_word (const std::string&, std::size_t& b, std::size_t& e, - char d1 = ' ', char d2 = '\0'); - - std::size_t - next_word (const std::string&, std::size_t n, std::size_t& b, std::size_t& e, - char d1 = ' ', char d2 = '\0'); - - // Sanitize a string to only contain characters valid in an identifier - // (ASCII alphanumeric plus `_`) replacing all others with `_`. - // - // Note that it doesn't make sure the first character is not a digit. - // - std::string& sanitize_identifier (std::string&); - std::string sanitize_identifier (std::string&&); - std::string sanitize_identifier (const std::string&); - - // Sanitize a string (e.g., a path) to be a valid C string literal by - // escaping backslahes, double-quotes, and newlines. - // - // Note that in the second version the result is appended to out. - // - std::string sanitize_strlit (const std::string&); - void sanitize_strlit (const std::string&, std::string& out); - - // Return true if the string is a valid UTF-8 encoded byte string and, - // optionally, its decoded codepoints belong to the specified types or - // codepoint whitelist. - // - bool - utf8 (const std::string&, - codepoint_types = codepoint_types::any, - const char32_t* whitelist = nullptr); - - // As above but in case of an invalid sequence also return the description - // of why it is invalid. - // - bool - utf8 (const std::string&, - std::string& what, - codepoint_types = codepoint_types::any, - const char32_t* whitelist = nullptr); - - // Return UTF-8 byte string length in codepoints. Throw - // std::invalid_argument if this is not a valid UTF-8. - // - std::size_t - utf8_length (const std::string&, - codepoint_types = codepoint_types::any, - const char32_t* whitelist = nullptr); - - // Fixup the specified string (in place) to be valid UTF-8 replacing invalid - // bytes and codepoints with the specified character, for example, '?'. - // - // Potential future improvements: - // - char32_t replacement (will need UTF-8 encoding) - // - different replacement for bytes and codepoints - // - LIBBUTL_SYMEXPORT void - to_utf8 (std::string&, - char replacement, - codepoint_types = codepoint_types::any, - const char32_t* whitelist = nullptr); - - // If an input stream is in a failed state, then return true if this is - // because of the eof and throw istream::failure otherwise. If the stream - // is not in a failed state, return false. This helper function is normally - // used like this: - // - // is.exceptions (istream::badbit); - // - // for (string l; !eof (getline (is, l)); ) - // { - // ... - // } - // - bool - eof (std::istream&); - - // Environment variables. - // - // Our getenv() wrapper (as well as the relevant process startup functions) - // have a notion of a "thread environment", that is, thread-specific - // environment variables. However, unlike the process environment (in the - // form of the environ array), the thread environment is specified as a set - // of overrides over the process environment (sets and unsets), the same as - // for the process startup. - // - extern -#ifdef __cpp_thread_local - thread_local -#else - __thread -#endif - const char* const* thread_env_; - - // On Windows one cannot export a thread-local variable so we have to - // use wrapper functions. - // -#ifdef _WIN32 - LIBBUTL_SYMEXPORT const char* const* - thread_env (); - - LIBBUTL_SYMEXPORT void - thread_env (const char* const*); -#else - const char* const* - thread_env (); - - void - thread_env (const char* const*); -#endif - - struct auto_thread_env - { - optional prev_env; - - auto_thread_env () = default; - - explicit - auto_thread_env (const char* const*); - - // Move-to-empty-only type. - // - auto_thread_env (auto_thread_env&&); - auto_thread_env& operator= (auto_thread_env&&); - - auto_thread_env (const auto_thread_env&) = delete; - auto_thread_env& operator= (const auto_thread_env&) = delete; - - ~auto_thread_env (); - }; - - // Get the environment variables taking into account the current thread's - // overrides (thread_env). - // - LIBBUTL_SYMEXPORT optional - getenv (const char*); - - inline optional - getenv (const std::string& n) - { - return getenv (n.c_str ()); - } - - // Set the process environment variable. Best done before starting any - // threads (see thread_env). Throw system_error on failure. - // - // Note that on Windows setting an empty value unsets the variable. - // - LIBBUTL_SYMEXPORT void - setenv (const std::string& name, const std::string& value); - - // Unset the process environment variable. Best done before starting any - // threads (see thread_env). Throw system_error on failure. - // - LIBBUTL_SYMEXPORT void - unsetenv (const std::string&); - - // Key comparators (i.e., to be used in sets, maps, etc). - // - struct compare_c_string - { - bool operator() (const char* x, const char* y) const noexcept - { - return std::strcmp (x, y) < 0; - } - }; - - struct compare_pointer_target - { - template - bool operator() (const P& x, const P& y) const - { - return *x < *y; - } - }; - - //struct hash_pointer_target - //{ - // template - // std::size_t operator() (const P& x) const {return std::hash (*x);} - //}; - - // Compare two std::reference_wrapper's. - // - struct compare_reference_target - { - template - bool operator() (const R& x, const R& y) const - { - return x.get () < y.get (); - } - }; - - // Combine one or more hash values. - // - inline std::size_t - combine_hash (std::size_t s, std::size_t h) - { - // Magic formula from boost::hash_combine(). - // - return s ^ (h + 0x9e3779b9 + (s << 6) + (s >> 2)); - } - - template - inline std::size_t - combine_hash (std::size_t s, std::size_t h, S... hs) - { - return combine_hash (combine_hash (s, h), hs...); - } - - // Support for reverse iteration using range-based for-loop: - // - // for (... : reverse_iterate (x)) ... - // - template - class reverse_range - { - T x_; - - public: - reverse_range (T&& x): x_ (std::forward (x)) {} - - auto begin () const -> decltype (this->x_.rbegin ()) {return x_.rbegin ();} - auto end () const -> decltype (this->x_.rend ()) {return x_.rend ();} - }; - - template - inline reverse_range - reverse_iterate (T&& x) {return reverse_range (std::forward (x));} - - // Cleanly cast between incompatible function types or dlsym() result - // (void*) to a function pointer. - // - template - F - function_cast (P*); - - // Call a function on destruction. - // - template - struct guard_impl; - - template - inline guard_impl - make_guard (F f) - { - return guard_impl (std::move (f)); - } - - template - struct guard_impl - { - guard_impl (F f): function (std::move (f)), active (true) {} - ~guard_impl () {if (active) function ();} - - void - cancel () {active = false;} - - F function; - bool active; - }; - - // Call a function if there is an exception. - // - - template - struct exception_guard_impl; - - template - inline exception_guard_impl - make_exception_guard (F f) - { - return exception_guard_impl (std::move (f)); - } - -#ifdef __cpp_lib_uncaught_exceptions - template - struct exception_guard_impl - { - exception_guard_impl (F f) - : f_ (std::move (f)), - u_ (std::uncaught_exceptions ()) {} - - ~exception_guard_impl () - { - if (u_ != std::uncaught_exceptions ()) - f_ (); - } - - private: - F f_; - int u_; - }; -#else - // Fallback implementation using a TLS flag. - // - // True means we are in the body of a destructor that is being called as - // part of the exception stack unwindining. - // - extern -#ifdef __cpp_thread_local - thread_local -#else - __thread -#endif - // Work around glibc bug #14898. - // -#if defined(__GLIBC__) && \ - defined(__GLIBC_MINOR__) && \ - (__GLIBC__ < 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ < 17) - int -#else - bool -#endif - exception_unwinding_dtor_; - - // On Windows one cannot export a thread-local variable so we have to - // use wrapper functions. - // -#ifdef _WIN32 - LIBBUTL_SYMEXPORT bool - exception_unwinding_dtor (); - - LIBBUTL_SYMEXPORT void - exception_unwinding_dtor (bool); -#else - inline bool - exception_unwinding_dtor () {return exception_unwinding_dtor_;} - - inline void - exception_unwinding_dtor (bool v) {exception_unwinding_dtor_ = v;} -#endif - - template - struct exception_guard_impl - { - exception_guard_impl (F f): f_ (std::move (f)) {} - ~exception_guard_impl () - { - if (std::uncaught_exception ()) - { - exception_unwinding_dtor (true); - f_ (); - exception_unwinding_dtor (false); - } - } - - private: - F f_; - }; -#endif -} - -LIBBUTL_MODEXPORT namespace std -{ - // Sanitize the exception description before printing. This includes: - // - // - stripping leading colons and spaces (see fdstream.cxx) - // - stripping trailing newlines, periods, and spaces - // - stripping system error redundant suffix (see utility.cxx) - // - lower-case the first letter if the beginning looks like a word - // - LIBBUTL_SYMEXPORT ostream& - operator<< (ostream&, const exception&); -} - -#include diff --git a/libbutl/uuid-linux.cxx b/libbutl/uuid-linux.cxx index 7689088..82af2e9 100644 --- a/libbutl/uuid-linux.cxx +++ b/libbutl/uuid-linux.cxx @@ -13,7 +13,7 @@ #include // move() #include -#include // function_cast() +#include // function_cast() using namespace std; diff --git a/libbutl/vector-view.hxx b/libbutl/vector-view.hxx new file mode 100644 index 0000000..16ab08e --- /dev/null +++ b/libbutl/vector-view.hxx @@ -0,0 +1,118 @@ +// file : libbutl/vector-view.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include +#include // size_t, ptrdiff_t +#include // swap() +#include // reverse_iterator +#include // out_of_range + +#include + +namespace butl +{ + // In our version a const view allows the modification of the elements + // unless T is made const (the same semantics as in smart pointers). + // + // @@ If T is const T1, could be useful to have a c-tor from vector. + // + template + class vector_view + { + public: + using value_type = T; + using pointer = T*; + using reference = T&; + using const_pointer = const T*; + using const_reference = const T&; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + using iterator = T*; + using const_iterator = const T*; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + + // construct/copy/destroy: + // + vector_view (): data_ (nullptr), size_ (0) {} + vector_view (T* d, size_type s): data_ (d), size_ (s) {} + + template + vector_view (std::vector& v) + : data_ (v.data ()), size_ (v.size ()) {} + + template + vector_view (const std::vector& v) + : data_ (v.data ()), size_ (v.size ()) {} + + template + vector_view (const vector_view& v) + : data_ (v.data ()), size_ (v.size ()) {} + + vector_view (vector_view&&) = default; + vector_view (const vector_view&) = default; + vector_view& operator= (vector_view&&) = default; + vector_view& operator= (const vector_view&) = default; + + // iterators: + // + iterator begin() const {return data_;} + iterator end() const {return data_ + size_;} + + const_iterator cbegin() const {return data_;} + const_iterator cend() const {return data_ + size_;} + + reverse_iterator rbegin() const {return reverse_iterator (end ());} + reverse_iterator rend() const {return reverse_iterator (begin ());} + + const_reverse_iterator crbegin() const { + return const_reverse_iterator (cend ());} + const_reverse_iterator crend() const { + return const_reverse_iterator (cbegin ());} + + // capacity: + // + size_type size() const {return size_;} + bool empty() const {return size_ == 0;} + + // element access: + // + reference operator[](size_type n) const {return data_[n];} + reference front() const {return data_[0];} + reference back() const {return data_[size_ - 1];} + + reference at(size_type n) const + { + if (n >= size_) + throw std::out_of_range ("index out of range"); + return data_[n]; + } + + // data access: + // + T* data() const {return data_;} + + // modifiers: + // + void assign (T* d, size_type s) {data_ = d; size_ = s;} + void clear () {data_ = nullptr; size_ = 0;} + void swap (vector_view& v) { + std::swap (data_, v.data_); std::swap (size_, v.size_);} + + private: + T* data_; + size_type size_; + }; + + //@@ TODO. + // + template bool operator== (vector_view l, vector_view r); + template bool operator!= (vector_view l, vector_view r); + template bool operator< (vector_view l, vector_view r); + template bool operator> (vector_view l, vector_view r); + template bool operator<= (vector_view l, vector_view r); + template bool operator>= (vector_view l, vector_view r); +} diff --git a/libbutl/vector-view.mxx b/libbutl/vector-view.mxx deleted file mode 100644 index 7924371..0000000 --- a/libbutl/vector-view.mxx +++ /dev/null @@ -1,133 +0,0 @@ -// file : libbutl/vector-view.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#ifndef __cpp_lib_modules_ts -#include -#include // size_t, ptrdiff_t -#include // swap() -#include // reverse_iterator -#include // out_of_range -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.vector_view; -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // In our version a const view allows the modification of the elements - // unless T is made const (the same semantics as in smart pointers). - // - // @@ If T is const T1, could be useful to have a c-tor from vector. - // - template - class vector_view - { - public: - using value_type = T; - using pointer = T*; - using reference = T&; - using const_pointer = const T*; - using const_reference = const T&; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using iterator = T*; - using const_iterator = const T*; - using reverse_iterator = std::reverse_iterator; - using const_reverse_iterator = std::reverse_iterator; - - // construct/copy/destroy: - // - vector_view (): data_ (nullptr), size_ (0) {} - vector_view (T* d, size_type s): data_ (d), size_ (s) {} - - template - vector_view (std::vector& v) - : data_ (v.data ()), size_ (v.size ()) {} - - template - vector_view (const std::vector& v) - : data_ (v.data ()), size_ (v.size ()) {} - - template - vector_view (const vector_view& v) - : data_ (v.data ()), size_ (v.size ()) {} - - vector_view (vector_view&&) = default; - vector_view (const vector_view&) = default; - vector_view& operator= (vector_view&&) = default; - vector_view& operator= (const vector_view&) = default; - - // iterators: - // - iterator begin() const {return data_;} - iterator end() const {return data_ + size_;} - - const_iterator cbegin() const {return data_;} - const_iterator cend() const {return data_ + size_;} - - reverse_iterator rbegin() const {return reverse_iterator (end ());} - reverse_iterator rend() const {return reverse_iterator (begin ());} - - const_reverse_iterator crbegin() const { - return const_reverse_iterator (cend ());} - const_reverse_iterator crend() const { - return const_reverse_iterator (cbegin ());} - - // capacity: - // - size_type size() const {return size_;} - bool empty() const {return size_ == 0;} - - // element access: - // - reference operator[](size_type n) const {return data_[n];} - reference front() const {return data_[0];} - reference back() const {return data_[size_ - 1];} - - reference at(size_type n) const - { - if (n >= size_) - throw std::out_of_range ("index out of range"); - return data_[n]; - } - - // data access: - // - T* data() const {return data_;} - - // modifiers: - // - void assign (T* d, size_type s) {data_ = d; size_ = s;} - void clear () {data_ = nullptr; size_ = 0;} - void swap (vector_view& v) { - std::swap (data_, v.data_); std::swap (size_, v.size_);} - - private: - T* data_; - size_type size_; - }; - - //@@ TODO. - // - template bool operator== (vector_view l, vector_view r); - template bool operator!= (vector_view l, vector_view r); - template bool operator< (vector_view l, vector_view r); - template bool operator> (vector_view l, vector_view r); - template bool operator<= (vector_view l, vector_view r); - template bool operator>= (vector_view l, vector_view r); -} diff --git a/libbutl/win32-utility.cxx b/libbutl/win32-utility.cxx index 3b44d60..c69842b 100644 --- a/libbutl/win32-utility.cxx +++ b/libbutl/win32-utility.cxx @@ -8,16 +8,9 @@ // #ifdef _WIN32 -#ifndef __cpp_lib_modules_ts -#include #include // unique_ptr -#include // throw_system_error() -#else -import std.core; - -import butl.utility; -#endif +#include // throw_system_error() using namespace std; diff --git a/libbutl/win32-utility.hxx b/libbutl/win32-utility.hxx index b71eb1a..9bed647 100644 --- a/libbutl/win32-utility.hxx +++ b/libbutl/win32-utility.hxx @@ -31,11 +31,7 @@ # endif #endif -#ifndef __cpp_lib_modules_ts #include -#else -import std.core; -#endif #include diff --git a/tests/b-info/driver.cxx b/tests/b-info/driver.cxx index 963d4e3..6832aaa 100644 --- a/tests/b-info/driver.cxx +++ b/tests/b-info/driver.cxx @@ -1,26 +1,12 @@ // file : tests/b-info/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.b; -import butl.path; -import butl.utility; // operator<<(ostream,exception) -#else -#include -#include -#include -#endif + +#include +#include +#include // operator<<(ostream,exception) #undef NDEBUG #include diff --git a/tests/backtrace/driver.cxx b/tests/backtrace/driver.cxx index a8ae99a..ecfa58e 100644 --- a/tests/backtrace/driver.cxx +++ b/tests/backtrace/driver.cxx @@ -5,29 +5,14 @@ # include // setrlimit() #endif -#ifndef __cpp_lib_modules_ts #include #include #include // set_terminate(), terminate_handler #include -#else -import std.io; -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.process; -import butl.fdstream; -import butl.backtrace; -#else -#include -#include -#include -#endif +#include +#include +#include #undef NDEBUG #include diff --git a/tests/base64/driver.cxx b/tests/base64/driver.cxx index d6a95b3..a37a238 100644 --- a/tests/base64/driver.cxx +++ b/tests/base64/driver.cxx @@ -1,23 +1,11 @@ // file : tests/base64/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.base64; -#else -#include -#endif +#include #undef NDEBUG #include diff --git a/tests/builtin/driver.cxx b/tests/builtin/driver.cxx index 7a32d8e..7a0193f 100644 --- a/tests/builtin/driver.cxx +++ b/tests/builtin/driver.cxx @@ -5,7 +5,6 @@ # include #endif -#ifndef __cpp_lib_modules_ts #include #include #include @@ -16,27 +15,12 @@ #ifndef _WIN32 # include // this_thread::sleep_for() #endif -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.utility; // eof() -import butl.builtin; -import butl.optional; -import butl.timestamp; // to_stream(duration) -#else -#include -#include -#include -#include -#include -#endif +#include +#include // eof() +#include +#include +#include // to_stream(duration) #undef NDEBUG #include diff --git a/tests/command/driver.cxx b/tests/command/driver.cxx index e2fe41a..9194c13 100644 --- a/tests/command/driver.cxx +++ b/tests/command/driver.cxx @@ -1,36 +1,19 @@ // file : tests/command/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include #include #include // invalid_argument #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.path_io; -import butl.process; // process::print() -import butl.command; -import butl.utility; -import butl.optional; -#else -#include -#include -#include -#include -#include -#include -#endif +#include +#include +#include +#include +#include +#include #undef NDEBUG #include diff --git a/tests/cpfile/driver.cxx b/tests/cpfile/driver.cxx index 8891d58..fe01bdd 100644 --- a/tests/cpfile/driver.cxx +++ b/tests/cpfile/driver.cxx @@ -1,27 +1,13 @@ // file : tests/cpfile/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.fdstream; -import butl.filesystem; -#else -#include -#include -#include -#endif +#include +#include +#include #undef NDEBUG #include diff --git a/tests/curl/driver.cxx b/tests/curl/driver.cxx index 3b0ea43..72faf52 100644 --- a/tests/curl/driver.cxx +++ b/tests/curl/driver.cxx @@ -1,33 +1,14 @@ // file : tests/curl/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.curl; -import butl.path; -import butl.process; -import butl.utility; // operator<<(ostream, exception) -import butl.fdstream; - -import butl.optional; // @@ MOD Clang should not be necessary. -import butl.small_vector; // @@ MOD Clang should not be necessary. -#else -#include -#include -#include -#include -#include -#endif + +#include +#include +#include +#include // operator<<(ostream, exception) +#include #undef NDEBUG #include diff --git a/tests/default-options/driver.cxx b/tests/default-options/driver.cxx index 94ad189..766dca8 100644 --- a/tests/default-options/driver.cxx +++ b/tests/default-options/driver.cxx @@ -1,35 +1,19 @@ // file : tests/default-options/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include #include #include #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.path_io; -import butl.optional; -import butl.fdstream; -import butl.default_options; -#else -#include -#include -#include // eof() -#include -#include -#include -#endif + +#include +#include +#include // eof() +#include +#include +#include #undef NDEBUG #include diff --git a/tests/dir-iterator/driver.cxx b/tests/dir-iterator/driver.cxx index 0c19037..4a2ff53 100644 --- a/tests/dir-iterator/driver.cxx +++ b/tests/dir-iterator/driver.cxx @@ -1,28 +1,13 @@ // file : tests/dir-iterator/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include // size_t #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.path_io; -import butl.utility; -import butl.filesystem; -#else -#include -#include -#include // operator<<(ostream, exception) -#include -#endif + +#include +#include +#include // operator<<(ostream, exception) +#include #undef NDEBUG #include diff --git a/tests/entry-time/driver.cxx b/tests/entry-time/driver.cxx index 0da7fe9..c29837d 100644 --- a/tests/entry-time/driver.cxx +++ b/tests/entry-time/driver.cxx @@ -1,29 +1,14 @@ // file : tests/entry-time/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.filesystem; - -import butl.optional; // @@ MOD Clang should not be necessary. -#else -#include -#include -#include -#include -#endif +#include +#include +#include +#include #undef NDEBUG #include diff --git a/tests/fdstream/driver.cxx b/tests/fdstream/driver.cxx index dc657e7..254a03e 100644 --- a/tests/fdstream/driver.cxx +++ b/tests/fdstream/driver.cxx @@ -5,7 +5,6 @@ # include #endif -#ifndef __cpp_lib_modules_ts #ifndef _WIN32 # include #endif @@ -20,30 +19,12 @@ #include // move() #include #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#ifndef _WIN32 -import std.threading; -#endif -#endif -import butl.path; -import butl.process; -import butl.fdstream; -import butl.timestamp; -import butl.filesystem; -#else -#include -#include -#include -#include -#include -#endif +#include +#include +#include +#include +#include #undef NDEBUG #include diff --git a/tests/link/driver.cxx b/tests/link/driver.cxx index 6b898f5..db9c195 100644 --- a/tests/link/driver.cxx +++ b/tests/link/driver.cxx @@ -1,32 +1,16 @@ // file : tests/link/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include // pair #include // cerr #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.path_io; -import butl.utility; -import butl.fdstream; -import butl.filesystem; -#else -#include -#include -#include -#include -#include -#endif +#include +#include +#include +#include +#include #undef NDEBUG #include diff --git a/tests/lz4/driver.cxx b/tests/lz4/driver.cxx index 9004b56..8139c34 100644 --- a/tests/lz4/driver.cxx +++ b/tests/lz4/driver.cxx @@ -5,8 +5,8 @@ #include #include -#include -#include // entry_stat, path_entry() +#include +#include // entry_stat, path_entry() #undef NDEBUG #include diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx index 859e275..6924321 100644 --- a/tests/manifest-parser/driver.cxx +++ b/tests/manifest-parser/driver.cxx @@ -1,27 +1,14 @@ // file : tests/manifest-parser/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include // pair, move() #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.optional; -import butl.manifest_parser; -#else -#include -#include -#endif + +#include +#include #undef NDEBUG #include diff --git a/tests/manifest-rewriter/driver.cxx b/tests/manifest-rewriter/driver.cxx index d08a6c8..3e8fecb 100644 --- a/tests/manifest-rewriter/driver.cxx +++ b/tests/manifest-rewriter/driver.cxx @@ -1,34 +1,18 @@ // file : tests/manifest-rewriter/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include // uint64_t #include // move() #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.optional; -import butl.fdstream; -import butl.manifest_parser; -import butl.manifest_rewriter; -#else -#include -#include -#include -#include -#include -#endif + +#include +#include +#include +#include +#include #undef NDEBUG #include diff --git a/tests/manifest-roundtrip/driver.cxx b/tests/manifest-roundtrip/driver.cxx index d0bcab9..4cf35e9 100644 --- a/tests/manifest-roundtrip/driver.cxx +++ b/tests/manifest-roundtrip/driver.cxx @@ -1,28 +1,13 @@ // file : tests/manifest-roundtrip/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utility; // operator<<(ostream, exception) -import butl.fdstream; -import butl.manifest_parser; -import butl.manifest_serializer; -#else -#include -#include -#include -#include -#endif +#include // operator<<(ostream, exception) +#include +#include +#include #undef NDEBUG #include diff --git a/tests/manifest-serializer/driver.cxx b/tests/manifest-serializer/driver.cxx index 59b8ddd..4c09038 100644 --- a/tests/manifest-serializer/driver.cxx +++ b/tests/manifest-serializer/driver.cxx @@ -1,25 +1,13 @@ // file : tests/manifest-serializer/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include // pair #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.manifest_serializer; -#else -#include -#endif + +#include #undef NDEBUG #include diff --git a/tests/mventry/driver.cxx b/tests/mventry/driver.cxx index dc2e2f7..e895ad6 100644 --- a/tests/mventry/driver.cxx +++ b/tests/mventry/driver.cxx @@ -1,26 +1,12 @@ // file : tests/mventry/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.utility; // operator<<(ostream, exception) -import butl.filesystem; -#else -#include -#include -#include -#endif +#include +#include // operator<<(ostream, exception) +#include #undef NDEBUG #include diff --git a/tests/openssl/driver.cxx b/tests/openssl/driver.cxx index 450a547..d671c00 100644 --- a/tests/openssl/driver.cxx +++ b/tests/openssl/driver.cxx @@ -1,34 +1,15 @@ // file : tests/openssl/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.utility; // operator<<(ostream, exception) -import butl.openssl; -import butl.process; -import butl.fdstream; // nullfd - -import butl.optional; // @@ MOD Clang should not be necessary. -import butl.small_vector; // @@ MOD Clang should not be necessary. -#else -#include -#include -#include -#include -#endif +#include +#include // operator<<(ostream, exception) +#include +#include // nullfd #undef NDEBUG #include diff --git a/tests/optional/driver.cxx b/tests/optional/driver.cxx index e73f03e..da09cf5 100644 --- a/tests/optional/driver.cxx +++ b/tests/optional/driver.cxx @@ -1,21 +1,10 @@ // file : tests/optional/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include // move() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.optional; -#else -#include -#endif + +#include #undef NDEBUG #include diff --git a/tests/pager/driver.cxx b/tests/pager/driver.cxx index 0d6e1e6..c807ed0 100644 --- a/tests/pager/driver.cxx +++ b/tests/pager/driver.cxx @@ -1,26 +1,14 @@ // file : tests/pager/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include // ios_base::failure #include #include #include // move() #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.pager; -#else -#include -#endif + +#include #undef NDEBUG #include diff --git a/tests/path-entry/driver.cxx b/tests/path-entry/driver.cxx index 13914cd..d9ea2be 100644 --- a/tests/path-entry/driver.cxx +++ b/tests/path-entry/driver.cxx @@ -1,34 +1,17 @@ // file : tests/path-entry/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include // invalid_argument #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.path-io; -import butl.utility; // operator<<(ostream, exception) -import butl.optional; -import butl.timestamp; -import butl.filesystem; -#else -#include -#include -#include -#include -#include -#include -#endif + +#include +#include +#include // operator<<(ostream, exception) +#include +#include +#include #undef NDEBUG #include diff --git a/tests/path/driver.cxx b/tests/path/driver.cxx index 9f85d6b..3124c13 100644 --- a/tests/path/driver.cxx +++ b/tests/path/driver.cxx @@ -1,25 +1,12 @@ // file : tests/path/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -//import butl.path_io; -#else -#include -//#include -#endif +#include +//#include #undef NDEBUG #include diff --git a/tests/prefix-map/driver.cxx b/tests/prefix-map/driver.cxx index 0628abf..8ed35ea 100644 --- a/tests/prefix-map/driver.cxx +++ b/tests/prefix-map/driver.cxx @@ -1,22 +1,10 @@ // file : tests/prefix-map/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.prefix_map; -#else -#include -#endif + +#include #undef NDEBUG #include diff --git a/tests/process-run/driver.cxx b/tests/process-run/driver.cxx index c851666..032f890 100644 --- a/tests/process-run/driver.cxx +++ b/tests/process-run/driver.cxx @@ -1,29 +1,13 @@ // file : tests/process-run/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.process; -import butl.optional; // @@ MOD Clang shouldn't be needed. -import butl.fdstream; -import butl.small_vector; -#else -#include -#include -#include -#include -#endif + +#include +#include +#include +#include #undef NDEBUG #include diff --git a/tests/process-term/driver.cxx b/tests/process-term/driver.cxx index 90afc96..799757c 100644 --- a/tests/process-term/driver.cxx +++ b/tests/process-term/driver.cxx @@ -10,7 +10,6 @@ # include #endif -#ifndef __cpp_lib_modules_ts #include #include // ERANGE #include // move() @@ -21,23 +20,10 @@ #ifndef _WIN32 # include #endif -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.process; -import butl.optional; -import butl.fdstream; -#else -#include -#include -#include -#endif +#include +#include +#include #undef NDEBUG #include diff --git a/tests/process/driver.cxx b/tests/process/driver.cxx index ab5006a..1ee5710 100644 --- a/tests/process/driver.cxx +++ b/tests/process/driver.cxx @@ -1,7 +1,6 @@ // file : tests/process/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include @@ -10,30 +9,14 @@ #include // istreambuf_iterator, ostream_iterator #include // copy() #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.utility; // setenv(), getenv() -import butl.process; -import butl.optional; -import butl.fdstream; -import butl.timestamp; -#else -#include -#include -#include -#include -#include -#include -#include -#endif +#include +#include // setenv(), getenv() +#include +#include +#include +#include +#include #undef NDEBUG #include diff --git a/tests/progress/driver.cxx b/tests/progress/driver.cxx index 68952a7..f1a257c 100644 --- a/tests/progress/driver.cxx +++ b/tests/progress/driver.cxx @@ -8,36 +8,16 @@ # include //_write() #endif -#ifndef __cpp_lib_modules_ts #include #include // size_t #include #ifndef _WIN32 # include // this_thread::sleep_for() #endif -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#ifndef _WIN32 -import std.threading; -#endif -#endif -import butl.process; -import butl.fdstream; -import butl.diagnostics; -import butl.optional; // @@ MOD Clang should not be necessary. -import butl.small_vector; // @@ MOD Clang should not be necessary. -#else -#include -#include // fdopen_null(), stderr_fd() -#include -#endif +#include +#include // fdopen_null(), stderr_fd() +#include #undef NDEBUG #include diff --git a/tests/project-name/driver.cxx b/tests/project-name/driver.cxx index 8c5b206..ac1c898 100644 --- a/tests/project-name/driver.cxx +++ b/tests/project-name/driver.cxx @@ -1,26 +1,14 @@ // file : tests/project-name/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include // ios::*bit #include #include #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utility; // operator<<(ostream,exception), eof(), *case() -import butl.project_name; -#else -#include -#include -#endif + +#include // operator<<(ostream,exception), eof(), + // *case() +#include #undef NDEBUG #include diff --git a/tests/regex/driver.cxx b/tests/regex/driver.cxx index ce707b8..f8363e1 100644 --- a/tests/regex/driver.cxx +++ b/tests/regex/driver.cxx @@ -1,29 +1,15 @@ // file : tests/regex/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include // pair #include #include // invalid_argument #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.regex; // @@ MOD TODO: shouldn't be necessary (re-export). -#endif -import butl.regex; -import butl.utility; // operator<<(ostream, exception) -#else -#include -#include -#endif +#include +#include // operator<<(ostream, exception) #undef NDEBUG #include diff --git a/tests/semantic-version/driver.cxx b/tests/semantic-version/driver.cxx index ac6bf33..2bdd415 100644 --- a/tests/semantic-version/driver.cxx +++ b/tests/semantic-version/driver.cxx @@ -1,21 +1,9 @@ // file : tests/semantic-version/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.semantic_version; -#else -#include -#endif +#include #undef NDEBUG #include diff --git a/tests/sendmail/driver.cxx b/tests/sendmail/driver.cxx index a29650c..3b97202 100644 --- a/tests/sendmail/driver.cxx +++ b/tests/sendmail/driver.cxx @@ -1,32 +1,13 @@ // file : tests/sendmail/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.process; -import butl.utility; // operator<<(ostream, exception) -import butl.sendmail; -import butl.fdstream; - -import butl.optional; // @@ MOD Clang should not be necessary. -import butl.small_vector; // @@ MOD Clang should not be necessary. -#else -#include -#include -#include -#include -#endif +#include +#include +#include // operator<<(ostream, exception) +#include #undef NDEBUG #include diff --git a/tests/sha1/driver.cxx b/tests/sha1/driver.cxx index cb648db..1e8e254 100644 --- a/tests/sha1/driver.cxx +++ b/tests/sha1/driver.cxx @@ -1,27 +1,13 @@ // file : tests/sha1/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include // size_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.sha1; -import butl.path; -import butl.fdstream; -import butl.filesystem; -#else -#include -#include -#include -#include // auto_rmfile -#endif + +#include +#include +#include +#include // auto_rmfile #undef NDEBUG #include diff --git a/tests/sha256/driver.cxx b/tests/sha256/driver.cxx index 3d2f69e..30dfa49 100644 --- a/tests/sha256/driver.cxx +++ b/tests/sha256/driver.cxx @@ -1,27 +1,13 @@ // file : tests/sha256/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include // size_t -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.path; -import butl.sha256; -import butl.fdstream; -import butl.filesystem; -#else -#include -#include -#include -#include // auto_rmfile -#endif + +#include +#include +#include +#include // auto_rmfile #undef NDEBUG #include diff --git a/tests/small-forward-list/driver.cxx b/tests/small-forward-list/driver.cxx index 1b8a14f..1cfea77 100644 --- a/tests/small-forward-list/driver.cxx +++ b/tests/small-forward-list/driver.cxx @@ -1,22 +1,10 @@ // file : tests/small-forward-list/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.small_forward_list; -#else -#include -#endif + +#include #undef NDEBUG #include diff --git a/tests/small-list/driver.cxx b/tests/small-list/driver.cxx index cced91d..8e2fb6e 100644 --- a/tests/small-list/driver.cxx +++ b/tests/small-list/driver.cxx @@ -1,22 +1,10 @@ // file : tests/small-list/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.small_list; -#else -#include -#endif +#include #undef NDEBUG #include diff --git a/tests/small-vector/driver.cxx b/tests/small-vector/driver.cxx index be8e6d8..cc012fc 100644 --- a/tests/small-vector/driver.cxx +++ b/tests/small-vector/driver.cxx @@ -1,22 +1,10 @@ // file : tests/small-vector/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.small_vector; -#else -#include -#endif + +#include #undef NDEBUG #include diff --git a/tests/standard-version/driver.cxx b/tests/standard-version/driver.cxx index dbd24fc..29cad54 100644 --- a/tests/standard-version/driver.cxx +++ b/tests/standard-version/driver.cxx @@ -1,29 +1,15 @@ // file : tests/standard-version/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include // ios::failbit, ios::badbit #include #include // uint*_t #include #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utility; // operator<<(ostream,exception), eof() -import butl.optional; -import butl.standard_version; -#else -#include -#include -#include -#endif + +#include // operator<<(ostream,exception), eof() +#include +#include #undef NDEBUG #include diff --git a/tests/strcase/driver.cxx b/tests/strcase/driver.cxx index 48e77f2..8e964a6 100644 --- a/tests/strcase/driver.cxx +++ b/tests/strcase/driver.cxx @@ -1,20 +1,9 @@ // file : tests/strcase/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.utility; -#else -#include -#endif +#include #undef NDEBUG #include diff --git a/tests/string-parser/driver.cxx b/tests/string-parser/driver.cxx index 2e47e4a..8cba912 100644 --- a/tests/string-parser/driver.cxx +++ b/tests/string-parser/driver.cxx @@ -1,25 +1,12 @@ // file : tests/string-parser/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utility; // operator<<(ostream,exception) -import butl.string_parser; -#else -#include -#include -#endif + +#include // operator<<(ostream,exception) +#include #undef NDEBUG #include diff --git a/tests/tab-parser/driver.cxx b/tests/tab-parser/driver.cxx index e9cd36f..99c19d9 100644 --- a/tests/tab-parser/driver.cxx +++ b/tests/tab-parser/driver.cxx @@ -1,24 +1,11 @@ // file : tests/tab-parser/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include -#endif -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.utility; // operator<<(ostream,exception) -import butl.tab_parser; -#else -#include -#include -#endif +#include // operator<<(ostream,exception) +#include #undef NDEBUG #include diff --git a/tests/target-triplet/driver.cxx b/tests/target-triplet/driver.cxx index 9cd7c2b..6dcb77e 100644 --- a/tests/target-triplet/driver.cxx +++ b/tests/target-triplet/driver.cxx @@ -1,23 +1,11 @@ // file : tests/target-triplet/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.target_triplet; -#else -#include -#endif + +#include #undef NDEBUG #include diff --git a/tests/timestamp/driver.cxx b/tests/timestamp/driver.cxx index 0ea2b38..11f328a 100644 --- a/tests/timestamp/driver.cxx +++ b/tests/timestamp/driver.cxx @@ -3,26 +3,14 @@ #include // tzset() (POSIX), _tzset() (Windows) -#ifndef __cpp_lib_modules_ts #include #include #include #include #include #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.timestamp; -#else -#include -#endif +#include #undef NDEBUG #include diff --git a/tests/url/driver.cxx b/tests/url/driver.cxx index 02bb5b8..869eed5 100644 --- a/tests/url/driver.cxx +++ b/tests/url/driver.cxx @@ -1,27 +1,14 @@ // file : tests/url/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include // move() #include #include // back_inserter #include // invalid_argument -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.url; -import butl.utility; // operator<<(ostream, exception) -#else -#include -#include -#endif + +#include +#include // operator<<(ostream, exception) #undef NDEBUG #include diff --git a/tests/utf8/driver.cxx b/tests/utf8/driver.cxx index db98aa6..ccc2870 100644 --- a/tests/utf8/driver.cxx +++ b/tests/utf8/driver.cxx @@ -1,22 +1,10 @@ // file : tests/utf8/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -#endif -import butl.utf8; -import butl.utility; -#else -#include -#include -#endif + +#include +#include #undef NDEBUG #include diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx index bd6a00d..9419a79 100644 --- a/tests/wildcard/driver.cxx +++ b/tests/wildcard/driver.cxx @@ -1,34 +1,18 @@ // file : tests/wildcard/driver.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_lib_modules_ts #include #include #include #include // sort() #include #include -#endif - -// Other includes. -#ifdef __cpp_modules_ts -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.utility; // operator<<(ostream, exception) -import butl.optional; -import butl.filesystem; -import butl.path_pattern; -#else -#include -#include -#include -#include -#include -#endif +#include +#include // operator<<(ostream, exception) +#include +#include +#include #undef NDEBUG #include -- cgit v1.1