diff options
105 files changed, 8050 insertions, 834 deletions
@@ -5,10 +5,16 @@ *.d *.t *.i +*.i.* *.ii +*.ii.* *.o *.obj +*.gcm +*.pcm +*.ifc *.so +*.dylib *.dll *.a *.lib @@ -1 +1 @@ -Copyright (c) 2014-2022 the build2 authors (see the AUTHORS file). +Copyright (c) 2014-2024 the build2 authors (see the AUTHORS file). @@ -8,6 +8,10 @@ libbutl/mingw-*.hxx: 2-clause BSD License; see the file headers for details. +libbutl/json/pdjson.[hc]: + +UNLICENSE (dedicated to the public domain). + The rest: MIT License diff --git a/build/root.build b/build/root.build index 1526ef0..17e42b1 100644 --- a/build/root.build +++ b/build/root.build @@ -22,7 +22,12 @@ elif ($cxx.id == 'gcc') # cxx.coptions += -Wno-maybe-uninitialized -Wno-free-nonheap-object \ -Wno-stringop-overread + + if ($cxx.version.major >= 13) + cxx.coptions += -Wno-dangling-reference } +elif ($cxx.id.type == 'clang' && $cxx.version.major >= 15) + cxx.coptions += -Wno-unqualified-std-cast-call # Load the cli module but only if it's available. This way a distribution # that includes pre-generated files can be built without installing cli. diff --git a/libbutl/b.cxx b/libbutl/b.cxx index 74a430c..0b4472f 100644 --- a/libbutl/b.cxx +++ b/libbutl/b.cxx @@ -35,7 +35,7 @@ namespace butl void b_info (std::vector<b_project_info>& r, const vector<dir_path>& projects, - bool ext_mods, + b_info_flags fl, uint16_t verb, const function<b_callback>& cmd_callback, const path& program, @@ -81,13 +81,22 @@ namespace butl else vops.push_back ("-q"); - vector<string> ps; - ps.reserve (projects.size ()); + string spec ("info("); // Note that quoting is essential here. // - for (const dir_path& p: projects) - ps.push_back ("'" + p.representation () + "'"); + for (size_t i (0); i != projects.size(); ++i) + { + if (i != 0) + spec += ' '; + + spec += '\'' + projects[i].representation () + '\''; + } + + if ((fl & b_info_flags::subprojects) == b_info_flags::none) + spec += ",no_subprojects"; + + spec += ')'; pr = process_start_callback ( cmd_callback ? cmd_callback : [] (const char* const*, size_t) {}, @@ -96,10 +105,12 @@ namespace butl 2 /* stderr */, pp, vops, - ext_mods ? nullptr : "--no-external-modules", + ((fl & b_info_flags::ext_mods) == b_info_flags::none + ? "--no-external-modules" + : nullptr), "-s", ops, - "info:", ps); + spec); pipe.out.close (); ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); @@ -297,7 +308,7 @@ namespace butl assert (!pr.wait ()); throw b_error ( - string ("process ") + pp.recall_string () + " " + to_string (*pr.exit), + string ("process ") + pp.recall_string () + ' ' + to_string (*pr.exit), move (pr.exit)); } catch (const process_error& e) diff --git a/libbutl/b.hxx b/libbutl/b.hxx index cc3a309..d3fd2bf 100644 --- a/libbutl/b.hxx +++ b/libbutl/b.hxx @@ -51,11 +51,6 @@ namespace butl // result vector can be used to determine which project information caused // the error. // - // Unless you need information that may come from external modules - // (operations, meta-operations, etc), pass false as the ext_mods argument, - // which results in passing --no-external-modules to the build2 program and - // speeds up its execution. - // // You can also specify the build2 verbosity level, command line callback // (see process_run_callback() for details), build program search details, // and additional options. @@ -92,12 +87,35 @@ namespace butl std::vector<std::string> modules; }; + enum class b_info_flags: std::uint16_t + { + // Retrieve information that may come from external modules (operations, + // meta-operations, etc). Omitting this flag results in passing + // --no-external-modules to the build2 program and speeds up its + // execution. + // + ext_mods = 0x1, + + // Discover subprojects. Omitting this flag results in passing + // no_subprojects info meta-operation parameter to the build2 program and + // speeds up its execution. + // + subprojects = 0x2, + + none = 0 + }; + + inline b_info_flags operator& (b_info_flags, b_info_flags); + inline b_info_flags operator| (b_info_flags, b_info_flags); + inline b_info_flags operator&= (b_info_flags&, b_info_flags); + inline b_info_flags operator|= (b_info_flags&, b_info_flags); + using b_callback = void (const char* const args[], std::size_t n); LIBBUTL_SYMEXPORT void b_info (std::vector<b_project_info>& result, const std::vector<dir_path>& projects, - bool ext_mods, + b_info_flags, std::uint16_t verb = 1, const std::function<b_callback>& cmd_callback = {}, const path& program = path ("b"), @@ -108,7 +126,7 @@ namespace butl // inline b_project_info b_info (const dir_path& project, - bool ext_mods, + b_info_flags fl, std::uint16_t verb = 1, const std::function<b_callback>& cmd_callback = {}, const path& program = path ("b"), @@ -118,7 +136,7 @@ namespace butl std::vector<b_project_info> r; b_info (r, std::vector<dir_path> ({project}), - ext_mods, + fl, verb, cmd_callback, program, @@ -128,3 +146,5 @@ namespace butl return std::move (r[0]); } } + +#include <libbutl/b.ixx> diff --git a/libbutl/b.ixx b/libbutl/b.ixx new file mode 100644 index 0000000..1667101 --- /dev/null +++ b/libbutl/b.ixx @@ -0,0 +1,31 @@ +// file : libbutl/b.ixx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +namespace butl +{ + // b_info_flags + // + inline b_info_flags operator& (b_info_flags x, b_info_flags y) + { + return x &= y; + } + + inline b_info_flags operator| (b_info_flags x, b_info_flags y) + { + return x |= y; + } + + inline b_info_flags operator&= (b_info_flags& x, b_info_flags y) + { + return x = static_cast<b_info_flags> ( + static_cast<std::uint16_t> (x) & + static_cast<std::uint16_t> (y)); + } + + inline b_info_flags operator|= (b_info_flags& x, b_info_flags y) + { + return x = static_cast<b_info_flags> ( + static_cast<std::uint16_t> (x) | + static_cast<std::uint16_t> (y)); + } +} diff --git a/libbutl/base64.cxx b/libbutl/base64.cxx index 4466f24..282f7c2 100644 --- a/libbutl/base64.cxx +++ b/libbutl/base64.cxx @@ -16,19 +16,20 @@ namespace butl static const char codes[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + static const char codes_url[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + // base64-encode the data in the iterator range [i, e). Write the encoded - // data starting at the iterator position o. + // data starting at the iterator position o. If url is true, encode using + // base64url. // template <typename I, typename O> static void - base64_encode (I& i, const I& e, O& o) + base64_encode (I& i, const I& e, O& o, bool url = false) { const size_t un (65); // Non-existing index of the codes string. for (size_t n (0); i != e; ++n) { - if (n && n % 19 == 0) - *o++ = '\n'; // Split into lines, like the base64 utility does. - auto next = [&i] () {return static_cast<unsigned char> (*i++);}; unsigned char c (next ()); @@ -51,10 +52,26 @@ namespace butl i4 = c & 0x3F; } - *o++ = codes[i1]; - *o++ = codes[i2]; - *o++ = i3 == un ? '=' : codes[i3]; - *o++ = i4 == un ? '=' : codes[i4]; + if (!url) + { + if (n && n % 19 == 0) + *o++ = '\n'; // Split into lines, like the base64 utility does. + + *o++ = codes[i1]; + *o++ = codes[i2]; + *o++ = i3 == un ? '=' : codes[i3]; + *o++ = i4 == un ? '=' : codes[i4]; + } + // base64url: different 63rd and 64th characters and no padding or + // newlines. + // + else + { + *o++ = codes_url[i1]; + *o++ = codes_url[i2]; + if (i3 != un) *o++ = codes_url[i3]; + if (i4 != un) *o++ = codes_url[i4]; + } } } @@ -170,6 +187,47 @@ namespace butl return r; } + string + base64url_encode (istream& is) + { + if (!is.good ()) + throw invalid_argument ("bad stream"); + + string r; + istreambuf_iterator<char> i (is); + back_insert_iterator<string> o (r); + + base64_encode (i, istreambuf_iterator<char> (), o, true /* url */); + is.setstate (istream::eofbit); + return r; + } + + void + base64url_encode (ostream& os, istream& is) + { + if (!os.good () || !is.good ()) + throw invalid_argument ("bad stream"); + + istreambuf_iterator<char> i (is); + ostreambuf_iterator<char> o (os); + base64_encode (i, istreambuf_iterator<char> (), o, true /* url */); + + if (o.failed ()) + os.setstate (istream::badbit); + + is.setstate (istream::eofbit); + } + + string + base64url_encode (const std::vector<char>& v) + { + string r; + back_insert_iterator<string> o (r); + auto i (v.begin ()); + base64_encode (i, v.end (), o, true /* url */); + return r; + } + void base64_decode (ostream& os, istream& is) { diff --git a/libbutl/base64.hxx b/libbutl/base64.hxx index f38e62f..a0d1450 100644 --- a/libbutl/base64.hxx +++ b/libbutl/base64.hxx @@ -27,6 +27,25 @@ namespace butl LIBBUTL_SYMEXPORT std::string base64_encode (const std::vector<char>&); + // Encode a stream or a buffer using base64url (RFC4648), a base64 variant + // with different 62nd and 63rd alphabet characters (- and _ instead of ~ + // and .; to make it filesystem safe) and optional padding because the + // padding character `=` would have to be percent-encoded to be safe in + // URLs. This implementation does not output any padding, newlines or any + // other whitespace (which is required, for example, by RFC7519: JSON Web + // Token (JWT) and RFC7515: JSON Web Signature (JWS)). + // + // Note that base64url decoding has not yet been implemented. + // + LIBBUTL_SYMEXPORT void + base64url_encode (std::ostream&, std::istream&); + + LIBBUTL_SYMEXPORT std::string + base64url_encode (std::istream&); + + LIBBUTL_SYMEXPORT std::string + base64url_encode (const std::vector<char>&); + // Base64-decode a stream or a string. Throw invalid_argument if the input // is not a valid base64 representation. If reading from a stream, check if // it has badbit, failbit, or eofbit set and throw invalid_argument if diff --git a/libbutl/buildfile b/libbutl/buildfile index 6c490af..ba4ad96 100644 --- a/libbutl/buildfile +++ b/libbutl/buildfile @@ -29,9 +29,9 @@ lib{butl}: {hxx ixx cxx}{win32-utility}: include = $windows lib{butl}: hxx{mingw-*}: include = $mingw_stdthread # Our C-files are always included into C++-files that wrap the corresponding -# API so treat them as files exclude from the compilation. +# API so treat them as files to exclude from the compilation. # -lib{butl}: file{*.c *.h} +lib{butl}: file{**.c **.h} # Platform-specific UUID implementations. # diff --git a/libbutl/builtin-options.cxx b/libbutl/builtin-options.cxx index 5a243e5..98a47cf 100644 --- a/libbutl/builtin-options.cxx +++ b/libbutl/builtin-options.cxx @@ -18,6 +18,7 @@ #include <utility> #include <ostream> #include <sstream> +#include <cstring> namespace butl { @@ -26,7 +27,7 @@ namespace butl // unknown_option // unknown_option:: - ~unknown_option () throw () + ~unknown_option () noexcept { } @@ -37,7 +38,7 @@ namespace butl } const char* unknown_option:: - what () const throw () + what () const noexcept { return "unknown option"; } @@ -45,7 +46,7 @@ namespace butl // unknown_argument // unknown_argument:: - ~unknown_argument () throw () + ~unknown_argument () noexcept { } @@ -56,7 +57,7 @@ namespace butl } const char* unknown_argument:: - what () const throw () + what () const noexcept { return "unknown argument"; } @@ -64,7 +65,7 @@ namespace butl // missing_value // missing_value:: - ~missing_value () throw () + ~missing_value () noexcept { } @@ -75,7 +76,7 @@ namespace butl } const char* missing_value:: - what () const throw () + what () const noexcept { return "missing option value"; } @@ -83,7 +84,7 @@ namespace butl // invalid_value // invalid_value:: - ~invalid_value () throw () + ~invalid_value () noexcept { } @@ -98,7 +99,7 @@ namespace butl } const char* invalid_value:: - what () const throw () + what () const noexcept { return "invalid option value"; } @@ -112,7 +113,7 @@ namespace butl } const char* eos_reached:: - what () const throw () + what () const noexcept { return "end of argument stream reached"; } @@ -252,10 +253,31 @@ namespace butl struct parser<bool> { static void - parse (bool& x, scanner& s) + parse (bool& x, bool& xs, scanner& s) { - s.next (); - x = true; + const char* o (s.next ()); + + if (s.more ()) + { + const char* v (s.next ()); + + if (std::strcmp (v, "1") == 0 || + std::strcmp (v, "true") == 0 || + std::strcmp (v, "TRUE") == 0 || + std::strcmp (v, "True") == 0) + x = true; + else if (std::strcmp (v, "0") == 0 || + std::strcmp (v, "false") == 0 || + std::strcmp (v, "FALSE") == 0 || + std::strcmp (v, "False") == 0) + x = false; + else + throw invalid_value (o, v); + } + else + throw missing_value (o); + + xs = true; } }; @@ -365,6 +387,56 @@ namespace butl } }; + template <typename K, typename V, typename C> + struct parser<std::multimap<K, V, C> > + { + static void + parse (std::multimap<K, V, C>& m, bool& xs, scanner& s) + { + const char* o (s.next ()); + + if (s.more ()) + { + std::size_t pos (s.position ()); + std::string ov (s.next ()); + std::string::size_type p = ov.find ('='); + + K k = K (); + V v = V (); + std::string kstr (ov, 0, p); + std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ())); + + int ac (2); + char* av[] = + { + const_cast<char*> (o), + 0 + }; + + bool dummy; + if (!kstr.empty ()) + { + av[1] = const_cast<char*> (kstr.c_str ()); + argv_scanner s (0, ac, av, false, pos); + parser<K>::parse (k, dummy, s); + } + + if (!vstr.empty ()) + { + av[1] = const_cast<char*> (vstr.c_str ()); + argv_scanner s (0, ac, av, false, pos); + parser<V>::parse (v, dummy, s); + } + + m.insert (typename std::multimap<K, V, C>::value_type (k, v)); + } + else + throw missing_value (o); + + xs = true; + } + }; + template <typename X, typename T, T X::*M> void thunk (X& x, scanner& s) @@ -372,6 +444,14 @@ namespace butl parser<T>::parse (x.*M, s); } + template <typename X, bool X::*M> + void + thunk (X& x, scanner& s) + { + s.next (); + x.*M = true; + } + template <typename X, typename T, T X::*M, bool X::*S> void thunk (X& x, scanner& s) @@ -382,7 +462,6 @@ namespace butl } #include <map> -#include <cstring> namespace butl { @@ -733,15 +812,15 @@ namespace butl _cli_cp_options_map_init () { _cli_cp_options_map_["--recursive"] = - &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >; + &::butl::cli::thunk< cp_options, &cp_options::recursive_ >; _cli_cp_options_map_["-R"] = - &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >; + &::butl::cli::thunk< cp_options, &cp_options::recursive_ >; _cli_cp_options_map_["-r"] = - &::butl::cli::thunk< cp_options, bool, &cp_options::recursive_ >; + &::butl::cli::thunk< cp_options, &cp_options::recursive_ >; _cli_cp_options_map_["--preserve"] = - &::butl::cli::thunk< cp_options, bool, &cp_options::preserve_ >; + &::butl::cli::thunk< cp_options, &cp_options::preserve_ >; _cli_cp_options_map_["-p"] = - &::butl::cli::thunk< cp_options, bool, &cp_options::preserve_ >; + &::butl::cli::thunk< cp_options, &cp_options::preserve_ >; } }; @@ -1007,9 +1086,9 @@ namespace butl _cli_date_options_map_init () { _cli_date_options_map_["--utc"] = - &::butl::cli::thunk< date_options, bool, &date_options::utc_ >; + &::butl::cli::thunk< date_options, &date_options::utc_ >; _cli_date_options_map_["-u"] = - &::butl::cli::thunk< date_options, bool, &date_options::utc_ >; + &::butl::cli::thunk< date_options, &date_options::utc_ >; } }; @@ -1192,6 +1271,269 @@ namespace butl return r; } + // find_options + // + + find_options:: + find_options () + { + } + + bool find_options:: + parse (int& argc, + char** argv, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool find_options:: + parse (int start, + int& argc, + char** argv, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool find_options:: + parse (int& argc, + char** argv, + int& end, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool find_options:: + parse (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool find_options:: + parse (::butl::cli::scanner& s, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + bool r = _parse (s, opt, arg); + return r; + } + + typedef + std::map<std::string, void (*) (find_options&, ::butl::cli::scanner&)> + _cli_find_options_map; + + static _cli_find_options_map _cli_find_options_map_; + + struct _cli_find_options_map_init + { + _cli_find_options_map_init () + { + } + }; + + static _cli_find_options_map_init _cli_find_options_map_init_; + + bool find_options:: + _parse (const char* o, ::butl::cli::scanner& s) + { + _cli_find_options_map::const_iterator i (_cli_find_options_map_.find (o)); + + if (i != _cli_find_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool find_options:: + _parse (::butl::cli::scanner& s, + ::butl::cli::unknown_mode opt_mode, + ::butl::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::butl::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast<char*> (co.c_str ()), + const_cast<char*> (v) + }; + + ::butl::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::butl::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::butl::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::butl::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::butl::cli::unknown_mode::stop: + { + break; + } + case ::butl::cli::unknown_mode::fail: + { + throw ::butl::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::butl::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::butl::cli::unknown_mode::stop: + { + break; + } + case ::butl::cli::unknown_mode::fail: + { + throw ::butl::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } + // ln_options // @@ -1275,9 +1617,9 @@ namespace butl _cli_ln_options_map_init () { _cli_ln_options_map_["--symbolic"] = - &::butl::cli::thunk< ln_options, bool, &ln_options::symbolic_ >; + &::butl::cli::thunk< ln_options, &ln_options::symbolic_ >; _cli_ln_options_map_["-s"] = - &::butl::cli::thunk< ln_options, bool, &ln_options::symbolic_ >; + &::butl::cli::thunk< ln_options, &ln_options::symbolic_ >; } }; @@ -1543,9 +1885,9 @@ namespace butl _cli_mkdir_options_map_init () { _cli_mkdir_options_map_["--parents"] = - &::butl::cli::thunk< mkdir_options, bool, &mkdir_options::parents_ >; + &::butl::cli::thunk< mkdir_options, &mkdir_options::parents_ >; _cli_mkdir_options_map_["-p"] = - &::butl::cli::thunk< mkdir_options, bool, &mkdir_options::parents_ >; + &::butl::cli::thunk< mkdir_options, &mkdir_options::parents_ >; } }; @@ -1811,9 +2153,9 @@ namespace butl _cli_mv_options_map_init () { _cli_mv_options_map_["--force"] = - &::butl::cli::thunk< mv_options, bool, &mv_options::force_ >; + &::butl::cli::thunk< mv_options, &mv_options::force_ >; _cli_mv_options_map_["-f"] = - &::butl::cli::thunk< mv_options, bool, &mv_options::force_ >; + &::butl::cli::thunk< mv_options, &mv_options::force_ >; } }; @@ -2080,13 +2422,13 @@ namespace butl _cli_rm_options_map_init () { _cli_rm_options_map_["--recursive"] = - &::butl::cli::thunk< rm_options, bool, &rm_options::recursive_ >; + &::butl::cli::thunk< rm_options, &rm_options::recursive_ >; _cli_rm_options_map_["-r"] = - &::butl::cli::thunk< rm_options, bool, &rm_options::recursive_ >; + &::butl::cli::thunk< rm_options, &rm_options::recursive_ >; _cli_rm_options_map_["--force"] = - &::butl::cli::thunk< rm_options, bool, &rm_options::force_ >; + &::butl::cli::thunk< rm_options, &rm_options::force_ >; _cli_rm_options_map_["-f"] = - &::butl::cli::thunk< rm_options, bool, &rm_options::force_ >; + &::butl::cli::thunk< rm_options, &rm_options::force_ >; } }; @@ -2352,9 +2694,9 @@ namespace butl _cli_rmdir_options_map_init () { _cli_rmdir_options_map_["--force"] = - &::butl::cli::thunk< rmdir_options, bool, &rmdir_options::force_ >; + &::butl::cli::thunk< rmdir_options, &rmdir_options::force_ >; _cli_rmdir_options_map_["-f"] = - &::butl::cli::thunk< rmdir_options, bool, &rmdir_options::force_ >; + &::butl::cli::thunk< rmdir_options, &rmdir_options::force_ >; } }; @@ -2623,13 +2965,13 @@ namespace butl _cli_sed_options_map_init () { _cli_sed_options_map_["--quiet"] = - &::butl::cli::thunk< sed_options, bool, &sed_options::quiet_ >; + &::butl::cli::thunk< sed_options, &sed_options::quiet_ >; _cli_sed_options_map_["-n"] = - &::butl::cli::thunk< sed_options, bool, &sed_options::quiet_ >; + &::butl::cli::thunk< sed_options, &sed_options::quiet_ >; _cli_sed_options_map_["--in-place"] = - &::butl::cli::thunk< sed_options, bool, &sed_options::in_place_ >; + &::butl::cli::thunk< sed_options, &sed_options::in_place_ >; _cli_sed_options_map_["-i"] = - &::butl::cli::thunk< sed_options, bool, &sed_options::in_place_ >; + &::butl::cli::thunk< sed_options, &sed_options::in_place_ >; _cli_sed_options_map_["--expression"] = &::butl::cli::thunk< sed_options, std::vector<std::string>, &sed_options::expression_, &sed_options::expression_specified_ >; @@ -3165,13 +3507,13 @@ namespace butl _cli_test_options_map_init () { _cli_test_options_map_["--file"] = - &::butl::cli::thunk< test_options, bool, &test_options::file_ >; + &::butl::cli::thunk< test_options, &test_options::file_ >; _cli_test_options_map_["-f"] = - &::butl::cli::thunk< test_options, bool, &test_options::file_ >; + &::butl::cli::thunk< test_options, &test_options::file_ >; _cli_test_options_map_["--directory"] = - &::butl::cli::thunk< test_options, bool, &test_options::directory_ >; + &::butl::cli::thunk< test_options, &test_options::directory_ >; _cli_test_options_map_["-d"] = - &::butl::cli::thunk< test_options, bool, &test_options::directory_ >; + &::butl::cli::thunk< test_options, &test_options::directory_ >; } }; diff --git a/libbutl/builtin-options.hxx b/libbutl/builtin-options.hxx index 6288e54..70179dd 100644 --- a/libbutl/builtin-options.hxx +++ b/libbutl/builtin-options.hxx @@ -68,7 +68,7 @@ namespace butl { public: virtual - ~unknown_option () throw (); + ~unknown_option () noexcept; unknown_option (const std::string& option); @@ -79,7 +79,7 @@ namespace butl print (::std::ostream&) const; virtual const char* - what () const throw (); + what () const noexcept; private: std::string option_; @@ -89,7 +89,7 @@ namespace butl { public: virtual - ~unknown_argument () throw (); + ~unknown_argument () noexcept; unknown_argument (const std::string& argument); @@ -100,7 +100,7 @@ namespace butl print (::std::ostream&) const; virtual const char* - what () const throw (); + what () const noexcept; private: std::string argument_; @@ -110,7 +110,7 @@ namespace butl { public: virtual - ~missing_value () throw (); + ~missing_value () noexcept; missing_value (const std::string& option); @@ -121,7 +121,7 @@ namespace butl print (::std::ostream&) const; virtual const char* - what () const throw (); + what () const noexcept; private: std::string option_; @@ -131,7 +131,7 @@ namespace butl { public: virtual - ~invalid_value () throw (); + ~invalid_value () noexcept; invalid_value (const std::string& option, const std::string& value, @@ -150,7 +150,7 @@ namespace butl print (::std::ostream&) const; virtual const char* - what () const throw (); + what () const noexcept; private: std::string option_; @@ -165,7 +165,7 @@ namespace butl print (::std::ostream&) const; virtual const char* - what () const throw (); + what () const noexcept; }; // Command line argument scanner interface. @@ -484,6 +484,67 @@ namespace butl bool utc_; }; + class find_options + { + public: + find_options (); + + // Return true if anything has been parsed. + // + bool + parse (int& argc, + char** argv, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int& argc, + char** argv, + int& end, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (::butl::cli::scanner&, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + // Option accessors. + // + // Implementation details. + // + protected: + bool + _parse (const char*, ::butl::cli::scanner&); + + private: + bool + _parse (::butl::cli::scanner&, + ::butl::cli::unknown_mode option, + ::butl::cli::unknown_mode argument); + + public: + }; + class ln_options { public: diff --git a/libbutl/builtin-options.ixx b/libbutl/builtin-options.ixx index b977f16..e118156 100644 --- a/libbutl/builtin-options.ixx +++ b/libbutl/builtin-options.ixx @@ -193,6 +193,9 @@ namespace butl return this->utc_; } + // find_options + // + // ln_options // diff --git a/libbutl/builtin.cli b/libbutl/builtin.cli index adc47fa..23a5708 100644 --- a/libbutl/builtin.cli +++ b/libbutl/builtin.cli @@ -34,6 +34,11 @@ namespace butl bool --utc|-u; }; + class find_options + { + // No options so far (expression/primaries handled as arguments). + }; + class ln_options { bool --symbolic|-s; diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index b13a59a..2755bf1 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -470,7 +470,7 @@ namespace butl if (cbs.create) call (fail, cbs.create, to, false /* pre */); - for (const auto& de: dir_iterator (from, false /* ignore_dangling */)) + for (const auto& de: dir_iterator (from, dir_iterator::no_follow)) { path f (from / de.path ()); path t (to / de.path ()); @@ -816,6 +816,314 @@ namespace butl return builtin (r = 0); } + // find <start-path>... [-name <pattern>] + // [-type <type>] + // [-mindepth <depth>] + // [-maxdepth <depth>] + // + // Note: must be executed asynchronously. + // + static uint8_t + find (const strings& args, + auto_fd in, auto_fd out, auto_fd err, + const dir_path& cwd, + const builtin_callbacks& cbs) noexcept + try + { + uint8_t r (1); + ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ())); + + // Note that on some errors we will issue diagnostics but continue the + // search and return with non-zero code at the end. This is consistent + // with how major implementations behave (see below). + // + bool error_occured (false); + auto error = [&cerr, &error_occured] (bool fail = false) + { + error_occured = true; + return error_record (cerr, fail, "find"); + }; + + auto fail = [&error] () {return error (true /* fail */);}; + + try + { + in.close (); + ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ())); + + // Parse arguments. + // + cli::vector_scanner scan (args); + + // Currently, we don't expect any options. + // + parse<find_options> (scan, args, cbs.parse_option, fail); + + // Parse path arguments until the first primary (starts with '-') is + // encountered. + // + small_vector<path, 1> paths; + + while (scan.more ()) + { + if (*scan.peek () == '-') + break; + + try + { + paths.emplace_back (scan.next ()); + } + catch (const invalid_path& e) + { + fail () << "invalid path '" << e.path << "'"; + } + } + + // Note that POSIX doesn't explicitly describe the behavior if no paths + // are specified on the command line. On Linux the current directory is + // assumed in this case. We, however, will follow the FreeBSD behavior + // and fail since this seems to be less error-prone. + // + if (paths.empty ()) + fail () << "missing start path"; + + // Parse primaries. + // + optional<string> name; + optional<entry_type> type; + optional<uint64_t> min_depth; + optional<uint64_t> max_depth; + + while (scan.more ()) + { + const char* p (scan.next ()); + + // Return the string value of the current primary. Fail if absent or + // empty, unless empty value is allowed. + // + auto str = [p, &scan, &fail] (bool allow_empty = false) + { + if (!scan.more ()) + { + fail () << "missing value for primary '" << p << "'"; + } + + string n (p); // Save for diagnostics. + string r (scan.next ()); + + if (r.empty () && !allow_empty) + fail () << "empty value for primary '" << n << "'"; + + return r; + }; + + // Return the unsigned numeric value of the current primary. Fail if + // absent or is not a valid number. + // + auto num = [p, &str, &fail] () + { + string n (p); // Save for diagnostics. + string s (str ()); + + const char* b (s.c_str ()); + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + uint64_t r (strtoull (b, &e, 10)); // Can't throw. + + if (errno == ERANGE || e != b + s.size ()) + fail () << "invalid value '" << s << "' for primary '" << n << "'"; + + return r; + }; + + if (strcmp (p, "-name") == 0) + { + // Note that the empty never-matching pattern is allowed. + // + name = str (true /* allow_empty */); + } + else if (strcmp (p, "-type") == 0) + { + string s (str ()); + char t (s.size () == 1 ? s[0] : '\0'); + + switch (t) + { + case 'f': type = entry_type::regular; break; + case 'd': type = entry_type::directory; break; + case 'l': type = entry_type::symlink; break; + default: fail () << "invalid value '" << s << "' for primary '-type'"; + } + } + else if (strcmp (p, "-mindepth") == 0) + { + min_depth = num (); + } + else if (strcmp (p, "-maxdepth") == 0) + { + max_depth = num (); + } + else + fail () << "unknown primary '" << p << "'"; + } + + // Print the path if the expression evaluates to true for it. Traverse + // further down if the path refers to a directory and the maximum depth + // is not specified or is not reached. + // + // Note that paths for evaluating/printing (pp) and for + // stating/traversing (ap) are passed separately. The former is + // potentially relative and the latter is absolute. Also note that + // for optimization we separately pass the base name simple path. + // + auto find = [&cout, + &name, + &type, + &min_depth, + &max_depth, + &fail] (const path& pp, + const path& ap, + const path& bp, + entry_type t, + uint64_t level, + const auto& find) -> void + { + // Print the path if no primary evaluates to false. + // + if ((!type || *type == t) && + (!min_depth || level >= *min_depth) && + (!name || path_match (bp.string (), *name))) + { + // Print the trailing directory separator, if present. + // + if (pp.to_directory ()) + { + // The trailing directory separator can only be present for + // paths specified on the command line. + // + assert (level == 0); + + cout << pp.representation () << '\n'; + } + else + cout << pp << '\n'; + } + + // Traverse the directory, unless the max depth is specified and + // reached. + // + if (t == entry_type::directory && (!max_depth || level < *max_depth)) + try + { + for (const auto& de: dir_iterator (path_cast<dir_path> (ap), + dir_iterator::no_follow)) + { + find (pp / de.path (), + ap / de.path (), + de.path (), + de.ltype (), + level + 1, + find); + } + } + catch (const system_error& e) + { + fail () << "unable to scan directory '" << pp << "': " << e; + } + }; + + dir_path wd; + + for (const path& p: paths) + { + // Complete the path if it is relative, so that we can properly stat + // it and, potentially, traverse. Note that we don't normalize it + // since POSIX requires that the paths should be evaluated (by + // primaries) and printed unaltered. + // + path ap; + + if (p.relative ()) + { + if (wd.empty () && cwd.relative ()) + wd = current_directory (cwd, fail); + + ap = (!wd.empty () ? wd : cwd) / p; + } + + // Issue an error if the path is empty, doesn't exist, or has the + // trailing directory separator but refers to a non-directory. + // + // Note that POSIX doesn't explicitly describe the behavior if any of + // the above happens. We will follow the behavior which is common for + // both Linux and FreeBSD by issuing the diagnostics, proceeding to + // the subsequent paths, and returning with non-zero code at the end. + // + if (p.empty ()) + { + error () << "empty path"; + continue; + } + + const path& fp (!ap.empty () ? ap : p); + pair<bool, entry_stat> pe; + + try + { + pe = path_entry (fp); + } + catch (const system_error& e) + { + fail () << "unable to stat '" << p << "': " << e; + } + + if (!pe.first) + { + error () << "'" << p << "' doesn't exists"; + continue; + } + + entry_type t (pe.second.type); + + if (p.to_directory () && t != entry_type::directory) + { + error () << "'" << p << "' is not a directory"; + continue; + } + + find (p, fp, p.leaf (), t, 0 /* level */, find); + } + + cout.close (); + r = !error_occured ? 0 : 1; + } + // Can be thrown while closing cin or creating, writing to, or closing + // cout or writing to cerr. + // + catch (const io_error& e) + { + error () << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + catch (const cli::exception& e) + { + error () << e; + } + + cerr.close (); + return r; + } + // In particular, handles io_error exception potentially thrown while + // creating, writing to, or closing cerr. + // + catch (const std::exception&) + { + return 1; + } + // Create a symlink to a file or directory at the specified path and calling // the hook for the created filesystem entries. The paths must be absolute // and normalized. Fall back to creating a hardlink, if symlink creation is @@ -2177,17 +2485,17 @@ namespace butl { unique_ptr<builtin::async_state> s ( new builtin::async_state ( + r, [fn, - &r, &args, in = move (in), out = move (out), err = move (err), &cwd, - &cbs] () mutable noexcept + &cbs] () mutable noexcept -> uint8_t { - r = fn (args, - move (in), move (out), move (err), - cwd, - cbs); + return fn (args, + move (in), move (out), move (err), + cwd, + cbs); })); return builtin (r, move (s)); @@ -2227,6 +2535,7 @@ namespace butl {"diff", {nullptr, 2}}, {"echo", {&async_impl<&echo>, 2}}, {"false", {&false_, 0}}, + {"find", {&async_impl<&find>, 2}}, {"ln", {&sync_impl<&ln>, 2}}, {"mkdir", {&sync_impl<&mkdir>, 2}}, {"mv", {&sync_impl<&mv>, 2}}, diff --git a/libbutl/builtin.hxx b/libbutl/builtin.hxx index b8546be..b301f8a 100644 --- a/libbutl/builtin.hxx +++ b/libbutl/builtin.hxx @@ -90,8 +90,7 @@ namespace butl // be able to capture auto_fd by value in a lambda, etc). // template <typename F> - explicit - async_state (F); + async_state (uint8_t&, F); }; builtin (std::uint8_t& r, std::unique_ptr<async_state>&& s = nullptr) diff --git a/libbutl/builtin.ixx b/libbutl/builtin.ixx index 24fbae3..d77590b 100644 --- a/libbutl/builtin.ixx +++ b/libbutl/builtin.ixx @@ -47,13 +47,14 @@ namespace butl // template <typename F> inline builtin::async_state:: - async_state (F f) - : thread ([f = std::move (f), this] () mutable noexcept + async_state (uint8_t& r, F f) + : thread ([this, &r, f = std::move (f)] () mutable noexcept { - f (); + uint8_t t (f ()); { unique_lock l (this->mutex); + r = t; finished = true; } @@ -68,9 +69,10 @@ namespace butl { std::unique_ptr<builtin::async_state> s ( new builtin::async_state ( - [f = std::move (f), &r] () mutable noexcept + r, + [f = std::move (f)] () mutable noexcept -> uint8_t { - r = f (); + return f (); })); return builtin (r, move (s)); diff --git a/libbutl/command.cxx b/libbutl/command.cxx index 6f8c0f1..2df52dd 100644 --- a/libbutl/command.cxx +++ b/libbutl/command.cxx @@ -51,7 +51,7 @@ namespace butl // if (p == string::npos) throw invalid_argument (string ("unmatched substitution character '") + - open + "'"); + open + '\''); if (p == sp) throw invalid_argument ("empty substitution variable"); @@ -60,12 +60,12 @@ namespace butl if (vn.find_first_of (" \t") != string::npos) throw invalid_argument ("whitespace in substitution variable '" + - vn + "'"); + vn + '\''); // Find the variable and append its value or fail if it's unknown. // if (!sc (vn, r)) - throw invalid_argument ("unknown substitution variable '" + vn + "'"); + throw invalid_argument ("unknown substitution variable '" + vn + '\''); } // Append the source string tail following the last substitution. @@ -168,7 +168,7 @@ namespace butl catch (const invalid_path& e) { throw invalid_argument ("invalid stdout redirect file path '" + - e.path + "'"); + e.path + '\''); } if (redir->empty ()) diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx index addba81..5649965 100644 --- a/libbutl/curl.cxx +++ b/libbutl/curl.cxx @@ -5,6 +5,7 @@ #include <cassert> #include <utility> // move() +#include <cstdlib> // strtoul(), size_t #include <exception> // invalid_argument #include <libbutl/utility.hxx> @@ -21,7 +22,17 @@ namespace butl case ftp_put: throw invalid_argument ("no input specified for PUT method"); case http_post: - throw invalid_argument ("no input specified for POST method"); + { + // Post the empty data. + // + // Note that while it's tempting to specify the --request POST option + // instead, that can potentially overwrite the request methods for the + // HTTP 30X response code redirects. + // + d.options.push_back ("--data-raw"); + d.options.push_back (""); + } + // Fall through. case ftp_get: case http_get: { @@ -142,7 +153,7 @@ namespace butl } curl::method_proto curl:: - translate (method_type m, const string& u, method_proto_options& o) + translate (method_type m, const string& u, method_proto_options& o, flags fs) { size_t n (u.find ("://")); @@ -161,8 +172,11 @@ namespace butl } else if (icasecmp (u, "http", n) == 0 || icasecmp (u, "https", n) == 0) { - o.push_back ("--fail"); // Fail on HTTP errors (e.g., 404). - o.push_back ("--location"); // Follow redirects. + if ((fs & flags::no_fail) == flags::none) + o.push_back ("--fail"); // Fail on HTTP errors (e.g., 404). + + if ((fs & flags::no_location) == flags::none) + o.push_back ("--location"); // Follow redirects. switch (m) { @@ -175,4 +189,123 @@ namespace butl throw invalid_argument ("unsupported protocol"); } + + uint16_t curl:: + parse_http_status_code (const string& s) + { + char* e (nullptr); + unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw. + assert (e != nullptr); + + return *e == '\0' && c >= 100 && c < 600 + ? static_cast<uint16_t> (c) + : 0; + } + + string curl:: + read_http_response_line (ifdstream& is) + { + string r; + getline (is, r); // Strips the trailing LF (0xA). + + // Note that on POSIX CRLF is not automatically translated into LF, so we + // need to strip CR (0xD) manually. + // + if (!r.empty () && r.back () == '\r') + r.pop_back (); + + return r; + } + + curl::http_status curl:: + read_http_status (ifdstream& is, bool skip_headers) + { + // After getting the status line, if requested, we will read until the + // empty line (containing just CRLF). Not being able to reach such a line + // is an error, which is the reason for the exception mask choice. When + // done, we will restore the original exception mask. + // + ifdstream::iostate es (is.exceptions ()); + is.exceptions (ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); + + auto read_status = [&is, es] () + { + string l (read_http_response_line (is)); + + for (;;) // Breakout loop. + { + if (l.compare (0, 5, "HTTP/") != 0) + break; + + size_t p (l.find (' ', 5)); // The protocol end. + if (p == string::npos) + break; + + p = l.find_first_not_of (' ', p + 1); // The code start. + if (p == string::npos) + break; + + size_t e (l.find (' ', p + 1)); // The code end. + if (e == string::npos) + break; + + uint16_t c (parse_http_status_code (string (l, p, e - p))); + if (c == 0) + break; + + string r; + p = l.find_first_not_of (' ', e + 1); // The reason start. + if (p != string::npos) + { + e = l.find_last_not_of (' '); // The reason end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return http_status {c, move (r)}; + } + + is.exceptions (es); // Restore the exception mask. + + throw invalid_argument ("invalid status line '" + l + "'"); + }; + + // The curl output for a successfull request looks like this: + // + // HTTP/1.1 100 Continue + // + // HTTP/1.1 200 OK + // Content-Length: 83 + // Content-Type: text/manifest;charset=utf-8 + // + // <response-body> + // + // curl normally sends the 'Expect: 100-continue' header for uploads, so + // we need to handle the interim HTTP server response with the continue + // (100) status code. + // + // Interestingly, Apache can respond with the continue (100) code and with + // the not found (404) code afterwords. + // + http_status rs (read_status ()); + + if (rs.code == 100) + { + // Skips the interim response. + // + while (!read_http_response_line (is).empty ()) ; + + rs = read_status (); // Reads the final status code. + } + + if (skip_headers) + { + while (!read_http_response_line (is).empty ()) ; // Skips headers. + } + + is.exceptions (es); + + return rs; + } } diff --git a/libbutl/curl.hxx b/libbutl/curl.hxx index cd4ebd0..ea91807 100644 --- a/libbutl/curl.hxx +++ b/libbutl/curl.hxx @@ -4,6 +4,7 @@ #pragma once #include <string> +#include <cstdint> // uint16_t #include <type_traits> #include <libbutl/path.hxx> @@ -90,6 +91,19 @@ namespace butl public: enum method_type {get, put, post}; + // By default the -sS and, for the HTTP protocol, --fail and --location + // options are passed to curl on the command line. Optionally, these + // options can be suppressed. + // + enum class flags: std::uint16_t + { + no_fail = 0x01, // Don't pass --fail. + no_location = 0x02, // Don't pass --location + no_sS = 0x04, // Don't pass -sS + + none = 0 // Default options set. + }; + ifdstream in; ofdstream out; @@ -120,12 +134,77 @@ namespace butl const std::string& url, A&&... options); + // Similar to the above, but allows to adjust the curl's default command + // line. + // + template <typename I, + typename O, + typename E, + typename... A> + curl (I&& in, + O&& out, + E&& err, + method_type, + flags, + const std::string& url, + A&&... options); + + template <typename C, + typename I, + typename O, + typename E, + typename... A> + curl (const C&, + I&& in, + O&& out, + E&& err, + method_type, + flags, + const std::string& url, + A&&... options); + + // Read the HTTP response status from an input stream. + // + // Specifically, read and parse the HTTP status line, by default skip over + // the remaining headers (leaving the stream at the beginning of the + // response body), and return the status code and the reason phrase. Throw + // std::invalid_argument if the status line could not be parsed. Pass + // through the ios::failure exception on the stream error. + // + // Note that if ios::failure is thrown the stream's exception mask may not + // be preserved. + // + struct http_status + { + std::uint16_t code; + std::string reason; + }; + + static http_status + read_http_status (ifdstream&, bool skip_headers = true); + + // Parse and return the HTTP status code. Return 0 if the argument is + // invalid. + // + static std::uint16_t + parse_http_status_code (const std::string&); + + // Read the CRLF-terminated line from an input stream, stripping the + // trailing CRLF. Pass through the ios::failure exception on the stream + // error. + // + static std::string + read_http_response_line (ifdstream&); + private: enum method_proto {ftp_get, ftp_put, http_get, http_post}; using method_proto_options = small_vector<const char*, 2>; method_proto - translate (method_type, const std::string& url, method_proto_options&); + translate (method_type, + const std::string& url, + method_proto_options&, + flags); private: template <typename T> @@ -165,6 +244,11 @@ namespace butl typename std::enable_if<is_other<O>::value, O>::type map_out (O&&, method_proto, io_data&); }; + + curl::flags operator& (curl::flags, curl::flags); + curl::flags operator| (curl::flags, curl::flags); + curl::flags operator&= (curl::flags&, curl::flags); + curl::flags operator|= (curl::flags&, curl::flags); } #include <libbutl/curl.ixx> diff --git a/libbutl/curl.ixx b/libbutl/curl.ixx index b7f6496..6dcfe13 100644 --- a/libbutl/curl.ixx +++ b/libbutl/curl.ixx @@ -16,6 +16,7 @@ namespace butl O&& out, E&& err, method_type m, + flags fs, const std::string& url, A&&... options) : curl ([] (const char* [], std::size_t) {}, @@ -23,8 +24,80 @@ namespace butl std::forward<O> (out), std::forward<E> (err), m, + fs, url, std::forward<A> (options)...) { } + + template <typename C, + typename I, + typename O, + typename E, + typename... A> + inline curl:: + curl (const C& cmdc, + I&& in, + O&& out, + E&& err, + method_type m, + const std::string& url, + A&&... options) + : curl (cmdc, + std::forward<I> (in), + std::forward<O> (out), + std::forward<E> (err), + m, + flags::none, + url, + std::forward<A> (options)...) + { + } + + template <typename I, + typename O, + typename E, + typename... A> + inline curl:: + curl (I&& in, + O&& out, + E&& err, + method_type m, + const std::string& url, + A&&... options) + : curl (std::forward<I> (in), + std::forward<O> (out), + std::forward<E> (err), + m, + flags::none, + url, + std::forward<A> (options)...) + { + } + + inline curl::flags + operator&= (curl::flags& x, curl::flags y) + { + return x = static_cast<curl::flags> (static_cast<std::uint16_t> (x) & + static_cast<std::uint16_t> (y)); + } + + inline curl::flags + operator|= (curl::flags& x, curl::flags y) + { + return x = static_cast<curl::flags> (static_cast<std::uint16_t> (x) | + static_cast<std::uint16_t> (y)); + } + + inline curl::flags + operator& (curl::flags x, curl::flags y) + { + return x &= y; + } + + inline curl::flags + operator| (curl::flags x, curl::flags y) + { + return x |= y; + } } diff --git a/libbutl/curl.txx b/libbutl/curl.txx index ee08145..fc74470 100644 --- a/libbutl/curl.txx +++ b/libbutl/curl.txx @@ -65,11 +65,12 @@ namespace butl O&& out, E&& err, method_type m, + flags fs, const std::string& url, A&&... options) { method_proto_options mpo; - method_proto mp (translate (m, url, mpo)); + method_proto mp (translate (m, url, mpo, fs)); io_data in_data; io_data out_data; @@ -81,8 +82,9 @@ namespace butl map_out (std::forward<O> (out), mp, out_data), std::forward<E> (err), "curl", - "-s", // Silent. - "-S", // But do show diagnostics. + ((fs & flags::no_sS) == flags::none + ? "-sS" // Silent but do show diagnostics. + : nullptr), mpo, in_data.options, out_data.options, diff --git a/libbutl/diagnostics.cxx b/libbutl/diagnostics.cxx index f574fd6..6ac8192 100644 --- a/libbutl/diagnostics.cxx +++ b/libbutl/diagnostics.cxx @@ -154,17 +154,17 @@ namespace butl diag_stream->flush (); } - void (*diag_record::writer) (const diag_record&) = &default_writer; + diag_writer* diag_record::writer = &default_writer; void diag_record:: - flush () const + flush (void (*w) (const diag_record&)) const { if (!empty_) { if (epilogue_ == nullptr) { - if (writer != nullptr) - writer (*this); + if (w != nullptr || (w = writer) != nullptr) + w (*this); empty_ = true; } @@ -174,8 +174,8 @@ namespace butl // auto e (epilogue_); epilogue_ = nullptr; - e (*this); // Can throw. - flush (); // Call ourselves to write the data in case it returns. + e (*this, w); // Can throw. + flush (w); // Call ourselves to write the data in case it returns. } } } diff --git a/libbutl/diagnostics.hxx b/libbutl/diagnostics.hxx index 23aa14f..c6db34b 100644 --- a/libbutl/diagnostics.hxx +++ b/libbutl/diagnostics.hxx @@ -27,8 +27,11 @@ namespace butl LIBBUTL_SYMEXPORT extern std::ostream* diag_stream; // Acquire the diagnostics exclusive access mutex in ctor, release in dtor. - // An object of the type must be created prior to writing to diag_stream (see - // above). + // An object of the type must be created prior to writing to diag_stream + // (see above). + // + // Note that this class also manages the interaction with the progress + // printing (see below). // struct LIBBUTL_SYMEXPORT diag_stream_lock { @@ -92,7 +95,8 @@ namespace butl template <typename> struct diag_prologue; template <typename> struct diag_mark; - using diag_epilogue = void (const diag_record&); + using diag_writer = void (const diag_record&); + using diag_epilogue = void (const diag_record&, diag_writer*); struct LIBBUTL_SYMEXPORT diag_record { @@ -129,7 +133,7 @@ namespace butl full () const {return !empty_;} void - flush () const; + flush (diag_writer* = nullptr) const; void append (const char* indent, diag_epilogue* e) const @@ -162,7 +166,7 @@ namespace butl #endif empty_ (r.empty_), epilogue_ (r.epilogue_), - os (std::move (r.os)) + os (std::move (r.os)) // Note: can throw. { if (!empty_) { @@ -180,7 +184,7 @@ namespace butl // Diagnostics writer. The default implementation writes the record text // to diag_stream. If it is NULL, then the record text is ignored. // - static void (*writer) (const diag_record&); + static diag_writer* writer; protected: #ifdef __cpp_lib_uncaught_exceptions @@ -315,10 +319,10 @@ namespace butl stack (prev_); } - // Normally passed as an epilogue. + // Normally passed as an epilogue. Writer is not used. // static void - apply (const diag_record& r) + apply (const diag_record& r, diag_writer* = nullptr) { for (const diag_frame* f (stack ()); f != nullptr; f = f->prev_) f->func_ (*f, r); diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx index fb2c8d1..07cb9f2 100644 --- a/libbutl/fdstream.cxx +++ b/libbutl/fdstream.cxx @@ -17,6 +17,10 @@ #else # include <libbutl/win32-utility.hxx> +# ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING +# define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x04 +# endif + # include <io.h> // _close(), _read(), _write(), _setmode(), _sopen(), // _lseek(), _dup(), _pipe(), _chsize_s, // _get_osfhandle() @@ -34,6 +38,7 @@ # include <wchar.h> // wcsncmp(), wcsstr() +# include <thread> // this_thread::yield() # include <algorithm> // count() #endif @@ -41,7 +46,8 @@ #include <new> // bad_alloc #include <limits> // numeric_limits #include <cassert> -#include <cstring> // memcpy(), memmove() +#include <cstring> // memcpy(), memmove(), memchr(), strcmp() +#include <cstdlib> // getenv() #include <iostream> // cin, cout #include <exception> // uncaught_exception[s]() #include <stdexcept> // invalid_argument @@ -353,14 +359,6 @@ namespace butl return save () ? 0 : -1; } -#ifdef _WIN32 - static inline int - write (int fd, const void* buf, size_t n) - { - return _write (fd, buf, static_cast<unsigned int> (n)); - } -#endif - bool fdstreambuf:: save () { @@ -372,7 +370,7 @@ namespace butl // descriptor opened for read-only access (while -1 with errno EBADF is // expected). This is in contrast with VC's _write() and POSIX's write(). // - auto m (write (fd_.get (), buf_, n)); + auto m (fdwrite (fd_.get (), buf_, n)); if (m == -1) throw_generic_ios_failure (errno); @@ -487,7 +485,7 @@ namespace butl // Flush the buffer. // size_t wn (bn + an); - int r (wn > 0 ? write (fd_.get (), buf_, wn) : 0); + streamsize r (wn > 0 ? fdwrite (fd_.get (), buf_, wn) : 0); if (r == -1) throw_generic_ios_failure (errno); @@ -530,7 +528,7 @@ namespace butl // The data tail doesn't fit the buffer so write it to the file. // - r = write (fd_.get (), s, n); + r = fdwrite (fd_.get (), s, n); if (r == -1) throw_generic_ios_failure (errno); @@ -846,7 +844,7 @@ namespace butl } ifdstream& - getline (ifdstream& is, string& s, char delim) + getline (ifdstream& is, string& l, char delim) { ifdstream::iostate eb (is.exceptions ()); assert (eb & ifdstream::badbit); @@ -863,7 +861,7 @@ namespace butl if (eb != ifdstream::badbit) is.exceptions (ifdstream::badbit); - std::getline (is, s, delim); + std::getline (is, l, delim); // Throw if any of the newly set bits are present in the exception mask. // @@ -876,6 +874,58 @@ namespace butl return is; } + bool + getline_non_blocking (ifdstream& is, string& l, char delim) + { + assert (!is.blocking () && (is.exceptions () & ifdstream::badbit) != 0); + + fdstreambuf& sb (*static_cast<fdstreambuf*> (is.rdbuf ())); + + // Read until blocked (0), EOF (-1) or encounter the delimiter. + // + // Note that here we reasonably assume that any failure in in_avail() + // will lead to badbit and thus an exception (see showmanyc()). + // + streamsize s; + while ((s = sb.in_avail ()) > 0) + { + const char* p (sb.gptr ()); + size_t n (sb.egptr () - p); + + const char* e (static_cast<const char*> (memchr (p, delim, n))); + if (e != nullptr) + n = e - p; + + l.append (p, n); + + // Note: consume the delimiter if found. + // + sb.gbump (static_cast<int> (n + (e != nullptr ? 1 : 0))); + + if (e != nullptr) + break; + } + + // Here s can be: + // + // -1 -- EOF. + // 0 -- blocked before encountering delimiter/EOF. + // >0 -- encountered the delimiter. + // + if (s == -1) + { + is.setstate (ifdstream::eofbit); + + // If we couldn't extract anything, not even the delimiter, then this is + // a failure per the getline() interface. + // + if (l.empty ()) + is.setstate (ifdstream::failbit); + } + + return s != 0; + } + // ofdstream // ofdstream:: @@ -1025,10 +1075,11 @@ namespace butl #endif // Unlike other platforms, *BSD allows opening a directory as a file which - // will cause all kinds of problems upstream (e.g., cpfile()). So we detect - // and diagnose this. + // will cause all kinds of problems upstream (e.g., cpfile()). So we + // detect and diagnose this. Note: not certain this is the case for NetBSD + // and OpenBSD. // -#if defined(__FreeBSD__) || defined(__NetBSD__) +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) { struct stat s; if (stat (f, &s) == 0 && S_ISDIR (s.st_mode)) @@ -1119,12 +1170,12 @@ namespace butl // open it results in EINVAL POSIX error, ERROR_USER_MAPPED_FILE system // error. So we retry those as well. // - for (size_t i (0); i < 21; ++i) + for (size_t i (0); i < 41; ++i) { - // Sleep 100 milliseconds before the open retry. + // Sleep 50 milliseconds before the open retry. // if (i != 0) - Sleep (100); + Sleep (50); fd = pass_perm ? _sopen (f, of, _SH_DENYNO, pf) @@ -1393,6 +1444,16 @@ namespace butl throw_generic_ios_failure (errno); } + bool + fdterm_color (int, bool) + { + const char* t (std::getenv ("TERM")); + + // This test was lifted from GCC (Emacs shell sets TERM=dumb). + // + return t != nullptr && strcmp (t, "dumb") != 0; + } + static pair<size_t, size_t> fdselect (fdselect_set& read, fdselect_set& write, @@ -1411,6 +1472,8 @@ namespace butl for (fdselect_state& s: from) { + s.ready = false; + if (s.fd == nullfd) continue; @@ -1418,7 +1481,6 @@ namespace butl throw invalid_argument ("invalid file descriptor"); FD_SET (s.fd, &to); - s.ready = false; if (max_fd < s.fd) max_fd = s.fd; @@ -1525,6 +1587,12 @@ namespace butl return read (fd, buf, n); } + streamsize + fdwrite (int fd, const void* buf, size_t n) + { + return write (fd, buf, n); + } + #else auto_fd @@ -1805,6 +1873,9 @@ namespace butl bool fdterm (int fd) { + // @@ Both GCC and Clang simply call GetConsoleMode() for this check. I + // wonder why we don't do the same? See also fdterm_color() below. + // We don't need to close it (see fd_to_handle()). // HANDLE h (fd_to_handle (fd)); @@ -1890,6 +1961,42 @@ namespace butl return false; } + bool + fdterm_color (int fd, bool enable) + { + // We don't need to close it (see fd_to_handle()). + // + HANDLE h (fd_to_handle (fd)); + + // See GH issue #312 for background on this logic. + // + DWORD m; + if (!GetConsoleMode (h, &m)) + throw_system_ios_failure (GetLastError ()); + + // Some terminals (e.g. Windows Terminal) enable VT processing by default. + // + if ((m & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0) + return true; + + if (enable) + { + // If SetConsoleMode() fails, assume VT processing is unsupported (it + // is only supported from a certain build of Windows 10). + // + // Note that Wine pretends to support this but doesn't handle the escape + // sequences. See https://bugs.winehq.org/show_bug.cgi?id=49780. + // + if (SetConsoleMode (h, + (m | + ENABLE_PROCESSED_OUTPUT | + ENABLE_VIRTUAL_TERMINAL_PROCESSING))) + return true; + } + + return false; + } + static pair<size_t, size_t> fdselect (fdselect_set& read, fdselect_set& write, @@ -1906,13 +2013,14 @@ namespace butl for (fdselect_state& s: read) { + s.ready = false; + if (s.fd == nullfd) continue; if (s.fd < 0) throw invalid_argument ("invalid file descriptor"); - s.ready = false; ++n; } @@ -1933,7 +2041,7 @@ namespace butl // size_t r (0); - while (true) + for (size_t i (0);; ++i) { for (fdselect_state& s: read) { @@ -2006,7 +2114,11 @@ namespace butl if (r != 0) break; - DWORD t (50); + // Use exponential backoff but not too aggressive and with 25ms max. + // + DWORD t ( + static_cast<DWORD> (i <= 1000 ? 0 : + i >= 1000 + 100 ? 25 : 1 + ((i - 1000) / 4))); if (timeout) { @@ -2023,7 +2135,10 @@ namespace butl break; } - Sleep (t); + if (t == 0) + this_thread::yield (); + else + Sleep (t); } return make_pair (r, 0); @@ -2066,6 +2181,12 @@ namespace butl return r; } + streamsize + fdwrite (int fd, const void* buf, size_t n) + { + return _write (fd, buf, static_cast<unsigned int> (n)); + } + #endif pair<size_t, size_t> diff --git a/libbutl/fdstream.hxx b/libbutl/fdstream.hxx index 4dc89a3..9c8f786 100644 --- a/libbutl/fdstream.hxx +++ b/libbutl/fdstream.hxx @@ -137,6 +137,11 @@ namespace butl class LIBBUTL_SYMEXPORT fdstreambuf: public bufstreambuf { public: + // Reasonable (for stack allocation) buffer size that provides decent + // performance. + // + static const std::size_t buffer_size = 8192; + fdstreambuf () = default; // Unless specified, the current read/write position is assumed to @@ -174,6 +179,9 @@ namespace butl bool blocking (bool); + bool + blocking () const {return !non_blocking_;} + public: using base = bufstreambuf; @@ -238,7 +246,7 @@ namespace butl private: auto_fd fd_; - char buf_[8192]; + char buf_[buffer_size]; bool non_blocking_ = false; }; @@ -271,7 +279,9 @@ namespace butl binary = 0x02, skip = 0x04, blocking = 0x08, - non_blocking = 0x10 + non_blocking = 0x10, + + none = 0 }; inline fdstream_mode operator& (fdstream_mode, fdstream_mode); @@ -311,6 +321,9 @@ namespace butl int fd () const {return buf_.fd ();} + bool + blocking () const {return buf_.blocking ();} + protected: fdstreambuf buf_; }; @@ -641,6 +654,54 @@ namespace butl LIBBUTL_SYMEXPORT ifdstream& getline (ifdstream&, std::string&, char delim = '\n'); + // The non-blocking getline() version that reads the line in potentially + // multiple calls. Key differences compared to getline(): + // + // - Stream must be in the non-blocking mode and exception mask must have + // at least badbit. + // + // - Return type is bool instead of stream. Return true if the line has been + // read or false if it should be called again once the stream has more + // data to read. Also return true on failure. + // + // - The string must be empty on the first call. + // + // - There could still be data to read in the stream's buffer (as opposed to + // file descriptor) after this function returns true and you should be + // careful not to block on fdselect() in this case. In fact, the + // recommended pattern is to call this function first and only call + // fdselect() if it returns false. + // + // The typical usage in combination with the eof() helper: + // + // fdselect_set fds {is.fd (), ...}; + // fdselect_state& ist (fds[0]); + // fdselect_state& ...; + // + // for (string l; ist.fd != nullfd || ...; ) + // { + // if (ist.fd != nullfd && getline_non_blocking (is, l)) + // { + // if (eof (is)) + // ist.fd = nullfd; + // else + // { + // // Consume line. + // + // l.clear (); + // } + // + // continue; + // } + // + // ifdselect (fds); + // + // // Handle other ready fds. + // } + // + LIBBUTL_SYMEXPORT bool + getline_non_blocking (ifdstream&, std::string&, char delim = '\n'); + // Open a file returning an auto_fd that holds its file descriptor on // success and throwing ios::failure otherwise. // @@ -840,6 +901,14 @@ namespace butl LIBBUTL_SYMEXPORT bool fdterm (int); + // Test whether a terminal file descriptor supports ANSI color output. If + // the enable argument is true, then also try to enable color output (only + // applicable on some platforms, such as Windows). Throw ios::failure on the + // underlying OS error. + // + LIBBUTL_SYMEXPORT bool + fdterm_color (int, bool enable); + // Wait until one or more file descriptors becomes ready for input (reading) // or output (writing). Return the pair of numbers of descriptors that are // ready. Throw std::invalid_argument if anything is wrong with arguments @@ -847,7 +916,7 @@ namespace butl // underlying OS error. // // Note that the function clears all the previously-ready entries on each - // call. Entries with nullfd are ignored. + // call. Entries with nullfd are ignored (but cleared). // // On Windows only pipes and only their input (read) ends are supported. // @@ -855,11 +924,13 @@ namespace butl { int fd; bool ready; + void* data; // Arbitrary data which can be associated with the descriptor. // Note: intentionally non-explicit to allow implicit initialization when // pushing to fdselect_set. // - fdselect_state (int fd): fd (fd), ready (false) {} + fdselect_state (int fd, void* d = nullptr) + : fd (fd), ready (false), data (d) {} }; using fdselect_set = small_vector<fdselect_state, 4>; @@ -912,6 +983,11 @@ namespace butl // LIBBUTL_SYMEXPORT std::streamsize fdread (int, void*, std::size_t); + + // POSIX write() function wrapper, for uniformity. + // + LIBBUTL_SYMEXPORT std::streamsize + fdwrite (int, const void*, std::size_t); } #include <libbutl/fdstream.ixx> diff --git a/libbutl/fdstream.ixx b/libbutl/fdstream.ixx index 08e317c..e024af9 100644 --- a/libbutl/fdstream.ixx +++ b/libbutl/fdstream.ixx @@ -167,6 +167,8 @@ namespace butl inline std::vector<char> ifdstream:: read_binary () { + // @@ TODO: surely there is a more efficient way! See sha256! + std::vector<char> v (std::istreambuf_iterator<char> (*this), std::istreambuf_iterator<char> ()); return v; diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index 0a3d260..28a0de8 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -16,7 +16,7 @@ #else # include <libbutl/win32-utility.hxx> -# include <io.h> // _find*(), _unlink(), _chmod() +# include <io.h> // _unlink(), _chmod() # include <direct.h> // _mkdir(), _rmdir() # include <winioctl.h> // FSCTL_SET_REPARSE_POINT # include <sys/types.h> // _stat @@ -28,8 +28,9 @@ # define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) # endif -# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t -# include <cstring> // strncmp() +# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t +# include <cstring> // strncmp() +# include <type_traits> // is_same #endif #include <chrono> @@ -183,6 +184,19 @@ namespace butl // static inline constexpr int // ansec (...) {return 0;} + static inline entry_time + entry_tm (const struct stat& s) noexcept + { + auto tm = [] (time_t sec, auto nsec) -> timestamp + { + return system_clock::from_time_t (sec) + + chrono::duration_cast<duration> (chrono::nanoseconds (nsec)); + }; + + return {tm (s.st_mtime, mnsec<struct stat> (&s, true)), + tm (s.st_atime, ansec<struct stat> (&s, true))}; + } + // Return the modification and access times of a regular file or directory. // static entry_time @@ -200,14 +214,7 @@ namespace butl if (dir ? !S_ISDIR (s.st_mode) : !S_ISREG (s.st_mode)) return {timestamp_nonexistent, timestamp_nonexistent}; - auto tm = [] (time_t sec, auto nsec) -> timestamp - { - return system_clock::from_time_t (sec) + - chrono::duration_cast<duration> (chrono::nanoseconds (nsec)); - }; - - return {tm (s.st_mtime, mnsec<struct stat> (&s, true)), - tm (s.st_atime, ansec<struct stat> (&s, true))}; + return entry_tm (s); } // Set the modification and access times for a regular file or directory. @@ -309,16 +316,15 @@ namespace butl // Open a filesystem entry for reading and optionally writing its // meta-information and return the entry handle and meta-information if the - // path refers to an existing entry and nullhandle otherwise. Follow reparse - // points by default. Underlying OS errors are reported by throwing - // std::system_error, unless ignore_error is true in which case nullhandle - // is returned. In the latter case the error code can be obtained by calling - // GetLastError(). + // path refers to an existing entry and nullhandle otherwise. Underlying OS + // errors are reported by throwing std::system_error, unless ignore_error is + // true in which case nullhandle is returned. In the latter case the error + // code can be obtained by calling GetLastError(). // static inline pair<win32::auto_handle, BY_HANDLE_FILE_INFORMATION> entry_info_handle (const char* p, bool write, - bool fr = true, + bool follow_reparse_points, bool ie = false) { // Open the entry for reading/writing its meta-information. Follow reparse @@ -333,7 +339,7 @@ namespace butl nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS | // Required for a directory. - (fr ? 0 : FILE_FLAG_OPEN_REPARSE_POINT), + (follow_reparse_points ? 0 : FILE_FLAG_OPEN_REPARSE_POINT), nullptr)); if (h == nullhandle) @@ -358,13 +364,15 @@ namespace butl } // Return a flag indicating whether the path is to an existing filesystem - // entry and its meta-information if so. Follow reparse points by default. + // entry and its meta-information if so. // static inline pair<bool, BY_HANDLE_FILE_INFORMATION> - path_entry_info (const char* p, bool fr = true, bool ie = false) + path_entry_handle_info (const char* p, + bool follow_reparse_points, + bool ie = false) { pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi ( - entry_info_handle (p, false /* write */, fr, ie)); + entry_info_handle (p, false /* write */, follow_reparse_points, ie)); if (hi.first == nullhandle) return make_pair (false, BY_HANDLE_FILE_INFORMATION ()); @@ -376,9 +384,34 @@ namespace butl } static inline pair<bool, BY_HANDLE_FILE_INFORMATION> - path_entry_info (const path& p, bool fr = true, bool ie = false) + path_entry_handle_info (const path& p, bool fr, bool ie = false) { - return path_entry_info (p.string ().c_str (), fr, ie); + return path_entry_handle_info (p.string ().c_str (), fr, ie); + } + + // Return a flag indicating whether the path is to an existing filesystem + // entry and its extended attributes if so. Don't follow reparse points. + // + static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA> + path_entry_info (const char* p, bool ie = false) + { + WIN32_FILE_ATTRIBUTE_DATA r; + if (!GetFileAttributesExA (p, GetFileExInfoStandard, &r)) + { + DWORD ec; + if (ie || error_file_not_found (ec = GetLastError ())) + return make_pair (false, WIN32_FILE_ATTRIBUTE_DATA ()); + + throw_system_error (ec); + } + + return make_pair (true, r); + } + + static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA> + path_entry_info (const path& p, bool ie = false) + { + return path_entry_info (p.string ().c_str (), ie); } // Reparse point data. @@ -614,8 +647,48 @@ namespace butl return reparse_point_entry (p.string ().c_str (), ie); } - pair<bool, entry_stat> - path_entry (const char* p, bool fl, bool ie) + static inline timestamp + to_timestamp (const FILETIME& t) + { + // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" + // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch" + // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds. + // + uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) | + t.dwLowDateTime); + + nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch. + nsec *= 100; // Now in nanoseconds. + + return timestamp ( + chrono::duration_cast<duration> (chrono::nanoseconds (nsec))); + } + + static inline FILETIME + to_filetime (timestamp t) + { + // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" + // (1601-01-01T00:00:00Z). To convert "UNIX epoch" (1970-01-01T00:00:00Z) + // to it we need to add 11644473600 seconds. + // + uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> ( + t.time_since_epoch ()).count ()); + + ticks /= 100; // Now in 100 nanosecond "ticks". + ticks += 11644473600ULL * 10000000; // Now in "Windows epoch". + + FILETIME r; + r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF; + r.dwLowDateTime = ticks & 0xFFFFFFFF; + return r; + } + + // If the being returned entry type is regular or directory and et is not + // NULL, then also save the entry modification and access times into the + // referenced variable. + // + static inline pair<bool, entry_stat> + path_entry (const char* p, bool fl, bool ie, entry_time* et) { // A path like 'C:', while being a root path in our terminology, is not as // such for Windows, that maintains current directory for each drive, and @@ -626,73 +699,105 @@ namespace butl string d; if (path::traits_type::root (p)) { - d = p; + d = string (p); // GCC bug #105329. d += path::traits_type::directory_separator; p = d.c_str (); } // Stat the entry not following reparse points. // - pair<bool, BY_HANDLE_FILE_INFORMATION> pi ( - path_entry_info (p, false /* follow_reparse_points */, ie)); + pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p, ie)); if (!pi.first) return make_pair (false, entry_stat {entry_type::unknown, 0}); - if (reparse_point (pi.second.dwFileAttributes)) + auto entry_info = [et] (const auto& ei) { - pair<entry_type, path> rp (reparse_point_entry (p, ie)); + if (et != nullptr) + { + et->modification = to_timestamp (ei.ftLastWriteTime); + et->access = to_timestamp (ei.ftLastAccessTime); + } + + if (directory (ei.dwFileAttributes)) + return make_pair (true, entry_stat {entry_type::directory, 0}); + else + return make_pair ( + true, + entry_stat {entry_type::regular, + ((uint64_t (ei.nFileSizeHigh) << 32) | ei.nFileSizeLow)}); + }; - if (rp.first == entry_type::symlink) + if (!reparse_point (pi.second.dwFileAttributes)) + return entry_info (pi.second); + + pair<entry_type, path> rp (reparse_point_entry (p, ie)); + + if (rp.first == entry_type::symlink) + { + // If following symlinks is requested, then follow the reparse point and + // return its target information. Otherwise, return the symlink entry + // type. + // + if (fl) { - // If following symlinks is requested, then follow the reparse point, - // overwrite its own information with the resolved target information, - // and fall through. Otherwise, return the symlink entry type. - // - if (fl) - { - pi = path_entry_info (p, true /* follow_reparse_points */, ie); + pair<bool, BY_HANDLE_FILE_INFORMATION> pi ( + path_entry_handle_info (p, true /* follow_reparse_points */, ie)); - if (!pi.first) - return make_pair (false, entry_stat {entry_type::unknown, 0}); - } - else - return make_pair (true, entry_stat {entry_type::symlink, 0}); + return pi.first + ? entry_info (pi.second) + : make_pair (false, entry_stat {entry_type::unknown, 0}); } - else if (rp.first == entry_type::unknown) - return make_pair (false, entry_stat {entry_type::unknown, 0}); - else // entry_type::other - return make_pair (true, entry_stat {entry_type::other, 0}); + else + return make_pair (true, entry_stat {entry_type::symlink, 0}); } + else if (rp.first == entry_type::unknown) + return make_pair (false, entry_stat {entry_type::unknown, 0}); + else // entry_type::other + return make_pair (true, entry_stat {entry_type::other, 0}); + } - if (directory (pi.second.dwFileAttributes)) - return make_pair (true, entry_stat {entry_type::directory, 0}); - else - return make_pair ( - true, - entry_stat {entry_type::regular, - ((uint64_t (pi.second.nFileSizeHigh) << 32) | - pi.second.nFileSizeLow)}); + static inline pair<bool, entry_stat> + path_entry (const path& p, bool fl, bool ie, entry_time* et) + { + return path_entry (p.string ().c_str (), fl, ie, et); + } + + pair<bool, entry_stat> + path_entry (const char* p, bool fl, bool ie) + { + return path_entry (p, fl, ie, nullptr /* entry_time */); } permissions path_permissions (const path& p) { - pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p)); + // Let's optimize for the common case when the entry is not a reparse + // point. + // + auto attr_to_perm = [] (const auto& pi) -> permissions + { + if (!pi.first) + throw_generic_error (ENOENT); - if (!pi.first) - throw_generic_error (ENOENT); + // On Windows a filesystem entry is always readable. Also there is no + // notion of group/other permissions at OS level, so we extrapolate user + // permissions to group/other permissions (as the _stat() function + // does). + // + permissions r (permissions::ru | permissions::rg | permissions::ro); - // On Windows a filesystem entry is always readable. Also there is no - // notion of group/other permissions at OS level, so we extrapolate user - // permissions to group/other permissions (as the _stat() function does). - // - permissions r (permissions::ru | permissions::rg | permissions::ro); + if (!readonly (pi.second.dwFileAttributes)) + r |= permissions::wu | permissions::wg | permissions::wo; - if (!readonly (pi.second.dwFileAttributes)) - r |= permissions::wu | permissions::wg | permissions::wo; + return r; + }; - return r; + pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p)); + return !pi.first || !reparse_point (pi.second.dwFileAttributes) + ? attr_to_perm (pi) + : attr_to_perm ( + path_entry_handle_info (p, true /* follow_reparse_points */)); } void @@ -718,50 +823,26 @@ namespace butl static entry_time entry_tm (const char* p, bool dir) { - pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p)); - - // If the entry is of the wrong type, then let's pretend that it doesn't - // exists. + // Let's optimize for the common case when the entry is not a reparse + // point. // - if (!pi.first || directory (pi.second.dwFileAttributes) != dir) - return {timestamp_nonexistent, timestamp_nonexistent}; - - auto tm = [] (const FILETIME& t) -> timestamp + auto attr_to_time = [dir] (const auto& pi) -> entry_time { - // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" - // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch" - // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds. + // If the entry is of the wrong type, then let's pretend that it doesn't + // exists. // - uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) | - t.dwLowDateTime); + if (!pi.first || directory (pi.second.dwFileAttributes) != dir) + return entry_time {timestamp_nonexistent, timestamp_nonexistent}; - nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch. - nsec *= 100; // Now in nanoseconds. - - return timestamp ( - chrono::duration_cast<duration> (chrono::nanoseconds (nsec))); + return entry_time {to_timestamp (pi.second.ftLastWriteTime), + to_timestamp (pi.second.ftLastAccessTime)}; }; - return {tm (pi.second.ftLastWriteTime), tm (pi.second.ftLastAccessTime)}; - } - - static inline FILETIME - to_filetime (timestamp t) - { - // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" - // (1601-01-01T00:00:00Z). To convert "UNIX epoch" - // (1970-01-01T00:00:00Z) to it we need to add 11644473600 seconds. - // - uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> ( - t.time_since_epoch ()).count ()); - - ticks /= 100; // Now in 100 nanosecond "ticks". - ticks += 11644473600ULL * 10000000; // Now in "Windows epoch". - - FILETIME r; - r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF; - r.dwLowDateTime = ticks & 0xFFFFFFFF; - return r; + pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p)); + return !pi.first || !reparse_point (pi.second.dwFileAttributes) + ? attr_to_time (pi) + : attr_to_time ( + path_entry_handle_info (p, true /* follow_reparse_points */)); } // Set the modification and access times for a regular file or directory. @@ -772,7 +853,9 @@ namespace butl // See also touch_file() below. // pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi ( - entry_info_handle (p, true /* write */)); + entry_info_handle (p, + true /* write */, + true /* follow_reparse_points */)); // If the entry is of the wrong type, then let's pretend that it doesn't // exist. @@ -857,7 +940,9 @@ namespace butl // implicitly. // pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi ( - entry_info_handle (p.string ().c_str (), true /* write */)); + entry_info_handle (p.string ().c_str (), + true /* write */, + true /* follow_reparse_points */)); if (hi.first != nullhandle) { @@ -1006,7 +1091,7 @@ namespace butl // try { - for (const dir_entry& de: dir_iterator (p, false /* ignore_dangling */)) + for (const dir_entry& de: dir_iterator (p, dir_iterator::no_follow)) { path ep (p / de.path ()); //@@ Would be good to reuse the buffer. @@ -1057,12 +1142,12 @@ namespace butl // failure (see mventry() for details). If that's the case, we will keep // trying to move the file for two seconds. // - for (size_t i (0); i < 21; ++i) + for (size_t i (0); i < 41; ++i) { - // Sleep 100 milliseconds before the removal retry. + // Sleep 50 milliseconds before the removal retry. // if (i != 0) - Sleep (100); + Sleep (50); ur = _unlink (f); @@ -1610,9 +1695,12 @@ namespace butl } void - cpfile (const path& from, const path& to, cpflags fl) + cpfile (const path& from, + const path& to, + cpflags fl, + optional<permissions> cperm) { - permissions perm (path_permissions (from)); + permissions perm (cperm ? *cperm : path_permissions (from)); auto_rmfile rm; cpfile<is_base_of<system_error, ios_base::failure>::value> ( @@ -1704,12 +1792,12 @@ namespace butl // fdopen(). // DWORD ec; - for (size_t i (0); i < 21; ++i) + for (size_t i (0); i < 41; ++i) { // Sleep 100 milliseconds before the move retry. // if (i != 0) - Sleep (100); + Sleep (50); if (MoveFileExA (f, t, mfl)) return; @@ -1811,7 +1899,7 @@ namespace butl h_ = x.h_; x.h_ = nullptr; - ignore_dangling_ = x.ignore_dangling_; + mode_ = x.mode_; } return *this; } @@ -1832,6 +1920,11 @@ namespace butl entry_type dir_entry:: type (bool follow_symlinks) const { + // Note that this function can only be used for resolving an entry type + // lazily and thus can't be used with the detect_dangling dir_iterator + // mode (see dir_iterator::next () implementation for details). Thus, we + // always throw on the stat()/lstat() failure. + // path_type p (b_ / p_); struct stat s; if ((follow_symlinks @@ -1839,7 +1932,18 @@ namespace butl : lstat (p.string ().c_str (), &s)) != 0) throw_generic_error (errno); - return butl::type (s); + entry_type r (butl::type (s)); + + // While at it, also save the entry modification and access times. + // + if (r != entry_type::symlink) + { + entry_time t (entry_tm (s)); + mtime_ = t.modification; + atime_ = t.access; + } + + return r; } // dir_iterator @@ -1850,8 +1954,8 @@ namespace butl }; dir_iterator:: - dir_iterator (const dir_path& d, bool ignore_dangling) - : ignore_dangling_ (ignore_dangling) + dir_iterator (const dir_path& d, mode m) + : mode_ (m) { unique_ptr<DIR, dir_deleter> h (opendir (d.string ().c_str ())); h_ = h.get (); @@ -1867,7 +1971,7 @@ namespace butl } template <typename D> - static inline /*constexpr*/ entry_type + static inline /*constexpr*/ optional<entry_type> d_type (const D* d, decltype(d->d_type)*) { switch (d->d_type) @@ -1895,13 +1999,13 @@ namespace butl #endif return entry_type::other; - default: return entry_type::unknown; + default: return nullopt; } } template <typename D> - static inline constexpr entry_type - d_type (...) {return entry_type::unknown;} + static inline constexpr optional<entry_type> + d_type (...) {return nullopt;} void dir_iterator:: next () @@ -1923,25 +2027,43 @@ namespace butl e_.p_ = move (p); e_.t_ = d_type<struct dirent> (de, nullptr); - e_.lt_ = entry_type::unknown; + e_.lt_ = nullopt; + + e_.mtime_ = timestamp_unknown; + e_.atime_ = timestamp_unknown; // If requested, we ignore dangling symlinks, skipping ones with - // non-existing or inaccessible targets. + // non-existing or inaccessible targets (ignore_dangling mode), or set + // the entry_type::unknown type for them (detect_dangling mode). // - if (ignore_dangling_) + if (mode_ != no_follow) { - // Note that ltype () can potentially lstat() (see d_type() for + bool dd (mode_ == detect_dangling); + + // Note that ltype () can potentially lstat() (see type() for // details) and so throw. We, however, need to skip the entry if it // is already removed (due to a race) and throw on any other error. // path fp (e_.base () / e_.path ()); const char* p (fp.string ().c_str ()); - if (e_.t_ == entry_type::unknown) + if (!e_.t_) { struct stat s; if (lstat (p, &s) != 0) { + // Given that we have already enumerated the filesystem entry, + // these error codes can only mean that the entry doesn't exist + // anymore and so we always skip it. + // + // If errno is EACCES, then the permission to search a directory + // we currently iterate over has been revoked. Throwing in this + // case sounds like the best choice. + // + // Note that according to POSIX the filesystem entry we call + // lstat() on doesn't require any specific permissions to be + // granted. + // if (errno == ENOENT || errno == ENOTDIR) continue; @@ -1949,21 +2071,53 @@ namespace butl } e_.t_ = type (s); + + if (*e_.t_ != entry_type::symlink) + { + entry_time t (entry_tm (s)); + e_.mtime_ = t.modification; + e_.atime_ = t.access; + } } - if (e_.t_ == entry_type::symlink) + // The entry type should be present and may not be + // entry_type::unknown. + // + //assert (e_.t_ && *e_.t_ != entry_type::unknown); + + // Check if the symlink target exists and is accessible and set the + // target type. + // + if (*e_.t_ == entry_type::symlink) { struct stat s; if (stat (p, &s) != 0) { if (errno == ENOENT || errno == ENOTDIR || errno == EACCES) - continue; - - throw_generic_error (errno); + { + if (dd) + e_.lt_ = entry_type::unknown; + else + continue; + } + else + throw_generic_error (errno); } + else + { + e_.lt_ = type (s); - e_.lt_ = type (s); // While at it, set the target type. + entry_time t (entry_tm (s)); + e_.mtime_ = t.modification; + e_.atime_ = t.access; + } } + + // The symlink target type should be present and in the + // ignore_dangling mode it may not be entry_type::unknown. + // + //assert (*e_.t_ != entry_type::symlink || + // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown))); } } else if (errno == 0) @@ -1984,11 +2138,49 @@ namespace butl // dir_entry // + entry_type dir_entry:: + type (bool follow_symlinks) const + { + // Note that this function can only be used for resolving an entry type + // lazily and thus can't be used with the detect_dangling dir_iterator + // mode (see dir_iterator::next () implementation for details). Thus, we + // always throw if the entry info can't be retrieved. + // + // While at it, also save the entry modification and access times. + // + path_type p (base () / path ()); + entry_time et; + pair<bool, entry_stat> e ( + path_entry (p, follow_symlinks, false /* ignore_error */, &et)); + + if (!e.first) + throw_generic_error (ENOENT); + + if (e.second.type == entry_type::regular || + e.second.type == entry_type::directory) + { + mtime_ = et.modification; + atime_ = et.access; + } + + return e.second.type; + } + + // dir_iterator + // + static_assert(is_same<HANDLE, void*>::value, "HANDLE is not void*"); + + static inline HANDLE + to_handle (intptr_t h) + { + return reinterpret_cast<HANDLE> (h); + } + dir_iterator:: ~dir_iterator () { if (h_ != -1) - _findclose (h_); // Ignore any errors. + FindClose (to_handle (h_)); // Ignore any errors. } dir_iterator& dir_iterator:: @@ -1998,56 +2190,32 @@ namespace butl { e_ = move (x.e_); - if (h_ != -1 && _findclose (h_) == -1) - throw_generic_error (errno); + if (h_ != -1 && !FindClose (to_handle (h_))) + throw_system_error (GetLastError ()); h_ = x.h_; x.h_ = -1; - ignore_dangling_ = x.ignore_dangling_; + mode_ = x.mode_; } return *this; } - entry_type dir_entry:: - type (bool follow_symlinks) const - { - path_type p (base () / path ()); - pair<bool, entry_stat> e (path_entry (p, follow_symlinks)); - - if (!e.first) - throw_generic_error (ENOENT); - - return e.second.type; - } - - // dir_iterator - // - struct auto_dir + dir_iterator:: + dir_iterator (const dir_path& d, mode m) + : mode_ (m) { - explicit - auto_dir (intptr_t& h): h_ (&h) {} - - auto_dir (const auto_dir&) = delete; - auto_dir& operator= (const auto_dir&) = delete; - - ~auto_dir () + struct deleter { - if (h_ != nullptr && *h_ != -1) - _findclose (*h_); - } - - void release () {h_ = nullptr;} + void operator() (intptr_t* p) const + { + if (p != nullptr && *p != -1) + FindClose (to_handle (*p)); + } + }; - private: - intptr_t* h_; - }; + unique_ptr<intptr_t, deleter> h (&h_); - dir_iterator:: - dir_iterator (const dir_path& d, bool ignore_dangling) - : ignore_dangling_ (ignore_dangling) - { - auto_dir h (h_); e_.b_ = d; // Used by next(). next (); @@ -2060,31 +2228,37 @@ namespace butl for (;;) { bool r; - _finddata_t fi; + WIN32_FIND_DATA fi; if (h_ == -1) { // The call is made from the constructor. Any other call with h_ == -1 // is illegal. // - - // Check to distinguish non-existent vs empty directories. + // Note that we used to check for the directory existence before + // iterating over it. However, let's not pessimize things and only + // check for the directory existence if FindFirstFileExA() fails. // - if (!dir_exists (e_.base ())) - throw_generic_error (ENOENT); - h_ = _findfirst ((e_.base () / path ("*")).string ().c_str (), &fi); - r = h_ != -1; + h_ = reinterpret_cast<intptr_t> ( + FindFirstFileExA ((e_.base () / path ("*")).string ().c_str (), + FindExInfoBasic, + &fi, + FindExSearchNameMatch, + NULL, + 0)); + + r = (h_ != -1); } else - r = _findnext (h_, &fi) == 0; + r = FindNextFileA (to_handle (h_), &fi); if (r) { // We can accept some overhead for '.' and '..' (relying on short // string optimization) in favor of a more compact code. // - path p (fi.name); + path p (fi.cFileName); // Skip '.' and '..'. // @@ -2093,26 +2267,47 @@ namespace butl e_.p_ = move (p); - // Note that the entry type detection always requires to additionally - // query the entry information. Thus, we evaluate its type lazily. + DWORD a (fi.dwFileAttributes); + bool rp (reparse_point (a)); + + // Evaluate the entry type lazily if this is a reparse point since it + // requires to additionally query the entry information (see + // reparse_point_entry() for details). // - e_.t_ = entry_type::unknown; + e_.t_ = rp ? nullopt : + directory (a) ? optional<entry_type> (entry_type::directory) : + optional<entry_type> (entry_type::regular) ; - e_.lt_ = entry_type::unknown; + e_.lt_ = nullopt; + + e_.mtime_ = rp ? timestamp_unknown : to_timestamp (fi.ftLastWriteTime); + + // Note that according to MSDN for the FindFirstFile[Ex]() function + // "the NTFS file system delays updates to the last access time for a + // file by up to 1 hour after the last access" and "on the FAT file + // system access time has a resolution of 1 day". + // + e_.atime_ = timestamp_unknown; // If requested, we ignore dangling symlinks and junctions, skipping - // ones with non-existing or inaccessible targets. + // ones with non-existing or inaccessible targets (ignore_dangling + // mode), or set the entry_type::unknown type for them + // (detect_dangling mode). // - if (ignore_dangling_) + if (rp && mode_ != no_follow) { + bool dd (mode_ == detect_dangling); + // Check the last error code throwing for codes other than "path not - // found" and "access denied". + // found" and "access denied" and returning this error code + // otherwise. // auto verify_error = [] () { DWORD ec (GetLastError ()); if (!error_file_not_found (ec) && ec != ERROR_ACCESS_DENIED) throw_system_error (ec); + return ec; }; // Note that ltype() queries the entry information due to the type @@ -2123,48 +2318,50 @@ namespace butl path fp (e_.base () / e_.path ()); const char* p (fp.string ().c_str ()); - DWORD a (GetFileAttributesA (p)); - if (a == INVALID_FILE_ATTRIBUTES) - { - // Note that sometimes trying to obtain attributes for a - // filesystem entry that was potentially removed ends up with - // ERROR_ACCESS_DENIED. One can argue that there can be another - // reason for this error (antivirus, indexer, etc). However, given - // that the entry is seen by a _find*() function and normally you - // can retrieve attributes for a read-only entry and for an entry - // opened in the non-shared mode (see the CreateFile() function - // documentation for details) the only meaningful explanation for - // ERROR_ACCESS_DENIED is that the entry is being removed. Also - // the DeleteFile() documentation mentions such a possibility. - // - verify_error (); - continue; - } + pair<entry_type, path> rpe ( + reparse_point_entry (p, true /* ignore_error */)); - if (reparse_point (a)) + if (rpe.first == entry_type::unknown) { - pair<entry_type, path> rp ( - reparse_point_entry (p, true /* ignore_error */)); + DWORD ec (verify_error ()); - if (rp.first == entry_type::unknown) - { - verify_error (); + // Silently skip the entry if it is not found (being already + // deleted) or we are in the ignore dangling mode. Otherwise, set + // the entry type to unknown. + // + // Note that sometimes trying to obtain information for a being + // removed filesystem entry ends up with ERROR_ACCESS_DENIED (see + // DeleteFile() and CreateFile() for details). Probably getting + // this error code while trying to obtain the reparse point + // information (involves calling CreateFile(FILE_READ_EA) and + // DeviceIoControl()) can also be interpreted differently. We, + // however, always treat it as "access denied" in the detect + // dangling mode for good measure. Let's see if that won't be too + // noisy. + // + if (ec != ERROR_ACCESS_DENIED || !dd) continue; - } - e_.t_ = rp.first; + // Fall through. } - else - e_.t_ = directory (a) - ? entry_type::directory - : entry_type::regular; - if (e_.t_ == entry_type::symlink) + e_.t_ = rpe.first; + + // In this mode the entry type should be present and in the + // ignore_dangling mode it may not be entry_type::unknown. + // + //assert (e_.t_ && (dd || *e_.t_ != entry_type::unknown)); + + // Check if the symlink target exists and is accessible and set the + // target type. + // + if (*e_.t_ == entry_type::symlink) { // Query the target info. // // Note that we use entry_info_handle() rather than - // path_entry_info() to be able to verify an error on failure. + // path_entry_handle_info() to be able to verify an error on + // failure. // pair<auto_handle, BY_HANDLE_FILE_INFORMATION> ti ( entry_info_handle (p, @@ -2175,31 +2372,59 @@ namespace butl if (ti.first == nullhandle) { verify_error (); - continue; + + if (dd) + e_.lt_ = entry_type::unknown; + else + continue; } + else + { + ti.first.close (); // Checks for error. - ti.first.close (); // Checks for error. + e_.lt_ = directory (ti.second.dwFileAttributes) + ? entry_type::directory + : entry_type::regular; - // While at it, set the target type. - // - e_.lt_ = directory (ti.second.dwFileAttributes) - ? entry_type::directory - : entry_type::regular; + e_.mtime_ = to_timestamp (ti.second.ftLastWriteTime); + e_.atime_ = to_timestamp (ti.second.ftLastAccessTime); + } } + + // In this mode the symlink target type should be present and in the + // ignore_dangling mode it may not be entry_type::unknown. + // + //assert (*e_.t_ != entry_type::symlink || + // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown))); } } - else if (errno == ENOENT) + else { - // End of stream. + DWORD ec (GetLastError ()); + bool first (h_ == -1); + + // Check to distinguish non-existent vs empty directories. // - if (h_ != -1) + // Note that dir_exists() handles not only the "filesystem entry does + // not exist" case but also the case when the entry exists but is not + // a directory. + // + if (first && !dir_exists (e_.base ())) + throw_generic_error (ENOENT); + + if (ec == (first ? ERROR_FILE_NOT_FOUND : ERROR_NO_MORE_FILES)) { - _findclose (h_); - h_ = -1; + // End of stream. + // + if (h_ != -1) + { + FindClose (to_handle (h_)); + h_ = -1; + } } + else + throw_system_error (ec); } - else - throw_generic_error (errno); break; } @@ -2207,14 +2432,27 @@ namespace butl #endif // Search for paths matching the pattern and call the specified function for - // each matching path. Return false if the underlying func() call returns - // false. Otherwise the function conforms to the path_search() description. + // each matching path. Return false if the underlying func() or + // dangling_func() call returns false. Otherwise the function conforms to + // the path_search() description. // // Note that the access to the traversed directory tree (real or virtual) is // performed through the provided filesystem object. // static const string any_dir ("*/"); + // Filesystem traversal callbacks. + // + // Called before entering a directory for the recursive traversal. If + // returns false, then the directory is not entered. + // + using preopen = function<bool (const dir_path&)>; + + // Called before skipping a dangling link. If returns false, then the + // traversal is stopped. + // + using preskip = function<bool (const dir_entry&)>; + template <typename FS> static bool search ( @@ -2222,11 +2460,14 @@ namespace butl dir_path pattern_dir, path_match_flags fl, const function<bool (path&&, const string& pattern, bool interm)>& func, + const function<bool (const dir_entry&)>& dangling_func, FS& filesystem) { bool follow_symlinks ((fl & path_match_flags::follow_symlinks) != path_match_flags::none); + assert (follow_symlinks || dangling_func == nullptr); + // Fast-forward the leftmost pattern non-wildcard components. So, for // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. // @@ -2273,17 +2514,47 @@ namespace butl // bool simple (pattern.simple ()); - // Note that we rely on "small function object" optimization here. + // If symlinks need to be followed, then pass the preskip callback for the + // filesystem iterator. + // + bool fs (follow_symlinks || !simple); + preskip ps; + bool dangling_stop (false); + + if (fs) + { + if (dangling_func != nullptr) + { + // Note that we rely on the "small function object" optimization here. + // + ps = [&dangling_func, &dangling_stop] (const dir_entry& de) -> bool + { + dangling_stop = !dangling_func (de); + return !dangling_stop; + }; + } + else + { + ps = [] (const dir_entry& de) -> bool + { + throw_generic_error ( + de.ltype () == entry_type::symlink ? ENOENT : EACCES); + }; + } + } + + // Note that we rely on the "small function object" optimization here. // typename FS::iterator_type i (filesystem.iterator ( pattern_dir, path_pattern_recursive (pcr), path_pattern_self_matching (pcr), - follow_symlinks || !simple, + fs, [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. { return func (pattern_dir / p, any_dir, true); - })); + }, + move (ps))); // Canonicalize the pattern component collapsing consecutive stars (used to // express that it is recursive) into a single one. @@ -2329,7 +2600,7 @@ namespace butl // represented by the iterator as an empty path, and so we need to // compute it (the leaf would actually be enough) for matching. This // leaf can be acquired from the pattern_dir (if not empty) or - // start_dir. We don't expect the start_dir to be empty, as the + // start_dir. We don't expect the start_dir to be empty, as the // filesystem object must replace an empty start directory with the // current one. This is the case when we search in the current directory // (start_dir is empty) with a pattern that starts with a *** wildcard @@ -2368,10 +2639,14 @@ namespace butl pattern_dir / path_cast<dir_path> (move (p)), fl, func, + dangling_func, filesystem)) return false; } + if (dangling_stop) + return false; + // If requested, also search with the absent-matching pattern path // component omitted, unless this is the only pattern component. // @@ -2379,8 +2654,15 @@ namespace butl pc.to_directory () && (!pattern_dir.empty () || !simple) && pc.string ().find_first_not_of ('*') == string::npos && - !search (pattern.leaf (pc), pattern_dir, fl, func, filesystem)) + !search (pattern.leaf (pc), + pattern_dir, + fl, + func, + dangling_func, + filesystem)) + { return false; + } return true; } @@ -2389,8 +2671,6 @@ namespace butl // static const dir_path empty_dir; - using preopen = function<bool (const dir_path&)>; - // Base for filesystem (see above) implementations. // // Don't copy start directory. It is expected to exist till the end of the @@ -2440,13 +2720,17 @@ namespace butl bool recursive, bool self, bool fs, - preopen po) + preopen po, + preskip ps) : start_ (move (p)), recursive_ (recursive), self_ (self), follow_symlinks_ (fs), - preopen_ (move (po)) + preopen_ (move (po)), + preskip_ (move (ps)) { + assert (fs || ps == nullptr); + open (dir_path (), self_); } @@ -2456,12 +2740,16 @@ namespace butl recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete; recursive_dir_iterator (recursive_dir_iterator&&) = default; - // Return false if no more entries left. Otherwise save the next entry path - // and return true. The path is relative to the directory being + // Return false if no more entries left. Otherwise save the next entry + // path and return true. The path is relative to the directory being // traversed and contains a trailing separator for sub-directories. Throw // std::system_error in case of a failure (insufficient permissions, // dangling symlink encountered, etc). // + // If symlinks need to be followed, then skip inaccessible/dangling + // entries or, if the preskip callback is specified and returns false for + // such an entry, stop the entire traversal. + // bool next (path& p) { @@ -2470,44 +2758,64 @@ namespace butl auto& i (iters_.back ()); - // If we got to the end of directory sub-entries, then go one level up - // and return this directory path. - // - if (i.first == dir_iterator ()) + for (;;) // Skip inaccessible/dangling entries. { - path d (move (i.second)); - iters_.pop_back (); + // If we got to the end of directory sub-entries, then go one level up + // and return this directory path. + // + if (i.first == dir_iterator ()) + { + path d (move (i.second)); + iters_.pop_back (); - // Return the path unless it is the last one (the directory we started - // to iterate from) and the self flag is not set. + // Return the path unless it is the last one (the directory we + // started to iterate from) and the self flag is not set. + // + if (iters_.empty () && !self_) + return false; + + p = move (d); + return true; + } + + const dir_entry& de (*i.first); + + // Append separator if a directory. Note that dir_entry::type() can + // throw. // - if (iters_.empty () && !self_) - return false; + entry_type et (follow_symlinks_ ? de.type () : de.ltype ()); - p = move (d); - return true; - } + // If the entry turned out to be inaccessible/dangling, then skip it + // if the preskip function is not specified or returns true and stop + // the entire traversal otherwise. + // + if (et == entry_type::unknown) + { + if (preskip_ != nullptr && !preskip_ (de)) + { + iters_.clear (); + return false; + } - const dir_entry& de (*i.first); + ++i.first; + continue; + } - // Append separator if a directory. Note that dir_entry::type() can - // throw. - // - entry_type et (follow_symlinks_ ? de.type () : de.ltype ()); - path pe (et == entry_type::directory - ? path_cast<dir_path> (i.second / de.path ()) - : i.second / de.path ()); + path pe (et == entry_type::directory + ? path_cast<dir_path> (i.second / de.path ()) + : i.second / de.path ()); - ++i.first; + ++i.first; - if (recursive_ && pe.to_directory ()) - { - open (path_cast<dir_path> (move (pe)), true); - return next (p); - } + if (recursive_ && pe.to_directory ()) + { + open (path_cast<dir_path> (move (pe)), true); + return next (p); + } - p = move (pe); - return true; + p = move (pe); + return true; + } } private: @@ -2529,10 +2837,15 @@ namespace butl { dir_path d (start_ / p); - // If we follow symlinks, then we ignore the dangling ones. + // If we follow symlinks, then we may need to skip the dangling + // ones. Note, however, that we will be skipping them not at the + // dir_iterator level but ourselves, after calling the preskip + // callback function (see next() for details). // i = dir_iterator (!d.empty () ? d : dir_path ("."), - follow_symlinks_); + follow_symlinks_ + ? dir_iterator::detect_dangling + : dir_iterator::no_follow); } iters_.emplace_back (move (i), move (p)); @@ -2562,6 +2875,7 @@ namespace butl bool self_; bool follow_symlinks_; preopen preopen_; + preskip preskip_; small_vector<pair<dir_iterator, dir_path>, 1> iters_; }; @@ -2585,13 +2899,15 @@ namespace butl bool recursive, bool self, bool follow_symlinks, - preopen po) const + preopen po, + preskip ps) const { return iterator_type (start_ / p, recursive, self, follow_symlinks, - move (po)); + move (po), + move (ps)); } }; @@ -2600,10 +2916,11 @@ namespace butl const path& pattern, const function<bool (path&&, const string& pattern, bool interm)>& func, const dir_path& start, - path_match_flags flags) + path_match_flags flags, + const function<bool (const dir_entry&)>& dangling_func) { real_filesystem fs (pattern.relative () ? start : empty_dir); - search (pattern, dir_path (), flags, func, fs); + search (pattern, dir_path (), flags, func, dangling_func, fs); } // Search path in the directory tree represented by a path. @@ -2761,7 +3078,8 @@ namespace butl bool recursive, bool self, bool /*follow_symlinks*/, - preopen po) + preopen po, + preskip) { // If path and sub-path are non-empty, and both are absolute or relative, // then no extra effort is required (prior to checking if one is a @@ -2820,6 +3138,6 @@ namespace butl path_match_flags flags) { path_filesystem fs (start, entry); - search (pattern, dir_path (), flags, func, fs); + search (pattern, dir_path (), flags, func, nullptr /* dangle_func */, fs); } } diff --git a/libbutl/filesystem.hxx b/libbutl/filesystem.hxx index 8804b04..0f5fb0b 100644 --- a/libbutl/filesystem.hxx +++ b/libbutl/filesystem.hxx @@ -36,6 +36,32 @@ namespace butl { + // Path permissions. + // + enum class permissions: std::uint16_t + { + // Note: matching POSIX values. + // + xo = 0001, + wo = 0002, + ro = 0004, + + xg = 0010, + wg = 0020, + rg = 0040, + + xu = 0100, + wu = 0200, + ru = 0400, + + none = 0 + }; + + inline permissions operator& (permissions, permissions); + inline permissions operator| (permissions, permissions); + inline permissions operator&= (permissions&, permissions); + inline permissions operator|= (permissions&, permissions); + // Return true if the path is to an existing regular file. Note that by // default this function follows symlinks. Underlying OS errors are reported // by throwing std::system_error, unless ignore_error is true (in which case @@ -215,8 +241,8 @@ namespace butl // Movable-only type. Move-assignment cancels the lhs object. // - auto_rm (auto_rm&&); - auto_rm& operator= (auto_rm&&); + auto_rm (auto_rm&&) noexcept; + auto_rm& operator= (auto_rm&&) noexcept; auto_rm (const auto_rm&) = delete; auto_rm& operator= (const auto_rm&) = delete; @@ -381,11 +407,13 @@ namespace butl inline cpflags operator&= (cpflags&, cpflags); inline cpflags operator|= (cpflags&, cpflags); - // Copy a regular file, including its permissions, and optionally timestamps. - // Throw std::system_error on failure. Fail if the destination file exists - // and the overwrite_content flag is not set. Leave permissions of an - // existing destination file intact unless the overwrite_permissions flag is - // set. Delete incomplete copies before throwing. + // Copy a regular file, including its permissions (unless custom permissions + // are specified), and optionally timestamps. Throw std::system_error on + // failure. Fail if the destination file exists and the overwrite_content + // flag is not set. Leave permissions of an existing destination file intact + // (including if custom permissions are specified) unless the + // overwrite_permissions flag is set. Delete incomplete copies before + // throwing. // // Note that in case of overwriting, the existing destination file gets // truncated (not deleted) prior to being overwritten. As a side-effect, @@ -397,7 +425,10 @@ namespace butl // fail. // LIBBUTL_SYMEXPORT void - cpfile (const path& from, const path& to, cpflags = cpflags::none); + cpfile (const path& from, + const path& to, + cpflags = cpflags::none, + optional<permissions> perm = nullopt); // Copy a regular file into (inside) an existing directory. // @@ -605,32 +636,6 @@ namespace butl return dir_atime (p.string ().c_str (), t); } - // Path permissions. - // - enum class permissions: std::uint16_t - { - // Note: matching POSIX values. - // - xo = 0001, - wo = 0002, - ro = 0004, - - xg = 0010, - wg = 0020, - rg = 0040, - - xu = 0100, - wu = 0200, - ru = 0400, - - none = 0 - }; - - inline permissions operator& (permissions, permissions); - inline permissions operator| (permissions, permissions); - inline permissions operator&= (permissions&, permissions); - inline permissions operator|= (permissions&, permissions); - // Get path permissions. Throw std::system_error on failure. Note that this // function resolves symlinks. // @@ -652,12 +657,45 @@ namespace butl // Symlink target type in case of the symlink, ltype() otherwise. // + // If type() returns entry_type::unknown then this entry is inaccessible + // (ltype() also returns entry_type::unknown) or is a dangling symlink + // (ltype() returns entry_type::symlink). Used with the detect_dangling + // dir_iterator mode. Note that on POSIX ltype() can never return unknown + // (because it is part of the directory iteration result). + // entry_type type () const; entry_type ltype () const; + // Modification and access times of the filesystem entry if it is not a + // symlink and of the symlink target otherwise. + // + // These are provided as an optimization if they can be obtained as a + // byproduct of work that is already being done anyway (iteration itself, + // calls to [l]type(), etc). If (not yet) available, timestamp_unknown is + // returned. + // + // Specifically: + // + // - On Windows mtime is always set by dir_iterator for entries other than + // reparse points. + // + // - On all platforms mtime and atime are always set for symlink targets + // by dir_iterator in the {detect,ignore}_dangling modes. + // + // - On all platforms mtime and atime can potentially be set by [l]type() + // if the stat() call is required to retrieve the type information (the + // native directory entry iterating API doesn't provide it, the type of + // the symlink target is queried, etc). + // + timestamp + mtime () const {return mtime_;} + + timestamp + atime () const {return atime_;} + // Entry path (excluding the base). To get the full path, do // base () / path (). // @@ -668,8 +706,17 @@ namespace butl base () const {return b_;} dir_entry () = default; - dir_entry (entry_type t, path_type p, dir_path b) - : t_ (t), p_ (std::move (p)), b_ (std::move (b)) {} + + dir_entry (entry_type t, + path_type p, + dir_path b, + timestamp mt = timestamp_unknown, + timestamp at = timestamp_unknown) + : t_ (t), + mtime_ (mt), + atime_ (at), + p_ (std::move (p)), + b_ (std::move (b)) {} private: entry_type @@ -678,8 +725,14 @@ namespace butl private: friend class dir_iterator; - mutable entry_type t_ = entry_type::unknown; // Lazy evaluation. - mutable entry_type lt_ = entry_type::unknown; // Lazy evaluation. + // Note: lazy evaluation. + // + mutable optional<entry_type> t_; // Entry type. + mutable optional<entry_type> lt_; // Symlink target type. + + mutable timestamp mtime_ = timestamp_unknown; + mutable timestamp atime_ = timestamp_unknown; + path_type p_; dir_path b_; }; @@ -696,12 +749,15 @@ namespace butl ~dir_iterator (); dir_iterator () = default; - // If it is requested to ignore dangling symlinks, then the increment - // operator will skip symlinks that refer to non-existing or inaccessible - // targets. That implies that it will always try to stat() symlinks. + // If the mode is either ignore_dangling or detect_dangling, then stat() + // the entry and either ignore inaccessible/dangling entry or return it + // with the corresponding dir_entry type set to unknown (see dir_entry + // type()/ltype() for details). // + enum mode {no_follow, detect_dangling, ignore_dangling}; + explicit - dir_iterator (const dir_path&, bool ignore_dangling); + dir_iterator (const dir_path&, mode); dir_iterator (const dir_iterator&) = delete; dir_iterator& operator= (const dir_iterator&) = delete; @@ -727,10 +783,10 @@ namespace butl #ifndef _WIN32 DIR* h_ = nullptr; #else - intptr_t h_ = -1; + intptr_t h_ = -1; // INVALID_HANDLE_VALUE #endif - bool ignore_dangling_ = false; + mode mode_ = no_follow; }; // Range-based for loop support. @@ -821,9 +877,20 @@ namespace butl // (a/b/, b*/, true) // (a/b/c/, c*/, false) // - // Note that recursive iterating through directories currently goes - // depth-first which make sense for the cleanup use cases. In future we may - // want to make it controllable. + // Note that recursive iterating through directories currently goes depth- + // first which make sense for the cleanup use cases. In the future we may + // want to make this controllable. + // + // If the match flags contain follow_symlinks, then call the dangling + // callback function for inaccessible/dangling entries if specified, and + // throw appropriate std::system_error otherwise. If the callback function + // returns true, then inaccessible/dangling entry is ignored. Otherwise, + // the entire search is stopped. + // + // Note also that if pattern is not simple (that is, contains directory + // components), then some symlinks (those that are matched against the + // directory components) may still be followed and thus the dangling + // function called. // LIBBUTL_SYMEXPORT void path_search (const path& pattern, @@ -831,7 +898,8 @@ namespace butl const std::string& pattern, bool interm)>&, const dir_path& start = dir_path (), - path_match_flags = path_match_flags::follow_symlinks); + path_match_flags = path_match_flags::follow_symlinks, + const std::function<bool (const dir_entry&)>& dangling = nullptr); // Same as above, but behaves as if the directory tree being searched // through contains only the specified entry. The start directory is used if diff --git a/libbutl/filesystem.ixx b/libbutl/filesystem.ixx index 763d311..b3f9224 100644 --- a/libbutl/filesystem.ixx +++ b/libbutl/filesystem.ixx @@ -11,7 +11,7 @@ namespace butl { // @@ Could 0 size be a valid and faster way? // - return dir_iterator (d, false /* ignore_dangling */) == dir_iterator (); + return dir_iterator (d, dir_iterator::no_follow) == dir_iterator (); } inline bool @@ -73,7 +73,7 @@ namespace butl // template <typename P> inline auto_rm<P>:: - auto_rm (auto_rm&& x) + auto_rm (auto_rm&& x) noexcept : path (std::move (x.path)), active (x.active) { x.active = false; @@ -81,7 +81,7 @@ namespace butl template <typename P> inline auto_rm<P>& auto_rm<P>:: - operator= (auto_rm&& x) + operator= (auto_rm&& x) noexcept { if (this != &x) { @@ -137,54 +137,28 @@ namespace butl static_cast<std::uint16_t> (y)); } - // path_match_flags - // - inline path_match_flags operator& (path_match_flags x, path_match_flags y) - { - return x &= y; - } - - inline path_match_flags operator| (path_match_flags x, path_match_flags y) - { - return x |= y; - } - - inline path_match_flags operator&= (path_match_flags& x, path_match_flags y) - { - return x = static_cast<path_match_flags> ( - static_cast<std::uint16_t> (x) & - static_cast<std::uint16_t> (y)); - } - - inline path_match_flags operator|= (path_match_flags& x, path_match_flags y) - { - return x = static_cast<path_match_flags> ( - static_cast<std::uint16_t> (x) | - static_cast<std::uint16_t> (y)); - } - // dir_entry // inline entry_type dir_entry:: ltype () const { - return t_ != entry_type::unknown ? t_ : (t_ = type (false)); + return t_ ? *t_ : *(t_ = type (false /* follow_symlinks */)); } inline entry_type dir_entry:: type () const { entry_type t (ltype ()); - return t != entry_type::symlink - ? t - : lt_ != entry_type::unknown ? lt_ : (lt_ = type (true)); + return t != entry_type::symlink ? t : + lt_ ? *lt_ : + *(lt_ = type (true /* follow_symlinks */)); } // dir_iterator // inline dir_iterator:: dir_iterator (dir_iterator&& x) noexcept - : e_ (std::move (x.e_)), h_ (x.h_), ignore_dangling_ (x.ignore_dangling_) + : e_ (std::move (x.e_)), h_ (x.h_), mode_ (x.mode_) { #ifndef _WIN32 x.h_ = nullptr; diff --git a/libbutl/git.cxx b/libbutl/git.cxx index cc10c91..f37e16a 100644 --- a/libbutl/git.cxx +++ b/libbutl/git.cxx @@ -36,7 +36,9 @@ namespace butl // MinGit: git version 2.16.1.windows.1 // if (s.compare (0, 12, "git version ") == 0) - return parse_semantic_version (s, 12, "" /* build_separators */); + return parse_semantic_version (s, 12, + semantic_version::allow_build, + "" /* build_separators */); return nullopt; } diff --git a/libbutl/host-os-release.cxx b/libbutl/host-os-release.cxx new file mode 100644 index 0000000..f13f62c --- /dev/null +++ b/libbutl/host-os-release.cxx @@ -0,0 +1,323 @@ +// file : libbutl/host-os-release.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <libbutl/host-os-release.hxx> + +#include <sstream> +#include <stdexcept> // runtime_error + +#include <libbutl/path.hxx> +#include <libbutl/path-io.hxx> +#include <libbutl/utility.hxx> +#include <libbutl/process.hxx> +#include <libbutl/fdstream.hxx> +#include <libbutl/filesystem.hxx> // file_exists() +#include <libbutl/string-parser.hxx> // parse_quoted() + +#ifdef _WIN32 +# include <libbutl/win32-utility.hxx> +#endif + +using namespace std; + +namespace butl +{ + // Note: exported for access from the test. + // + LIBBUTL_SYMEXPORT os_release + host_os_release_linux (path f = {}) + { + os_release r; + + // According to os-release(5), we should use /etc/os-release and fallback + // to /usr/lib/os-release if the former does not exist. It also lists the + // fallback values for individual variables, in case some are not present. + // + auto exists = [] (const path& f) + { + try + { + return file_exists (f); + } + catch (const system_error& e) + { + ostringstream os; + os << "unable to stat path " << f << ": " << e; + throw runtime_error (os.str ()); + } + }; + + if (!f.empty () + ? exists (f) + : (exists (f = path ("/etc/os-release")) || + exists (f = path ("/usr/lib/os-release")))) + { + try + { + ifdstream ifs (f, ifdstream::badbit); + + string l; + for (uint64_t ln (1); !eof (getline (ifs, l)); ++ln) + { + trim (l); + + // Skip blanks lines and comments. + // + if (l.empty () || l[0] == '#') + continue; + + // The variable assignments are in the "shell style" and so can be + // quoted/escaped. For now we only handle quoting, which is what all + // the instances seen in the wild seems to use. + // + size_t p (l.find ('=')); + if (p == string::npos) + continue; + + string n (l, 0, p); + l.erase (0, p + 1); + + using string_parser::parse_quoted; + using string_parser::invalid_string; + + try + { + if (n == "ID_LIKE") + { + r.like_ids.clear (); + + vector<string> vs (parse_quoted (l, true /* unquote */)); + for (const string& v: vs) + { + for (size_t b (0), e (0); next_word (v, b, e); ) + { + r.like_ids.push_back (string (v, b, e - b)); + } + } + } + else if (string* p = (n == "ID" ? &r.name_id : + n == "VERSION_ID" ? &r.version_id : + n == "VARIANT_ID" ? &r.variant_id : + n == "NAME" ? &r.name : + n == "VERSION_CODENAME" ? &r.version_codename : + n == "VARIANT" ? &r.variant : + nullptr)) + { + vector<string> vs (parse_quoted (l, true /* unquote */)); + switch (vs.size ()) + { + case 0: *p = ""; break; + case 1: *p = move (vs.front ()); break; + default: throw invalid_string (0, "multiple values"); + } + } + } + catch (const invalid_string& e) + { + ostringstream os; + os << "invalid " << n << " value in " << f << ':' << ln << ": " + << e; + throw runtime_error (os.str ()); + } + } + + ifs.close (); + } + catch (const ios::failure& e) + { + ostringstream os; + os << "unable to read from " << f << ": " << e; + throw runtime_error (os.str ()); + } + } + + // Assign fallback values. + // + if (r.name_id.empty ()) r.name_id = "linux"; + if (r.name.empty ()) r.name = "Linux"; + + return r; + } + + static os_release + host_os_release_macos () + { + // Run sw_vers -productVersion to get Mac OS version. + // + try + { + process pr; + try + { + fdpipe pipe (fdopen_pipe ()); + + pr = process_start (0, pipe, 2, "sw_vers", "-productVersion"); + + pipe.out.close (); + ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); + + // The output should be one line containing the version. + // + optional<string> v; + for (string l; !eof (getline (is, l)); ) + { + if (l.empty () || v) + { + v = nullopt; + break; + } + + v = move (l); + } + + is.close (); // Detect errors. + + if (pr.wait ()) + { + if (!v) + throw runtime_error ("unexpected sw_vers -productVersion output"); + + return os_release {"macos", {}, move (*v), "", "Mac OS", "", ""}; + } + + } + catch (const ios::failure& e) + { + if (pr.wait ()) + { + ostringstream os; + os << "error reading sw_vers output: " << e; + throw runtime_error (os.str ()); + } + + // Fall through. + } + + // We should only get here if the child exited with an error status. + // + assert (!pr.wait ()); + throw runtime_error ("process sw_vers exited with non-zero code"); + } + catch (const process_error& e) + { + ostringstream os; + os << "unable to execute sw_vers: " << e; + throw runtime_error (os.str ()); + } + } + + static os_release + host_os_release_windows () + { +#ifdef _WIN32 + // The straightforward way to get the version would be the GetVersionEx() + // Win32 function. However, if the application is built with a certain + // assembly manifest, this function will return the version the + // application was built for rather than what's actually running. + // + // The other plausible options are to call the `ver` program and parse it + // output (of questionable regularity) or to call RtlGetVersion(). The + // latter combined with GetProcAddress() seems to be a widely-used + // approach, so we are going with that (seeing that we employ a similar + // technique in quite a few places). + // + HMODULE nh (GetModuleHandle ("ntdll.dll")); + if (nh == nullptr) + throw runtime_error ("unable to get handle to ntdll.dll"); + + using RtlGetVersion = LONG /*NTSTATUS*/ (WINAPI*)(PRTL_OSVERSIONINFOW); + + RtlGetVersion gv ( + function_cast<RtlGetVersion> ( + GetProcAddress (nh, "RtlGetVersion"))); + + // RtlGetVersion() is available from Windows 2000 which is way before + // anything we might possibly care about (e.g., XP or 7). + // + if (gv == nullptr) + throw runtime_error ("unable to get address of RtlGetVersion()"); + + RTL_OSVERSIONINFOW vi; + vi.dwOSVersionInfoSize = sizeof (vi); + gv (&vi); // Always succeeds, according to documentation. + + // Ok, the real mess starts here. Here is how the commonly known Windows + // versions correspond to the major/minor/build numbers and how we will + // map them (note that there are also Server versions in the mix; see the + // OSVERSIONINFOEXW struct documentation for the complete picture): + // + // major minor build mapped + // Windows 11 10 0 >=22000 11 + // Windows 10 10 0 <22000 10 + // Windows 8.1 6 3 8.1 + // Windows 8 6 2 8 + // Windows 7 6 1 7 + // Windows Vista 6 0 6 + // Windows XP Pro/64-bit 5 2 5.2 + // Windows XP 5 1 5.1 + // Windows 2000 5 0 5 + // + // Based on this it's probably not wise to try to map any future versions + // automatically. + // + string v; + if (vi.dwMajorVersion == 10 && vi.dwMinorVersion == 0) + { + v = vi.dwBuildNumber >= 22000 ? "11" : "10"; + } + else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 3) v = "8.1"; + else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 2) v = "8"; + else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 1) v = "7"; + else if (vi.dwMajorVersion == 6 && vi.dwMinorVersion == 0) v = "6"; + else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 2) v = "5.2"; + else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 1) v = "5.1"; + else if (vi.dwMajorVersion == 5 && vi.dwMinorVersion == 0) v = "5"; + else throw ("unknown windows version " + + std::to_string (vi.dwMajorVersion) + '.' + + std::to_string (vi.dwMinorVersion) + '.' + + std::to_string (vi.dwBuildNumber)); + + return os_release {"windows", {}, move (v), "", "Windows", "", ""}; +#else + throw runtime_error ("unexpected host operating system"); +#endif + } + + optional<os_release> + host_os_release (const target_triplet& h) + { + const string& c (h.class_); + const string& s (h.system); + + if (c == "linux") + return host_os_release_linux (); + + if (c == "macos") + return host_os_release_macos (); + + if (c == "windows") + return host_os_release_windows (); + + if (c == "bsd") + { + // @@ TODO: ideally we would want to run uname and obtain the actual + // version we are runnig on rather than what we've been built for. + // (Think also how this will affect tests). + // + if (s == "freebsd") + return os_release {"freebsd", {}, h.version, "", "FreeBSD", "", ""}; + + if (s == "netbsd") + return os_release {"netbsd", {}, h.version, "", "NetBSD", "", ""}; + + if (s == "openbsd") + return os_release {"openbsd", {}, h.version, "", "OpenBSD", "", ""}; + + // Assume some other BSD. + // + return os_release {s, {}, h.version, "", s, "", ""}; + } + + return nullopt; + } +} diff --git a/libbutl/host-os-release.hxx b/libbutl/host-os-release.hxx new file mode 100644 index 0000000..058afdc --- /dev/null +++ b/libbutl/host-os-release.hxx @@ -0,0 +1,86 @@ +// file : libbutl/host-os-release.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#pragma once + +#include <string> +#include <vector> + +#include <libbutl/optional.hxx> +#include <libbutl/target-triplet.hxx> + +#include <libbutl/export.hxx> + +namespace butl +{ + // Information extracted from /etc/os-release on Linux. See os-release(5) + // for background. For other platforms we derive the equivalent information + // from other sources. Some examples: + // + // {"debian", {}, "10", "", + // "Debian GNU/Linux", "buster", ""} + // + // {"fedora", {}, "35", "workstation", + // "Fedora Linux", "", "Workstation Edition"} + // + // {"ubuntu", {"debian"}, "20.04", "", + // "Ubuntu", "focal", ""} + // + // {"macos", {}, "12.5", "", + // "Mac OS", "", ""} + // + // {"freebsd", {}, "13.1", "", + // "FreeBSD", "", ""} + // + // {"windows", {}, "10", "", + // "Windows", "", ""} + // + // Note that for Mac OS, the version is the Mac OS version (as printed by + // sw_vers) rather than Darwin version (as printed by uname). + // + // For Windows we currently do not distinguish the Server edition and the + // version mapping is as follows: + // + // Windows 11 11 + // Windows 10 10 + // Windows 8.1 8.1 + // Windows 8 8 + // Windows 7 7 + // Windows Vista 6 + // Windows XP Pro/64-bit 5.2 + // Windows XP 5.1 + // Windows 2000 5 + // + // Note that version_id may be empty, for example, on Debian testing: + // + // {"debian", {}, "", "", + // "Debian GNU/Linux", "", ""} + // + // Note also that we don't extract PRETTY_NAME because its content is + // unpredictable. For example, it may include variant, as in "Fedora Linux + // 35 (Workstation Edition)". Instead, construct it from the individual + // components as appropriate, normally "$name $version ($version_codename)". + // + struct os_release + { + std::string name_id; // ID + std::vector<std::string> like_ids; // ID_LIKE + std::string version_id; // VERSION_ID + std::string variant_id; // VARIANT_ID + + std::string name; // NAME + std::string version_codename; // VERSION_CODENAME + std::string variant; // VARIANT + }; + + // Return the release information for the specified host or nullopt if the + // specific host is unknown/unsupported. Throw std::runtime_error if + // anything goes wrong. + // + // Note that "host" here implies that we may be running programs, reading + // files, examining environment variables, etc., of the machine we are + // running on. + // + LIBBUTL_SYMEXPORT optional<os_release> + host_os_release (const target_triplet& host); +} diff --git a/libbutl/json/parser.cxx b/libbutl/json/parser.cxx new file mode 100644 index 0000000..8ef7422 --- /dev/null +++ b/libbutl/json/parser.cxx @@ -0,0 +1,645 @@ +#define PDJSON_SYMEXPORT static // See below. + +#include <libbutl/json/parser.hxx> + +#include <istream> + +// There is an issue (segfault) with using std::current_exception() and +// std::rethrow_exception() with older versions of libc++ on Linux. While the +// exact root cause hasn't been determined, the suspicion is that something +// gets messed up if we "smuggle" std::exception_ptr through extern "C" call +// frames (we cannot even destroy such an exception without a segfault). We +// also could not determine in which version exactly this has been fixed but +// we know that libc++ 6.0.0 doesn't appear to have this issue (though we are +// not entirely sure the issue is (only) in libc++; libgcc_s could also be +// involved). +// +// The workaround is to just catch (and note) the exception and then throw a +// new instance of generic std::istream::failure. In order not to drag the +// below test into the header, we wrap exception_ptr with optional<> and use +// NULL to indicate the presence of the exception when the workaround is +// required. +// +// Note that if/when we drop this workaround, we should also get rid of +// optional<> in stream::exception member. +// +#undef LIBBUTL_JSON_NO_EXCEPTION_PTR + +#if defined (__linux__) && defined(__clang__) +# if __has_include(<__config>) +# include <__config> // _LIBCPP_VERSION +# if _LIBCPP_VERSION < 6000 +# define LIBBUTL_JSON_NO_EXCEPTION_PTR 1 +# endif +# endif +#endif + +namespace butl +{ + namespace json + { + using namespace std; + + parser:: + ~parser () + { + json_close (impl_); + } + + static int + stream_get (void* x) + { + auto& s (*static_cast<parser::stream*> (x)); + + // In the multi-value mode reading of whitespaces/separators is split + // between our code and pdjson's. As a result, these functions may end + // up being called more than once after EOF is reached. Which is + // something iostream does not handle gracefully. + // + if (!s.is->eof ()) + { + try + { + // We first peek not to trip failbit on EOF. + // + if (s.is->peek () != istream::traits_type::eof ()) + return static_cast<char> (s.is->get ()); + } + catch (...) + { +#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR + s.exception = current_exception (); +#else + s.exception = nullptr; +#endif + } + } + + return EOF; + } + + static int + stream_peek (void* x) + { + auto& s (*static_cast<parser::stream*> (x)); + + if (!s.is->eof ()) + { + try + { + auto c (s.is->peek ()); + if (c != istream::traits_type::eof ()) + return static_cast<char> (c); + } + catch (...) + { +#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR + s.exception = current_exception (); +#else + s.exception = nullptr; +#endif + } + } + + return EOF; + } + + // NOTE: watch out for exception safety (specifically, doing anything that + // might throw after opening the stream). + // + parser:: + parser (istream& is, const char* n, bool mv, const char* sep) noexcept + : input_name (n), + stream_ {&is, nullopt}, + multi_value_ (mv), + separators_ (sep), + raw_s_ (nullptr), + raw_n_ (0) + { + json_open_user (impl_, &stream_get, &stream_peek, &stream_); + json_set_streaming (impl_, multi_value_); + } + + parser:: + parser (const void* t, + size_t s, + const char* n, + bool mv, + const char* sep) noexcept + : input_name (n), + stream_ {nullptr, nullopt}, + multi_value_ (mv), + separators_ (sep), + raw_s_ (nullptr), + raw_n_ (0) + { + json_open_buffer (impl_, t, s); + json_set_streaming (impl_, multi_value_); + } + + optional<event> parser:: + next () + { + name_p_ = value_p_ = location_p_ = false; + + // Note that for now we don't worry about the state of the parser if + // next_impl() throws assuming it is not going to be reused. + // + if (peeked_) + { + parsed_ = peeked_; + peeked_ = nullopt; + } + else + parsed_ = next_impl (); + + return translate (*parsed_); + } + + optional<event> parser:: + peek () + { + if (!peeked_) + { + if (parsed_) + { + cache_parsed_data (); + cache_parsed_location (); + } + peeked_ = next_impl (); + } + return translate (*peeked_); + } + + static inline const char* + event_name (event e) + { + switch (e) + { + case event::begin_object: return "beginning of object"; + case event::end_object: return "end of object"; + case event::begin_array: return "beginning of array"; + case event::end_array: return "end of array"; + case event::name: return "member name"; + case event::string: return "string value"; + case event::number: return "numeric value"; + case event::boolean: return "boolean value"; + case event::null: return "null value"; + } + + return ""; + } + + bool parser:: + next_expect (event p, optional<event> s) + { + optional<event> e (next ()); + bool r; + if (e && ((r = *e == p) || (s && *e == *s))) + return r; + + string d ("expected "); + d += event_name (p); + + if (s) + { + d += " or "; + d += event_name (*s); + } + + if (e) + { + d += " instead of "; + d += event_name (*e); + } + + throw invalid_json_input (input_name != nullptr ? input_name : "", + line (), + column (), + position (), + move (d)); + } + + void parser:: + next_expect_name (const char* n, bool su) + { + for (;;) + { + next_expect (event::name); + + if (name () == n) + return; + + if (!su) + break; + + next_expect_value_skip (); + } + + string d ("expected object member name '"); + d += n; + d += "' instead of '"; + d += name (); + d += '\''; + + throw invalid_json_input (input_name != nullptr ? input_name : "", + line (), + column (), + position (), + move (d)); + } + + void parser:: + next_expect_value_skip () + { + optional<event> e (next ()); + + if (e) + { + switch (*e) + { + case event::begin_object: + case event::begin_array: + { + // Skip until matching end_object/array keeping track of nesting. + // We are going to rely on the fact that we should either get such + // an event or next() should throw. + // + event be (*e); + event ee (be == event::begin_object + ? event::end_object + : event::end_array); + + for (size_t n (0);; ) + { + event e (*next ()); + + if (e == ee) + { + if (n == 0) + break; + + --n; + } + else if (e == be) + ++n; + } + + return; + } + case event::string: + case event::number: + case event::boolean: + case event::null: + return; + case event::name: + case event::end_object: + case event::end_array: + break; + } + } + + string d ("expected value"); + + if (e) + { + d += " instead of "; + d += event_name (*e); + } + + throw invalid_json_input (input_name != nullptr ? input_name : "", + line (), + column (), + position (), + move (d)); + } + + std::uint64_t parser:: + line () const noexcept + { + if (!location_p_) + { + if (!parsed_) + return 0; + + assert (!peeked_); + + return static_cast<uint64_t> ( + json_get_lineno (const_cast<json_stream*> (impl_))); + } + + return line_; + } + + std::uint64_t parser:: + column () const noexcept + { + if (!location_p_) + { + if (!parsed_) + return 0; + + assert (!peeked_); + + return static_cast<uint64_t> ( + json_get_column (const_cast<json_stream*> (impl_))); + } + + return column_; + } + + std::uint64_t parser:: + position () const noexcept + { + if (!location_p_) + { + if (!parsed_) + return 0; + + assert (!peeked_); + + return static_cast<uint64_t> ( + json_get_position (const_cast<json_stream*> (impl_))); + } + + return position_; + } + + json_type parser:: + next_impl () + { + raw_s_ = nullptr; + raw_n_ = 0; + json_type e; + + // Read characters between values skipping required separators and JSON + // whitespaces. Return whether a required separator was encountered as + // well as the first non-separator/whitespace character (which, if EOF, + // should trigger a check for input/output errors). + // + // Note that the returned non-separator will not have been extracted + // from the input (so position, column, etc. will still refer to its + // predecessor). + // + auto skip_separators = [this] () -> pair<bool, int> + { + bool r (separators_ == nullptr); + + int c; + for (; (c = json_source_peek (impl_)) != EOF; json_source_get (impl_)) + { + // User separator. + // + if (separators_ != nullptr && *separators_ != '\0') + { + if (strchr (separators_, c) != nullptr) + { + r = true; + continue; + } + } + + // JSON separator. + // + if (json_isspace (c)) + { + if (separators_ != nullptr && *separators_ == '\0') + r = true; + + continue; + } + + break; + } + + return make_pair (r, c); + }; + + // In the multi-value mode skip any instances of required separators + // (and any other JSON whitespace) preceding the first JSON value. + // + if (multi_value_ && !parsed_ && !peeked_) + { + if (skip_separators ().second == EOF && stream_.is != nullptr) + { + if (stream_.exception) goto fail_rethrow; + if (stream_.is->fail ()) goto fail_stream; + } + } + + e = json_next (impl_); + + // First check for a pending input/output error. + // + if (stream_.is != nullptr) + { + if (stream_.exception) goto fail_rethrow; + if (stream_.is->fail ()) goto fail_stream; + } + + // There are two ways to view separation between two values: as following + // the first value or as preceding the second value. And one aspect that + // is determined by this is whether a separation violation is a problem + // with the first value or with the second, which becomes important if + // the user bails out before parsing the second value. + // + // Consider these two unseparated value (yes, in JSON they are two + // values, leading zeros are not allowed in JSON numbers): + // + // 01 + // + // If the user bails out after parsing 0 in a stream that should have + // been newline-delimited, they most likely would want to get an error + // since this is most definitely an invalid value rather than two + // values that are not properly separated. So in this light we handle + // separators at the end of the first value. + // + switch (e) + { + case JSON_DONE: + { + // Deal with the following value separators. + // + // Note that we must not do this for the second JSON_DONE (or the + // first one in case there are no values) that signals the end of + // input. + // + if (multi_value_ && + (parsed_ || peeked_) && + (peeked_ ? *peeked_ : *parsed_) != JSON_DONE) + { + auto p (skip_separators ()); + + if (p.second == EOF && stream_.is != nullptr) + { + if (stream_.exception) goto fail_rethrow; + if (stream_.is->fail ()) goto fail_stream; + } + + // Note that we don't require separators after the last value. + // + if (!p.first && p.second != EOF) + { + json_source_get (impl_); // Consume to update column number. + goto fail_separation; + } + + json_reset (impl_); + } + break; + } + case JSON_ERROR: goto fail_json; + case JSON_STRING: + case JSON_NUMBER: + raw_s_ = json_get_string (impl_, &raw_n_); + raw_n_--; // Includes terminating `\0`. + break; + case JSON_TRUE: raw_s_ = "true"; raw_n_ = 4; break; + case JSON_FALSE: raw_s_ = "false"; raw_n_ = 5; break; + case JSON_NULL: raw_s_ = "null"; raw_n_ = 4; break; + default: break; + } + + return e; + + fail_json: + throw invalid_json_input ( + input_name != nullptr ? input_name : "", + static_cast<uint64_t> (json_get_lineno (impl_)), + static_cast<uint64_t> (json_get_column (impl_)), + static_cast<uint64_t> (json_get_position (impl_)), + json_get_error (impl_)); + + fail_separation: + throw invalid_json_input ( + input_name != nullptr ? input_name : "", + static_cast<uint64_t> (json_get_lineno (impl_)), + static_cast<uint64_t> (json_get_column (impl_)), + static_cast<uint64_t> (json_get_position (impl_)), + "missing separator between JSON values"); + + fail_stream: + throw invalid_json_input ( + input_name != nullptr ? input_name : "", + static_cast<uint64_t> (json_get_lineno (impl_)), + static_cast<uint64_t> (json_get_column (impl_)), + static_cast<uint64_t> (json_get_position (impl_)), + "unable to read JSON input text"); + + fail_rethrow: +#ifndef LIBBUTL_JSON_NO_EXCEPTION_PTR + rethrow_exception (move (*stream_.exception)); +#else + throw istream::failure ("unable to read"); +#endif + } + + optional<event> parser:: + translate (json_type e) const noexcept + { + switch (e) + { + case JSON_DONE: return nullopt; + case JSON_OBJECT: return event::begin_object; + case JSON_OBJECT_END: return event::end_object; + case JSON_ARRAY: return event::begin_array; + case JSON_ARRAY_END: return event::end_array; + case JSON_STRING: + { + // This can be a value or, inside an object, a name from the + // name/value pair. + // + size_t n; + return json_get_context (const_cast<json_stream*> (impl_), &n) == + JSON_OBJECT && + n % 2 == 1 + ? event::name + : event::string; + } + case JSON_NUMBER: return event::number; + case JSON_TRUE: return event::boolean; + case JSON_FALSE: return event::boolean; + case JSON_NULL: return event::null; + case JSON_ERROR: assert (false); // Should've been handled by caller. + } + + return nullopt; // Should never reach. + } + + void parser:: + cache_parsed_data () + { + name_p_ = value_p_ = false; + if (const optional<event> e = translate (*parsed_)) + { + if (e == event::name) + { + name_.assign (raw_s_, raw_n_); + name_p_ = true; + } + else if (value_event (e)) + { + value_.assign (raw_s_, raw_n_); + value_p_ = true; + } + } + } + + void parser:: + cache_parsed_location () noexcept + { + line_ = static_cast<uint64_t> (json_get_lineno (impl_)); + column_ = static_cast<uint64_t> (json_get_column (impl_)); + position_ = static_cast<uint64_t> (json_get_position (impl_)); + location_p_ = true; + } + + bool parser:: + value_event (optional<event> e) noexcept + { + if (!e) + return false; + + switch (*e) + { + case event::string: + case event::number: + case event::boolean: + case event::null: + return true; + default: + return false; + } + } + + [[noreturn]] void parser:: + throw_invalid_value (const char* type, const char* v, size_t n) const + { + string d (string ("invalid ") + type + " value: '"); + d.append (v, n); + d += '\''; + + throw invalid_json_input (input_name != nullptr ? input_name : "", + line (), + column (), + position (), + move (d)); + } + } // namespace json +} // namespace butl + +// Include the implementation into our translation unit (instead of compiling +// it separately) to (hopefully) get function inlining without LTO. +// +// Let's keep it last since the implementation defines a couple of macros. +// +#if defined(__clang__) || defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif + +extern "C" +{ +#define PDJSON_STACK_INC 16 +#define PDJSON_STACK_MAX 2048 +#include "pdjson.c" +} diff --git a/libbutl/json/parser.hxx b/libbutl/json/parser.hxx new file mode 100644 index 0000000..95d9c4e --- /dev/null +++ b/libbutl/json/parser.hxx @@ -0,0 +1,705 @@ +#pragma once + +#ifdef BUILD2_BOOTSTRAP +# error JSON parser not available during bootstrap +#endif + +#include <iosfwd> +#include <string> +#include <cstddef> // size_t +#include <cstdint> // uint64_t +#include <utility> // pair +#include <exception> // exception_ptr +#include <stdexcept> // invalid_argument + +#include <libbutl/optional.hxx> // butl::optional is std::optional or similar. + +#include <libbutl/json/event.hxx> + +#include <libbutl/json/pdjson.h> // Implementation details. + +#include <libbutl/export.hxx> + +namespace butl +{ + // Using the RFC8259 terminology: JSON (input) text, JSON value, object + // member. + // + namespace json + { + class invalid_json_input: public std::invalid_argument + { + public: + std::string name; + std::uint64_t line; + std::uint64_t column; + std::uint64_t position; + + invalid_json_input (std::string name, + std::uint64_t line, + std::uint64_t column, + std::uint64_t position, + const std::string& description); + + invalid_json_input (std::string name, + std::uint64_t line, + std::uint64_t column, + std::uint64_t position, + const char* description); + }; + + class LIBBUTL_SYMEXPORT parser + { + public: + const char* input_name; + + // Construction. + // + + // Parse JSON input text from std::istream. + // + // The name argument is used to identify the input being parsed. Note + // that the stream, name, and separators are kept as references so they + // must outlive the parser instance. + // + // If stream exceptions are enabled then the std::ios_base::failure + // exception is used to report input/output errors (badbit and failbit). + // Otherwise, those are reported as the invalid_json_input exception. + // + // If multi_value is true, enable the multi-value mode in which case the + // input stream may contain multiple JSON values (more precisely, zero + // or more). If false (the default), parsing will fail unless there is + // exactly one JSON value in the input stream. + // + // If multi_value is true, the separators argument specifies the + // required separator characters between JSON values. At least one of + // them must be present between every pair of JSON values (in addition + // to any number of JSON whitespaces). No separators are required after + // the last JSON value (but any found will be skipped). + // + // Specifically, if it is NULL, then no separation is required (that is, + // both `{...}{...}` and `{...} {...}` would be valid). If it is empty, + // then at least one JSON whitespace is required. And if it is non- + // empty, then at least one of its characters must be present (for + // example, "\n\t" would require at least one newline or TAB character + // between JSON values). + // + // Note that a separator need not be valid JSON whitespace: any + // character is acceptable (though it probably shouldn't be an object, + // array, or string delimiter and should not occur within a non-self- + // delimited top-level value, such as `true`, `false`, `null`, or a + // number). All instances of required separators before and after a + // value are skipped. Therefore JSON Text Sequences (RFC 7464; AKA + // Record Separator-delimited JSON), which requires the RS (0x1E) + // character before each value, can be handled as well. + // + parser (std::istream&, + const std::string& name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (std::istream&, + const char* name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (std::istream&, + std::string&&, + bool = false, + const char* = nullptr) = delete; + + // Parse a memory buffer that contains the entire JSON input text. + // + // The name argument is used to identify the input being parsed. Note + // that the buffer, name, and separators are kept as references so they + // must outlive the parser instance. + // + parser (const void* text, + std::size_t size, + const std::string& name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (const void* text, + std::size_t size, + const char* name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (const void*, + std::size_t, + std::string&&, + bool = false, + const char* = nullptr) = delete; + + // Similar to the above but parse a string. + // + parser (const std::string& text, + const std::string& name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (const std::string& text, + const char* name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (const std::string&, + std::string&&, + bool = false, + const char* = nullptr) = delete; + + // Similar to the above but parse a C-string. + // + parser (const char* text, + const std::string& name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (const char* text, + const char* name, + bool multi_value = false, + const char* separators = nullptr) noexcept; + + parser (const char*, + std::string&&, + bool = false, + const char* = nullptr) = delete; + + parser (parser&&) = delete; + parser (const parser&) = delete; + + parser& operator= (parser&&) = delete; + parser& operator= (const parser&) = delete; + + // Event iteration. + // + + // Return the next event or nullopt if end of input is reached. + // + // In the single-value parsing mode (default) the parsing code could + // look like this: + // + // while (optional<event> e = p.next ()) + // { + // switch (*e) + // { + // // ... + // } + // } + // + // In the multi-value mode the parser additionally returns nullopt after + // every JSON value parsed (so there will be two nullopt's after the + // last JSON value, the second indicating the end of input). + // + // One way to perform multi-value parsing is with the help of the peek() + // function (see below): + // + // while (p.peek ()) + // { + // while (optional<event> e = p.next ()) + // { + // switch (*e) + // { + // //... + // } + // } + // } + // + // Note that while the single-value mode will always parse exactly one + // value, the multi-value mode will accept zero values in which case a + // single nullopt is returned. + // + optional<event> + next (); + + // The range-based for loop support. + // + // In the single-value parsing mode (default) the parsing code could + // look like this: + // + // for (event e: p) + // { + // switch (e) + // { + // //... + // } + // } + // + // And in the multi-value mode (see next() for more information) like + // this: + // + // while (p.peek ()) + // { + // for (event e: p) + // { + // switch (e) + // { + // //... + // } + // } + // } + // + // Note that generally, the iterator interface doesn't make much sense + // for the parser so for now we have an implementation that is just + // enough for the range-based for. + // + struct iterator; + + iterator begin () {return iterator (this, next ());} + iterator end () {return iterator (nullptr, nullopt);} + + // Return the next event without considering it parsed. In other words, + // after this call, any subsequent calls to peek() and the next call to + // next() (if any) will all return the same event. + // + // Note that the name, value, and line corresponding to the peeked event + // are not accessible with name(), value() and line(); these functions + // will still return values corresponding to the most recent call to + // next(). The peeked values, however, can be accessed in the raw form + // using data(). + // + optional<event> + peek (); + + + // Event data access. + // + + // Return the object member name. + // + const std::string& + name (); + + // Any value (string, number, boolean, and null) can be retrieved as a + // string. Calling this function after any non-value events is illegal. + // + // Note that the value is returned as a non-const string reference and + // you are allowed to move the value out of it. However, this should not + // be done unnecessarily or in cases where the small string optimization + // is likely since the string's buffer is reused to store subsequent + // values. + // + std::string& + value (); + + // Convert the value to an integer, floating point, or bool. Throw + // invalid_json_input if the conversion is impossible without a loss. + // + template <typename T> + T + value () const; + + // Return the value or object member name in the raw form. + // + // Calling this function on non-value/name events is legal in which case + // NULL is returned. Note also that the returned data corresponds to the + // most recent event, whether peeked or parsed. + // + std::pair<const char*, std::size_t> + data () const {return std::make_pair (raw_s_, raw_n_);} + + + // Higher-level API suitable for parsing specific JSON vocabularies. + // + // The API summary: + // + // void next_expect (event); + // bool next_expect (event primary, event secondary); + // + // void next_expect_name (string name, bool skip_unknown = false); + // + // std::string& next_expect_string (); + // T next_expect_string<T> (); + // std::string& next_expect_number (); + // T next_expect_number<T> (); + // std::string& next_expect_boolean (); + // T next_expect_boolean<T>(); + // + // std::string* next_expect_string_null (); + // optional<T> next_expect_string_null<T> (); + // std::string* next_expect_number_null (); + // optional<T> next_expect_number_null<T> (); + // std::string* next_expect_boolean_null (); + // optional<T> next_expect_boolean_null<T>(); + // + // std::string& next_expect_member_string (string name, bool = false); + // T next_expect_member_string<T> (string name, bool = false); + // std::string& next_expect_member_number (string name, bool = false); + // T next_expect_member_number<T> (string name, bool = false); + // std::string& next_expect_member_boolean (string name, bool = false); + // T next_expect_member_boolean<T>(string name, bool = false); + // + // std::string* next_expect_member_string_null (string, bool = false); + // optional<T> next_expect_member_string_null<T> (string, bool = false); + // std::string* next_expect_member_number_null (string, bool = false); + // optional<T> next_expect_member_number_null<T> (string, bool = false); + // std::string* next_expect_member_boolean_null (string, bool = false); + // optional<T> next_expect_member_boolean_null<T>(string, bool = false); + // + // void next_expect_member_object (string name, bool = false); + // bool next_expect_member_object_null(string name, bool = false); + // + // void next_expect_member_array (string name, bool = false); + // bool next_expect_member_array_null(string name, bool = false); + // + // void next_expect_value_skip(); + + // Get the next event and make sure that it's what's expected: primary + // or, if specified, secondary event. If it is not either, then throw + // invalid_json_input with appropriate description. Return true if it is + // primary. + // + // The secondary expected event is primarily useful for handling + // optional members. For example: + // + // while (p.next_expect (event::name, event::end_object)) + // { + // // Handle object member. + // } + // + // Or homogeneous arrays: + // + // while (p.next_expect (event::string, event::end_array)) + // { + // // Handle array element. + // } + // + // Or values that can be null: + // + // if (p.next_expect (event::begin_object, event::null)) + // { + // // Parse object. + // } + // + bool + next_expect (event primary, optional<event> secondary = nullopt); + + // Get the next event and make sure it is event::name and the object + // member matches the specified name. If either is not, then throw + // invalid_json_input with appropriate description. If skip_unknown is + // true, then skip over unknown member names until a match is found. + // + void + next_expect_name (const char* name, bool skip_unknown = false); + + void + next_expect_name (const std::string&, bool = false); + + // Get the next event and make sure it is event::<type> returning its + // value similar to the value() functions. If it is not, then throw + // invalid_json_input with appropriate description. + // + std::string& + next_expect_string (); + + template <typename T> + T + next_expect_string (); + + std::string& + next_expect_number (); + + template <typename T> + T + next_expect_number (); + + std::string& + next_expect_boolean (); + + template <typename T> + T + next_expect_boolean (); + + // Similar to next_expect_<type>() but in addition to event::<type> also + // allow event::null, in which case returning no value. + // + std::string* + next_expect_string_null (); + + template <typename T> + optional<T> + next_expect_string_null (); + + std::string* + next_expect_number_null (); + + template <typename T> + optional<T> + next_expect_number_null (); + + std::string* + next_expect_boolean_null (); + + template <typename T> + optional<T> + next_expect_boolean_null (); + + // Call next_expect_name() followed by next_expect_<type>[_null]() + // returning its result. In other words, parse the entire object member + // with the specifed name and of type <type>, returning its value. + + // next_expect_member_string() + // + std::string& + next_expect_member_string (const char* name, bool skip_unknown = false); + + std::string& + next_expect_member_string (const std::string&, bool = false); + + template <typename T> + T + next_expect_member_string (const char*, bool = false); + + template <typename T> + T + next_expect_member_string (const std::string&, bool = false); + + // next_expect_member_number() + // + std::string& + next_expect_member_number (const char* name, bool skip_unknown = false); + + std::string& + next_expect_member_number (const std::string&, bool = false); + + template <typename T> + T + next_expect_member_number (const char*, bool = false); + + template <typename T> + T + next_expect_member_number (const std::string&, bool = false); + + // next_expect_member_boolean() + // + std::string& + next_expect_member_boolean (const char* name, bool skip_unknown = false); + + std::string& + next_expect_member_boolean (const std::string&, bool = false); + + template <typename T> + T + next_expect_member_boolean (const char*, bool = false); + + template <typename T> + T + next_expect_member_boolean (const std::string&, bool = false); + + // next_expect_member_string_null() + // + std::string* + next_expect_member_string_null (const char*, bool = false); + + std::string* + next_expect_member_string_null (const std::string&, bool = false); + + template <typename T> + optional<T> + next_expect_member_string_null (const char*, bool = false); + + template <typename T> + optional<T> + next_expect_member_string_null (const std::string&, bool = false); + + // next_expect_member_number_null() + // + std::string* + next_expect_member_number_null (const char*, bool = false); + + std::string* + next_expect_member_number_null (const std::string&, bool = false); + + template <typename T> + optional<T> + next_expect_member_number_null (const char*, bool = false); + + template <typename T> + optional<T> + next_expect_member_number_null (const std::string&, bool = false); + + // next_expect_member_boolean_null() + // + std::string* + next_expect_member_boolean_null (const char*, bool = false); + + std::string* + next_expect_member_boolean_null (const std::string&, bool = false); + + template <typename T> + optional<T> + next_expect_member_boolean_null (const char*, bool = false); + + template <typename T> + optional<T> + next_expect_member_boolean_null (const std::string&, bool = false); + + // Call next_expect_name() followed by next_expect(event::begin_object). + // In the _null version also allow event::null, in which case return + // false. + // + void + next_expect_member_object (const char* name, bool skip_unknown = false); + + void + next_expect_member_object (const std::string&, bool = false); + + bool + next_expect_member_object_null (const char*, bool = false); + + bool + next_expect_member_object_null (const std::string&, bool = false); + + // Call next_expect_name() followed by next_expect(event::begin_array). + // In the _null version also allow event::null, in which case return + // false. + // + void + next_expect_member_array (const char* name, bool skip_unknown = false); + + void + next_expect_member_array (const std::string&, bool = false); + + bool + next_expect_member_array_null (const char*, bool = false); + + bool + next_expect_member_array_null (const std::string&, bool = false); + + // Get the next event and make sure it is the beginning of a value + // (begin_object, begin_array, string, number, boolean, null). If it is + // not, then throw invalid_json_input with appropriate description. + // Otherwise, skip until the end of the value, recursively in case of + // object and array. + // + // This function is primarily useful for skipping unknown object + // members, for example: + // + // while (p.next_expect (event::name, event::end_object)) + // { + // if (p.name () == "known") + // { + // // Handle known member. + // } + // else + // p.next_expect_value_skip (); + // } + // + void + next_expect_value_skip (); + + // Parsing location. + // + + // Return the line number (1-based) corresponding to the most recently + // parsed event or 0 if nothing has been parsed yet. + // + std::uint64_t + line () const noexcept; + + // Return the column number (1-based) corresponding to the beginning of + // the most recently parsed event or 0 if nothing has been parsed yet. + // + std::uint64_t + column () const noexcept; + + // Return the position (byte offset) pointing immediately after the most + // recently parsed event or 0 if nothing has been parsed yet. + // + std::uint64_t + position () const noexcept; + + // Implementation details. + // + public: + struct iterator + { + using value_type = event; + + explicit + iterator (parser* p = nullptr, optional<event> e = nullopt) + : p_ (p), e_ (e) {} + + event operator* () const {return *e_;} + iterator& operator++ () {e_ = p_->next (); return *this;} + + // Comparison only makes sense when comparing to end (eof). + // + bool operator== (iterator y) const {return !e_ && !y.e_;} + bool operator!= (iterator y) const {return !(*this == y);} + + private: + parser* p_; + optional<event> e_; + }; + + struct stream + { + std::istream* is; + optional<std::exception_ptr> exception; + }; + + [[noreturn]] void + throw_invalid_value (const char* type, const char*, std::size_t) const; + + ~parser (); + + private: + // Functionality shared by next() and peek(). + // + json_type + next_impl (); + + // Translate the event produced by the most recent call to next_impl(). + // + // Note that the underlying parser state determines whether name or + // value is returned when translating JSON_STRING. + // + optional<event> + translate (json_type) const noexcept; + + // Cache state (name/value) produced by the most recent call to + // next_impl(). + // + void + cache_parsed_data (); + + // Cache the location numbers as determined by the most recent call to + // next_impl(). + // + void + cache_parsed_location () noexcept; + + // Return true if this is a value event (string, number, boolean, or + // null). + // + static bool + value_event (optional<event>) noexcept; + + stream stream_; + + bool multi_value_; + const char* separators_; + + // The *_p_ members indicate whether the value is present (cached). + // Note: not using optional not to reallocate the string's buffer. + // + std::string name_; bool name_p_ = false; + std::string value_; bool value_p_ = false; + std::uint64_t line_, column_, position_; bool location_p_ = false; + + optional<json_type> parsed_; // Current parsed event if any. + optional<json_type> peeked_; // Current peeked event if any. + + ::json_stream impl_[1]; + + // Cached raw value. + // + const char* raw_s_; + std::size_t raw_n_; + }; + } +} + +#include <libbutl/json/parser.ixx> diff --git a/libbutl/json/parser.ixx b/libbutl/json/parser.ixx new file mode 100644 index 0000000..cf6dca3 --- /dev/null +++ b/libbutl/json/parser.ixx @@ -0,0 +1,552 @@ +#include <cerrno> +#include <limits> // numeric_limits +#include <utility> // move() +#include <cassert> +#include <cstdlib> // strto*() +#include <type_traits> // enable_if, is_* +#include <cstring> // strlen() + +namespace butl +{ + namespace json + { + inline invalid_json_input:: + invalid_json_input (std::string n, + std::uint64_t l, + std::uint64_t c, + std::uint64_t p, + const std::string& d) + : invalid_json_input (move (n), l, c, p, d.c_str ()) + { + } + + inline invalid_json_input:: + invalid_json_input (std::string n, + std::uint64_t l, + std::uint64_t c, + std::uint64_t p, + const char* d) + : invalid_argument (d), + name (std::move (n)), + line (l), column (c), position (p) + { + } + + inline parser:: + parser (std::istream& is, + const std::string& n, + bool mv, + const char* sep) noexcept + : parser (is, n.c_str (), mv, sep) + { + } + + inline parser:: + parser (const void* t, + std::size_t s, + const std::string& n, + bool mv, + const char* sep) noexcept + : parser (t, s, n.c_str (), mv, sep) + { + } + + inline parser:: + parser (const std::string& t, + const std::string& n, + bool mv, + const char* sep) noexcept + : parser (t.data (), t.size (), n.c_str (), mv, sep) + { + } + + inline parser:: + parser (const std::string& t, + const char* n, + bool mv, + const char* sep) noexcept + : parser (t.data (), t.size (), n, mv, sep) + { + } + + inline parser:: + parser (const char* t, + const std::string& n, + bool mv, + const char* sep) noexcept + : parser (t, std::strlen (t), n.c_str (), mv, sep) + { + } + + inline parser:: + parser (const char* t, + const char* n, + bool mv, + const char* sep) noexcept + : parser (t, std::strlen (t), n, mv, sep) + { + } + + inline const std::string& parser:: + name () + { + if (!name_p_) + { + assert (parsed_ && !peeked_ && !value_p_); + cache_parsed_data (); + assert (name_p_); + } + return name_; + } + + inline std::string& parser:: + value () + { + if (!value_p_) + { + assert (parsed_ && !peeked_ && !name_p_); + cache_parsed_data (); + assert (value_p_); + } + return value_; + } + + // Note: one day we will be able to use C++17 from_chars() which was made + // exactly for this. + // + template <typename T> + inline typename std::enable_if<std::is_same<T, bool>::value, T>::type + parse_value (const char* b, size_t, const parser&) + { + return *b == 't'; + } + + template <typename T> + inline typename std::enable_if< + std::is_integral<T>::value && + std::is_signed<T>::value && + !std::is_same<T, bool>::value, T>::type + parse_value (const char* b, size_t n, const parser& p) + { + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + std::int64_t v (strtoll (b, &e, 10)); // Can't throw. + + if (e == b || e != b + n || errno == ERANGE || + v < std::numeric_limits<T>::min () || + v > std::numeric_limits<T>::max ()) + p.throw_invalid_value ("signed integer", b, n); + + return static_cast<T> (v); + } + + template <typename T> + inline typename std::enable_if< + std::is_integral<T>::value && + std::is_unsigned<T>::value && + !std::is_same<T, bool>::value, T>::type + parse_value (const char* b, size_t n, const parser& p) + { + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + std::uint64_t v (strtoull (b, &e, 10)); // Can't throw. + + if (e == b || e != b + n || errno == ERANGE || + v > std::numeric_limits<T>::max ()) + p.throw_invalid_value ("unsigned integer", b, n); + + return static_cast<T> (v); + } + + template <typename T> + inline typename std::enable_if<std::is_same<T, float>::value, T>::type + parse_value (const char* b, size_t n, const parser& p) + { + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + T r (std::strtof (b, &e)); + + if (e == b || e != b + n || errno == ERANGE) + p.throw_invalid_value ("float", b, n); + + return r; + } + + template <typename T> + inline typename std::enable_if<std::is_same<T, double>::value, T>::type + parse_value (const char* b, size_t n, const parser& p) + { + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + T r (std::strtod (b, &e)); + + if (e == b || e != b + n || errno == ERANGE) + p.throw_invalid_value ("double", b, n); + + return r; + } + + template <typename T> + inline typename std::enable_if<std::is_same<T, long double>::value, T>::type + parse_value (const char* b, size_t n, const parser& p) + { + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + T r (std::strtold (b, &e)); + + if (e == b || e != b + n || errno == ERANGE) + p.throw_invalid_value ("long double", b, n); + + return r; + } + + template <typename T> + inline T parser:: + value () const + { + if (!value_p_) + { + assert (parsed_ && !peeked_ && value_event (translate (*parsed_))); + return parse_value<T> (raw_s_, raw_n_, *this); + } + + return parse_value<T> (value_.data (), value_.size (), *this); + } + + inline void parser:: + next_expect_name (const std::string& n, bool su) + { + next_expect_name (n.c_str (), su); + } + + // next_expect_<type>() + // + inline std::string& parser:: + next_expect_string () + { + next_expect (event::string); + return value (); + } + + template <typename T> + inline T parser:: + next_expect_string () + { + next_expect (event::string); + return value<T> (); + } + + inline std::string& parser:: + next_expect_number () + { + next_expect (event::number); + return value (); + } + + template <typename T> + inline T parser:: + next_expect_number () + { + next_expect (event::number); + return value<T> (); + } + + inline std::string& parser:: + next_expect_boolean () + { + next_expect (event::boolean); + return value (); + } + + template <typename T> + inline T parser:: + next_expect_boolean () + { + next_expect (event::boolean); + return value<T> (); + } + + // next_expect_<type>_null() + // + inline std::string* parser:: + next_expect_string_null () + { + return next_expect (event::string, event::null) ? &value () : nullptr; + } + + template <typename T> + inline optional<T> parser:: + next_expect_string_null () + { + return next_expect (event::string, event::null) + ? optional<T> (value<T> ()) + : nullopt; + } + + inline std::string* parser:: + next_expect_number_null () + { + return next_expect (event::number, event::null) ? &value () : nullptr; + } + + template <typename T> + inline optional<T> parser:: + next_expect_number_null () + { + return next_expect (event::number, event::null) + ? optional<T> (value<T> ()) + : nullopt; + } + + inline std::string* parser:: + next_expect_boolean_null () + { + return next_expect (event::boolean, event::null) ? &value () : nullptr; + } + + template <typename T> + inline optional<T> parser:: + next_expect_boolean_null () + { + return next_expect (event::boolean, event::null) + ? optional<T> (value<T> ()) + : nullopt; + } + + // next_expect_member_string() + // + inline std::string& parser:: + next_expect_member_string (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_string (); + } + + inline std::string& parser:: + next_expect_member_string (const std::string& n, bool su) + { + return next_expect_member_string (n.c_str (), su); + } + + template <typename T> + inline T parser:: + next_expect_member_string (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_string<T> (); + } + + template <typename T> + inline T parser:: + next_expect_member_string (const std::string& n, bool su) + { + return next_expect_member_string<T> (n.c_str (), su); + } + + // next_expect_member_number() + // + inline std::string& parser:: + next_expect_member_number (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_number (); + } + + inline std::string& parser:: + next_expect_member_number (const std::string& n, bool su) + { + return next_expect_member_number (n.c_str (), su); + } + + template <typename T> + inline T parser:: + next_expect_member_number (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_number<T> (); + } + + template <typename T> + inline T parser:: + next_expect_member_number (const std::string& n, bool su) + { + return next_expect_member_number<T> (n.c_str (), su); + } + + // next_expect_member_boolean() + // + inline std::string& parser:: + next_expect_member_boolean (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_boolean (); + } + + inline std::string& parser:: + next_expect_member_boolean (const std::string& n, bool su) + { + return next_expect_member_boolean (n.c_str (), su); + } + + template <typename T> + inline T parser:: + next_expect_member_boolean (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_boolean<T> (); + } + + template <typename T> + inline T parser:: + next_expect_member_boolean (const std::string& n, bool su) + { + return next_expect_member_boolean<T> (n.c_str (), su); + } + + // next_expect_member_string_null() + // + inline std::string* parser:: + next_expect_member_string_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_string_null (); + } + + inline std::string* parser:: + next_expect_member_string_null (const std::string& n, bool su) + { + return next_expect_member_string_null (n.c_str (), su); + } + + template <typename T> + inline optional<T> parser:: + next_expect_member_string_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_string_null<T> (); + } + + template <typename T> + inline optional<T> parser:: + next_expect_member_string_null (const std::string& n, bool su) + { + return next_expect_member_string_null<T> (n.c_str (), su); + } + + // next_expect_member_number_null() + // + inline std::string* parser:: + next_expect_member_number_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_number_null (); + } + + inline std::string* parser:: + next_expect_member_number_null (const std::string& n, bool su) + { + return next_expect_member_number_null (n.c_str (), su); + } + + template <typename T> + inline optional<T> parser:: + next_expect_member_number_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_number_null<T> (); + } + + template <typename T> + inline optional<T> parser:: + next_expect_member_number_null (const std::string& n, bool su) + { + return next_expect_member_number_null<T> (n.c_str (), su); + } + + // next_expect_member_boolean_null() + // + inline std::string* parser:: + next_expect_member_boolean_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_boolean_null (); + } + + inline std::string* parser:: + next_expect_member_boolean_null (const std::string& n, bool su) + { + return next_expect_member_boolean_null (n.c_str (), su); + } + + template <typename T> + inline optional<T> parser:: + next_expect_member_boolean_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect_boolean_null<T> (); + } + + template <typename T> + inline optional<T> parser:: + next_expect_member_boolean_null (const std::string& n, bool su) + { + return next_expect_member_boolean_null<T> (n.c_str (), su); + } + + // next_expect_member_object[_null]() + // + inline void parser:: + next_expect_member_object (const char* n, bool su) + { + next_expect_name (n, su); + next_expect (event::begin_object); + } + + inline void parser:: + next_expect_member_object (const std::string& n, bool su) + { + next_expect_member_object (n.c_str (), su); + } + + inline bool parser:: + next_expect_member_object_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect (event::begin_object, event::null); + } + + inline bool parser:: + next_expect_member_object_null (const std::string& n, bool su) + { + return next_expect_member_object_null (n.c_str (), su); + } + + // next_expect_member_array[_null]() + // + inline void parser:: + next_expect_member_array (const char* n, bool su) + { + next_expect_name (n, su); + next_expect (event::begin_array); + } + + inline void parser:: + next_expect_member_array (const std::string& n, bool su) + { + next_expect_member_array (n.c_str (), su); + } + + inline bool parser:: + next_expect_member_array_null (const char* n, bool su) + { + next_expect_name (n, su); + return next_expect (event::begin_array, event::null); + } + + inline bool parser:: + next_expect_member_array_null (const std::string& n, bool su) + { + return next_expect_member_array_null (n.c_str (), su); + } + } +} diff --git a/libbutl/json/pdjson.c b/libbutl/json/pdjson.c new file mode 100644 index 0000000..ae10c95 --- /dev/null +++ b/libbutl/json/pdjson.c @@ -0,0 +1,1044 @@ +#ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200112L +#elif _POSIX_C_SOURCE < 200112L +# error incompatible _POSIX_C_SOURCE level +#endif + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#ifndef PDJSON_H +# include "pdjson.h" +#endif + +#define JSON_FLAG_ERROR (1u << 0) +#define JSON_FLAG_STREAMING (1u << 1) + +#if defined(_MSC_VER) && (_MSC_VER < 1900) + +#define json_error(json, format, ...) \ + if (!(json->flags & JSON_FLAG_ERROR)) { \ + json->flags |= JSON_FLAG_ERROR; \ + _snprintf_s(json->errmsg, sizeof(json->errmsg), \ + _TRUNCATE, \ + format, \ + __VA_ARGS__); \ + } \ + +#else + +#define json_error(json, format, ...) \ + if (!(json->flags & JSON_FLAG_ERROR)) { \ + json->flags |= JSON_FLAG_ERROR; \ + snprintf(json->errmsg, sizeof(json->errmsg), \ + format, \ + __VA_ARGS__); \ + } \ + +#endif /* _MSC_VER */ + +/* See also PDJSON_STACK_MAX below. */ +#ifndef PDJSON_STACK_INC +# define PDJSON_STACK_INC 4 +#endif + +struct json_stack { + enum json_type type; + long count; +}; + +static enum json_type +push(json_stream *json, enum json_type type) +{ + json->stack_top++; + +#ifdef PDJSON_STACK_MAX + if (json->stack_top > PDJSON_STACK_MAX) { + json_error(json, "%s", "maximum depth of nesting reached"); + return JSON_ERROR; + } +#endif + + if (json->stack_top >= json->stack_size) { + struct json_stack *stack; + size_t size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack); + stack = (struct json_stack *)json->alloc.realloc(json->stack, size); + if (stack == NULL) { + json_error(json, "%s", "out of memory"); + return JSON_ERROR; + } + + json->stack_size += PDJSON_STACK_INC; + json->stack = stack; + } + + json->stack[json->stack_top].type = type; + json->stack[json->stack_top].count = 0; + + return type; +} + +/* Note: c is assumed not to be EOF. */ +static enum json_type +pop(json_stream *json, int c, enum json_type expected) +{ + if (json->stack == NULL || json->stack[json->stack_top].type != expected) { + json_error(json, "unexpected byte '%c'", c); + return JSON_ERROR; + } + json->stack_top--; + return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END; +} + +static int buffer_peek(struct json_source *source) +{ + if (source->position < source->source.buffer.length) + return source->source.buffer.buffer[source->position]; + else + return EOF; +} + +static int buffer_get(struct json_source *source) +{ + int c = source->peek(source); + if (c != EOF) + source->position++; + return c; +} + +static int stream_get(struct json_source *source) +{ + int c = fgetc(source->source.stream.stream); + if (c != EOF) + source->position++; + return c; +} + +static int stream_peek(struct json_source *source) +{ + int c = fgetc(source->source.stream.stream); + ungetc(c, source->source.stream.stream); + return c; +} + +static void init(json_stream *json) +{ + json->lineno = 1; + json->linepos = 0; + json->lineadj = 0; + json->linecon = 0; + json->colno = 0; + json->flags = JSON_FLAG_STREAMING; + json->errmsg[0] = '\0'; + json->ntokens = 0; + json->next = (enum json_type)0; + + json->stack = NULL; + json->stack_top = -1; + json->stack_size = 0; + + json->data.string = NULL; + json->data.string_size = 0; + json->data.string_fill = 0; + json->source.position = 0; + + json->alloc.malloc = malloc; + json->alloc.realloc = realloc; + json->alloc.free = free; +} + +static enum json_type +is_match(json_stream *json, const char *pattern, enum json_type type) +{ + int c; + for (const char *p = pattern; *p; p++) { + if (*p != (c = json->source.get(&json->source))) { + if (c != EOF) { + json_error(json, "expected '%c' instead of byte '%c'", *p, c); + } else { + json_error(json, "expected '%c' instead of end of text", *p); + } + return JSON_ERROR; + } + } + return type; +} + +static int pushchar(json_stream *json, int c) +{ + if (json->data.string_fill == json->data.string_size) { + size_t size = json->data.string_size * 2; + char *buffer = (char *)json->alloc.realloc(json->data.string, size); + if (buffer == NULL) { + json_error(json, "%s", "out of memory"); + return -1; + } else { + json->data.string_size = size; + json->data.string = buffer; + } + } + json->data.string[json->data.string_fill++] = c; + return 0; +} + +static int init_string(json_stream *json) +{ + json->data.string_fill = 0; + if (json->data.string == NULL) { + json->data.string_size = 1024; + json->data.string = (char *)json->alloc.malloc(json->data.string_size); + if (json->data.string == NULL) { + json_error(json, "%s", "out of memory"); + return -1; + } + } + json->data.string[0] = '\0'; + return 0; +} + +static int encode_utf8(json_stream *json, unsigned long c) +{ + if (c < 0x80UL) { + return pushchar(json, c); + } else if (c < 0x0800UL) { + return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else if (c < 0x010000UL) { + if (c >= 0xd800 && c <= 0xdfff) { + json_error(json, "invalid codepoint %06lx", c); + return -1; + } + return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) && + (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else if (c < 0x110000UL) { + return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) && + (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else { + json_error(json, "unable to encode %06lx as UTF-8", c); + return -1; + } +} + +static int hexchar(int c) +{ + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': + case 'A': return 10; + case 'b': + case 'B': return 11; + case 'c': + case 'C': return 12; + case 'd': + case 'D': return 13; + case 'e': + case 'E': return 14; + case 'f': + case 'F': return 15; + default: + return -1; + } +} + +static long +read_unicode_cp(json_stream *json) +{ + long cp = 0; + int shift = 12; + + for (size_t i = 0; i < 4; i++) { + int c = json->source.get(&json->source); + int hc; + + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if ((hc = hexchar(c)) == -1) { + json_error(json, "invalid escape Unicode byte '%c'", c); + return -1; + } + + cp += hc * (1 << shift); + shift -= 4; + } + + + return cp; +} + +static int read_unicode(json_stream *json) +{ + long cp, h, l; + + if ((cp = read_unicode_cp(json)) == -1) { + return -1; + } + + if (cp >= 0xd800 && cp <= 0xdbff) { + /* This is the high portion of a surrogate pair; we need to read the + * lower portion to get the codepoint + */ + h = cp; + + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if (c != '\\') { + json_error(json, "invalid continuation for surrogate pair '%c', " + "expected '\\'", c); + return -1; + } + + c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if (c != 'u') { + json_error(json, "invalid continuation for surrogate pair '%c', " + "expected 'u'", c); + return -1; + } + + if ((l = read_unicode_cp(json)) == -1) { + return -1; + } + + if (l < 0xdc00 || l > 0xdfff) { + json_error(json, "surrogate pair continuation \\u%04lx out " + "of range (dc00-dfff)", l); + return -1; + } + + cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); + } else if (cp >= 0xdc00 && cp <= 0xdfff) { + json_error(json, "dangling surrogate \\u%04lx", cp); + return -1; + } + + return encode_utf8(json, cp); +} + +static int +read_escaped(json_stream *json) +{ + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in escape"); + return -1; + } else if (c == 'u') { + if (read_unicode(json) != 0) + return -1; + } else { + switch (c) { + case '\\': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case '/': + case '"': + { + const char *codes = "\\bfnrt/\""; + const char *p = strchr(codes, c); + if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0) + return -1; + } + break; + default: + json_error(json, "invalid escaped byte '%c'", c); + return -1; + } + } + return 0; +} + +static int +char_needs_escaping(int c) +{ + if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) { + return 1; + } + + return 0; +} + +static int +utf8_seq_length(char byte) +{ + unsigned char u = (unsigned char) byte; + if (u < 0x80) return 1; + + if (0x80 <= u && u <= 0xBF) + { + // second, third or fourth byte of a multi-byte + // sequence, i.e. a "continuation byte" + return 0; + } + else if (u == 0xC0 || u == 0xC1) + { + // overlong encoding of an ASCII byte + return 0; + } + else if (0xC2 <= u && u <= 0xDF) + { + // 2-byte sequence + return 2; + } + else if (0xE0 <= u && u <= 0xEF) + { + // 3-byte sequence + return 3; + } + else if (0xF0 <= u && u <= 0xF4) + { + // 4-byte sequence + return 4; + } + else + { + // u >= 0xF5 + // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8 + return 0; + } +} + +static int +is_legal_utf8(const unsigned char *bytes, int length) +{ + if (0 == bytes || 0 == length) return 0; + + unsigned char a; + const unsigned char* srcptr = bytes + length; + switch (length) + { + default: + return 0; + // Everything else falls through when true. + case 4: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + /* FALLTHRU */ + case 3: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + /* FALLTHRU */ + case 2: + a = (*--srcptr); + switch (*bytes) + { + case 0xE0: + if (a < 0xA0 || a > 0xBF) return 0; + break; + case 0xED: + if (a < 0x80 || a > 0x9F) return 0; + break; + case 0xF0: + if (a < 0x90 || a > 0xBF) return 0; + break; + case 0xF4: + if (a < 0x80 || a > 0x8F) return 0; + break; + default: + if (a < 0x80 || a > 0xBF) return 0; + break; + } + /* FALLTHRU */ + case 1: + if (*bytes >= 0x80 && *bytes < 0xC2) return 0; + } + return *bytes <= 0xF4; +} + +static int +read_utf8(json_stream* json, int next_char) +{ + int count = utf8_seq_length(next_char); + if (!count) + { + json_error(json, "%s", "invalid UTF-8 character"); + return -1; + } + + char buffer[4]; + buffer[0] = next_char; + int i; + for (i = 1; i < count; ++i) + { + if ((next_char = json->source.get(&json->source)) == EOF) + break; + + buffer[i] = next_char; + json->lineadj++; + } + + if (i != count || !is_legal_utf8((unsigned char*) buffer, count)) + { + json_error(json, "%s", "invalid UTF-8 text"); + return -1; + } + + for (i = 0; i < count; ++i) + { + if (pushchar(json, buffer[i]) != 0) + return -1; + } + return 0; +} + +static enum json_type +read_string(json_stream *json) +{ + if (init_string(json) != 0) + return JSON_ERROR; + while (1) { + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal"); + return JSON_ERROR; + } else if (c == '"') { + if (pushchar(json, '\0') == 0) + return JSON_STRING; + else + return JSON_ERROR; + } else if (c == '\\') { + if (read_escaped(json) != 0) + return JSON_ERROR; + } else if ((unsigned) c >= 0x80) { + if (read_utf8(json, c) != 0) + return JSON_ERROR; + } else { + if (char_needs_escaping(c)) { + json_error(json, "%s", "unescaped control character in string"); + return JSON_ERROR; + } + + if (pushchar(json, c) != 0) + return JSON_ERROR; + } + } + return JSON_ERROR; +} + +static int +is_digit(int c) +{ + return c >= 48 /*0*/ && c <= 57 /*9*/; +} + +static int +read_digits(json_stream *json) +{ + int c; + unsigned nread = 0; + while (is_digit(c = json->source.peek(&json->source))) { + if (pushchar(json, json->source.get(&json->source)) != 0) + return -1; + + nread++; + } + + if (nread == 0) { + if (c != EOF) { + json_error(json, "expected digit instead of byte '%c'", c); + } else { + json_error(json, "%s", "expected digit instead of end of text"); + } + return -1; + } + + return 0; +} + +static enum json_type +read_number(json_stream *json, int c) +{ + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (c == '-') { + c = json->source.get(&json->source); + if (is_digit(c)) { + return read_number(json, c); + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c' in number", c); + } else { + json_error(json, "%s", "unexpected end of text in number"); + } + return JSON_ERROR; + } + } else if (strchr("123456789", c) != NULL) { + c = json->source.peek(&json->source); + if (is_digit(c)) { + if (read_digits(json) != 0) + return JSON_ERROR; + } + } + /* Up to decimal or exponent has been read. */ + c = json->source.peek(&json->source); + if (strchr(".eE", c) == NULL) { + if (pushchar(json, '\0') != 0) + return JSON_ERROR; + else + return JSON_NUMBER; + } + if (c == '.') { + json->source.get(&json->source); // consume . + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (read_digits(json) != 0) + return JSON_ERROR; + } + /* Check for exponent. */ + c = json->source.peek(&json->source); + if (c == 'e' || c == 'E') { + json->source.get(&json->source); // consume e/E + if (pushchar(json, c) != 0) + return JSON_ERROR; + c = json->source.peek(&json->source); + if (c == '+' || c == '-') { + json->source.get(&json->source); // consume + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (read_digits(json) != 0) + return JSON_ERROR; + } else if (is_digit(c)) { + if (read_digits(json) != 0) + return JSON_ERROR; + } else { + json->source.get(&json->source); // consume (for column) + if (c != EOF) { + json_error(json, "unexpected byte '%c' in number", c); + } else { + json_error(json, "%s", "unexpected end of text in number"); + } + return JSON_ERROR; + } + } + if (pushchar(json, '\0') != 0) + return JSON_ERROR; + else + return JSON_NUMBER; +} + +bool +json_isspace(int c) +{ + switch (c) { + case 0x09: + case 0x0a: + case 0x0d: + case 0x20: + return true; + } + + return false; +} + +static void newline(json_stream *json) +{ + json->lineno++; + json->linepos = json->source.position; + json->lineadj = 0; + json->linecon = 0; +} + +/* Returns the next non-whitespace character in the stream. + * + * Note that this is the only function (besides user-facing json_source_get()) + * that needs to worry about newline housekeeping. + */ +static int next(json_stream *json) +{ + int c; + while (json_isspace(c = json->source.get(&json->source))) + if (c == '\n') + newline(json); + return c; +} + +static enum json_type +read_value(json_stream *json, int c) +{ + enum json_type type; + size_t colno = json_get_column(json); + + json->ntokens++; + + switch (c) { + case EOF: + json_error(json, "%s", "unexpected end of text"); + type = JSON_ERROR; + break; + case '{': + type = push(json, JSON_OBJECT); + break; + case '[': + type = push(json, JSON_ARRAY); + break; + case '"': + type = read_string(json); + break; + case 'n': + type = is_match(json, "ull", JSON_NULL); + break; + case 'f': + type = is_match(json, "alse", JSON_FALSE); + break; + case 't': + type = is_match(json, "rue", JSON_TRUE); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + type = init_string(json) == 0 ? read_number(json, c) : JSON_ERROR; + break; + default: + type = JSON_ERROR; + json_error(json, "unexpected byte '%c' in value", c); + break; + } + + if (type != JSON_ERROR) + json->colno = colno; + + return type; +} + +enum json_type json_peek(json_stream *json) +{ + enum json_type next; + if (json->next) + next = json->next; + else + next = json->next = json_next(json); + return next; +} + +enum json_type json_next(json_stream *json) +{ + if (json->flags & JSON_FLAG_ERROR) + return JSON_ERROR; + if (json->next != 0) { + enum json_type next = json->next; + json->next = (enum json_type)0; + return next; + } + + json->colno = 0; + + if (json->ntokens > 0 && json->stack_top == (size_t)-1) { + + /* In the streaming mode leave any trailing whitespaces in the stream. + * This allows the user to validate any desired separation between + * values (such as newlines) using json_source_get/peek() with any + * remaining whitespaces ignored as leading when we parse the next + * value. */ + if (!(json->flags & JSON_FLAG_STREAMING)) { + int c = next(json); + if (c != EOF) { + json_error(json, "expected end of text instead of byte '%c'", c); + return JSON_ERROR; + } + } + + return JSON_DONE; + } + int c = next(json); + if (json->stack_top == (size_t)-1) { + if (c == EOF && (json->flags & JSON_FLAG_STREAMING)) + return JSON_DONE; + + return read_value(json, c); + } + if (json->stack[json->stack_top].type == JSON_ARRAY) { + if (json->stack[json->stack_top].count == 0) { + if (c == ']') { + return pop(json, c, JSON_ARRAY); + } + json->stack[json->stack_top].count++; + return read_value(json, c); + } else if (c == ',') { + json->stack[json->stack_top].count++; + return read_value(json, next(json)); + } else if (c == ']') { + return pop(json, c, JSON_ARRAY); + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c'", c); + } else { + json_error(json, "%s", "unexpected end of text"); + } + return JSON_ERROR; + } + } else if (json->stack[json->stack_top].type == JSON_OBJECT) { + if (json->stack[json->stack_top].count == 0) { + if (c == '}') { + return pop(json, c, JSON_OBJECT); + } + + /* No member name/value pairs yet. */ + enum json_type value = read_value(json, c); + if (value != JSON_STRING) { + if (value != JSON_ERROR) + json_error(json, "%s", "expected member name or '}'"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return value; + } + } else if ((json->stack[json->stack_top].count % 2) == 0) { + /* Expecting comma followed by member name. */ + if (c != ',' && c != '}') { + json_error(json, "%s", "expected ',' or '}' after member value"); + return JSON_ERROR; + } else if (c == '}') { + return pop(json, c, JSON_OBJECT); + } else { + enum json_type value = read_value(json, next(json)); + if (value != JSON_STRING) { + if (value != JSON_ERROR) + json_error(json, "%s", "expected member name"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return value; + } + } + } else if ((json->stack[json->stack_top].count % 2) == 1) { + /* Expecting colon followed by value. */ + if (c != ':') { + json_error(json, "%s", "expected ':' after member name"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return read_value(json, next(json)); + } + } + } + json_error(json, "%s", "invalid parser state"); + return JSON_ERROR; +} + +void json_reset(json_stream *json) +{ + json->stack_top = -1; + json->ntokens = 0; + json->flags &= ~JSON_FLAG_ERROR; + json->errmsg[0] = '\0'; +} + +enum json_type json_skip(json_stream *json) +{ + enum json_type type = json_next(json); + size_t cnt_arr = 0; + size_t cnt_obj = 0; + + for (enum json_type skip = type; ; skip = json_next(json)) { + if (skip == JSON_ERROR || skip == JSON_DONE) + return skip; + + if (skip == JSON_ARRAY) { + ++cnt_arr; + } else if (skip == JSON_ARRAY_END && cnt_arr > 0) { + --cnt_arr; + } else if (skip == JSON_OBJECT) { + ++cnt_obj; + } else if (skip == JSON_OBJECT_END && cnt_obj > 0) { + --cnt_obj; + } + + if (!cnt_arr && !cnt_obj) + break; + } + + return type; +} + +enum json_type json_skip_until(json_stream *json, enum json_type type) +{ + while (1) { + enum json_type skip = json_skip(json); + + if (skip == JSON_ERROR || skip == JSON_DONE) + return skip; + + if (skip == type) + break; + } + + return type; +} + +const char *json_get_string(json_stream *json, size_t *length) +{ + if (length != NULL) + *length = json->data.string_fill; + if (json->data.string == NULL) + return ""; + else + return json->data.string; +} + +double json_get_number(json_stream *json) +{ + char *p = json->data.string; + return p == NULL ? 0 : strtod(p, NULL); +} + +const char *json_get_error(json_stream *json) +{ + return json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL; +} + +size_t json_get_lineno(json_stream *json) +{ + return json->lineno; +} + +size_t json_get_position(json_stream *json) +{ + return json->source.position; +} + +size_t json_get_column(json_stream *json) +{ + return json->colno == 0 + ? json->source.position == 0 ? 1 : json->source.position - json->linepos - json->lineadj + : json->colno; +} + +size_t json_get_depth(json_stream *json) +{ + return json->stack_top + 1; +} + +/* Return the current parsing context, that is, JSON_OBJECT if we are inside + an object, JSON_ARRAY if we are inside an array, and JSON_DONE if we are + not yet/anymore in either. + + Additionally, for the first two cases, also return the number of parsing + events that have already been observed at this level with json_next/peek(). + In particular, inside an object, an odd number would indicate that the just + observed JSON_STRING event is a member name. +*/ +enum json_type json_get_context(json_stream *json, size_t *count) +{ + if (json->stack_top == (size_t)-1) + return JSON_DONE; + + if (count != NULL) + *count = json->stack[json->stack_top].count; + + return json->stack[json->stack_top].type; +} + +int json_source_get(json_stream *json) +{ + /* If the caller reads a multi-byte UTF-8 sequence, we expect them to read + * it in its entirety. We also assume that any invalid bytes within such a + * sequence belong to the same column (as opposed to starting a new column + * or some such). */ + + int c = json->source.get(&json->source); + if (json->linecon > 0) { + /* Expecting a continuation byte within a multi-byte UTF-8 sequence. */ + json->linecon--; + if (c != EOF) + json->lineadj++; + } else if (c == '\n') + newline(json); + else if (c >= 0xC2 && c <= 0xF4) /* First in multi-byte UTF-8 sequence. */ + json->linecon = utf8_seq_length(c) - 1; + + return c; +} + +int json_source_peek(json_stream *json) +{ + return json->source.peek(&json->source); +} + +void json_open_buffer(json_stream *json, const void *buffer, size_t size) +{ + init(json); + json->source.get = buffer_get; + json->source.peek = buffer_peek; + json->source.source.buffer.buffer = (const char *)buffer; + json->source.source.buffer.length = size; +} + +void json_open_string(json_stream *json, const char *string) +{ + json_open_buffer(json, string, strlen(string)); +} + +void json_open_stream(json_stream *json, FILE * stream) +{ + init(json); + json->source.get = stream_get; + json->source.peek = stream_peek; + json->source.source.stream.stream = stream; +} + +static int user_get(struct json_source *json) +{ + int c = json->source.user.get(json->source.user.ptr); + if (c != EOF) + json->position++; + return c; +} + +static int user_peek(struct json_source *json) +{ + return json->source.user.peek(json->source.user.ptr); +} + +void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user) +{ + init(json); + json->source.get = user_get; + json->source.peek = user_peek; + json->source.source.user.ptr = user; + json->source.source.user.get = get; + json->source.source.user.peek = peek; +} + +void json_set_allocator(json_stream *json, json_allocator *a) +{ + json->alloc = *a; +} + +void json_set_streaming(json_stream *json, bool streaming) +{ + if (streaming) + json->flags |= JSON_FLAG_STREAMING; + else + json->flags &= ~JSON_FLAG_STREAMING; +} + +void json_close(json_stream *json) +{ + json->alloc.free(json->stack); + json->alloc.free(json->data.string); +} diff --git a/libbutl/json/pdjson.h b/libbutl/json/pdjson.h new file mode 100644 index 0000000..ac698e4 --- /dev/null +++ b/libbutl/json/pdjson.h @@ -0,0 +1,147 @@ +#ifndef PDJSON_H +#define PDJSON_H + +#ifndef PDJSON_SYMEXPORT +# define PDJSON_SYMEXPORT +#endif + +#ifdef __cplusplus +extern "C" { +#else +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #include <stdbool.h> +#else + #ifndef bool + #define bool int + #define true 1 + #define false 0 + #endif /* bool */ +#endif /* __STDC_VERSION__ */ +#endif /* __cplusplus */ + +#include <stdio.h> + +enum json_type { + JSON_ERROR = 1, JSON_DONE, + JSON_OBJECT, JSON_OBJECT_END, JSON_ARRAY, JSON_ARRAY_END, + JSON_STRING, JSON_NUMBER, JSON_TRUE, JSON_FALSE, JSON_NULL +}; + +struct json_allocator { + void *(*malloc)(size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); +}; + +typedef int (*json_user_io)(void *user); + +typedef struct json_stream json_stream; +typedef struct json_allocator json_allocator; + +PDJSON_SYMEXPORT void json_open_buffer(json_stream *json, const void *buffer, size_t size); +PDJSON_SYMEXPORT void json_open_string(json_stream *json, const char *string); +PDJSON_SYMEXPORT void json_open_stream(json_stream *json, FILE *stream); +PDJSON_SYMEXPORT void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user); +PDJSON_SYMEXPORT void json_close(json_stream *json); + +PDJSON_SYMEXPORT void json_set_allocator(json_stream *json, json_allocator *a); +PDJSON_SYMEXPORT void json_set_streaming(json_stream *json, bool mode); + +PDJSON_SYMEXPORT enum json_type json_next(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_peek(json_stream *json); +PDJSON_SYMEXPORT void json_reset(json_stream *json); +PDJSON_SYMEXPORT const char *json_get_string(json_stream *json, size_t *length); +PDJSON_SYMEXPORT double json_get_number(json_stream *json); + +PDJSON_SYMEXPORT enum json_type json_skip(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_skip_until(json_stream *json, enum json_type type); + +PDJSON_SYMEXPORT size_t json_get_lineno(json_stream *json); +PDJSON_SYMEXPORT size_t json_get_position(json_stream *json); +PDJSON_SYMEXPORT size_t json_get_column(json_stream *json); +PDJSON_SYMEXPORT size_t json_get_depth(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_get_context(json_stream *json, size_t *count); +PDJSON_SYMEXPORT const char *json_get_error(json_stream *json); + +PDJSON_SYMEXPORT int json_source_get(json_stream *json); +PDJSON_SYMEXPORT int json_source_peek(json_stream *json); +PDJSON_SYMEXPORT bool json_isspace(int c); + +/* internal */ + +struct json_source { + int (*get)(struct json_source *); + int (*peek)(struct json_source *); + size_t position; + union { + struct { + FILE *stream; + } stream; + struct { + const char *buffer; + size_t length; + } buffer; + struct { + void *ptr; + json_user_io get; + json_user_io peek; + } user; + } source; +}; + +struct json_stream { + size_t lineno; + + /* While counting lines is straightforward, columns are tricky because we + * have to count codepoints, not bytes. We could have peppered the code + * with increments in all the relevant places but that seems inelegant. + * So instead we calculate the column dynamically, based on the current + * position. + * + * Specifically, we will remember the position at the beginning of each + * line (linepos) and, assuming only the ASCII characters on the line, the + * column will be the difference between the current position and linepos. + * Of course there could also be multi-byte UTF-8 sequences which we will + * handle by keeping an adjustment (lineadj) -- the number of continuation + * bytes encountered on this line so far. Finally, for json_source_get() + * we also have to keep the number of remaining continuation bytes in the + * current multi-byte UTF-8 sequence (linecon). + * + * This is not the end of the story, however: with only the just described + * approach we will always end up with the column of the latest character + * read which is not what we want when returning potentially multi- + * character value events (string, number, etc); in these cases we want to + * return the column of the first character (note that if the value itself + * is invalid and we are returning JSON_ERROR, we still want the current + * column). So to handle this we will cache the start column (colno) for + * such events. + */ + size_t linepos; /* Position at the beginning of the current line. */ + size_t lineadj; /* Adjustment for multi-byte UTF-8 sequences. */ + size_t linecon; /* Number of remaining UTF-8 continuation bytes. */ + size_t colno; /* Start column for value events or 0. */ + + struct json_stack *stack; + size_t stack_top; + size_t stack_size; + enum json_type next; + unsigned flags; + + struct { + char *string; + size_t string_fill; + size_t string_size; + } data; + + size_t ntokens; + + struct json_source source; + struct json_allocator alloc; + char errmsg[128]; +}; + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif diff --git a/libbutl/json/serializer.cxx b/libbutl/json/serializer.cxx index 0e8b566..fbd569a 100644 --- a/libbutl/json/serializer.cxx +++ b/libbutl/json/serializer.cxx @@ -1,6 +1,6 @@ -#include <cstdio> // snprintf -#include <cstdarg> // va_list -#include <cstring> // memcpy +#include <cstdio> // snprintf +#include <cstdarg> // va_list +#include <cstring> // memcpy #include <ostream> #include <libbutl/json/serializer.hxx> diff --git a/libbutl/json/serializer.hxx b/libbutl/json/serializer.hxx index b52bf65..5192cb4 100644 --- a/libbutl/json/serializer.hxx +++ b/libbutl/json/serializer.hxx @@ -167,10 +167,21 @@ namespace butl // Begin/end an object. // + // The member_begin_object() version is a shortcut for: + // + // member_name (name, check); + // begin_object (); + // void begin_object (); void + member_begin_object (const char*, bool check = true); + + void + member_begin_object (const std::string&, bool check = true); + + void end_object (); // Serialize an object member (name and value). @@ -199,10 +210,21 @@ namespace butl // Begin/end an array. // + // The member_begin_array() version is a shortcut for: + // + // member_name (name, check); + // begin_array (); + // void begin_array (); void + member_begin_array (const char*, bool check = true); + + void + member_begin_array (const std::string&, bool check = true); + + void end_array (); // Serialize a string. diff --git a/libbutl/json/serializer.ixx b/libbutl/json/serializer.ixx index 50fe397..a719ef6 100644 --- a/libbutl/json/serializer.ixx +++ b/libbutl/json/serializer.ixx @@ -1,3 +1,5 @@ +#include <cstring> // strlen() + namespace butl { namespace json @@ -72,7 +74,7 @@ namespace butl inline void buffer_serializer:: member_name (const char* n, bool c) { - next (event::name, {n, n != nullptr ? strlen (n) : 0}, c); + next (event::name, {n, n != nullptr ? std::strlen (n) : 0}, c); } inline void buffer_serializer:: @@ -81,6 +83,20 @@ namespace butl next (event::name, {n.c_str (), n.size ()}, c); } + inline void buffer_serializer:: + member_begin_object (const char* n, bool c) + { + member_name (n, c); + begin_object (); + } + + inline void buffer_serializer:: + member_begin_object (const std::string& n, bool c) + { + member_name (n, c); + begin_object (); + } + template <typename T> inline void buffer_serializer:: member (const char* n, const T& v, bool c) @@ -104,6 +120,20 @@ namespace butl } inline void buffer_serializer:: + member_begin_array (const char* n, bool c) + { + member_name (n, c); + begin_array (); + } + + inline void buffer_serializer:: + member_begin_array (const std::string& n, bool c) + { + member_name (n, c); + begin_array (); + } + + inline void buffer_serializer:: end_array () { next (event::end_array); @@ -113,7 +143,7 @@ namespace butl value (const char* v, bool c) { if (v != nullptr) - next (event::string, {v, strlen (v)}, c); + next (event::string, {v, std::strlen (v)}, c); else next (event::null); } @@ -157,7 +187,7 @@ namespace butl // Use event::number (which doesn't involve any quoting) with a disabled // check. // - next (event::number, {v, strlen (v)}, false /* check */); + next (event::number, {v, std::strlen (v)}, false /* check */); } inline void buffer_serializer:: diff --git a/libbutl/lz4frame.c b/libbutl/lz4frame.c index ec02c92..0db8c1e 100644 --- a/libbutl/lz4frame.c +++ b/libbutl/lz4frame.c @@ -904,8 +904,8 @@ size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr, } /* keep tmpIn within limits */ - if ((cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize) /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */ - && !(cctxPtr->prefs.autoFlush)) + if (!(cctxPtr->prefs.autoFlush) && + (cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */ { int const realDictSize = LZ4F_localSaveDict(cctxPtr); cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize; diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx index 258a536..904910a 100644 --- a/libbutl/manifest-parser.cxx +++ b/libbutl/manifest-parser.cxx @@ -148,41 +148,136 @@ namespace butl { using iterator = string::const_iterator; - auto space = [] (char c) -> bool {return c == ' ' || c == '\t';}; + // Parse the value differently depending on whether it is multi-line or + // not. + // + if (v.find ('\n') == string::npos) // Single-line. + { + auto space = [] (char c) {return c == ' ' || c == '\t';}; - iterator i (v.begin ()); - iterator e (v.end ()); + iterator i (v.begin ()); + iterator e (v.end ()); - string r; - size_t n (0); - for (char c; i != e && (c = *i) != ';'; ++i) - { - // Unescape ';' character. + string r; + size_t n (0); + for (char c; i != e && (c = *i) != ';'; ++i) + { + // Unescape ';' and '\' characters. + // + if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\')) + c = *++i; + + r += c; + + if (!space (c)) + n = r.size (); + } + + // Strip the value trailing spaces. // - if (c == '\\' && i + 1 != e && *(i + 1) == ';') - c = *++i; + if (r.size () != n) + r.resize (n); - r += c; + // Find beginning of a comment (i). + // + if (i != e) + { + // Skip spaces. + // + for (++i; i != e && space (*i); ++i); + } - if (!space (c)) - n = r.size (); + return make_pair (move (r), string (i, e)); } + else // Multi-line. + { + string r; + string c; - // Strip the value trailing spaces. - // - if (r.size () != n) - r.resize (n); + // Parse the value lines until the comment separator is encountered or + // the end of the value is reached. Add these lines to the resulting + // value, unescaping them if required. + // + // Note that we only need to unescape lines which have the '\+;' form. + // + auto i (v.begin ()); + auto e (v.end ()); - // Find beginning of a comment (i). - // - if (i != e) - { - // Skip spaces. + while (i != e) + { + // Find the end of the line and while at it the first non-backslash + // character. + // + auto le (i); + auto nb (e); + for (; le != e && *le != '\n'; ++le) + { + if (nb == e && *le != '\\') + nb = le; + } + + // If the value end is not reached then position to the beginning of + // the next line and to the end of the value otherwise. + // + auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);}; + + // If the first non-backslash character is ';' and it is the last + // character on the line, then this is either the comment separator or + // an escape sequence. + // + if (nb != e && *nb == ';' && nb + 1 == le) + { + // If ';' is the first (and thus the only) character on the line, + // then this is the comment separator and we bail out from this + // loop. Note that in this case we need to trim the trailing newline + // (but only one) from the resulting value since it is considered as + // a part of the separator. + // + if (nb == i) + { + if (!r.empty ()) + { + assert (r.back () == '\n'); + r.pop_back (); + } + + next (); + break; + } + // + // Otherwise, this is an escape sequence, so unescape it. For that + // just take the rightmost half of the string: + // + // \; -> ; + // \\; -> \; + // \\\; -> \; + // \\\\; -> \\; + // \\\\\; -> \\; + // + else + i += (le - i) / 2; + } + + // Add the line to the resulting value together with the trailing + // newline, if present. + // + r.append (i, le); + + if (le != e) + r += '\n'; + + next (); + } + + // If we haven't reached the end of the value then it means we've + // encountered the comment separator. In this case save the remaining + // value part as a comment. // - for (++i; i != e && space (*i); ++i); - } + if (i != e) + c = string (i, e); - return make_pair (move (r), string (i, e)); + return make_pair (move (r), move (c)); + } } void manifest_parser:: diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx index b0d0324..26699e0 100644 --- a/libbutl/manifest-serializer.cxx +++ b/libbutl/manifest-serializer.cxx @@ -101,22 +101,89 @@ namespace butl merge_comment (const string& value, const string& comment) { string r; - for (char c: value) + + // Merge the value and comment differently depending on whether any of + // them is multi-line or not. + // + if (value.find ('\n') == string::npos && // Single-line. + comment.find ('\n') == string::npos) { - // Escape ';' character. - // - if (c == ';') - r += '\\'; + for (char c: value) + { + // Escape ';' and '\' characters. + // + if (c == ';' || c == '\\') + r += '\\'; - r += c; - } + r += c; + } - // Add the comment. - // - if (!comment.empty ()) + // Add the comment. + // + if (!comment.empty ()) + { + r += "; "; + r += comment; + } + } + else // Multi-line. { - r += "; "; - r += comment; + // Parse the value lines and add them to the resulting value, escaping + // them if required. + // + // Note that we only need to escape lines which have the '\*;' form. + // + for (auto i (value.begin ()), e (value.end ()); i != e; ) + { + // Find the end of the line and while at it the first non-backslash + // character. + // + auto le (i); + auto nb (e); + for (; le != e && *le != '\n'; ++le) + { + if (nb == e && *le != '\\') + nb = le; + } + + // If the first non-backslash character is ';' and it is the last + // character on the line, then we need to escape the line characters. + // Note that we only escape ';' if it is the only character on the + // line. Otherwise, we only escape backslashes doubling the number of + // them from the left: + // + // ; -> \; + // \; -> \\; + // \\; -> \\\\; + // \\\; -> \\\\\\; + // + if (nb != e && *nb == ';' && nb + 1 == le) + r.append (nb == i ? 1 : nb - i, '\\'); + + // Add the line to the resulting value together with the trailing + // newline, if present. + // + r.append (i, le); + + if (le != e) + r += '\n'; + + // If the value end is not reached then position to the beginning of + // the next line and to the end of the value otherwise. + // + i = (le != e ? le + 1 : e); + } + + // Append the comment, if present. + // + if (!comment.empty ()) + { + if (!r.empty ()) + r += '\n'; + + r += ";\n"; + r += comment; + } } return r; diff --git a/libbutl/mingw-thread.hxx b/libbutl/mingw-thread.hxx index b308dde..66f98aa 100644 --- a/libbutl/mingw-thread.hxx +++ b/libbutl/mingw-thread.hxx @@ -154,7 +154,7 @@ namespace mingw_stdthread native_handle_type native_handle() const {return mHandle;} thread(): mHandle(kInvalidHandle), mThreadId(){} - thread(thread&& other) + thread(thread&& other) noexcept :mHandle(other.mHandle), mThreadId(other.mThreadId) { other.mHandle = kInvalidHandle; diff --git a/libbutl/move-only-function.hxx b/libbutl/move-only-function.hxx index 846ef25..e5cfe51 100644 --- a/libbutl/move-only-function.hxx +++ b/libbutl/move-only-function.hxx @@ -124,7 +124,10 @@ namespace butl return f (std::forward<A> (args)...); } - wrapper (wrapper&& w): f (std::move (w.f)) {} + wrapper (wrapper&& w) + noexcept (std::is_nothrow_move_constructible<F>::value) + : f (std::move (w.f)) {} + wrapper& operator= (wrapper&&) = delete; // Shouldn't be needed. ~wrapper () {f.~F ();} diff --git a/libbutl/openssl.txx b/libbutl/openssl.txx index 01e854c..f55432d 100644 --- a/libbutl/openssl.txx +++ b/libbutl/openssl.txx @@ -105,6 +105,7 @@ namespace butl optional<semantic_version> ver ( parse_semantic_version (string (s, b, e != string::npos ? e - b : e), + semantic_version::allow_build, "" /* build_separators */)); if (!ver) diff --git a/libbutl/optional.hxx b/libbutl/optional.hxx index 7d66ac5..f22189b 100644 --- a/libbutl/optional.hxx +++ b/libbutl/optional.hxx @@ -108,10 +108,16 @@ namespace butl #if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} + + constexpr optional_data (optional_data&& o) + noexcept (std::is_nothrow_move_constructible<T>::value) + : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} #else optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} + + optional_data (optional_data&& o) + noexcept (std::is_nothrow_move_constructible<T>::value) + : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} #endif optional_data& operator= (nullopt_t); @@ -119,7 +125,11 @@ namespace butl optional_data& operator= (T&&); optional_data& operator= (const optional_data&); - optional_data& operator= (optional_data&&); + + optional_data& operator= (optional_data&&) + noexcept (std::is_nothrow_move_constructible<T>::value && + std::is_nothrow_move_assignable<T>::value && + std::is_nothrow_destructible<T>::value); ~optional_data (); }; @@ -151,10 +161,16 @@ namespace butl #if (!defined(_MSC_VER) || _MSC_VER > 1900) && \ (!defined(__GNUC__) || __GNUC__ > 4 || defined(__clang__)) constexpr optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - constexpr optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} + + constexpr optional_data (optional_data&& o) + noexcept (std::is_nothrow_move_constructible<T>::value) + : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} #else optional_data (const optional_data& o): v_ (o.v_) {if (v_) new (&d_) T (o.d_);} - optional_data (optional_data&& o): v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} + + optional_data (optional_data&& o) + noexcept (std::is_nothrow_move_constructible<T>::value) + : v_ (o.v_) {if (v_) new (&d_) T (std::move (o.d_));} #endif optional_data& operator= (nullopt_t); @@ -162,7 +178,12 @@ namespace butl optional_data& operator= (T&&); optional_data& operator= (const optional_data&); - optional_data& operator= (optional_data&&); + + // Note: it is trivially destructible and thus is no-throw destructible. + // + optional_data& operator= (optional_data&&) + noexcept (std::is_nothrow_move_constructible<T>::value && + std::is_nothrow_move_assignable<T>::value); }; template <typename T, diff --git a/libbutl/optional.ixx b/libbutl/optional.ixx index e2b552f..fdd0ac5 100644 --- a/libbutl/optional.ixx +++ b/libbutl/optional.ixx @@ -77,6 +77,9 @@ namespace butl template <typename T> inline optional_data<T, false>& optional_data<T, false>:: operator= (optional_data&& o) + noexcept (std::is_nothrow_move_constructible<T>::value && + std::is_nothrow_move_assignable<T>::value && + std::is_nothrow_destructible<T>::value) { if (o.v_) { @@ -171,6 +174,8 @@ namespace butl template <typename T> inline optional_data<T, true>& optional_data<T, true>:: operator= (optional_data&& o) + noexcept (std::is_nothrow_move_constructible<T>::value && + std::is_nothrow_move_assignable<T>::value) { if (o.v_) { diff --git a/libbutl/path-pattern.ixx b/libbutl/path-pattern.ixx index 71f125c..6fee31e 100644 --- a/libbutl/path-pattern.ixx +++ b/libbutl/path-pattern.ixx @@ -3,6 +3,32 @@ namespace butl { + // path_match_flags + // + inline path_match_flags operator& (path_match_flags x, path_match_flags y) + { + return x &= y; + } + + inline path_match_flags operator| (path_match_flags x, path_match_flags y) + { + return x |= y; + } + + inline path_match_flags operator&= (path_match_flags& x, path_match_flags y) + { + return x = static_cast<path_match_flags> ( + static_cast<std::uint16_t> (x) & + static_cast<std::uint16_t> (y)); + } + + inline path_match_flags operator|= (path_match_flags& x, path_match_flags y) + { + return x = static_cast<path_match_flags> ( + static_cast<std::uint16_t> (x) | + static_cast<std::uint16_t> (y)); + } + // path_pattern_iterator // inline path_pattern_iterator:: diff --git a/libbutl/path.cxx b/libbutl/path.cxx index e4f373e..bd66f13 100644 --- a/libbutl/path.cxx +++ b/libbutl/path.cxx @@ -184,8 +184,8 @@ namespace butl using std::to_string; return prefix - + "-" + to_string (process::current_id ()) - + "-" + to_string (temp_name_count++); + + '-' + to_string (process::current_id ()) + + '-' + to_string (temp_name_count++); } template <> diff --git a/libbutl/path.hxx b/libbutl/path.hxx index b3be75a..b10022a 100644 --- a/libbutl/path.hxx +++ b/libbutl/path.hxx @@ -612,18 +612,18 @@ namespace butl // Constructors. // - path_data () + path_data () noexcept : tsep_ (0) {} - path_data (string_type&& p, difference_type ts) + path_data (string_type&& p, difference_type ts) noexcept : path_ (std::move (p)), tsep_ (path_.empty () ? 0 : ts) {} explicit - path_data (string_type&& p) + path_data (string_type&& p) noexcept : path_ (std::move (p)) { _init (); } void - _init () + _init () noexcept { size_type n (path_.size ()), i; @@ -651,7 +651,8 @@ namespace butl using path_data<C>::path_data; base_type () = default; - base_type (path_data<C>&& d): path_data<C> (std::move (d)) {} + base_type (path_data<C>&& d) noexcept + : path_data<C> (std::move (d)) {} }; using dir_type = basic_path<C, dir_path_kind<C>>; @@ -1278,7 +1279,8 @@ namespace butl // Direct initialization without init()/cast(). // explicit - basic_path (data_type&& d): base_type (std::move (d)) {} + basic_path (data_type&& d) noexcept + : base_type (std::move (d)) {} using base_type::_size; using base_type::_init; @@ -1477,9 +1479,9 @@ namespace butl basic_path_name (): // Create empty/NULL path name. base (nullptr, &name) {} - basic_path_name (basic_path_name&&); + basic_path_name (basic_path_name&&) noexcept; basic_path_name (const basic_path_name&); - basic_path_name& operator= (basic_path_name&&); + basic_path_name& operator= (basic_path_name&&) noexcept; basic_path_name& operator= (const basic_path_name&); }; @@ -1506,9 +1508,9 @@ namespace butl basic_path_name_value (): base (&path) {} // Create empty/NULL path name. - basic_path_name_value (basic_path_name_value&&); + basic_path_name_value (basic_path_name_value&&) noexcept; basic_path_name_value (const basic_path_name_value&); - basic_path_name_value& operator= (basic_path_name_value&&); + basic_path_name_value& operator= (basic_path_name_value&&) noexcept; basic_path_name_value& operator= (const basic_path_name_value&); }; } diff --git a/libbutl/path.ixx b/libbutl/path.ixx index d2084f0..b2fdb6f 100644 --- a/libbutl/path.ixx +++ b/libbutl/path.ixx @@ -782,7 +782,7 @@ namespace butl // template <typename P> inline basic_path_name<P>:: - basic_path_name (basic_path_name&& p) + basic_path_name (basic_path_name&& p) noexcept : basic_path_name (p.path, std::move (p.name)) { } @@ -796,7 +796,7 @@ namespace butl template <typename P> inline basic_path_name<P>& basic_path_name<P>:: - operator= (basic_path_name&& p) + operator= (basic_path_name&& p) noexcept { if (this != &p) { @@ -824,7 +824,7 @@ namespace butl // template <typename P> inline basic_path_name_value<P>:: - basic_path_name_value (basic_path_name_value&& p) + basic_path_name_value (basic_path_name_value&& p) noexcept : basic_path_name_value (std::move (p.path), std::move (p.name)) { } @@ -838,7 +838,7 @@ namespace butl template <typename P> inline basic_path_name_value<P>& basic_path_name_value<P>:: - operator= (basic_path_name_value&& p) + operator= (basic_path_name_value&& p) noexcept { if (this != &p) { diff --git a/libbutl/process-run.cxx b/libbutl/process-run.cxx index a5014f6..b044ea1 100644 --- a/libbutl/process-run.cxx +++ b/libbutl/process-run.cxx @@ -24,7 +24,7 @@ namespace butl try { return process (pp, cmd, - in, out, err, + move (in), move (out), move (err), cwd != nullptr ? cwd->string ().c_str () : nullptr, envvars); } diff --git a/libbutl/process-run.txx b/libbutl/process-run.txx index 8e6ca57..6c903a8 100644 --- a/libbutl/process-run.txx +++ b/libbutl/process-run.txx @@ -87,21 +87,21 @@ namespace butl // valid file descriptor. // inline process::pipe - process_stdin (const process::pipe& v) + process_stdin (process::pipe v) { assert (v.in >= 0); return v; } inline process::pipe - process_stdout (const process::pipe& v) + process_stdout (process::pipe v) { assert (v.out >= 0); return v; } inline process::pipe - process_stderr (const process::pipe& v) + process_stderr (process::pipe v) { assert (v.out >= 0); return v; @@ -131,13 +131,13 @@ namespace butl typename... A, typename std::size_t... index> process - process_start (std::index_sequence<index...>, - const C& cmdc, - I&& in, - O&& out, - E&& err, - const process_env& env, - A&&... args) + process_start_impl (std::index_sequence<index...>, + const C& cmdc, + I&& in, + O&& out, + E&& err, + const process_env& env, + A&&... args) { // Map stdin/stdout/stderr arguments to their integer values, as expected // by the process constructor. @@ -170,7 +170,9 @@ namespace butl return process_start (env.cwd, *env.path, cmd.data (), env.vars, - in_i, out_i, err_i); + std::move (in_i), + std::move (out_i), + std::move (err_i)); } template <typename C, @@ -186,13 +188,13 @@ namespace butl const process_env& env, A&&... args) { - return process_start (std::index_sequence_for<A...> (), - cmdc, - std::forward<I> (in), - std::forward<O> (out), - std::forward<E> (err), - env, - std::forward<A> (args)...); + return process_start_impl (std::index_sequence_for<A...> (), + cmdc, + std::forward<I> (in), + std::forward<O> (out), + std::forward<E> (err), + env, + std::forward<A> (args)...); } template <typename I, @@ -257,4 +259,45 @@ namespace butl env, std::forward<A> (args)...); } + + template <typename C, + typename... A, + typename std::size_t... index> + void + process_print_impl (std::index_sequence<index...>, + const C& cmdc, + const process_env& env, + A&&... args) + { + // Construct the command line array. + // + const std::size_t args_size (sizeof... (args)); + + small_vector<const char*, args_size + 2> cmd; + + assert (env.path != nullptr); + cmd.push_back (env.path->recall_string ()); + + std::string storage[args_size != 0 ? args_size : 1]; + + const char* dummy[] = { + nullptr, process_args_as_wrapper (cmd, args, storage[index])... }; + + cmd.push_back (dummy[0]); // NULL (and get rid of unused warning). + + cmdc (cmd.data (), cmd.size ()); + } + + template <typename C, + typename... A> + inline void + process_print_callback (const C& cmdc, + const process_env& env, + A&&... args) + { + process_print_impl (std::index_sequence_for<A...> (), + cmdc, + env, + std::forward<A> (args)...); + } } diff --git a/libbutl/process.cxx b/libbutl/process.cxx index a19719f..e416807 100644 --- a/libbutl/process.cxx +++ b/libbutl/process.cxx @@ -190,7 +190,7 @@ namespace butl } void process:: - print (ostream& o, const char* const args[], size_t n) + print (ostream& o, const char* const* args, size_t n) { size_t m (0); const char* const* p (args); @@ -383,7 +383,7 @@ namespace butl } process:: - process (const process_path& pp, const char* args[], + process (const process_path& pp, const char* const* args, pipe pin, pipe pout, pipe perr, const char* cwd, const char* const* evars) @@ -758,6 +758,13 @@ namespace butl { if (handle != 0) { + // First close any open pipe ends for good measure but ignore any + // errors. + // + out_fd.reset (); + in_ofd.reset (); + in_efd.reset (); + int es; int r (waitpid (handle, &es, 0)); handle = 0; // We have tried. @@ -839,6 +846,12 @@ namespace butl return getpid (); } + process::handle_type process:: + current_handle () + { + return getpid (); + } + // process_exit // process_exit:: @@ -1365,7 +1378,7 @@ namespace butl static map<string, bool> detect_msys_cache_; process:: - process (const process_path& pp, const char* args[], + process (const process_path& pp, const char* const* args, pipe pin, pipe pout, pipe perr, const char* cwd, const char* const* evars) @@ -1794,7 +1807,6 @@ namespace butl using namespace chrono; - // Retry for about 1 hour. // system_clock::duration timeout (1h); @@ -1961,6 +1973,10 @@ namespace butl { if (handle != 0) { + out_fd.reset (); + in_ofd.reset (); + in_efd.reset (); + DWORD es; DWORD e (NO_ERROR); if (WaitForSingleObject (handle, INFINITE) != WAIT_OBJECT_0 || @@ -2068,6 +2084,15 @@ namespace butl return GetCurrentProcessId (); } + process::handle_type process:: + current_handle () + { + // Note that the returned handle is a pseudo handle (-1) that does not + // need to be closed. + // + return GetCurrentProcess (); + } + // process_exit // process_exit:: diff --git a/libbutl/process.hxx b/libbutl/process.hxx index 47cc507..bbb7c89 100644 --- a/libbutl/process.hxx +++ b/libbutl/process.hxx @@ -117,8 +117,8 @@ namespace butl // Moveable-only type. // - process_path (process_path&&); - process_path& operator= (process_path&&); + process_path (process_path&&) noexcept; + process_path& operator= (process_path&&) noexcept; process_path (const process_path&) = delete; process_path& operator= (const process_path&) = delete; @@ -269,7 +269,30 @@ namespace butl // the parent. So you should do this yourself, if required. For example, // to redirect the child process stdout to stderr, you can do: // - // process p (..., 0, 2); + // process pr (..., 0, 2); + // + // Note also that the somewhat roundabout setup with -1 as a redirect + // "instruction" and out_fd/in_ofd/in_efd data members for the result + // helps to make sure the stream instances are destroyed before the + // process instance. For example: + // + // process pr (..., 0, -1, 2); + // ifdstream is (move (pr.in_ofd)); + // + // This is important in case an exception is thrown where we want to make + // sure all our pipe ends are closed before we wait for the process exit + // (which happens in the process destructor). + // + // And speaking of the destruction order, another thing to keep in mind is + // that only one stream can use the skip mode (fdstream_mode::skip; + // because skipping is performed in the blocking mode) and the stream that + // skips should come first so that all other streams are destroyed/closed + // before it (failed that, we may end up in a deadlock). For example: + // + // process pr (..., -1, -1, -1); + // ifdstream is (move (pr.in_ofd), fdstream_mode::skip); // Must be first. + // ifdstream es (move (pr.in_efd)); + // ofdstream os (move (pr.out_fd)); // // The cwd argument allows to change the current working directory of the // child process. NULL and empty arguments are ignored. @@ -287,39 +310,104 @@ namespace butl // Note that the versions without the the process_path argument may // temporarily change args[0] (see path_search() for details). // - process (const char* [], + process (const char**, + int in = 0, int out = 1, int err = 2, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const char* const*, int in = 0, int out = 1, int err = 2, const char* cwd = nullptr, const char* const* envvars = nullptr); - process (const process_path&, const char* [], + process (std::vector<const char*>&, + int in = 0, int out = 1, int err = 2, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const std::vector<const char*>&, int in = 0, int out = 1, int err = 2, const char* cwd = nullptr, const char* const* envvars = nullptr); // If the descriptors are pipes that you have created, then you should use - // this constructor instead to communicate this information. + // this constructor instead to communicate this information (the parent + // end may need to be "probed" on Windows). // // For generality, if the "other" end of the pipe is -1, then assume this // is not a pipe. // struct pipe { - int in = -1; - int out = -1; - pipe () = default; pipe (int i, int o): in (i), out (o) {} explicit pipe (const fdpipe& p): in (p.in.get ()), out (p.out.get ()) {} + + // Transfer ownership to one end of the pipe. + // + pipe (auto_fd i, int o): in (i.release ()), out (o), own_in (true) {} + pipe (int i, auto_fd o): in (i), out (o.release ()), own_out (true) {} + + // Moveable-only type. + // + pipe (pipe&&) noexcept; + pipe& operator= (pipe&&) noexcept; + + pipe (const pipe&) = delete; + pipe& operator= (const pipe&) = delete; + + ~pipe (); + + public: + int in = -1; + int out = -1; + + bool own_in = false; + bool own_out = false; }; - process (const process_path&, const char* [], + process (const char**, pipe in, pipe out, pipe err, const char* cwd = nullptr, const char* const* envvars = nullptr); + process (const char**, + int in, int out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const char* const*, + pipe in, pipe out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const char* const*, + int in, int out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (std::vector<const char*>&, + pipe in, pipe out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (std::vector<const char*>&, + int in, int out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const std::vector<const char*>&, + pipe in, pipe out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const std::vector<const char*>&, + int in, int out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + // The "piping" constructor, for example: // // process lhs (..., 0, -1); // Redirect stdout to a pipe. @@ -328,16 +416,36 @@ namespace butl // rhs.wait (); // Wait for last first. // lhs.wait (); // - process (const char* [], + process (const char**, process&, int out = 1, int err = 2, const char* cwd = nullptr, const char* const* envvars = nullptr); - process (const process_path&, const char* [], + process (const process_path&, const char* const*, process&, int out = 1, int err = 2, const char* cwd = nullptr, const char* const* envvars = nullptr); + process (const char**, + process&, pipe out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const char**, + process&, int out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const char* const*, + process&, pipe out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + + process (const process_path&, const char* const*, + process&, int out, pipe err, + const char* cwd = nullptr, + const char* const* envvars = nullptr); + // Wait for the process to terminate. Return true if the process // terminated normally and with the zero exit code. Unless ignore_error // is true, throw process_error if anything goes wrong. This function can @@ -364,7 +472,7 @@ namespace butl // Note that the destructor will wait for the process but will ignore // any errors and the exit status. // - ~process () {if (handle != 0) wait (true);} + ~process () { if (handle != 0) wait (true); } // Process termination. // @@ -391,8 +499,8 @@ namespace butl // Moveable-only type. // - process (process&&); - process& operator= (process&&); + process (process&&) noexcept; + process& operator= (process&&) noexcept (false); // Note: calls wait(). process (const process&) = delete; process& operator= (const process&) = delete; @@ -414,7 +522,7 @@ namespace butl // // ... // E.g., print args[0]. // - // process p (pp, args); + // process pr (pp, args); // // You can also specify the fallback directory which will be tried last. // This, for example, can be used to implement the Windows "search in the @@ -498,7 +606,7 @@ namespace butl // nameN arg arg ... nullptr nullptr // static void - print (std::ostream&, const char* const args[], size_t n = 0); + print (std::ostream&, const char* const* args, size_t n = 0); // Quote and escape the specified command line argument. If batch is true // then also quote the equal (`=`), comma (`,`) and semicolon (`;`) @@ -521,13 +629,16 @@ namespace butl public: handle_type handle; + static handle_type + current_handle (); + // Absence means that the exit information is not (yet) known. This can be // because you haven't called wait() yet or because wait() failed. // optional<process_exit> exit; - // Use the following file descriptors to communicate with the new process's - // standard streams. + // Use the following file descriptors to communicate with the new + // process's standard streams (if redirected to pipes; see above). // auto_fd out_fd; // Write to it to send to stdin. auto_fd in_ofd; // Read from it to receive from stdout. @@ -641,8 +752,8 @@ namespace butl // Moveable-only type. // - process_env (process_env&&); - process_env& operator= (process_env&&); + process_env (process_env&&) noexcept; + process_env& operator= (process_env&&) noexcept; process_env (const process_env&) = delete; process_env& operator= (const process_env&) = delete; @@ -678,7 +789,7 @@ namespace butl // command line or similar. It should be callable with the following // signature: // - // void (const char*[], std::size_t) + // void (const char* const*, std::size_t) // template <typename C, typename I, @@ -719,6 +830,15 @@ namespace butl const process_env&, A&&... args); + // Call the callback without actually running/starting anything. + // + template <typename C, + typename... A> + void + process_print_callback (const C&, + const process_env&, + A&&... args); + // Conversion of types to their C string representations. Can be overloaded // (including via ADL) for custom types. The default implementation calls // to_string() which covers all the numeric values via std::to_string () and diff --git a/libbutl/process.ixx b/libbutl/process.ixx index 256454b..e4db474 100644 --- a/libbutl/process.ixx +++ b/libbutl/process.ixx @@ -35,7 +35,7 @@ namespace butl args0_ (nullptr) {} inline process_path:: - process_path (process_path&& p) + process_path (process_path&& p) noexcept : effect (std::move (p.effect)), args0_ (p.args0_) { @@ -48,7 +48,7 @@ namespace butl } inline process_path& process_path:: - operator= (process_path&& p) + operator= (process_path&& p) noexcept { if (this != &p) { @@ -124,6 +124,42 @@ namespace butl } #endif + // process::pipe + // + inline process::pipe:: + pipe (pipe&& p) noexcept + : in (p.in), out (p.out), own_in (p.own_in), own_out (p.own_out) + { + p.in = p.out = -1; + } + + inline process::pipe& process::pipe:: + operator= (pipe&& p) noexcept + { + if (this != &p) + { + int d (own_in ? in : own_out ? out : -1); + if (d != -1) + fdclose (d); + + in = p.in; + out = p.out; + own_in = p.own_in; + own_out = p.own_out; + + p.in = p.out = -1; + } + return *this; + } + + inline process::pipe:: + ~pipe () + { + int d (own_in ? in : own_out ? out : -1); + if (d != -1) + fdclose (d); + } + // process // #ifndef _WIN32 @@ -178,21 +214,37 @@ namespace butl inline process:: process (optional<process_exit> e) - : handle (0), - exit (std::move (e)), - out_fd (-1), - in_ofd (-1), - in_efd (-1) + : handle (0), exit (std::move (e)) + { + } + + inline process:: + process (const process_path& pp, const char* const* args, + int in, int out, int err, + const char* cwd, + const char* const* envvars) + : process (pp, args, + pipe (in, -1), pipe (-1, out), pipe (-1, err), + cwd, + envvars) + { + } + + inline process:: + process (const char** args, + int in, int out, int err, + const char* cwd, + const char* const* envvars) + : process (path_search (args[0]), args, in, out, err, cwd, envvars) { } inline process:: - process (const process_path& pp, const char* args[], + process (const process_path& pp, const std::vector<const char*>& args, int in, int out, int err, const char* cwd, const char* const* envvars) - : process (pp, - args, + : process (pp, args.data (), pipe (in, -1), pipe (-1, out), pipe (-1, err), cwd, envvars) @@ -200,32 +252,166 @@ namespace butl } inline process:: - process (const char* args[], + process (std::vector<const char*>& args, int in, int out, int err, const char* cwd, const char* const* envvars) - : process (path_search (args[0]), args, in, out, err, cwd, envvars) {} + : process (path_search (args[0]), args.data (), + in, out, err, + cwd, + envvars) + { + } inline process:: - process (const process_path& pp, const char* args[], + process (const char** args, + pipe in, pipe out, pipe err, + const char* cwd, + const char* const* envvars) + : process (path_search (args[0]), args, + std::move (in), std::move (out), std::move (err), + cwd, envvars) + { + } + + inline process:: + process (const char** args, + int in, int out, pipe err, + const char* cwd, + const char* const* envvars) + : process (path_search (args[0]), args, + pipe (in, -1), pipe (-1, out), std::move (err), + cwd, envvars) + { + } + + inline process:: + process (const process_path& pp, const char* const* args, + int in, int out, pipe err, + const char* cwd, + const char* const* envvars) + : process (pp, args, + pipe (in, -1), pipe (-1, out), std::move (err), + cwd, + envvars) + { + } + + inline process:: + process (std::vector<const char*>& args, + pipe in, pipe out, pipe err, + const char* cwd, + const char* const* envvars) + : process (path_search (args[0]), args.data (), + std::move (in), std::move (out), std::move (err), + cwd, + envvars) + { + } + + inline process:: + process (std::vector<const char*>& args, + int in, int out, pipe err, + const char* cwd, + const char* const* envvars) + : process (path_search (args[0]), args.data (), + pipe (in, -1), pipe (-1, out), std::move (err), + cwd, + envvars) + { + } + + inline process:: + process (const process_path& pp, const std::vector<const char*>& args, + pipe in, pipe out, pipe err, + const char* cwd, + const char* const* envvars) + : process (pp, args.data (), + std::move (in), std::move (out), std::move (err), + cwd, + envvars) + { + } + + inline process:: + process (const process_path& pp, const std::vector<const char*>& args, + int in, int out, pipe err, + const char* cwd, + const char* const* envvars) + : process (pp, args.data (), + pipe (in, -1), pipe (-1, out), std::move (err), + cwd, + envvars) + { + } + + inline process:: + process (const process_path& pp, const char* const* args, + process& in, pipe out, pipe err, + const char* cwd, + const char* const* envvars) + : process (pp, args, + [&in] () + { + assert (in.in_ofd != nullfd); // Should be a pipe. + return process::pipe (std::move (in.in_ofd), -1); + } (), + std::move (out), std::move (err), + cwd, envvars) + { + } + + inline process:: + process (const process_path& pp, const char* const* args, process& in, int out, int err, const char* cwd, const char* const* envvars) - : process (pp, args, in.in_ofd.get (), out, err, cwd, envvars) + : process (pp, args, in, pipe (-1, out), pipe (-1, err), cwd, envvars) { - assert (in.in_ofd.get () != -1); // Should be a pipe. - in.in_ofd.reset (); // Close it on our side. } inline process:: - process (const char* args[], + process (const char** args, process& in, int out, int err, const char* cwd, const char* const* envvars) - : process (path_search (args[0]), args, in, out, err, cwd, envvars) {} + : process (path_search (args[0]), args, in, out, err, cwd, envvars) + { + } + + inline process:: + process (const char** args, + process& in, pipe out, pipe err, + const char* cwd, + const char* const* envvars) + : process (path_search (args[0]), args, + in, std::move (out), std::move (err), + cwd, envvars) + { + } + + inline process:: + process (const char** args, + process& in, int out, pipe err, + const char* cwd, + const char* const* envvars) + : process (path_search (args[0]), args, + in, pipe (-1, out), std::move (err), + cwd, envvars) + { + } + + inline process:: + process (const process_path& pp, const char* const* args, + process& in, int out, pipe err, + const char* cwd, + const char* const* envvars) + : process (pp, args, in, pipe (-1, out), std::move (err), cwd, envvars) + { + } inline process:: - process (process&& p) + process (process&& p) noexcept : handle (p.handle), exit (std::move (p.exit)), out_fd (std::move (p.out_fd)), @@ -236,7 +422,7 @@ namespace butl } inline process& process:: - operator= (process&& p) + operator= (process&& p) noexcept (false) { if (this != &p) { @@ -273,13 +459,13 @@ namespace butl // process_env // inline process_env:: - process_env (process_env&& e) + process_env (process_env&& e) noexcept { *this = std::move (e); } inline process_env& process_env:: - operator= (process_env&& e) + operator= (process_env&& e) noexcept { if (this != &e) { diff --git a/libbutl/prompt.cxx b/libbutl/prompt.cxx index 2e42dd5..154522c 100644 --- a/libbutl/prompt.cxx +++ b/libbutl/prompt.cxx @@ -44,8 +44,8 @@ namespace butl if (!e) a = def; } - } while (a != "y" && a != "n"); + } while (a != "y" && a != "Y" && a != "n" && a != "N"); - return a == "y"; + return a == "y" || a == "Y"; } } diff --git a/libbutl/prompt.hxx b/libbutl/prompt.hxx index 90b8dbf..2a07708 100644 --- a/libbutl/prompt.hxx +++ b/libbutl/prompt.hxx @@ -15,6 +15,10 @@ namespace butl // Write the prompt to diag_stream. Throw ios_base::failure if no answer // could be extracted from stdin (for example, because it was closed). // + // Note that the implementation accepts both lower and upper case y/n as + // valid answers (apparently the capitalized default answer confuses some + // users into answering with capital letters). + // LIBBUTL_SYMEXPORT bool yn_prompt (const std::string&, char def = '\0'); } diff --git a/libbutl/semantic-version.cxx b/libbutl/semantic-version.cxx index 3be382f..9e0a1ef 100644 --- a/libbutl/semantic-version.cxx +++ b/libbutl/semantic-version.cxx @@ -3,6 +3,7 @@ #include <libbutl/semantic-version.hxx> +#include <cassert> #include <cstring> // strchr() #include <utility> // move() #include <stdexcept> // invalid_argument @@ -52,9 +53,9 @@ namespace butl } semantic_version:: - semantic_version (const std::string& s, size_t p, const char* bs) + semantic_version (const std::string& s, size_t p, flags fs, const char* bs) { - semantic_version_result r (parse_semantic_version_impl (s, p, bs)); + semantic_version_result r (parse_semantic_version_impl (s, p, fs, bs)); if (r.version) *this = move (*r.version); @@ -70,8 +71,27 @@ namespace butl uint64_t min = 0, uint64_t max = uint64_t (~0)); semantic_version_result - parse_semantic_version_impl (const string& s, size_t p, const char* bs) + parse_semantic_version_impl (const string& s, size_t p, + semantic_version::flags fs, + const char* bs) { + bool allow_build ((fs & semantic_version::allow_build) != 0); + + // If build separators are specified, then the allow_build flag must be + // specified explicitly. + // + assert (bs == nullptr || allow_build); + + if (allow_build && bs == nullptr) + bs = "-+"; + + bool require_minor ((fs & semantic_version::allow_omit_minor) == 0); + + if (!require_minor) + fs |= semantic_version::allow_omit_patch; + + bool require_patch ((fs & semantic_version::allow_omit_patch) == 0); + auto bail = [] (string m) { return semantic_version_result {nullopt, move (m)}; @@ -82,31 +102,47 @@ namespace butl if (!parse_uint64 (s, p, r.major)) return bail ("invalid major version"); - if (s[p] != '.') - return bail ("'.' expected after major version"); - - if (!parse_uint64 (s, ++p, r.minor)) - return bail ("invalid minor version"); - - if (s[p] == '.') + if (s[p] == '.') // Is there a minor version? { - // Treat it as build if failed to parse as patch (e.g., 1.2.alpha). + // Try to parse the minor version and treat it as build on failure + // (e.g., 1.alpha). // - if (!parse_uint64 (s, ++p, r.patch)) + if (parse_uint64 (s, ++p, r.minor)) { - //if (require_patch) - // return bail ("invalid patch version"); + if (s[p] == '.') // Is there a patch version? + { + // Try to parse the patch version and treat it as build on failure + // (e.g., 1.2.alpha). + // + if (parse_uint64 (s, ++p, r.patch)) + ; + else + { + if (require_patch) + return bail ("invalid patch version"); + + --p; + // Fall through. + } + } + else if (require_patch) + return bail ("'.' expected after minor version"); + } + else + { + if (require_minor) + return bail ("invalid minor version"); --p; // Fall through. } } - //else if (require_patch) - // return bail ("'.' expected after minor version"); + else if (require_minor) + return bail ("'.' expected after major version"); if (char c = s[p]) { - if (bs == nullptr || (*bs != '\0' && strchr (bs, c) == nullptr)) + if (!allow_build || (*bs != '\0' && strchr (bs, c) == nullptr)) return bail ("junk after version"); r.build.assign (s, p, string::npos); diff --git a/libbutl/semantic-version.hxx b/libbutl/semantic-version.hxx index 16f3d56..4eba38a 100644 --- a/libbutl/semantic-version.hxx +++ b/libbutl/semantic-version.hxx @@ -27,15 +27,9 @@ namespace butl { // Semantic or semantic-like version. // - // <major>.<minor>[.<patch>][<build>] + // <major>[.<minor>[.<patch>]][<build>] // - // If the patch component is absent, then it defaults to 0. - // - // @@ Currently there is no way to enforce the three-component version. - // Supporting this will require changing allow_build to a bit-wise - // flag. See parse_semantic_version_impl() for some sketched code. - // We may also want to pass these flags to string() to not print - // 0 patch. + // If the minor and patch components are absent, then they default to 0. // // By default, a version containing the <build> component is considered // valid only if separated from <patch> with '-' (semver pre-release) or '+' @@ -63,23 +57,36 @@ namespace butl std::uint64_t patch, std::string build = ""); - // The build_separators argument can be NULL (no build component allowed), - // empty (any build component allowed), or a string of characters to allow - // as separators. When allow_build is true build_separators defaults to - // "-+". + // If the allow_build flag is specified, then build_separators argument + // can be a string of characters to allow as separators, empty (any build + // component allowed), or NULL (defaults to "-+"). // - explicit - semantic_version (const std::string&, bool allow_build = true); + // Note: allow_omit_minor implies allow_omit_patch. + // + enum flags + { + none = 0, // Exact <major>.<minor>.<patch> form. + allow_omit_minor = 0x01, // Allow <major> form. + allow_omit_patch = 0x02, // Allow <major>.<minor> form. + allow_build = 0x04, // Allow <major>.<minor>.<patch>-<build> form. + }; - semantic_version (const std::string&, const char* build_separators); + explicit + semantic_version (const std::string&, + flags = none, + const char* build_separators = nullptr); // As above but parse from the specified position until the end of the // string. // - semantic_version (const std::string&, std::size_t pos, bool = true); - - semantic_version (const std::string&, std::size_t pos, const char*); + semantic_version (const std::string&, + std::size_t pos, + flags = none, + const char* = nullptr); + // @@ We may also want to pass allow_* flags not to print 0 minor/patch or + // maybe invent ignore_* flags. + // std::string string (bool ignore_build = false) const; @@ -116,16 +123,15 @@ namespace butl // Try to parse a string as a semantic version returning nullopt if invalid. // optional<semantic_version> - parse_semantic_version (const std::string&, bool allow_build = true); + parse_semantic_version (const std::string&, + semantic_version::flags = semantic_version::none, + const char* build_separators = nullptr); optional<semantic_version> - parse_semantic_version (const std::string&, const char* build_separators); - - optional<semantic_version> - parse_semantic_version (const std::string&, std::size_t pos, bool = true); - - optional<semantic_version> - parse_semantic_version (const std::string&, std::size_t pos, const char*); + parse_semantic_version (const std::string&, + std::size_t pos, + semantic_version::flags = semantic_version::none, + const char* = nullptr); // NOTE: comparison operators take the build component into account. // @@ -170,6 +176,18 @@ namespace butl { return o << x.string (); } + + semantic_version::flags + operator& (semantic_version::flags, semantic_version::flags); + + semantic_version::flags + operator| (semantic_version::flags, semantic_version::flags); + + semantic_version::flags + operator&= (semantic_version::flags&, semantic_version::flags); + + semantic_version::flags + operator|= (semantic_version::flags&, semantic_version::flags); } #include <libbutl/semantic-version.ixx> diff --git a/libbutl/semantic-version.ixx b/libbutl/semantic-version.ixx index 6bf7584..8de1554 100644 --- a/libbutl/semantic-version.ixx +++ b/libbutl/semantic-version.ixx @@ -15,23 +15,9 @@ namespace butl { } - // Note: the order is important to MinGW GCC (DLL linkage). - // inline semantic_version:: - semantic_version (const std::string& s, std::size_t p, bool ab) - : semantic_version (s, p, ab ? "-+" : nullptr) - { - } - - inline semantic_version:: - semantic_version (const std::string& s, const char* bs) - : semantic_version (s, 0, bs) - { - } - - inline semantic_version:: - semantic_version (const std::string& s, bool ab) - : semantic_version (s, ab ? "-+" : nullptr) + semantic_version (const std::string& s, flags fs, const char* bs) + : semantic_version (s, 0, fs, bs) { } @@ -42,29 +28,53 @@ namespace butl }; LIBBUTL_SYMEXPORT semantic_version_result - parse_semantic_version_impl (const std::string&, std::size_t, const char*); + parse_semantic_version_impl (const std::string&, + std::size_t, + semantic_version::flags, + const char*); inline optional<semantic_version> - parse_semantic_version (const std::string& s, bool ab) + parse_semantic_version (const std::string& s, + semantic_version::flags fs, + const char* bs) { - return parse_semantic_version (s, ab ? "-+" : nullptr); + return parse_semantic_version_impl (s, 0, fs, bs).version; } inline optional<semantic_version> - parse_semantic_version (const std::string& s, const char* bs) + parse_semantic_version (const std::string& s, + std::size_t p, + semantic_version::flags fs, + const char* bs) { - return parse_semantic_version_impl (s, 0, bs).version; + return parse_semantic_version_impl (s, p, fs, bs).version; } - inline optional<semantic_version> - parse_semantic_version (const std::string& s, std::size_t p, bool ab) + inline semantic_version::flags + operator&= (semantic_version::flags& x, semantic_version::flags y) { - return parse_semantic_version (s, p, ab ? "-+" : nullptr); + return x = static_cast<semantic_version::flags> ( + static_cast<std::uint16_t> (x) & + static_cast<std::uint16_t> (y)); } - inline optional<semantic_version> - parse_semantic_version (const std::string& s, std::size_t p, const char* bs) + inline semantic_version::flags + operator|= (semantic_version::flags& x, semantic_version::flags y) + { + return x = static_cast<semantic_version::flags> ( + static_cast<std::uint16_t> (x) | + static_cast<std::uint16_t> (y)); + } + + inline semantic_version::flags + operator& (semantic_version::flags x, semantic_version::flags y) + { + return x &= y; + } + + inline semantic_version::flags + operator| (semantic_version::flags x, semantic_version::flags y) { - return parse_semantic_version_impl (s, p, bs).version; + return x |= y; } } diff --git a/libbutl/sha1.c b/libbutl/sha1.c index 37e862e..98fce5e 100644 --- a/libbutl/sha1.c +++ b/libbutl/sha1.c @@ -121,11 +121,17 @@ main () #include <string.h> +/* Assume if bzero/bcopy are defined as macros, then they do what we need. */ + /* void bzero(void *s, size_t n); */ -#define bzero(s, n) memset((s), 0, (n)) +#ifndef bzero +# define bzero(s, n) memset((s), 0, (n)) +#endif /* void bcopy(const void *s1, void *s2, size_t n); */ -#define bcopy(s1, s2, n) memmove((s2), (s1), (n)) +#ifndef bcopy +# define bcopy(s1, s2, n) memmove((s2), (s1), (n)) +#endif /* The rest is the unmodified (except for adjusting function declarations and adding a few explicit casts to make compilable in C++ without warnings) diff --git a/libbutl/small-forward-list.hxx b/libbutl/small-forward-list.hxx index 1278dc2..8d1cf68 100644 --- a/libbutl/small-forward-list.hxx +++ b/libbutl/small-forward-list.hxx @@ -5,6 +5,7 @@ #include <cstddef> // size_t #include <utility> // move() +#include <type_traits> // is_nothrow_move_constructible #include <forward_list> #include <libbutl/small-allocator.hxx> @@ -101,14 +102,20 @@ namespace butl return *this; } + // See small_vector for the move-constructor/assignment noexept + // expressions reasoning. + // small_forward_list (small_forward_list&& v) +#if !defined(_MSC_VER) || _MSC_VER > 1900 + noexcept (std::is_nothrow_move_constructible<T>::value) +#endif : base_type (allocator_type (this)) { *this = std::move (v); // Delegate to operator=(&&). } small_forward_list& - operator= (small_forward_list&& v) + operator= (small_forward_list&& v) noexcept (false) { // VC14's implementation of operator=(&&) swaps pointers without regard // for allocator (fixed in 15). diff --git a/libbutl/small-list.hxx b/libbutl/small-list.hxx index aaeef22..7cb51fd 100644 --- a/libbutl/small-list.hxx +++ b/libbutl/small-list.hxx @@ -4,8 +4,9 @@ #pragma once #include <list> -#include <cstddef> // size_t -#include <utility> // move() +#include <cstddef> // size_t +#include <utility> // move() +#include <type_traits> // is_nothrow_move_constructible #include <libbutl/small-allocator.hxx> @@ -103,14 +104,20 @@ namespace butl return *this; } + // See small_vector for the move-constructor/assignment noexept + // expressions reasoning. + // small_list (small_list&& v) +#if !defined(__GLIBCXX__) && (!defined(_MSC_VER) || _MSC_VER > 1900) + noexcept (std::is_nothrow_move_constructible<T>::value) +#endif : base_type (allocator_type (this)) { *this = std::move (v); // Delegate to operator=(&&). } small_list& - operator= (small_list&& v) + operator= (small_list&& v) noexcept (false) { // libstdc++'s implementation prior to GCC 6 is broken (calls swap()). // Since there is no easy way to determine this library's version, for @@ -122,7 +129,7 @@ namespace butl #if defined(__GLIBCXX__) || (defined(_MSC_VER) && _MSC_VER <= 1900) this->clear (); for (T& x: v) - this->push_back (std::move (x)); + this->push_back (std::move (x)); // Note: can throw bad_alloc. v.clear (); #else // Note: propagate_on_container_move_assignment = false diff --git a/libbutl/small-vector.hxx b/libbutl/small-vector.hxx index f0594b1..44a3ef5 100644 --- a/libbutl/small-vector.hxx +++ b/libbutl/small-vector.hxx @@ -4,8 +4,9 @@ #pragma once #include <vector> -#include <cstddef> // size_t -#include <utility> // move() +#include <cstddef> // size_t +#include <utility> // move() +#include <type_traits> // is_nothrow_move_constructible #include <libbutl/small-allocator.hxx> @@ -24,6 +25,9 @@ namespace butl // // - swap() is deleted (see notes below). // + // - In contrast to std::vector, the references, pointers, and iterators + // referring to elements are invalidated after moving from it. + // template <typename T, std::size_t N> class small_vector: private small_allocator_buffer<T, N>, public std::vector<T, small_allocator<T, N>> @@ -104,7 +108,25 @@ namespace butl return *this; } + // Note that while the move constructor is implemented via the move + // assignment it may not throw if the value type is no-throw move + // constructible. + // + // Specifically, if v.size() > N then allocators evaluate as equal and the + // buffer ownership is transferred. Otherwise, the allocators do not + // evaluate as equal and the individual elements are move-constructed in + // the preallocated buffer. + // + // Also note that this constructor ends up calling + // base_type::operator=(base_type&&) whose noexcept expression evaluates + // to false (propagate_on_container_move_assignment and is_always_equal + // are false for small_allocator; see std::vector documentation for + // details). We, however, assume that the noexcept expression we use here + // is strict enough for all "sane" std::vector implementations since + // small_allocator never throws directly. + // small_vector (small_vector&& v) + noexcept (std::is_nothrow_move_constructible<T>::value) : base_type (allocator_type (this)) { if (v.size () <= N) @@ -118,8 +140,14 @@ namespace butl v.clear (); } + // Note that when size() <= N and v.size() > N, then allocators of this + // and other containers do not evaluate as equal. Thus, the memory for the + // new elements is allocated on the heap and so std::bad_alloc can be + // thrown. @@ TODO: maybe we could re-implement this case in terms of + // swap()? + // small_vector& - operator= (small_vector&& v) + operator= (small_vector&& v) noexcept (false) { // VC's implementation of operator=(&&) (both 14 and 15) frees the // memory and then reallocated with capacity equal to v.size(). This is diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx index aea1338..af5c1b3 100644 --- a/libbutl/string-parser.cxx +++ b/libbutl/string-parser.cxx @@ -14,7 +14,7 @@ namespace butl inline static bool space (char c) noexcept { - return c == ' ' || c == '\t'; + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } vector<pair<string, size_t>> diff --git a/libbutl/target-triplet.cxx b/libbutl/target-triplet.cxx index 209f75e..e28f119 100644 --- a/libbutl/target-triplet.cxx +++ b/libbutl/target-triplet.cxx @@ -88,6 +88,13 @@ namespace butl if (system.front () == '-' || system.back () == '-') bad ("invalid os/kernel/abi"); + // Canonicalize SYSTEM. + // + if (system == "linux") + system = "linux-gnu"; // Per config.sub. + else if (system == "windows-gnu" && vendor == "w64") // Clang's innovation. + system = "mingw32"; + // Extract VERSION for some recognized systems. // string::size_type v (0); diff --git a/libbutl/target-triplet.hxx b/libbutl/target-triplet.hxx index 45db457..bfb2c00 100644 --- a/libbutl/target-triplet.hxx +++ b/libbutl/target-triplet.hxx @@ -75,8 +75,10 @@ namespace butl // arm-softfloat-linux-gnu arm softfloat linux-gnu // i686-pc-mingw32 i686 mingw32 // i686-w64-mingw32 i686 w64 mingw32 + // i686-w64-windows-gnu i686 w64 mingw32 // i686-lfs-linux-gnu i686 lfs linux-gnu // x86_64-unknown-linux-gnu x86_64 linux-gnu + // x86_64-redhat-linux x86_64 redhat linux-gnu // x86_64-linux-gnux32 x86_64 linux-gnux32 // x86_64-microsoft-win32-msvc14.0 x86_64 microsoft win32-msvc 14.0 // x86_64-pc-windows-msvc x86_64 windows-msvc @@ -99,6 +101,8 @@ namespace butl // windows *-*-win32-* | *-*-windows-* | *-*-mingw32 // ios *-apple-ios* // + // NOTE: see also os_release if adding anything new here. + // // References: // // 1. The libtool repository contains the PLATFORM file that lists many known diff --git a/libbutl/utility.cxx b/libbutl/utility.cxx index 78abbd8..b03a8f8 100644 --- a/libbutl/utility.cxx +++ b/libbutl/utility.cxx @@ -171,13 +171,42 @@ namespace butl for (; i != n && ws (l[i]); ++i) ; for (; n != i && ws (l[n - 1]); --n) ; - if (i != 0) + if (n != l.size ()) l.resize (n); + if (i != 0) l.erase (0, i); + + return l; + } + + string& + trim_left (string& l) + { + auto ws = [] (char c ) { - string s (l, i, n - i); - l.swap (s); - } - else if (n != l.size ()) - l.resize (n); + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; + }; + + size_t i (0), n (l.size ()); + + for (; i != n && ws (l[i]); ++i) ; + + if (i != 0) l.erase (0, i); + + return l; + } + + string& + trim_right (string& l) + { + auto ws = [] (char c ) + { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; + }; + + size_t i (0), n (l.size ()); + + for (; n != i && ws (l[n - 1]); --n) ; + + if (n != l.size ()) l.resize (n); return l; } diff --git a/libbutl/utility.hxx b/libbutl/utility.hxx index 49b61b3..779a0aa 100644 --- a/libbutl/utility.hxx +++ b/libbutl/utility.hxx @@ -132,11 +132,13 @@ namespace butl bool digit (char); bool alnum (char); bool xdigit (char); + bool wspace (char); bool alpha (wchar_t); bool digit (wchar_t); bool alnum (wchar_t); bool xdigit (wchar_t); + bool wspace (wchar_t); // Basic string utilities. // @@ -146,13 +148,31 @@ namespace butl LIBBUTL_SYMEXPORT std::string& trim (std::string&); + LIBBUTL_SYMEXPORT std::string& + trim_left (std::string&); + + LIBBUTL_SYMEXPORT std::string& + trim_right (std::string&); + inline std::string trim (std::string&& s) { return move (trim (s)); } - // Find the beginning and end poistions of the next word. Return the size + inline std::string + trim_left (std::string&& s) + { + return move (trim_left (s)); + } + + inline std::string + trim_right (std::string&& s) + { + return move (trim_right (s)); + } + + // Find the beginning and end positions of the next word. Return the size // of the word or 0 and set b = e = n if there are no more words. For // example: // @@ -170,6 +190,24 @@ namespace butl // // The second version examines up to the n'th character in the string. // + // The third version, instead of skipping consecutive delimiters, treats + // them as separating empty words. The additional m variable contains an + // unspecified internal state and should be initialized to 0. Note that in + // this case you should use the (b == n) condition to detect the end. Note + // also that a leading delimiter is considered as separating an empty word + // from the rest and the trailing delimiter is considered as separating the + // rest from an empty word. For example, this is how to parse lines while + // observing blanks: + // + // for (size_t b (0), e (0), m (0), n (s.size ()); + // next_word (s, n, b, e, m, '\n', '\r'), b != n; ) + // { + // string l (s, b, e - b); + // } + // + // For string "\na\n" this code will observe the {"", "a", ""} words. And + // for just "\n" it will observe the {"", ""} words. + // std::size_t next_word (const std::string&, std::size_t& b, std::size_t& e, char d1 = ' ', char d2 = '\0'); @@ -178,6 +216,11 @@ namespace butl next_word (const std::string&, std::size_t n, std::size_t& b, std::size_t& e, char d1 = ' ', char d2 = '\0'); + std::size_t + next_word (const std::string&, std::size_t n, + std::size_t& b, std::size_t& e, std::size_t& m, + char d1 = ' ', char d2 = '\0'); + // Sanitize a string to only contain characters valid in an identifier // (ASCII alphanumeric plus `_`) replacing all others with `_`. // @@ -294,8 +337,8 @@ namespace butl // Move-to-empty-only type. // - auto_thread_env (auto_thread_env&&); - auto_thread_env& operator= (auto_thread_env&&); + auto_thread_env (auto_thread_env&&) noexcept; + auto_thread_env& operator= (auto_thread_env&&) noexcept; auto_thread_env (const auto_thread_env&) = delete; auto_thread_env& operator= (const auto_thread_env&) = delete; diff --git a/libbutl/utility.ixx b/libbutl/utility.ixx index 4180ad7..fda1ce5 100644 --- a/libbutl/utility.ixx +++ b/libbutl/utility.ixx @@ -143,6 +143,12 @@ namespace butl } inline bool + wspace (char c) + { + return std::isspace (c); + } + + inline bool alpha (wchar_t c) { return std::iswalpha (c); @@ -166,6 +172,12 @@ namespace butl return std::iswxdigit (c); } + inline bool + wspace (wchar_t c) + { + return std::iswspace (c); + } + inline std::size_t next_word (const std::string& s, std::size_t& b, std::size_t& e, char d1, char d2) @@ -198,6 +210,66 @@ namespace butl return e - b; } + inline std::size_t + next_word (const std::string& s, + std::size_t n, std::size_t& b, std::size_t& e, std::size_t& m, + char d1, char d2) + { + // An empty word will necessarily be represented as b and e being the + // position of a delimiter. Consider these corner cases (in all three we + // should produce two words): + // + // \n + // a\n + // \na + // + // It feels sensible to represent an empty word as the position of the + // trailing delimiter except if it is the last character (the first two + // cases). Thus the additional m state, which, if 0 or 1 indicates the + // number of delimiters to skip before parsing the next word and 2 if + // this is a trailing delimiter for which we need to fake an empty word + // with the leading delimiter. + + if (b != e) + b = e; + + if (m > 1) + { + --m; + return 0; + } + + // Skip the leading delimiter, if any. + // + b += m; + + if (b == n) + { + e = n; + return 0; + } + + // Find first trailing delimiter. + // + m = 0; + for (e = b; e != n; ++e) + { + if (s[e] == d1 || s[e] == d2) + { + m = 1; + + // Handle the special delimiter as the last character case. + // + if (e + 1 == n) + ++m; + + break; + } + } + + return e - b; + } + inline std::string& sanitize_identifier (std::string& s) { @@ -359,14 +431,14 @@ namespace butl } inline auto_thread_env:: - auto_thread_env (auto_thread_env&& x) + auto_thread_env (auto_thread_env&& x) noexcept : prev_env (std::move (x.prev_env)) { x.prev_env = nullopt; } inline auto_thread_env& auto_thread_env:: - operator= (auto_thread_env&& x) + operator= (auto_thread_env&& x) noexcept { if (this != &x) { diff --git a/libbutl/uuid.cxx b/libbutl/uuid.cxx index 377afb7..2132808 100644 --- a/libbutl/uuid.cxx +++ b/libbutl/uuid.cxx @@ -5,7 +5,7 @@ #include <errno.h> // ENOTSUP -#include <cstdio> // sprintf() scanf() +#include <cstdio> // snprintf() sscanf() #include <cstring> // strlen() #include <stdexcept> #include <system_error> @@ -19,16 +19,17 @@ namespace butl { array<char, 37> r; - sprintf (r.data (), - (upper - ? "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X" - : "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"), - time_low, - time_mid, - time_hiv, - clock_seq_hir, - clock_seq_low, - node[0], node[1], node[2], node[3], node[4], node[5]); + snprintf (r.data (), + 37, + (upper + ? "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X" + : "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"), + time_low, + time_mid, + time_hiv, + clock_seq_hir, + clock_seq_low, + node[0], node[1], node[2], node[3], node[4], node[5]); return r; } diff --git a/libbutl/uuid.hxx b/libbutl/uuid.hxx index 2361640..862f02d 100644 --- a/libbutl/uuid.hxx +++ b/libbutl/uuid.hxx @@ -48,12 +48,12 @@ namespace butl { // Normally not accessed directly (see RFC4122 Section 4.1.2). // - std::uint32_t time_low = 0; - std::uint16_t time_mid = 0; - std::uint16_t time_hiv = 0; // hi_and_version - std::uint8_t clock_seq_hir = 0; // hi_and_reserved - std::uint8_t clock_seq_low = 0; - std::uint8_t node[6] = {0, 0, 0, 0, 0, 0}; + std::uint32_t time_low = 0; + std::uint16_t time_mid = 0; + std::uint16_t time_hiv = 0; // hi_and_version + std::uint8_t clock_seq_hir = 0; // hi_and_reserved + std::uint8_t clock_seq_low = 0; + std::uint8_t node[6] = {0, 0, 0, 0, 0, 0}; // System UUID generator. See the uuid_generator interface for details. // @@ -158,10 +158,10 @@ namespace butl void swap (uuid&); - uuid (uuid&&); + uuid (uuid&&) noexcept; uuid (const uuid&) = default; - uuid& operator= (uuid&&); + uuid& operator= (uuid&&) noexcept; uuid& operator= (const uuid&) = default; }; @@ -183,7 +183,7 @@ namespace butl ~uuid_generator () = default; // Generate a UUID. If strong is true (default), generate a strongly- - // unique UUID. Throw std::runtime_error to report errors, including if + // unique UUID. Throw std::system_error to report errors, including if // strong uniqueness cannot be guaranteed. // // A weak UUID is not guaranteed to be unique, neither universialy nor @@ -207,7 +207,7 @@ namespace butl // Optional explicit initialization and termination. Note that it is not // thread-safe and must only be performed once (normally from main()) // before/after any calls to generate(), respectively. Both functions may - // throw std::runtime_error to report errors. + // throw std::system_error to report errors. // static void initialize (); diff --git a/libbutl/uuid.ixx b/libbutl/uuid.ixx index 6744af7..6115be1 100644 --- a/libbutl/uuid.ixx +++ b/libbutl/uuid.ixx @@ -39,14 +39,14 @@ namespace butl } inline uuid:: - uuid (uuid&& u) + uuid (uuid&& u) noexcept : uuid () // nil { swap (u); } inline uuid& uuid:: - operator= (uuid&& u) + operator= (uuid&& u) noexcept { if (this != &u) { @@ -1,6 +1,6 @@ : 1 name: libbutl -version: 0.15.0-a.0.z +version: 0.17.0-a.0.z project: build2 summary: build2 utility library license: MIT AND BSD-3-Clause AND BSD-2-Clause ; MIT except for files from the FreeBSD, LZ4, and mingw-std-threads projects. @@ -12,7 +12,7 @@ doc-url: https://build2.org/doc.xhtml src-url: https://git.build2.org/cgit/libbutl/tree/ email: users@build2.org build-warning-email: builds@build2.org -builds: host +builds: all : &host requires: c++14 -depends: * build2 >= 0.14.0- -depends: * bpkg >= 0.14.0- +depends: * build2 >= 0.16.0- +depends: * bpkg >= 0.16.0- diff --git a/tests/b-info/driver.cxx b/tests/b-info/driver.cxx index 6832aaa..5691221 100644 --- a/tests/b-info/driver.cxx +++ b/tests/b-info/driver.cxx @@ -49,13 +49,14 @@ try cout.exceptions (ios::failbit | ios::badbit); - b_project_info pi (b_info (project, - true /* ext_mods */, - 1 /* verb */, - {} /* cmd_callback */, - b, - {} /* search_fallback */, - {"--no-default-options"})); + b_project_info pi ( + b_info (project, + b_info_flags::ext_mods | b_info_flags::subprojects, + 1 /* verb */, + {} /* cmd_callback */, + b, + {} /* search_fallback */, + {"--no-default-options"})); cout << "project: " << pi.project << endl << "version: " << pi.version << endl diff --git a/tests/base64/driver.cxx b/tests/base64/driver.cxx index a37a238..32d5236 100644 --- a/tests/base64/driver.cxx +++ b/tests/base64/driver.cxx @@ -13,6 +13,8 @@ using namespace std; using namespace butl; +// Test base64 encoding and decoding. +// static bool encode (const string& i, const string& o) { @@ -68,9 +70,44 @@ encode (const string& i, const string& o) return r; } +// Test base64url encoding only (decoding not yet implemented). +// +static bool +encode_url (const string& i, const string& o) +{ + istringstream is (i); + string s (base64url_encode (is)); + bool r (s == o && is.eof ()); + + if (r) + { + is.seekg (0); + + // VC15 seekg() doesn't clear eofbit. + // +#if defined(_MSC_VER) && _MSC_VER < 1920 + is.clear (); +#endif + + assert (!is.eof ()); + + ostringstream os; + base64url_encode (os, is); + r = os.str () == o && is.eof (); + } + + if (r) + r = base64url_encode (vector<char> (i.begin (), i.end ())) == o; + + return r; +} + + int main () { + // base64 + // assert (encode ("", "")); assert (encode ("B", "Qg==")); assert (encode ("BX", "Qlg=")); @@ -80,6 +117,19 @@ main () assert (encode ("BXzS@#", "Qlh6U0Aj")); assert (encode ("BXzS@#/", "Qlh6U0AjLw==")); + // base64url: no padding in output. + // + assert (encode_url ("", "")); + assert (encode_url ("B", "Qg")); + assert (encode_url ("BX", "Qlg")); + assert (encode_url ("BXz", "Qlh6")); + assert (encode_url ("BXzS", "Qlh6Uw")); + assert (encode_url ("BXzS@", "Qlh6U0A")); + assert (encode_url ("BXzS@#", "Qlh6U0Aj")); + assert (encode_url ("BXzS@#/", "Qlh6U0AjLw")); + + // Multi-line input. + // const char* s ( "class fdstream_base\n" "{\n" @@ -91,10 +141,29 @@ main () " fdbuf buf_;\n" "};\n"); + // base64 + // const char* r ( "Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm\n" "YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK\n" "ICBmZGJ1ZiBidWZfOwp9Owo="); assert (encode (s, r)); + + // base64url: no newlines or padding in output. + // + r = +"Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm" +"YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK" +"ICBmZGJ1ZiBidWZfOwp9Owo"; + + assert (encode_url (s, r)); + + // Test 63rd and 64th characters: `>` maps to `+` or `-`; `?` maps to `/` or + // `_`. + // + assert (encode (">>>>>>", "Pj4+Pj4+")); + assert (encode_url (">>>>>>", "Pj4-Pj4-")); + assert (encode ("??????", "Pz8/Pz8/")); + assert (encode_url ("??????", "Pz8_Pz8_")); } diff --git a/tests/build/root.build b/tests/build/root.build index a1e935c..515e1c9 100644 --- a/tests/build/root.build +++ b/tests/build/root.build @@ -14,9 +14,16 @@ if ($cxx.target.system == 'win32-msvc') if ($cxx.class == 'msvc') cxx.coptions += /wd4251 /wd4275 /wd4800 elif ($cxx.id == 'gcc') +{ cxx.coptions += -Wno-maybe-uninitialized -Wno-free-nonheap-object \ -Wno-stringop-overread + if ($cxx.version.major >= 13) + cxx.coptions += -Wno-dangling-reference +} +elif ($cxx.id.type == 'clang' && $cxx.version.major >= 15) + cxx.coptions += -Wno-unqualified-std-cast-call + # Every exe{} in this subproject is by default a test. # exe{*}: test = true diff --git a/tests/builtin/driver.cxx b/tests/builtin/driver.cxx index 7a0193f..bdf3fa9 100644 --- a/tests/builtin/driver.cxx +++ b/tests/builtin/driver.cxx @@ -28,6 +28,13 @@ using namespace std; using namespace butl; +// Disable arguments globbing that may be enabled by default for MinGW runtime +// (see tests/wildcard/driver.cxx for details). +// +#ifdef __MINGW32__ +int _CRT_glob = 0; +#endif + inline ostream& operator<< (ostream& os, const path& p) { diff --git a/tests/builtin/find.testscript b/tests/builtin/find.testscript new file mode 100644 index 0000000..b09822c --- /dev/null +++ b/tests/builtin/find.testscript @@ -0,0 +1,276 @@ +# file : tests/builtin/find.testscript +# license : MIT; see accompanying LICENSE file + +posix = ($cxx.target.class != 'windows') + +test.arguments = "find" + +: no-paths +: +$* 2>"find: missing start path" == 1 + +: no-paths-primary +: +$* -name foo 2>"find: unknown option '-name'" == 1 + +: unknown-primary +: +$* . -foo 2>"find: unknown primary '-foo'" == 1 + + +: no-primary-value +: +$* . -name 2>"find: missing value for primary '-name'" == 1 + +: empty-primary-value +: +$* . -type '' 2>"find: empty value for primary '-type'" == 1 + +: invalid-type-primary +: +$* . -type foo 2>"find: invalid value 'foo' for primary '-type'" == 1 + +: invalid-mindepth-primary +: +$* . -mindepth 12a 2>"find: invalid value '12a' for primary '-mindepth'" == 1 + +: path-not-exists +: +{ + mkdir d; + $* x d >'d' 2>"find: 'x' doesn't exists" != 0 +} + +: path +: +{ + : relative + : + { + : no-cwd + : + { + mkdir a; + touch a/b; + + $* a >>/EOO + a + a/b + EOO + } + + : absolute-cwd + : + : When cross-testing we cannot guarantee that host absolute paths are + : recognized by the target process. + : + if ($test.target == $build.host) + { + test.options += -d $~/a; + mkdir a; + touch a/b; + + $* b >'b' + } + + : relative-cwd + : + if ($test.target == $build.host) + { + test.options += -d a; + mkdir a; + touch a/b; + + $* b >'b' + } + } + + : non-normalized + : + { + mkdir a; + touch a/b; + + # Note that the path specified on the command line is used unaltered. + # + s = ($posix ? '/' : '\'); + + $* ./a >>"EOO" + ./a + ./a$(s)b + EOO + } + + : absolute + : + { + mkdir a; + touch a/b; + + $* $~/a >>/"EOO" + $~/a + $~/a/b + EOO + } + + : non-existent + : + { + touch a b; + + $* a x b >>EOO 2>"find: 'x' doesn't exists" != 0 + a + b + EOO + } + + : non-directory + : + { + touch a b c; + + $* a b/ c >>EOO 2>"find: 'b' is not a directory" != 0 + a + c + EOO + } + + : trailing-slash + : + { + mkdir -p a/b; + + $* a >>/"EOO"; + a + a/b + EOO + + $* a/ >>"EOO" + a/ + a/b + EOO + } +} + +: name-primary +: +{ + : basic + : + { + mkdir a; + touch a/ab a/ba; + + $* . -name 'a*' >>/EOO; + ./a + ./a/ab + EOO + + $* . -name 'b*' >>/EOO; + ./a/ba + EOO + + $* a -name 'a*' >>/EOO + a + a/ab + EOO + } + + : empty + : + { + touch a; + + $* . -name '' + } +} + +: type-primary +: +{ + : regular + : + { + mkdir -p a/b; + touch a/b/c; + + $* a -type f >>/EOO + a/b/c + EOO + } + + : directory + : + { + mkdir -p a/b; + touch a/b/c; + + $* a -type d >>/EOO + a + a/b + EOO + } + + : symlink + : + if $posix + { + mkdir -p a/b; + touch a/b/c; + ln -s c a/b/d; + + $* a -type l >>/EOO + a/b/d + EOO + } +} + +: mindepth-primary +: +{ + mkdir -p a/b/c; + + $* a -mindepth 0 >>/EOO; + a + a/b + a/b/c + EOO + + $* a -mindepth 1 >>/EOO; + a/b + a/b/c + EOO + + $* a -mindepth 2 >>/EOO; + a/b/c + EOO + + $* a -mindepth 3 +} + +: maxdepth-primary +: +{ + mkdir -p a/b/c; + + $* a -maxdepth 0 >>/EOO; + a + EOO + + $* a -maxdepth 1 >>/EOO; + a + a/b + EOO + + $* a -maxdepth 2 >>/EOO; + a + a/b + a/b/c + EOO + + $* a -maxdepth 3 >>/EOO + a + a/b + a/b/c + EOO +} diff --git a/tests/curl/driver.cxx b/tests/curl/driver.cxx index 72faf52..856fde3 100644 --- a/tests/curl/driver.cxx +++ b/tests/curl/driver.cxx @@ -104,6 +104,26 @@ http () c.out.close (); assert (!c.wait ()); } + + // POST from stream without --fail. + // + { + curl c (p, path ("-"), nullfd, 2, + curl::post, + curl::flags::no_fail, + u + "/bogus"); + + c.out << "bogus" << endl; + c.out.close (); + assert (c.wait ()); + } + + // POST empty data. + // + { + curl c (p, nullfd, 1, 2, curl::post, u + "/bogus", "--verbose"); + assert (!c.wait ()); + } } int diff --git a/tests/curl/testscript b/tests/curl/testscript index 3da2306..d2056cd 100644 --- a/tests/curl/testscript +++ b/tests/curl/testscript @@ -43,14 +43,22 @@ sudo /usr/sbin/in.tftpd \ : http : { - $* 'http' 2>>EOE + $* 'http' 2>>~%EOE% - curl -s -S --fail --location https://build2.org/bogus - curl: (22) The requested URL returned error: 404 Not Found + curl -sS --fail --location https://build2.org/bogus + %curl: \(22\) The requested URL returned error: 404( Not Found)?% - curl -s -S --fail --location https://build2.org + curl -sS --fail --location https://build2.org - curl -s -S --fail --location --data-binary @- https://build2.org/bogus - curl: (22) The requested URL returned error: 404 Not Found + curl -sS --fail --location --data-binary @- https://build2.org/bogus + %curl: \(22\) The requested URL returned error: 404( Not Found)?% + + curl -sS --location --data-binary @- https://build2.org/bogus + + curl -sS --fail --location --data-raw "" --verbose https://build2.org/bogus + %.* + %> POST /bogus HTTP.+% + %.* + %curl: \(22\) The requested URL returned error: 404( Not Found)?% EOE } diff --git a/tests/dir-iterator/driver.cxx b/tests/dir-iterator/driver.cxx index 4a2ff53..c9f7218 100644 --- a/tests/dir-iterator/driver.cxx +++ b/tests/dir-iterator/driver.cxx @@ -7,6 +7,7 @@ #include <libbutl/path.hxx> #include <libbutl/path-io.hxx> #include <libbutl/utility.hxx> // operator<<(ostream, exception) +#include <libbutl/timestamp.hxx> #include <libbutl/filesystem.hxx> #undef NDEBUG @@ -26,7 +27,7 @@ operator<< (ostream& os, entry_type e) return os << entry_type_string[static_cast<size_t> (e)]; } -// Usage: argv[0] [-v] [-i] <dir> +// Usage: argv[0] [-v] [-i|-d] <dir> // // Iterates over a directory filesystem sub-entries, obtains their types and // target types for symlinks. @@ -38,6 +39,10 @@ operator<< (ostream& os, entry_type e) // Ignore dangling symlinks, rather than fail trying to obtain the target // type. // +// -d +// Detect dangling symlinks, rather than fail trying to obtain the target +// type. +// int main (int argc, const char* argv[]) { @@ -45,6 +50,7 @@ main (int argc, const char* argv[]) bool verbose (false); bool ignore_dangling (false); + bool detect_dangling (false); int i (1); for (; i != argc; ++i) @@ -55,6 +61,8 @@ main (int argc, const char* argv[]) verbose = true; else if (v == "-i") ignore_dangling = true; + else if (v == "-d") + detect_dangling = true; else break; } @@ -65,15 +73,42 @@ main (int argc, const char* argv[]) return 1; } + assert (!ignore_dangling || !detect_dangling); + const char* d (argv[i]); try { - for (const dir_entry& de: dir_iterator (dir_path (d), ignore_dangling)) + for (const dir_entry& de: + dir_iterator (dir_path (d), + (ignore_dangling ? dir_iterator::ignore_dangling : + detect_dangling ? dir_iterator::detect_dangling : + dir_iterator::no_follow))) { + timestamp mt (de.mtime ()); + timestamp at (de.atime ()); + entry_type lt (de.ltype ()); entry_type t (lt == entry_type::symlink ? de.type () : lt); + const path& p (de.path ()); + path fp (de.base () / p); + + entry_time et (t == entry_type::directory + ? dir_time (path_cast<dir_path> (fp)) + : file_time (fp)); + + if (mt != timestamp_unknown) + assert (mt == et.modification); + + if (at != timestamp_unknown) + assert (mt == et.access); + + if (de.mtime () != timestamp_unknown) + assert (de.mtime () == et.modification); + + if (de.atime () != timestamp_unknown) + assert (de.atime () == et.access); if (verbose) { diff --git a/tests/dir-iterator/testscript b/tests/dir-iterator/testscript index 03ed164..9bc5513 100644 --- a/tests/dir-iterator/testscript +++ b/tests/dir-iterator/testscript @@ -7,6 +7,8 @@ test.options = -v : mkdir a; touch a/b; +sleep 1; +echo "a" >=a/b; # Change modification time. $* a >"reg b" : dir @@ -24,16 +26,16 @@ $* a >"dir b" if ($test.target == $build.host) { +if ($cxx.target.class != 'windows') - lnf = ^ln -s t wd/l &wd/l - lnd = $lnf + lnf = [cmdline] ^ln -s t wd/l &wd/l + lnd = [cmdline] $lnf else echo 'yes' >=t if cmd /C 'mklink l t' >- 2>- &?l && cat l >'yes' - lnf = cmd /C 'mklink wd\l t' &wd/l >- - lnd = cmd /C 'mklink /D wd\l t' &wd/l >- + lnf = [cmdline] cmd /C 'mklink wd\l t' &wd/l >- + lnd = [cmdline] cmd /C 'mklink /D wd\l t' &wd/l >- end - jnc = cmd /C 'mklink /J wd\l wd\t' &wd/l >- + jnc = [cmdline] cmd /C 'mklink /J wd\l wd\t' &wd/l >- end : symlink @@ -54,6 +56,12 @@ if ($test.target == $build.host) $* ../wd >- 2>! != 0 : keep $* -i ../wd >'reg f': skip + + : detect + : + $* -d ../wd >>~%EOO% + %(reg f|sym unk l)%{2} + EOO } : dir @@ -71,6 +79,12 @@ if ($test.target == $build.host) $* ../wd >- 2>! != 0 : keep $* -i ../wd >'dir d': skip + + : detect + : + $* -d ../wd >>~%EOO% + %(dir d|sym unk l)%{2} + EOO } } @@ -89,5 +103,11 @@ if ($test.target == $build.host) $* ../wd >- 2>! != 0 : keep $* -i ../wd >'dir d': skip + + : detect + : + $* -d ../wd >>~%EOO% + %(dir d|sym unk l)%{2} + EOO } } diff --git a/tests/fdstream/driver.cxx b/tests/fdstream/driver.cxx index 0b66574..ec0c54e 100644 --- a/tests/fdstream/driver.cxx +++ b/tests/fdstream/driver.cxx @@ -42,7 +42,9 @@ static const string text2 ("12"); // Keep shorter than text1. // Windows text mode write-translated form of text1. // +#ifdef _WIN32 static const string text3 ("ABCDEF\r\nXYZ"); +#endif static string from_stream (ifdstream& is) @@ -568,6 +570,83 @@ main (int argc, const char* argv[]) t.join (); } + // Test (non-blocking) reading with getline_non_blocking(). + // + { + const string ln ( + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + + string s; + for (size_t i (0); i < 300; ++i) + { + s += ln; + s += '\n'; + } + + const char* args[] = {argv[0], "-c", nullptr}; + + auto test_read = [&args, &s, &ln] () + { + try + { + process pr (args, -1, -1); + ofdstream os (move (pr.out_fd)); + + ifdstream is (move (pr.in_ofd), + fdstream_mode::non_blocking, + ios_base::badbit); + + os << s; + os.close (); + + fdselect_set fds {is.fd ()}; + fdselect_state& ist (fds[0]); + + string r; + for (string l; ist.fd != nullfd; ) + { + if (ist.fd != nullfd && getline_non_blocking (is, l)) + { + if (eof (is)) + ist.fd = nullfd; + else + { + assert (l == ln); + + r += l; + r += '\n'; + + l.clear (); + } + + continue; + } + + ifdselect (fds); + } + + is.close (); + + assert (r == s); + } + catch (const ios::failure&) + { + assert (false); + } + catch (const process_error&) + { + assert (false); + } + }; + + vector<thread> threads; + for (size_t i (0); i < 20; ++i) + threads.emplace_back (test_read); + + for (thread& t: threads) + t.join (); + } + // Test setting and getting position via the non-standard fdstreambuf // interface. // diff --git a/tests/host-os-release/buildfile b/tests/host-os-release/buildfile new file mode 100644 index 0000000..cd277ff --- /dev/null +++ b/tests/host-os-release/buildfile @@ -0,0 +1,6 @@ +# file : tests/host-os-release/buildfile +# license : MIT; see accompanying LICENSE file + +import libs = libbutl%lib{butl} + +exe{driver}: {hxx cxx}{*} $libs testscript diff --git a/tests/host-os-release/driver.cxx b/tests/host-os-release/driver.cxx new file mode 100644 index 0000000..249cbff --- /dev/null +++ b/tests/host-os-release/driver.cxx @@ -0,0 +1,58 @@ +// file : tests/host-os-release/driver.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <libbutl/host-os-release.hxx> + +#include <libbutl/path.hxx> + +namespace butl +{ + LIBBUTL_SYMEXPORT os_release + host_os_release_linux (path f = {}); +} + +#include <iostream> + +#undef NDEBUG +#include <cassert> + +using namespace std; +using namespace butl; + +int +main (int argc, char* argv[]) +{ + assert (argc >= 2); // <host-target-triplet> + + target_triplet host (argv[1]); + + os_release r; + if (host.class_ == "linux") + { + assert (argc == 3); // <host-target-triplet> <file-path> + r = host_os_release_linux (path (argv[2])); + } + else + { + assert (argc == 2); + if (optional<os_release> o = host_os_release (host)) + r = move (*o); + else + { + cerr << "unrecognized host os " << host.string () << endl; + return 1; + } + } + + cout << r.name_id << '\n'; + for (auto b (r.like_ids.begin ()), i (b); i != r.like_ids.end (); ++i) + cout << (i != b ? "|" : "") << *i; + cout << '\n' + << r.version_id << '\n' + << r.variant_id << '\n' + << r.name << '\n' + << r.version_codename << '\n' + << r.variant << '\n'; + + return 0; +} diff --git a/tests/host-os-release/testscript b/tests/host-os-release/testscript new file mode 100644 index 0000000..a18aa74 --- /dev/null +++ b/tests/host-os-release/testscript @@ -0,0 +1,223 @@ +# file : tests/host-os-release/testscript +# license : MIT; see accompanying LICENSE file + +: linux +: +$* x86_64-linux-gnu os-release >>EOO + linux + + + + Linux + + + EOO + +: debian-10 +: +cat <<EOI >=os-release; + PRETTY_NAME="Debian GNU/Linux 10 (buster)" + NAME="Debian GNU/Linux" + VERSION_ID="10" + VERSION="10 (buster)" + VERSION_CODENAME=buster + ID=debian + HOME_URL="https://www.debian.org/" + SUPPORT_URL="https://www.debian.org/support" + BUG_REPORT_URL="https://bugs.debian.org/" + EOI +$* x86_64-linux-gnu os-release >>EOO + debian + + 10 + + Debian GNU/Linux + buster + + EOO + +: debian-testing +: +cat <<EOI >=os-release; + PRETTY_NAME="Debian GNU/Linux bookworm/sid" + NAME="Debian GNU/Linux" + ID=debian + HOME_URL="https://www.debian.org/" + SUPPORT_URL="https://www.debian.org/support" + BUG_REPORT_URL="https://bugs.debian.org/" + EOI +$* x86_64-linux-gnu os-release >>EOO + debian + + + + Debian GNU/Linux + + + EOO + +: ubuntu-20.04 +: +cat <<EOI >=os-release; + NAME="Ubuntu" + VERSION="20.04.1 LTS (Focal Fossa)" + ID=ubuntu + ID_LIKE=debian + PRETTY_NAME="Ubuntu 20.04.1 LTS" + VERSION_ID="20.04" + HOME_URL="https://www.ubuntu.com/" + SUPPORT_URL="https://help.ubuntu.com/" + BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" + PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" + VERSION_CODENAME=focal + UBUNTU_CODENAME=focal + EOI +$* x86_64-linux-gnu os-release >>EOO + ubuntu + debian + 20.04 + + Ubuntu + focal + + EOO + +: fedora-35 +: +cat <<EOI >=os-release; + NAME="Fedora Linux" + VERSION="35 (Workstation Edition)" + ID=fedora + VERSION_ID=35 + VERSION_CODENAME="" + PLATFORM_ID="platform:f35" + PRETTY_NAME="Fedora Linux 35 (Workstation Edition)" + ANSI_COLOR="0;38;2;60;110;180" + LOGO=fedora-logo-icon + CPE_NAME="cpe:/o:fedoraproject:fedora:35" + HOME_URL="https://fedoraproject.org/" + DOCUMENTATION_URL="https://docs.fedoraproject.org/en-US/fedora/f35/system-administrators-guide/" + SUPPORT_URL="https://ask.fedoraproject.org/" + BUG_REPORT_URL="https://bugzilla.redhat.com/" + REDHAT_BUGZILLA_PRODUCT="Fedora" + REDHAT_BUGZILLA_PRODUCT_VERSION=35 + REDHAT_SUPPORT_PRODUCT="Fedora" + REDHAT_SUPPORT_PRODUCT_VERSION=35 + PRIVACY_POLICY_URL="https://fedoraproject.org/wiki/Legal:PrivacyPolicy" + VARIANT="Workstation Edition" + VARIANT_ID=workstation + EOI +$* x86_64-linux-gnu os-release >>EOO + fedora + + 35 + workstation + Fedora Linux + + Workstation Edition + EOO + +: rhel-8.2 +: +cat <<EOI >=os-release; + NAME="Red Hat Enterprise Linux" + VERSION="8.2 (Ootpa)" + ID="rhel" + ID_LIKE="fedora" + VERSION_ID="8.2" + PLATFORM_ID="platform:el8" + PRETTY_NAME="Red Hat Enterprise Linux 8.2 (Ootpa)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:redhat:enterprise_linux:8.2:GA" + HOME_URL="https://www.redhat.com/" + BUG_REPORT_URL="https://bugzilla.redhat.com/" + + REDHAT_BUGZILLA_PRODUCT="Red Hat Enterprise Linux 8" + REDHAT_BUGZILLA_PRODUCT_VERSION=8.2 + REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux" + REDHAT_SUPPORT_PRODUCT_VERSION="8.2" + EOI +$* x86_64-linux-gnu os-release >>EOO + rhel + fedora + 8.2 + + Red Hat Enterprise Linux + + + EOO + +: centos-8 +: +cat <<EOI >=os-release; + NAME="CentOS Linux" + VERSION="8 (Core)" + ID="centos" + ID_LIKE="rhel fedora" + VERSION_ID="8" + PLATFORM_ID="platform:el8" + PRETTY_NAME="CentOS Linux 8 (Core)" + ANSI_COLOR="0;31" + CPE_NAME="cpe:/o:centos:centos:8" + HOME_URL="https://www.centos.org/" + BUG_REPORT_URL="https://bugs.centos.org/" + + CENTOS_MANTISBT_PROJECT="CentOS-8" + CENTOS_MANTISBT_PROJECT_VERSION="8" + REDHAT_SUPPORT_PRODUCT="centos" + REDHAT_SUPPORT_PRODUCT_VERSION="8" + EOI +$* x86_64-linux-gnu os-release >>EOO + centos + rhel|fedora + 8 + + CentOS Linux + + + EOO + +: macos +: +if ($build.host.class == 'macos') +{ + $* $build.host >>~/EOO/ + macos + + /[0-9]+(\.[0-9]+(\.[0-9]+)?)?/ + + Mac OS + + + EOO +} + +: freebsd +: +if ($build.host.system == 'freebsd') +{ + $* $build.host >>~/EOO/ + freebsd + + /[0-9]+\.[0-9]+/ + + FreeBSD + + + EOO +} + +: windows +: +if ($build.host.system == 'windows') +{ + $* $build.host >>~/EOO/ + windows + + /[0-9]+(\.[0-9]+)?/ + + Windows + + + EOO +} diff --git a/tests/link/driver.cxx b/tests/link/driver.cxx index db9c195..b659838 100644 --- a/tests/link/driver.cxx +++ b/tests/link/driver.cxx @@ -107,11 +107,11 @@ link_dir (const dir_path& target, dir_path tp (target.absolute () ? target : link.directory () / target); set<pair<entry_type, path>> te; - for (const dir_entry& de: dir_iterator (tp, false /* ignore_dangling */)) + for (const dir_entry& de: dir_iterator (tp, dir_iterator::no_follow)) te.emplace (de.ltype (), de.path ()); set<pair<entry_type, path>> le; - for (const dir_entry& de: dir_iterator (link, false /* ignore_dangling */)) + for (const dir_entry& de: dir_iterator (link, dir_iterator::no_follow)) le.emplace (de.ltype (), de.path ()); return te == le; @@ -306,7 +306,7 @@ main (int argc, const char* argv[]) assert (pe.first && pe.second.type == entry_type::directory); } - for (const dir_entry& de: dir_iterator (td, false /* ignore_dangling */)) + for (const dir_entry& de: dir_iterator (td, dir_iterator::no_follow)) { assert (de.path () != path ("dslink") || (de.type () == entry_type::directory && @@ -368,7 +368,9 @@ main (int argc, const char* argv[]) { mksymlink (dp / "non-existing", dp / "lnk"); assert (!dir_empty (dp)); - assert (dir_iterator (dp, true /* ignore_dangling */) == dir_iterator ()); + + assert (dir_iterator (dp, dir_iterator::ignore_dangling) == + dir_iterator ()); } catch (const system_error& e) { @@ -393,10 +395,10 @@ main (int argc, const char* argv[]) mksymlink (dp / "non-existing", dp / "lnk1", true /* dir */); assert (!dir_empty (dp)); - assert (dir_iterator (dp, true /* ignore_dangling */) == dir_iterator ()); + assert (dir_iterator (dp, dir_iterator::ignore_dangling) == dir_iterator ()); mksymlink (tgd, dp / "lnk2", true /* dir */); - assert (dir_iterator (dp, true /* ignore_dangling */) != dir_iterator ()); + assert (dir_iterator (dp, dir_iterator::ignore_dangling) != dir_iterator ()); rmdir_r (dp); assert (dir_exists (tgd)); diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx index 6924321..56c614a 100644 --- a/tests/manifest-parser/driver.cxx +++ b/tests/manifest-parser/driver.cxx @@ -164,14 +164,18 @@ namespace butl // Manifest value splitting (into the value/comment pair). // + // Single-line. + // { - auto p (manifest_parser::split_comment ("value\\; text ; comment text")); - assert (p.first == "value; text" && p.second == "comment text"); + auto p (manifest_parser::split_comment ( + "\\value\\\\\\; text ; comment text")); + + assert (p.first == "\\value\\; text" && p.second == "comment text"); } { - auto p (manifest_parser::split_comment ("value")); - assert (p.first == "value" && p.second == ""); + auto p (manifest_parser::split_comment ("value\\")); + assert (p.first == "value\\" && p.second == ""); } { @@ -179,6 +183,59 @@ namespace butl assert (p.first == "" && p.second == "comment"); } + // Multi-line. + // + { + auto p (manifest_parser::split_comment ("value\n;")); + assert (p.first == "value" && p.second == ""); + } + + { + auto p (manifest_parser::split_comment ("value\ntext\n")); + assert (p.first == "value\ntext\n" && p.second == ""); + } + + { + auto p (manifest_parser::split_comment ("value\ntext\n;")); + assert (p.first == "value\ntext" && p.second == ""); + } + + { + auto p (manifest_parser::split_comment ("value\ntext\n;\n")); + assert (p.first == "value\ntext" && p.second == ""); + } + + { + auto p (manifest_parser::split_comment ("\n\\\nvalue\ntext\n" + ";\n" + "\n\n comment\ntext")); + + assert (p.first == "\n\\\nvalue\ntext" && p.second == + "\n\n comment\ntext"); + } + + { + auto p (manifest_parser::split_comment ("\n;\ncomment")); + assert (p.first == "" && p.second == "comment"); + } + + { + auto p (manifest_parser::split_comment (";\ncomment")); + assert (p.first == "" && p.second == "comment"); + } + + { + auto p (manifest_parser::split_comment (";\n")); + assert (p.first == "" && p.second == ""); + } + + { + auto p (manifest_parser::split_comment ( + "\\;\n\\\\;\n\\\\\\;\n\\\\\\\\;\n\\\\\\\\\\;")); + + assert (p.first == ";\n\\;\n\\;\n\\\\;\n\\\\;" && p.second == ""); + } + // UTF-8. // assert (test (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0", diff --git a/tests/manifest-roundtrip/driver.cxx b/tests/manifest-roundtrip/driver.cxx index 5dc5862..c63a729 100644 --- a/tests/manifest-roundtrip/driver.cxx +++ b/tests/manifest-roundtrip/driver.cxx @@ -22,11 +22,16 @@ using namespace butl; // -m // Serialize multi-line manifest values using the v2 form. // +// -s +// Split values into the value/comment pairs and merge them back before +// printing. +// int main (int argc, const char* argv[]) try { bool multiline_v2 (false); + bool split (false); for (int i (1); i != argc; ++i) { @@ -34,6 +39,8 @@ try if (v == "-m") multiline_v2 = true; + else if (v == "-s") + split = true; } // Read/write in binary mode. @@ -61,6 +68,12 @@ try else eom = false; + if (split) + { + const auto& vc (manifest_parser::split_comment (nv.value)); + nv.value = manifest_serializer::merge_comment (vc.first, vc.second); + } + s.next (nv.name, nv.value); } } diff --git a/tests/manifest-roundtrip/testscript b/tests/manifest-roundtrip/testscript index e0a15cc..a228b0f 100644 --- a/tests/manifest-roundtrip/testscript +++ b/tests/manifest-roundtrip/testscript @@ -76,3 +76,43 @@ $* -m <<EOF >>EOF c:\windows\\ \ EOF + +: split-merge-comment +: +$* -s <<EOF >>EOF + : 1 + info:\ + value + text + \ + info:\ + value + text + ; + comment + \ + info:\ + ; + comment + text + \ + info:\ + value + \; + \\ + ; + comment + \ + info:\ + value + \\; + ; + comment + \ + info:\ + value + \\\\; + ; + comment + \ + EOF diff --git a/tests/manifest-serializer/driver.cxx b/tests/manifest-serializer/driver.cxx index be3ae25..a003fa4 100644 --- a/tests/manifest-serializer/driver.cxx +++ b/tests/manifest-serializer/driver.cxx @@ -251,12 +251,37 @@ main () // Manifest value/comment merging. // - assert (manifest_serializer::merge_comment ("value; text", "comment") == - "value\\; text; comment"); + // Single-line. + // + assert (manifest_serializer::merge_comment ("value\\; text", "comment") == + "value\\\\\\; text; comment"); assert (manifest_serializer::merge_comment ("value text", "") == "value text"); + // Multi-line. + // + assert (manifest_serializer::merge_comment ("value\n;\ntext", "comment") == + "value\n\\;\ntext\n;\ncomment"); + + assert (manifest_serializer::merge_comment ("value\n\\;\ntext\n", + "comment") == + "value\n\\\\;\ntext\n\n;\ncomment"); + + assert (manifest_serializer::merge_comment ("value\n\\\\;\ntext\n", + "comment") == + "value\n\\\\\\\\;\ntext\n\n;\ncomment"); + + + assert (manifest_serializer::merge_comment ("value\n\\\ntext", "comment") == + "value\n\\\ntext\n;\ncomment"); + + assert (manifest_serializer::merge_comment ("\\", "comment\n") == + "\\\n;\ncomment\n"); + + assert (manifest_serializer::merge_comment ("", "comment\ntext") == + ";\ncomment\ntext"); + // Filtering. // assert (test ({{"","1"},{"a","abc"},{"b","bca"},{"c","cab"},{"",""},{"",""}}, diff --git a/tests/mventry/testscript b/tests/mventry/testscript index 61ef871..f52be79 100644 --- a/tests/mventry/testscript +++ b/tests/mventry/testscript @@ -98,16 +98,16 @@ if ($test.target == $build.host) { +if ($cxx.target.class != 'windows') - lnf = ^ln -s t l &l - lnd = $lnf + lnf = [cmdline] ^ln -s t l &l + lnd = [cmdline] $lnf else echo 'yes' >=t if cmd /C 'mklink l t' >- 2>- &?l && cat l >'yes' - lnf = cmd /C 'mklink l t' &l >- - lnd = cmd /C 'mklink /D l t' &l >- + lnf = [cmdline] cmd /C 'mklink l t' &l >- + lnd = [cmdline] cmd /C 'mklink /D l t' &l >- end - jnc = cmd /C 'mklink /J l t' &l >- + jnc = [cmdline] cmd /C 'mklink /J l t' &l >- end : symlink diff --git a/tests/next-word/buildfile b/tests/next-word/buildfile new file mode 100644 index 0000000..e06cd88 --- /dev/null +++ b/tests/next-word/buildfile @@ -0,0 +1,6 @@ +# file : tests/next-word/buildfile +# license : MIT; see accompanying LICENSE file + +import libs = libbutl%lib{butl} + +exe{driver}: {hxx cxx}{*} $libs diff --git a/tests/next-word/driver.cxx b/tests/next-word/driver.cxx new file mode 100644 index 0000000..4ebe1a5 --- /dev/null +++ b/tests/next-word/driver.cxx @@ -0,0 +1,46 @@ +// file : tests/next-word/driver.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <vector> +#include <string> +//#include <iostream> + +#include <libbutl/utility.hxx> + +#undef NDEBUG +#include <cassert> + +using namespace std; +using namespace butl; + +using strings = vector<string>; + +static strings +parse_lines (const string& s) +{ + strings r; + for (size_t b (0), e (0), m (0), n (s.size ()); + next_word (s, n, b, e, m, '\n', '\r'), b != n; ) + { + //cerr << "'" << string (s, b, e - b) << "'" << endl; + r.push_back (string (s, b, e - b)); + } + return r; +} + +int +main () +{ + assert ((parse_lines("") == strings {})); + assert ((parse_lines("a") == strings {"a"})); + assert ((parse_lines("\n") == strings {"", ""})); + assert ((parse_lines("\n\n") == strings {"", "", ""})); + assert ((parse_lines("\n\n\n") == strings {"", "", "", ""})); + assert ((parse_lines("\na") == strings {"", "a"})); + assert ((parse_lines("\n\na") == strings {"", "", "a"})); + assert ((parse_lines("a\n") == strings {"a", ""})); + assert ((parse_lines("a\n\n") == strings {"a", "", ""})); + assert ((parse_lines("a\nb") == strings {"a", "b"})); + assert ((parse_lines("a\n\nb") == strings {"a", "", "b"})); + assert ((parse_lines("\na\nb\n") == strings {"", "a", "b", ""})); +} diff --git a/tests/path-entry/testscript b/tests/path-entry/testscript index 16039fa..3ac363b 100644 --- a/tests/path-entry/testscript +++ b/tests/path-entry/testscript @@ -57,16 +57,16 @@ if ($test.target == $build.host) { +if ($cxx.target.class != 'windows') - lnf = ^ln -s t l &l - lnd = $lnf + lnf = [cmdline] ^ln -s t l &l + lnd = [cmdline] $lnf else echo 'yes' >=t if cmd /C 'mklink l t' >- 2>- &?l && cat l >'yes' - lnf = cmd /C 'mklink l t' &l >- - lnd = cmd /C 'mklink /D l t' &l >- + lnf = [cmdline] cmd /C 'mklink l t' &l >- + lnd = [cmdline] cmd /C 'mklink /D l t' &l >- end - jnc = cmd /C 'mklink /J l t' &l >- + jnc = [cmdline] cmd /C 'mklink /J l t' &l >- end : symlink diff --git a/tests/semantic-version/driver.cxx b/tests/semantic-version/driver.cxx index 2bdd415..3c20a6c 100644 --- a/tests/semantic-version/driver.cxx +++ b/tests/semantic-version/driver.cxx @@ -23,7 +23,6 @@ main () semver v; assert (v.major == 0 && v.minor == 0 && v.patch == 0 && v.build.empty ()); } - { semver v (1, 2, 3); assert (v.major == 1 && v.minor == 2 && v.patch == 3 && v.build.empty ()); @@ -46,17 +45,27 @@ main () // String representation. // - assert (semver ("1.2") == semver (1, 2, 0)); - assert (semver ("1.2-3") == semver (1, 2, 0, "-3")); - assert (semver ("1.2.a1", "+-.") == semver (1, 2, 0, ".a1")); - assert (semver ("1.2.3") == semver (1, 2, 3)); - assert (semver ("1.2.3-4") == semver (1, 2, 3, "-4")); - assert (semver ("1.2.3+4") == semver (1, 2, 3, "+4")); - assert (semver ("1.2.3.4", "+-.") == semver (1, 2, 3, ".4")); - assert (semver ("1.2.3a", "") == semver (1, 2, 3, "a")); - try {semver v ("1.2.3-4", false); assert (false);} catch (failed) {} - try {semver v ("1.2.3.4"); assert (false);} catch (failed) {} - try {semver v ("1.2.3a"); assert (false);} catch (failed) {} + assert (semver ("1", semver::allow_omit_minor) == semver (1, 0, 0)); + assert (semver ("1-2", semver::allow_omit_minor | semver::allow_build) == semver (1, 0, 0, "-2")); + assert (semver ("1.2", semver::allow_omit_minor) == semver (1, 2, 0)); + assert (semver ("1.2+a", semver::allow_omit_minor | semver::allow_build) == semver (1, 2, 0, "+a")); + assert (semver ("1.2", semver::allow_omit_patch) == semver (1, 2, 0)); + assert (semver ("1.2-3", semver::allow_omit_patch | semver::allow_build) == semver (1, 2, 0, "-3")); + assert (semver ("1.2.a1", semver::allow_omit_patch | semver::allow_build, ".+-") == semver (1, 2, 0, ".a1")); + assert (semver ("1.2.3") == semver (1, 2, 3)); + assert (semver ("1.2.3-4", semver::allow_build) == semver (1, 2, 3, "-4")); + assert (semver ("1.2.3+4", semver::allow_build) == semver (1, 2, 3, "+4")); + assert (semver ("1.2.3.4", semver::allow_build, "+-.") == semver (1, 2, 3, ".4")); + assert (semver ("1.2.3a", semver::allow_build, "") == semver (1, 2, 3, "a")); + + try {semver v ("1"); assert (false);} catch (failed) {} + try {semver v ("1.x.2"); assert (false);} catch (failed) {} + try {semver v ("1.2"); assert (false);} catch (failed) {} + try {semver v ("1.2.x"); assert (false);} catch (failed) {} + try {semver v ("1.2.3-4"); assert (false);} catch (failed) {} + try {semver v ("1.2.3.4"); assert (false);} catch (failed) {} + try {semver v ("1.2.3a"); assert (false);} catch (failed) {} + assert (!parse_semantic_version ("1.2.3.4")); // Numeric representation. diff --git a/tests/standard-version/driver.cxx b/tests/standard-version/driver.cxx index 29cad54..4bddf08 100644 --- a/tests/standard-version/driver.cxx +++ b/tests/standard-version/driver.cxx @@ -83,7 +83,7 @@ version (const string& s, if (v.minor () != 99999) { - standard_version_constraint c1 ("~" + s); + standard_version_constraint c1 ('~' + s); standard_version_constraint c2 ('[' + s + ' ' + max_ver ('~') + ')'); assert (c1 == c2); } @@ -91,7 +91,7 @@ version (const string& s, if ((v.major () == 0 && v.minor () != 99999) || (v.major () != 0 && v.major () != 99999)) { - standard_version_constraint c1 ("^" + s); + standard_version_constraint c1 ('^' + s); standard_version_constraint c2 ('[' + s + ' ' + max_ver ('^') + ')'); assert (c1 == c2); } diff --git a/tests/target-triplet/driver.cxx b/tests/target-triplet/driver.cxx index 6dcb77e..8c08a90 100644 --- a/tests/target-triplet/driver.cxx +++ b/tests/target-triplet/driver.cxx @@ -74,6 +74,10 @@ main () "i686-w64-mingw32", "i686", "w64", "mingw32", "", "windows")); + assert (test ("x86_64-w64-windows-gnu", + "x86_64-w64-mingw32", + "x86_64", "w64", "mingw32", "", "windows")); + assert (test ("i686-lfs-linux-gnu", "i686-lfs-linux-gnu", "i686", "lfs", "linux-gnu", "", "linux")); @@ -82,6 +86,10 @@ main () "x86_64-linux-gnu", "x86_64", "", "linux-gnu", "", "linux")); + assert (test ("x86_64-redhat-linux", + "x86_64-redhat-linux-gnu", + "x86_64", "redhat", "linux-gnu", "", "linux")); + assert (test ("x86_64-linux-gnux32", "x86_64-linux-gnux32", "x86_64", "", "linux-gnux32", "", "linux")); diff --git a/tests/timestamp/driver.cxx b/tests/timestamp/driver.cxx index 11f328a..956b295 100644 --- a/tests/timestamp/driver.cxx +++ b/tests/timestamp/driver.cxx @@ -178,15 +178,15 @@ main () assert (parse (".384902285 Feb 21 19:31:10 2016", "%[.N] %b %d %H:%M:%S %Y", - "." + ns (384902285) + " Feb 21 19:31:10 2016")); + '.' + ns (384902285) + " Feb 21 19:31:10 2016")); assert (parse (".384902285 2016-02-21 19:31:10", "%[.N] %Y-%m-%d %H:%M:%S", - "." + ns (384902285) + " 2016-02-21 19:31:10")); + '.' + ns (384902285) + " 2016-02-21 19:31:10")); assert (parse (".3849022852016-02-21 19:31:10", "%[.N]%Y-%m-%d %H:%M:%S", - "." + ns (384902285) + "2016-02-21 19:31:10")); + '.' + ns (384902285) + "2016-02-21 19:31:10")); assert (parse ("Feb 1 2016", "%b %e %Y", "Feb 1 2016")); assert (parse ("Feb 11 2016", "%b %e %Y", "Feb 11 2016")); diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx index 9419a79..fee2748 100644 --- a/tests/wildcard/driver.cxx +++ b/tests/wildcard/driver.cxx @@ -4,11 +4,14 @@ #include <map> #include <string> #include <vector> -#include <algorithm> // sort() -#include <exception> #include <iostream> +#include <algorithm> // sort() +#include <exception> +#include <functional> +#include <system_error> #include <libbutl/path.hxx> +#include <libbutl/path-io.hxx> #include <libbutl/utility.hxx> // operator<<(ostream, exception) #include <libbutl/optional.hxx> #include <libbutl/filesystem.hxx> @@ -59,8 +62,13 @@ int _CRT_glob = 0; // through contains only the specified entry. The start directory is used if // the first pattern component is a self-matching wildcard. // +// -d (print|stop) +// If a inaccessible/dangling link is encountered, then print its path to +// stderr and, optionally, stop the search. Meaningful in combination with +// -sd and must follow it, if specified in the command line. +// // -i -// Pass psflags::ignorable_components to the match/search functions. +// Pass path_match_flags::match_absent to the match/search functions. // Meaningful in combination with -sd or -sp options and must follow it, if // specified in the command line. // @@ -93,6 +101,9 @@ try bool sort (true); path_match_flags flags (path_match_flags::follow_symlinks); + bool dangle_stop (false); + function<bool (const dir_entry&)> dangle_func; + int i (2); for (; i != argc; ++i) { @@ -101,6 +112,34 @@ try sort = false; else if (o == "-i") flags |= path_match_flags::match_absent; + else if (o == "-d") + { + ++i; + + assert (op == "-sd" && i != argc); + + string v (argv[i]); + + if (v == "print") + { + dangle_func = [] (const dir_entry& de) + { + cerr << de.base () / de.path () << endl; + return true; + }; + } + else if (v == "stop") + { + dangle_func = [&dangle_stop] (const dir_entry& de) + { + cerr << de.base () / de.path () << endl; + dangle_stop = true; + return false; + }; + } + else + assert (false); + } else break; // End of options. } @@ -166,10 +205,13 @@ try }; if (!entry) - path_search (pattern, add, start, flags); + path_search (pattern, add, start, flags, dangle_func); else path_search (pattern, *entry, add, start, flags); + if (dangle_stop) + return 1; + // It the search succeeds, then test search in the directory tree // represented by each matched path. Otherwise, if the directory tree is // specified, then make sure that it doesn't match the pattern. @@ -230,8 +272,13 @@ catch (const invalid_path& e) cerr << e << ": " << e.path << endl; return 2; } +catch (const system_error& e) +{ + cerr << e << endl; + return 3; +} catch (const exception& e) { cerr << e << endl; - return 2; + return 4; } diff --git a/tests/wildcard/testscript b/tests/wildcard/testscript index 5f6a767..baa51aa 100644 --- a/tests/wildcard/testscript +++ b/tests/wildcard/testscript @@ -650,12 +650,14 @@ { mkdir a; touch --no-cleanup a/b; - ^ln -s b a/l &a/l; + ln -s b a/l &a/l; rm a/b; touch a/c; - $* a/* >/'a/c' + $* a/* 2>! == 3; + $* -d 'print' a/* >/'a/c' 2>/'a/l'; + $* -d 'stop' a/* >! 2>/'a/l' == 1 } } |